PGSC_DM_v4.03-gff.ini

[ProcessRun]
; Run description: if specified, a custom description will be used,
; otherwise, "Added annotation for {MapSet path} from {Sources}." will be used.
RunDescription="Loaded annotations for PGSC_DM_v4.03 from http://solanaceae.plantbiology.msu.edu/pgsc_download.shtml"

[MapSet]
; Either MapSetId or MapSetPath is required.
; MapSetId: id of a target map set.
;MapSetId=247848026
; MapSetPath: path of a target map set.
MapSetPath="/Solanum tuberosum/DM_v4.03"

[Method]
; To add a new annotation method, specify a method name and a color to be shown in the annotation track.
; You don't need to provide this info if the method already exists in the database.
; Supported color name is available in http://www.flounder.com/csharp_color_table.htm
; If the method already exists, it will be updated. Otherwise, a new method will be added.
;Name={NamedColor|HTML hex code|R,G,B}
; Color examples: Red, #FF0000, or 255,0,0

GLEAN=Orange

[Annotation]
; GFF specification in http://www.sequenceontology.org/gff3.shtml
; Sources (required): GFF or GTF file(s) for annotations, accessible locally or remotely via URL.
; It is important to keep the file name extension consistent with the file format.
; Files can be compressed with gzip.
; $DATA variable defined in psh.exe.config as DataDir can be used in the path.
Sources="$DATA/potato/PGSC_DM_V403_genes.gff.zip"
; Tracks (required): comma delimited. Provide the name of the section below that describes the track
; At least one track section need to be specified.
Tracks="GLEAN"
; MaskedParentTypes: parent GFF types (column 3) that together with their child records should be ignored
; This saves some memory during GFF parsing, especially if the GFF file contains many records of this type.
;MaskedParentTypes="biological_region","repeat_region"
; The GFF files are expected to have their records grouped by map. This means that all records of a chromosome should come
; in one continuous block, followed by a block for another chromosome, etc. The records within the block can be unsorted (typical for Ensembl GFFs),
; the program will sort them in memory.
; SkipInconsistentRecords: indicates how to treat the conflicting records. If SkipLine - try to create annotation without
; individual conflicting lines, SkipMRNA - skip the entire MRNA if at least one line of the record is conflicting others,
; None (by default) - nothing to skip. No case sensitivity.
SkipInconsistentRecords=SkipMRNA

;AbuttingExons: exons that have an intron of size 0 between them. We can throw an Error (default), AddBoth or Merge such exons.
;AbuttingExons=AddBoth

; Commit frequency: indicates how often the process commits annotations. Every N annotations.; Commit frequency: indicates how often the process commits annotations. Every N annotations.
CommitFrequency=1000

[GLEAN]
; Method (required): annotation method. If new, should be specified in METHOD section.
Method="GLEAN"

; Column2: while parsing the gff file, only those lines will be analyzed that have the listed values in the gff column 2.
; Normally, this column shows the method or the source of the annotation.
; If the gff file has multiple sources in column 2, setting this filter will reduce the amount of data to be parsed and significantly improve the performance.
;Column2="maker"

; Track name (required): name of track
TrackName="GLEAN"
; TrackDescription: track description
TrackDescription="Gene models predicted by GLEAN"

; Type: GFF type (column 3) of annotation items. If not specified, both exon (SO:0000147) and CDS (SO:0000316) will be parsed.
; This line is typically left commented out
; The types of one-exon features can be defined here. To load multiple types, e.g., lncRNA, tRNA into one track,
; list them separated by comma. These items will be loaded as they are - no child records will be analyzed.
; ParentType should be commented out
;Type=lnc_RNA,tRNA

; Parent type (optional): GFF type of parent items that groups annotation items,
; for protein-coding genes this is typically "mRNA". The child items, such as "exon" or "CDS", that have this common parent,
; will be grouped and form gene models. The types like "five_prime_UTR" or "three_prime_UTR" are not required but if present
; will be checked for consistency with the exon and CDS records.
; If Parent type is omitted - parent will be auto detected by taking direct parents of exons, cds
ParentType="mRNA"

; Qualifier type: GFF type of parent items that contains qualifiers, typically "mRNA" or "gene" or both.
QualifierTypes="gene"
; Qualifier attributes: qualifiers to be loaded from the GFF attribute (the last column of GFF).
; If QualifierTypes list more than one type, to disambiguate the qualifiers with identical names,
; use "fully-qualified" name that includes the QualifierType. For example, if "gene" and "mRNA" records, requested in QualifierTypes, both have qualifier "Name",
; to load only "Name" from the mRNA line, type "mRNA:Name"
;QualifierAttributes="Name"
; For the qualifiers that need renaming or assigning their type or label, use a more flexible way of loading qualifiers engaging QualifierAttributeKey instruction.
; The attribute key in GFF can be stored as a qualifier with a different name. For example, to disambiguate gene:Name and mRNA:Name,
; store them as two different qualifiers, "geneName" and "transcriptName" using this construct:
; QualifierAttributeKey.gene:Name=geneName
; QualifierAttributeKey.mRNA:Name=transcriptName
; Qualifiers can also have type (integer, int, double, string, long) and format for printing. Assign the type and format like this:
; QualifierAttributeKey.gene:score=Score:Prediction score,double,0.00
; The line above instructs to find "score" attribute in the "gene" line of GFF and store it as a qualifier "Score" of type "double".
; Persephone will show it as "Prediction score" with the value having two digits after the decimal point.
;QualifierAttributeKey.[Type:]AttributeKey=qualifierName[:New label][,type,format]
;QualifierAttributeKey.Note=Function
; The line below stores attribute "ID" as qualifier "Id"
QualifierAttributeKey.ID="Id"
QualifierAttributeKey.name="Name"

; New.: Create new qualifiers by extracting substrings from existing gff attributes using regular expression.
; The general form of the instruction is
;New.newQualifierName=[Type:]attributeKey,regex
;New.Pfam=gene:Dbxref,Pfam:(\w+)
;New.Prosite=gene:Dbxref,Prosite:(\w+)
;New.InterPro=gene:Dbxref,InterPro:(IPR\w+)
;New.GO=gene:Ontology_term,(GO:\w+)
; The line above creates a new qualifier 'GO' by extracting a substring from an attribute 'Ontology_term'.

; ParentGroupName: the new track will be placed under a parent node with this name.
; To reduce the number of track nodes on the top level, group the tracks of similar type.
;ParentGroupName=gene models
; IsShownFirst: if false, the track will not be shown by default when the map is opened for the first time
;IsShownFirst=false
; IsSearchable: If true (default), the track data will be indexed for search. If false, the indexing will be skipped
;IsSearchable=false

[QualifierLinks]
; Some qualifiers can be shown as hyper-links.
; Link qualifier name-value to external sources.
; %s in the link is where a qualifier value is positioned.
;QUALIFIER_NAME=PLACEHOLDER_URL[|regex]
;ID="http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=%s"
; The line above would result in the qualifier "ID" shown as a hyper-link. For example, if ID="Os1g123", the URL would be "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=Os1g123"
Id="http://spuddb.uga.edu/cgi-bin/annotation_report.cgi?gene_id=%s

; Optional: a qualifier link can be embedded into into a longer text of the qualifier value. For example, 'Dbxref' can contain multiple identifiers, each of them
; can be used to construct a hyperlink. Use a regular expression to extract a substring that will be converted into the hyperlink inside the text. The regular expression
; should be appended using a pipe '|' symbol:
;Dbxref=https://www.ebi.ac.uk/interpro/entry/InterPro/%s/|Interpro:(IPR\d+)
; https://pfam.xfam.org/family/%s|(PF\d+)
; The command above contains two URL-regex pairs applied to the qualifier key Dbxref separated by new line.
; The first one instructs to analyze the qualifier value stored under key 'Dbxref', find substring that starts with 'Interpro:'
; and extract the part that has 'IPR' as the first letters followed by digits.
; The second one will extract substrings that start with PF and contain digits, and plug them into the placeholder %s of the URL that points to Pfam site.
; Later, you can use the psh command 'add qualifier_link' to add more URLs and regular expressions.

[AnnotationSearches]
; Add qualifier name-value to search term {GeneName, GeneFunction}
; Some qualifiers can have a special meaning: they represent gene name and gene function.
; This will help narrow down the text search specifically for gene name or function in Persephone.
; This section will establish the distinction. The keyword GeneName tells which qualifier is designated as the gene name.
; Important: use qualifier names here, which can be different from the GFF attribute names. For example, if you have introduced a qualifier "transcriptName" by
; renaming the GFF attribute mRNA:Name, then type GeneName=transcriptName.
;SEARCH_TERM=QUALIFIER_NAME
GeneName="Id"
GeneFunction="Name"

[MapMapping]
; Normally, the map names in the GFF files should correspond (case sensitive) to the map names in the database. In such case, no map name matching is necessary and this section can be empty.
; If map names in file are different from those in DB, you should map each MAP_NAME in the file to a MAP_ID or an ACCESION_NO or MAP_NAME in DB.
; manual mapping below:
;MAP_NAME_IN_FILE=MAP_NAME or MAP_ID or ACCESSION_NO in DB
;Chr1=Chr.1
;Chr2=Chr.2
;Chr3=Chr.3
; Check 'printmapping' command that may help generating the name mapping tables.

; Alternatively, use MapIdentifiedBy
; MapsIdentifiedBy: if all maps in the file instead of the map name are identified by their alternative IDs like MAP_ID, ACCESSION_NO or GENOME_DNA_ID,
; provide the mapping with just one line using either MapName, MapId, AccessionNo or GenomeDnaId, for example:
; MapsIdentifiedBy: one of MapId, AccessionNo, GenomeDnaId, MapName (default)
;MapsIdentifiedBy=AccessionNo

; LoadListedMapsOnly: if true, only data for the maps listed in this section will be added.
; If false, PersephoneShell will still try to match names from the file to maps in the database
; using MAP_NAME, MAP_ID or ACCESSION_NO, and if the map is not found, the annotation line will be skipped
;LoadListedMapsOnly=true

[DbSequences]
; Oracle only
; The ID columns below are used in loading annotations.
; If there is no sequence/trigger assigned to these columns, you must specify a sequence for them.
;PROCESS_RUN.RUN_ID=ID_SEQ
;GDNA_ANNOT.ANNOT_ID=ID_SEQ
;DESCRIPTION.DESCR_ID=ID_SEQ
;TRACK.TRACK_ID=ID_SEQ