MSU_osa1r7-uga.ini

[ProcessRun]
; Run description: if specified, a custom description will be used.
; otherwise, "Added annotation for {MapSet AccessionNo} from {Sources}." will be used.
RunDescription="Loaded MSU rice MSU_osa1r7"

[MapSet]
; Either MapSetId or MapSetPath is required.
; MapSetId: id of a target map set.
;MapSetId=247582504
; MapSetPath: path of a target map set.
MapSetPath="/Oryza sativa/MSU_osa1r7"

[Method]
; To add a new annotation method, specify a method name and a color to be shown in the annotation track.
; You don't need to provide this info if the method already exists in the database.
; Supported color name is available at http://www.flounder.com/csharp_color_table.htm
; If the method already exists, it will be updated. Otherwise, a new method will be added.
; Color examples: Red, #FF0000, or 255,0,0
;Name={NamedColor|HTML hex code|R,G,B}
MSU=100,200,255

[Annotation]
; GFF specification at http://www.sequenceontology.org/gff3.shtml
; Sources (required): GFF file(s) for annotations, accessible locally or remotely via URL.
; Files can be compressed with gzip.
; $DATA variable defined in psh.exe.config as DataDir can be used in the path.
Sources=$DATA/rice/all.gff3

; Tracks (required): comma delimited. Provide the name of the section(s) below that describes the track
; At least one track section need to be specified.
Tracks="MSU"
; MaskedParentTypes: parent GFF types (column 3) that together with their child records should be ignored
; This saves some memory during GFF parsing, especially if the GFF file contains many records of this type.
;MaskedParentTypes="biological_region","repeat_region"
; The GFF files are expected to have their records grouped by map. This means that all records of a chromosome should come
; in one continuous block, followed by a block for another chromosome, etc. The records within the block can be unsorted (typical for Ensembl GFFs),
; the program will sort them in memory.
; SkipInconsistentRecords: indicates how to treat the conflicting records. If SkipLine - try to create annotation without
; individual conflicting lines, SkipMRNA - skip the entire MRNA if at least one line of the record is conflicting others,
; None (by default) - nothing to skip. No case sensitivity.
SkipInconsistentRecords=SkipMRNA
; Commit frequency: indicates how often the process commits annotations. Every N annotations.
CommitFrequency=1000
; RebuildSearchIndex: if true (default), the search index on Oracle will be rebuilt
; The setting has no effect for MySql.
; One of the reasons to set it to false is adding annotation in a batch, leaving the index rebuilding to the very last command
;RebuildSearchIndex=false

;AbuttingExons: exons that have an intron of size 0 between them. We can throw an Error (default), AddBoth or Merge such exons.
;AbuttingExons=AddBoth

[MSU]
; Method (required): annotation method. If new, should be specified in METHOD section.
Method="MSU"

; Column2: while parsing the gff file, only those lines will be analyzed that have the listed values in the gff column 2.
; Normally, this column shows the method or the source of the annotation.
; If the gff file has multiple sources in column 2, setting this filter will reduce the amount of data to be parsed and significantly improve the performance.
;Column2="maker"

; Track name (required): name of track
TrackName="MSU gene models"
; TrackDescription: track description
TrackDescription="Gene annotation from
<a href=\"http://rice.uga.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3\">
http://rice.uga.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3</a>"
; Type: GFF type (column 3) of annotation items. If not specified, both exon (SO:0000147) and CDS (SO:0000316) will be parsed.
; This line is typically left commented out
;Type="CDS"
; Parent type (required if CDS or exon is implied under Type): GFF type of parent items that groups annotation items, for protein-coding genes this is typically "mRNA"
ParentType="mRNA"
; Qualifier type: GFF type of parent items that contains qualifiers, typically "mRNA" or "gene" or both.
QualifierTypes=gene,mRNA
; Qualifier attributes: qualifiers to be loaded from the GFF attribute (the last column of GFF).
; If QualifierTypes list more than one type, to disambiguate the qualifiers with identical names,
; use "fully-qualified" name that includes the QualifierType. For example, if "gene" and "mRNA" records, requested in QualifierTypes, both have qualifier "Name",
; to load only "Name" from the mRNA line, type "mRNA:Name"
QualifierAttributes="gene:Note"
; For the qualifiers that need renaming or assigning their type or label, use a more flexible way of loading qualifiers engaging QualifierAttributeKey instruction.
; The attribute key in GFF can be stored as a qualifier with a different name. For example, to disambiguate gene:Name and mRNA:Name,
; store them as two different qualifiers, "geneName" and "transcriptName" using this construct:
QualifierAttributeKey.gene:Name=geneName
QualifierAttributeKey.mRNA:Name=transcriptName
; Qualifiers can also have label, type (integer, int, double, string, long) and format for printing. Assign the type and format like this:
;QualifierAttributeKey.[Type:]AttributeKey=qualifierName[:New label][,type,format]
;QualifierAttributeKey.gene:score=Score:Prediction score,double,0.00
; The line above instructs to find "score" attribute in the "gene" line of GFF and store it as a qualifier "Score" of type "double".
; Persephone will show it as "Prediction score" with the value having two digits after the decimal point.
;QualifierAttributeKey.Note=Function
; The line above stores attribute "Note" as qualifier "Function"

;QualifierAttributeKey._AED=AED,double,0.00
;QualifierAttributeKey._eAED=eAED,double,0.00

; New.: Create new qualifiers by extracting substrings from existing gff attributes using regular expression.
; The general form of the instruction is
;New.newQualifierName=[Type:]attributeKey,regex
;New.Pfam=gene:Dbxref,Pfam:(\w+)
;New.Prosite=gene:Dbxref,Prosite:(\w+)
;New.InterPro=gene:Dbxref,InterPro:(IPR\w+)
;New.GO=gene:Ontology_term,(GO:\w+)
; The line above creates a new qualifier 'GO' by extracting a substring from an attribute 'Ontology_term'.

; ParentGroupName: the new track will be placed under a parent node with this name.
; To reduce the number of track nodes on the top level, group the tracks of similar type.
;ParentGroupName=gene models

; IsShownFirst: if false, the track will not be shown by default when the map is opened for the first time
;IsShownFirst=false

; IsSearchable: If true (default), the track data will be indexed for search. If false, the indexing will be skipped
;IsSearchable=false

; OrderNo: track's order number in the list of tracks for each map. By default, if it is not specified, it will be automatically assigned sequentially
; thus preserving the order of track loading. Sometimes, to force showing a track as the first one on a map, you can use negative number for OrderNo
;OrderNo=-1

; GroupMaxFeatures - All tracks in this loading procedure should have the number of features less than this value. If even one track has a higher feature count,
; the grouping will not be applied.
; The grouping is necessary for the tracks with gene models that will be used for finding orthologs. The ortholog pairs are recorded between the groups, not between
; individual genes. If a track contains features that have CDS but are not supposed to be grouped, avoid grouping by setting GroupMaxFeatures to a low number, e.g., 1000.
; This will result in the tracks in this batch not being grouped, and thus save you a lot of processing time.
; If GroupMaxFeatures is not used, the default value of 400000 will be used. To change the default value, modify the section <appSettings> in psh.exe.config:
; <appSettings><add key="MaxGenesOnTrackToGroup" value="300000" /></appsettings>.
;GroupMaxFeatures=100000

[QualifierLinks]
; Some qualifiers can be shown as hyper-links.
; Link qualifier name-value to external sources.
; %s in the link is where a qualifier value is positioned.
;QUALIFIER_NAME=PLACEHOLDER_URL[|regex]
;ID="http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=%s"
; The line above would result in the qualifier "ID" shown as a hyper-link.
; For example, if ID="Os1g123", the URL would be "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=Os1g123"

; Optional: a qualifier link can be embedded into into a longer text of the qualifier value. For example, 'Dbxref' can contain multiple identifiers, each of them
; can be used to construct a hyperlink. Use a regular expression to extract a substring that will be converted into the hyperlink inside the text. The regular expression
; should be appended using a pipe '|' symbol:
;Dbxref=https://www.ebi.ac.uk/interpro/entry/InterPro/%s/|Interpro:(IPR\d+)
; https://pfam.xfam.org/family/%s|(PF\d+)
; The command above contains two URL-regex pairs applied to the qualifier key Dbxref separated by new line.
; The first one instructs to analyze the qualifier value stored under key 'Dbxref', find substring that starts with 'Interpro:'
; and extract the part that has 'IPR' as the first letters followed by digits.
; The second one will extract substrings that start with PF and contain digits, and plug them into the placeholder %s of the URL that points to Pfam site.
; Later, you can use the psh command 'add qualifier_link' to add more URLs and regular expressions.
transcriptName=http://rice.uga.edu/cgi-bin/ORF_infopage.cgi?orf=%s

[AnnotationSearches]
; Add qualifier name-value to search term {GeneName, GeneFunction}
; Some qualifiers can have a special meaning: they represent gene name and gene function.
; This will help narrow down the text search specifically for gene name or function in Persephone.
; This section will establish the distinction. The keyword GeneName tells which qualifier is designated as the gene name.
; Important: use qualifier names here, which can be different from the GFF attribute names. For example, if you have introduced a qualifier "transcriptName" by
; renaming the GFF attribute mRNA:Name, then type GeneName=transcriptName.
;SEARCH_TERM=QUALIFIER_NAME
GeneName=transcriptName
GeneFunction=Note

[MapMapping]
; see Control Files for more details
; Normally, the map names in the GFF files should correspond (case sensitive) to the map names in the database.
; In such case, no map name matching is necessary and this section can be empty.
; If map names in file are different from those in DB, you should map each MAP_NAME in the file to a MAP_ID or an ACCESION_NO or MAP_NAME in DB.
; manual mapping below:
;MAP_NAME_IN_FILE=MAP_NAME or MAP_ID or ACCESSION_NO in DB
;Chr1=Chr.1
;Chr2=Chr.2
;Chr3=Chr.3
; Check 'printmapping' command that may help generating the name mapping tables.
; Alternatively, use MapIdentifiedBy
; MapsIdentifiedBy: if all maps in the file instead of the map name are identified by their alternative IDs like MAP_ID, ACCESSION_NO or GENOME_DNA_ID,
; provide the mapping with just one line using either MapName, MapId, AccessionNo or GenomeDnaId, for example:
; MapsIdentifiedBy: one of MapId, AccessionNo, GenomeDnaId, MapName (default)
; MapsIdentifiedBy=AccessionNo
; LoadListedMapsOnly: if true, only data for the maps listed in this section will be added.
; If false, PersephoneShell will still try to match names from the file to maps in the database
; using MAP_NAME, MAP_ID or ACCESSION_NO, and if the map is not found, the annotation line will be skipped
;LoadListedMapsOnly=true