wheat-v2.1-ncbi.ini

[ProcessRun]
; Run description: if specified, a custom description will be used. Will be ignored if a RunId is specified.
; otherwise, "Added annotations for {MapSet Accession No.} from {Sources}." will be used.
;RunDescription="Load Gnomon annotations for SL3.2 from NCBI (ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/168/275/GCF_002168275.1_ASM216827v2/GCF_002168275.1_ASM216827v2_genomic.gff.gz)"

[MapSet]
; Either MapSetId or MapSetPath is required.
; MapSetId: id of a target map set.
;MapSetId=247848026
; MapSetPath: path of a target map set.
MapSetPath=/Triticum aestivum/IWGSC2.1

[Method]
; To add an annotation method, specify a method name, CDS flag and a color to be shown in the annotation track.
; CDS flag is a boolean value that indicates if the annotation method is for CDS or not.
; Supported color name is available in http://www.flounder.com/csharp_color_table.htm
; If a method already exists, will be updated. Otherwise, will be added.
;Name={NamedColor|HTML hex code|R,G,B}
Gnomon=100,255,160

[Annotation]
; GFF specification at http://www.sequenceontology.org/gff3.shtml
; Sources (required): GFF file(s) for annotations, accessible locally or remotely via URL.
; Files can be compressed with gzip.
; $DATA variable defined in psh.exe.config as DataDir can be used in the path.
Sources=$DATA/wheat/GCF_018294505.1_IWGSC_CS_RefSeq_v2.1_genomic.gff.gz

; Tracks (required): comma delimited. Provide the name of the section(s) below that describes the track
; At least one track section need to be specified.
Tracks="Genes"
; MaskedParentTypes: parent GFF types (column 3) that together with their child records should be ignored
; This saves some memory during GFF parsing, especially if the GFF file contains many records of this type.
;MaskedParentTypes="biological_region","repeat_region"
; The GFF files are expected to have their records grouped by map. This means that all records of a chromosome should come
; in one continuous block, followed by a block for another chromosome, etc. The records within the block can be unsorted (typical for Ensembl GFFs),
; the program will sort them in memory.
; SkipInconsistentRecords: indicates how to treat the conflicting records. If SkipLine - try to create annotation without
; individual conflicting lines, SkipMRNA - skip the entire MRNA if at least one line of the record is conflicting others,
; None (by default) - nothing to skip. No case sensitivity.
;SkipInconsistentRecords=SkipMRNA
; Commit frequency: indicates how often the process commits annotations. Every N annotations.
CommitFrequency=1000
; RebuildSearchIndex: if true (default), the search index on Oracle will be rebuilt
; The setting has no effect for MySql.
; One of the reasons to set it to false is adding annotation in a batch, leaving the index rebuilding to the very last command
;RebuildSearchIndex=false

;AbuttingExons: exons that have an intron of size 0 between them. We can throw an Error (default), AddBoth or Merge such exons.
;AbuttingExons=AddBoth

[Genes]
; Method (required): annotation method. If new, should be specified in METHOD section.
Method="Gnomon"

; Column2: while parsing the gff file, only those lines will be analyzed that have the listed values in the gff column 2.
; Normally, this column shows the method or the source of the annotation.
; If the gff file has multiple sources in column 2, setting this filter will reduce the amount of data to be parsed and significantly improve the performance.
;Column2="maker"

; Track name (required): name of track
TrackName="Gnomon gene models"
; TrackDescription: track description
TrackDescription="Gnomon annotation from NCBI
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/018/294/505/GCF_018294505.1_IWGSC_CS_RefSeq_v2.1/GCF_018294505.1_IWGSC_CS_RefSeq_v2.1_genomic.gff.gz
"
; Type: GFF type (column 3) of annotation items. If not specified, both exon (SO:0000147) and CDS (SO:0000316) will be parsed.
; This line is typically left commented out
;Type="CDS"
; Parent type (required if CDS or exon is implied under Type): GFF type of parent items that groups annotation items, for protein-coding genes this is typically "mRNA"
ParentType="mRNA"
; Qualifier type: GFF type of parent items that contains qualifiers, typically "mRNA" or "gene" or both.
QualifierType="mRNA","gene"
; Qualifier attributes: qualifiers to be loaded from the GFF attribute (the last column of GFF).
; If QualifierTypes list more than one type, to disambiguate the qualifiers with identical names,
; use "fully-qualified" name that includes the QualifierType. For example, if "gene" and "mRNA" records, requested in QualifierTypes, both have qualifier "Name",
; to load only "Name" from the mRNA line, type "mRNA:Name"
QualifierAttributes=mRNA:partial,mRNA:Note,mRNA:exception,mRNA:partial
; For the qualifiers that need renaming or assigning their type or label, use a more flexible way of loading qualifiers engaging QualifierAttributeKey instruction.
; The attribute key in GFF can be stored as a qualifier with a different name. For example, to disambiguate gene:Name and mRNA:Name,
; store them as two different qualifiers, "geneName" and "transcriptName" using this construct:
QualifierAttributeKey.gene:Name=geneName
QualifierAttributeKey.mRNA:Name=transcriptName
; Qualifiers can also have label, type (integer, int, double, string, long) and format for printing. Assign the type and format like this:
;QualifierAttributeKey.[Type:]AttributeKey=qualifierName[:New label][,type,format]
;QualifierAttributeKey.gene:score=Score:Prediction score,double,0.00
; The line above instructs to find "score" attribute in the "gene" line of GFF and store it as a qualifier "Score" of type "double".
; Persephone will show it as "Prediction score" with the value having two digits after the decimal point.
;QualifierAttributeKey.Note=Function
; The line above stores attribute "Note" as qualifier "Function"
QualifierAttributeKey.mRNA:model_evidence=model_evidence
QualifierAttributeKey.mRNA:product=product
QualifierAttributeKey.mRNA:transcript_id=transcriptId

;QualifierAttributeKey._AED=AED,double,0.00
;QualifierAttributeKey._eAED=eAED,double,0.00

; New.: Create new qualifiers by extracting substrings from existing gff attributes using regular expression.
; The general form of the instruction is
;New.newQualifierName=[Type:]attributeKey,regex
;New.Pfam=gene:Dbxref,Pfam:(\w+)
;New.Prosite=gene:Dbxref,Prosite:(\w+)
;New.InterPro=gene:Dbxref,InterPro:(IPR\w+)
;New.GO=gene:Ontology_term,(GO:\w+)
; The line above creates a new qualifier 'GO' by extracting a substring from an attribute 'Ontology_term'.

; ParentGroupName: the new track will be placed under a parent node with this name.
; To reduce the number of track nodes on the top level, group the tracks of similar type.
;ParentGroupName=gene models

; IsShownFirst: if false, the track will not be shown by default when the map is opened for the first time
;IsShownFirst=false

; IsSearchable: If true (default), the track data will be indexed for search. If false, the indexing will be skipped
;IsSearchable=false

; OrderNo: track's order number in the list of tracks for each map. By default, if it is not specified, it will be automatically assigned sequentially
; thus preserving the order of track loading. Sometimes, to force showing a track as the first one on a map, you can use negative number for OrderNo
;OrderNo=-1

; GroupMaxFeatures - All tracks in this loading procedure should have the number of features less than this value. If even one track has a higher feature count,
; the grouping will not be applied.
; The grouping is necessary for the tracks with gene models that will be used for finding orthologs. The ortholog pairs are recorded between the groups, not between
; individual genes. If a track contains features that have CDS but are not supposed to be grouped, avoid grouping by setting GroupMaxFeatures to a low number, e.g., 1000.
; This will result in the tracks in this batch not being grouped, and thus save you a lot of processing time.
; If GroupMaxFeatures is not used, the default value of 400000 will be used. To change the default value, modify the section <appSettings> in psh.exe.config:
; <appSettings><add key="MaxGenesOnTrackToGroup" value="300000" /></appsettings>.
;GroupMaxFeatures=100000

[QualifierLinks]
; Some qualifiers can be shown as hyper-links.
; Link qualifier name-value to external sources.
; %s in the link is where a qualifier value is positioned.
;QUALIFIER_NAME=PLACEHOLDER_URL[|regex]
;ID="http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=%s"
; The line above would result in the qualifier "ID" shown as a hyper-link. For example, if ID="Os1g123", the URL would be "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=Os1g123"
transcriptId="https://www.ncbi.nlm.nih.gov/search/all/?term=%s"

; Optional: a qualifier link can be embedded into into a longer text of the qualifier value. For example, 'Dbxref' can contain multiple identifiers, each of them
; can be used to construct a hyperlink. Use a regular expression to extract a substring that will be converted into the hyperlink inside the text. The regular expression
; should be appended using a pipe '|' symbol:
;Dbxref=https://www.ebi.ac.uk/interpro/entry/InterPro/%s/|Interpro:(IPR\d+)
; https://pfam.xfam.org/family/%s|(PF\d+)
; The command above contains two URL-regex pairs applied to the qualifier key Dbxref separated by new line.
; The first one instructs to analyze the qualifier value stored under key 'Dbxref', find substring that starts with 'Interpro:'
; and extract the part that has 'IPR' as the first letters followed by digits.
; The second one will extract substrings that start with PF and contain digits, and plug them into the placeholder %s of the URL that points to Pfam site.
; Later, you can use the psh command 'add qualifier_link' to add more URLs and regular expressions.

[AnnotationSearches]
; Add qualifier name-value to search term {GeneName, GeneFunction}
; Some qualifiers can have a special meaning: they represent gene name and gene function.
; This will help narrow down the text search specifically for gene name or function in Persephone.
; This section will establish the distinction. The keyword GeneName tells which qualifier is designated as the gene name.
; Important: use qualifier names here, which can be different from the GFF attribute names. For example, if you have introduced a qualifier "transcriptName" by
; renaming the GFF attribute mRNA:Name, then type GeneName=transcriptName.
;SEARCH_TERM=QUALIFIER_NAME
GeneName="transcriptId"
GeneFunction=product
;GeneFunction=Description

[MapMapping]
; Normally, the map names in the GFF files should correspond (case sensitive) to the map names in the database. In such case, no map name matching is necessary and this section can be empty.
; If map names in file are different from those in DB, you should map each MAP_NAME in the file to a MAP_ID or an ACCESION_NO or MAP_NAME in DB.
; manual mapping below:
;MAP_NAME_IN_FILE=MAP_NAME or MAP_ID or ACCESSION_NO in DB
;Chr1=Chr.1
;Chr2=Chr.2
;Chr3=Chr.3
; Check 'printmapping' command that may help generating the name mapping tables.

; Alternatively, use MapIdentifiedBy
; MapsIdentifiedBy: if all maps in the file instead of the map name are identified by their alternative IDs like MAP_ID, ACCESSION_NO or GENOME_DNA_ID,
; provide the mapping with just one line using either MapName, MapId, AccessionNo or GenomeDnaId, for example:
; MapsIdentifiedBy: one of MapId, AccessionNo, GenomeDnaId, MapName (default)
;MapsIdentifiedBy=AccessionNo

; LoadListedMapsOnly: if true, only data for the maps listed in this section will be added.
; If false, PersephoneShell will still try to match names from the file to maps in the database
; using MAP_NAME, MAP_ID or ACCESSION_NO, and if the map is not found, the annotation line will be skipped
LoadListedMapsOnly=true
;MAP_NAME in file=MAP_NAME or ACCESSION_NO in DB
NC_057794.1=Chr1A
NC_057795.1=Chr1B
NC_057796.1=Chr1D
NC_057797.1=Chr2A
NC_057798.1=Chr2B
NC_057799.1=Chr2D
NC_057800.1=Chr3A
NC_057801.1=Chr3B
NC_057802.1=Chr3D
NC_057803.1=Chr4A
NC_057804.1=Chr4B
NC_057805.1=Chr4D
NC_057806.1=Chr5A
NC_057807.1=Chr5B
NC_057808.1=Chr5D
NC_057809.1=Chr6A
NC_057810.1=Chr6B
NC_057811.1=Chr6D
NC_057812.1=Chr7A
NC_057813.1=Chr7B
NC_057814.1=Chr7D

[DbSequences]
; The ID columns below are used in loading annotations.
; If there is no sequence/trigger assigned to these columns, you must specify a sequence for them.
;PROCESS_RUN.RUN_ID=ID_SEQ
;GDNA_ANNOT.ANNOT_ID=ID_SEQ
;DESCRIPTION.DESCR_ID=ID_SEQ
;TRACK.TRACK_ID=ID_SEQ