Name: gff-js
Owner: Generic Model Organism Database Project
Description: parse and format streams of GFF3, for node or webpack
Created: 2018-04-05 23:32:53.0
Updated: 2018-04-28 20:51:33.0
Pushed: 2018-04-28 20:51:31.0
Homepage: null
Size: 177
Language: JavaScript
GitHub Committers
User | Most Recent Commit | # Commits |
---|
Other Committers
User | Most Recent Commit | # Commits |
---|
Read and write GFF3 data performantly. This module aims to be a complete implementation of the GFF3 specification.
Parent
and Derives_from
relationshipsParent
and Derives_from
relationships$ npm install --save @gmod/gff
t gff = require('@gmod/gff').default
r in ES6 (recommended)
rt gff from '@gmod/gff'
arse a file from a file name
arses only features and sequences by default,
et options to parse directives and/or comments
parseFile('path/to/my/file.gff3', { parseAll: true })
'data', data => {
(data.directive) {
console.log('got a directive',data)
se if (data.comment) {
console.log('got a comment',data)
se if (data.sequence) {
console.log('got a sequence from a FASTA section')
se {
console.log('got a feature',data)
arse a stream of GFF3 text
t fs = require('fs')
reateReadStream('path/to/my/file.gff3')
e(gff.parseStream())
'data', data => {
nsole.log('got item',data)
turn data
'end', () => {
nsole.log('done parsing!')
arse a string of gff3 synchronously
stringOfGFF3 = fs
eadFileSync('my_annotations.gff3')
oString()
arrayOfThings = gff.parseStringSync(stringOfGFF3)
ormat an array of items to a string
stringOfGFF3 = gff.formatSync(arrayOfThings)
ormat a stream of things to a stream of text.
nserts sync marks automatically.
reamOfGFF3Objects
e(gff.formatStream())
e(fs.createWriteStream('my_new.gff3'))
ormat a stream of things and write it to
gff3 file. inserts sync marks and a
##gff-version 3' header if one is not
lready present
reamOfGFF3Objects
e(gff.formatFile('path/to/destination.gff3')
In GFF3, features can have more than one location. We parse features
as arrayrefs of all the lines that share that feature's ID.
Values that are .
in the GFF3 are null
in the output.
A simple feature that's located in just one place:
"seq_id": "ctg123",
"source": null,
"type": "gene",
"start": 1000,
"end": 9000,
"score": null,
"strand": "+",
"phase": null,
"attributes": {
"ID": [
"gene00001"
],
"Name": [
"EDEN"
]
},
"child_features": [],
"derived_features": []
A CDS called cds00001
located in two places:
"seq_id": "ctg123",
"source": null,
"type": "CDS",
"start": 1201,
"end": 1500,
"score": null,
"strand": "+",
"phase": "0",
"attributes": {
"ID": [
"cds00001"
],
"Parent": [
"mRNA00001"
]
},
"child_features": [],
"derived_features": []
"seq_id": "ctg123",
"source": null,
"type": "CDS",
"start": 3000,
"end": 3902,
"score": null,
"strand": "+",
"phase": "0",
"attributes": {
"ID": [
"cds00001"
],
"Parent": [
"mRNA00001"
]
},
"child_features": [],
"derived_features": []
eDirective("##gff-version 3\n")
eturns
irective": "gff-version",
alue": "3"
s
eDirective('##sequence-region ctg123 1 1497228\n')
eturns
irective": "sequence-region",
alue": "ctg123 1 1497228",
eq_id": "ctg123",
tart": "1",
nd": "1497228"
eComment('# hi this is a comment\n')
eturns
omment": "hi this is a comment"
These come from any embedded ##FASTA
section in the GFF3 file.
d": "ctgA",
escription": "test contig",
equence": "ACTGACTAGCTAGCATCAGCGTCGTAGCTATTATATTACGGTAGCCA"
Parse a stream of text data into a stream of feature, directive, and comment objects.
Parameters
options
Object optional options object (optional, default {}
)options.encoding
string text encoding of the input GFF3. default 'utf8'options.parseAll
boolean default false. if true, will parse all items. overrides other flagsoptions.parseFeatures
boolean default trueoptions.parseDirectives
boolean default falseoptions.parseComments
boolean default falseoptions.parseSequences
boolean default trueoptions.bufferSize
Number maximum number of GFF3 lines to buffer. defaults to 1000Returns ReadableStream stream (in objectMode) of parsed items
Read and parse a GFF3 file from the filesystem.
Parameters
filename
string the filename of the file to parseoptions
Object optional options objectoptions.encoding
string the file's string encoding, defaults to 'utf8'options.parseAll
boolean default false. if true, will parse all items. overrides other flagsoptions.parseFeatures
boolean default trueoptions.parseDirectives
boolean default falseoptions.parseComments
boolean default falseoptions.parseSequences
boolean default trueoptions.bufferSize
Number maximum number of GFF3 lines to buffer. defaults to 1000Returns ReadableStream stream (in objectMode) of parsed items
Synchronously parse a string containing GFF3 and return an arrayref of the parsed items.
Parameters
Returns Array array of parsed features, directives, and/or comments
Format an array of GFF3 items (features,directives,comments) into string of GFF3. Does not insert synchronization (###) marks.
Parameters
items
Returns String the formatted GFF3
Format a stream of items (of the type produced by this script) into a stream of GFF3 text.
Inserts synchronization (###) marks automatically.
Parameters
options
Object
Format a stream of items (of the type produced by this script) into a GFF3 file and write it to the filesystem.
Inserts synchronization (###) marks and a ##gff-version directive automatically (if one is not already present).
Parameters
stream
ReadableStream the stream to write to the filefilename
String the file path to write tooptions
Object (optional, default {}
)
Returns Promise promise for the written filename
There is also a util
module that contains super-low-level functions for dealing with lines and parts of lines.
on-ES6
t util = require('@gmod/gff').default.util
r, with ES6
rt gff from '@gmod/gff'
t util = gff.util
t gff3Lines = util.formatItem({
q_id: 'ctgA',
.
Unescape a string value used in a GFF3 attribute.
Parameters
s
StringReturns String
Escape a value for use in a GFF3 attribute value.
Parameters
s
StringReturns String
Parse the 9th column (attributes) of a GFF3 feature line.
Parameters
attrString
StringReturns Object
Parse a GFF3 feature line
Parameters
line
StringParse a GFF3 directive line.
Parameters
line
StringReturns Object the information in the directive
Format an attributes object into a string suitable for the 9th column of GFF3.
Parameters
attrs
ObjectFormat a feature object or array of feature objects into one or more lines of GFF3.
Parameters
featureOrFeatures
Format a directive into a line of GFF3.
Parameters
directive
ObjectReturns String
Format a comment into a GFF3 comment. Yes I know this is just adding a # and a newline.
Parameters
comment
ObjectReturns String
Format a sequence object as FASTA
Parameters
seq
ObjectReturns String formatted single FASTA sequence
Format a directive, comment, or feature, or array of such items, into one or more lines of GFF3.
Parameters
MIT © Robert Buels