Skip to content

Commit

Permalink
Add support for Gz'd flat files (#79)
Browse files Browse the repository at this point in the history
* Adds func for reading multi and flat genbank files

* Fixed comment problems

* Reads, not parses

* Added feature for Sequences from Gzipped flat files

* Fixed error handling
  • Loading branch information
Koeng101 authored Dec 4, 2020
1 parent bb702bc commit a3f9b1d
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
Binary file added data/flatGbk_test.seq.gz
Binary file not shown.
50 changes: 20 additions & 30 deletions io.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package poly

import (
"bytes"
"compress/gzip"
"encoding/json"
"io/ioutil"
"log"
Expand Down Expand Up @@ -325,13 +326,9 @@ func BuildGff(sequence Sequence) []byte {

// ReadGff takes in a filepath for a .gffv3 file and parses it into an Annotated Sequence struct.
func ReadGff(path string) Sequence {
file, err := ioutil.ReadFile(path)
file, _ := ioutil.ReadFile(path)
var sequence Sequence
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
} else {
sequence = ParseGff(file)
}
sequence = ParseGff(file)
return sequence
}

Expand Down Expand Up @@ -368,10 +365,7 @@ func ParseJSON(file []byte) Sequence {

// ReadJSON reads an Sequence JSON file.
func ReadJSON(path string) Sequence {
file, err := ioutil.ReadFile(path)
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
}
file, _ := ioutil.ReadFile(path)
sequence := ParseJSON(file)
return sequence
}
Expand Down Expand Up @@ -499,10 +493,7 @@ func BuildFASTA(sequence Sequence) []byte {

// ReadFASTA reads a Sequence struct from a FASTA file.
func ReadFASTA(path string) Sequence {
file, err := ioutil.ReadFile(path)
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
}
file, _ := ioutil.ReadFile(path)
sequence := ParseFASTA(file)
return sequence
}
Expand Down Expand Up @@ -715,14 +706,9 @@ func BuildGbk(sequence Sequence) []byte {

// ReadGbk reads a Gbk from path and parses into an Annotated sequence struct.
func ReadGbk(path string) Sequence {
file, err := ioutil.ReadFile(path)
file, _ := ioutil.ReadFile(path)
var sequence Sequence
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
} else {
sequence = ParseGbk(file)

}
sequence = ParseGbk(file)
return sequence
}

Expand Down Expand Up @@ -1451,24 +1437,28 @@ func ParseGbkFlat(file []byte) []Sequence {

// ReadGbkMulti reads multiple genbank files from a single file
func ReadGbkMulti(path string) []Sequence {
file, err := ioutil.ReadFile(path)
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
}
file, _ := ioutil.ReadFile(path)
sequences := ParseGbkMulti(file)
return sequences
}

// ReadGbkFlat reads flat genbank files, like the ones provided by the NCBI FTP server
// ReadGbkFlat reads flat genbank files, like the ones provided by the NCBI FTP server (after decompression)
func ReadGbkFlat(path string) []Sequence {
file, err := ioutil.ReadFile(path)
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
}
file, _ := ioutil.ReadFile(path)
sequences := ParseGbkFlat(file)
return sequences
}

// ReadGbkFlatGz reads flat gzip'd genbank files, like the ones provided by the NCBI FTP server
func ReadGbkFlatGz(path string) []Sequence {
file, _ := ioutil.ReadFile(path)
rdata := bytes.NewReader(file)
r, _ := gzip.NewReader(rdata)
s, _ := ioutil.ReadAll(r)
sequences := ParseGbkFlat(s)
return sequences
}

/******************************************************************************
Genbank Flat specific IO related things end here.
Expand Down
11 changes: 11 additions & 0 deletions io_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,17 @@ func ExampleReadGbkFlat() {
// Output: AB000100, AB000106
}

func ExampleReadGbkFlatGz() {
sequences := ReadGbkFlatGz("data/flatGbk_test.seq.gz")
var locus []string
for _, sequence := range sequences {
locus = append(locus, sequence.Meta.Locus.Name)
}

fmt.Println(strings.Join(locus, ", "))
// Output: AB000100, AB000106
}

func ExampleParseGbkMulti() {
file, _ := ioutil.ReadFile("data/multiGbk_test.seq")
sequences := ParseGbkMulti(file)
Expand Down

0 comments on commit a3f9b1d

Please sign in to comment.