diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index d967d014..b4f66a9e 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -1,22 +1,23 @@ name: golangci-lint -# on: -# push: -# tags: -# - v* -# branches: -# - main -# pull_request: -# jobs: -# golangci: -# name: lint -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v2 -# - name: Run golangci-lint -# uses: golangci/golangci-lint-action@v2.5.2 -# with: -# # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version -# version: latest + +on: + push: + tags: + - v* + branches: + - main + pull_request: +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v2.5.2 + with: + # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version + version: latest # Optional: working directory, useful for monorepos # working-directory: somedir diff --git a/.github/workflows/notify.yml b/.github/workflows/notify.yml index 494e9995..2923423a 100644 --- a/.github/workflows/notify.yml +++ b/.github/workflows/notify.yml @@ -4,9 +4,8 @@ on: release: issues: pull_request: - issue_comment: - pull_request_review: - pull_request_review_comment: + types: [opened] + issue_comment: discussion: discussion_comment: push: @@ -23,4 +22,4 @@ jobs: - name: Actions for Discord env: DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }} - uses: Ilshidur/action-discord@0.3.2 \ No newline at end of file + uses: Ilshidur/action-discord@0.3.2 diff --git a/io/fasta/fasta.go b/io/fasta/fasta.go index 3ef150de..61b4cdbc 100644 --- a/io/fasta/fasta.go +++ b/io/fasta/fasta.go @@ -16,7 +16,6 @@ import ( "bytes" "compress/gzip" "io" - "io/ioutil" "os" "strings" ) @@ -193,5 +192,5 @@ func Write(fastas []Fasta, path string) error { if err != nil { return err } - return ioutil.WriteFile(path, fastaBytes, 0644) + return os.WriteFile(path, fastaBytes, 0644) } diff --git a/io/genbank/example_test.go b/io/genbank/example_test.go index 153acd91..8fd52d0b 100644 --- a/io/genbank/example_test.go +++ b/io/genbank/example_test.go @@ -3,7 +3,6 @@ package genbank_test import ( "bytes" "fmt" - "io/ioutil" "os" "path/filepath" @@ -29,7 +28,7 @@ func ExampleRead() { } func ExampleWrite() { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } @@ -74,7 +73,7 @@ func ExampleReadMulti() { } func ExampleWriteMulti() { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } diff --git a/io/genbank/genbank.go b/io/genbank/genbank.go index 7507094a..9002d972 100644 --- a/io/genbank/genbank.go +++ b/io/genbank/genbank.go @@ -15,7 +15,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "regexp" "strconv" @@ -33,7 +32,7 @@ GBK specific IO related things begin here. ******************************************************************************/ var ( - readFileFn = ioutil.ReadFile + readFileFn = os.ReadFile parseMultiNthFn = ParseMultiNth parseReferencesFn = parseReferences ) @@ -187,7 +186,7 @@ func Write(sequences Genbank, path string) error { // add error handling in the future. gbk, _ := Build(sequences) - err := ioutil.WriteFile(path, gbk, 0644) + err := os.WriteFile(path, gbk, 0644) return err } @@ -198,7 +197,7 @@ func WriteMulti(sequences []Genbank, path string) error { // add error handling in the future. gbk, _ := BuildMulti(sequences) - err := ioutil.WriteFile(path, gbk, 0644) + err := os.WriteFile(path, gbk, 0644) return err } diff --git a/io/genbank/genbank_test.go b/io/genbank/genbank_test.go index f6e4b4f8..8455d89f 100644 --- a/io/genbank/genbank_test.go +++ b/io/genbank/genbank_test.go @@ -3,7 +3,6 @@ package genbank import ( "errors" "io" - "io/ioutil" "os" "path/filepath" "strings" @@ -34,7 +33,7 @@ var singleGbkPaths = []string{ } func TestGbkIO(t *testing.T) { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { t.Error(err) } @@ -68,7 +67,7 @@ func TestMultiLineFeatureParse(t *testing.T) { } func TestMultiGenbankIO(t *testing.T) { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { t.Error(err) } @@ -89,7 +88,7 @@ func TestMultiGenbankIO(t *testing.T) { } func TestGbkLocationStringBuilder(t *testing.T) { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { t.Error(err) } @@ -117,7 +116,7 @@ func TestGbkLocationStringBuilder(t *testing.T) { } func TestGbLocationStringBuilder(t *testing.T) { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { t.Error(err) } diff --git a/io/gff/example_test.go b/io/gff/example_test.go index 04d778ba..e5361434 100644 --- a/io/gff/example_test.go +++ b/io/gff/example_test.go @@ -3,7 +3,6 @@ package gff_test import ( "bytes" "fmt" - "io/ioutil" "os" "path/filepath" "testing" @@ -49,7 +48,7 @@ func ExampleBuild() { } func ExampleWrite() { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } diff --git a/io/gff/gff.go b/io/gff/gff.go index 9ea606a2..0f0cbba3 100644 --- a/io/gff/gff.go +++ b/io/gff/gff.go @@ -14,7 +14,6 @@ package gff import ( "bytes" "io" - "io/ioutil" "os" "sort" "strconv" @@ -26,7 +25,7 @@ import ( ) var ( - readAllFn = ioutil.ReadAll + readAllFn = io.ReadAll atoiFn = strconv.Atoi openFn = os.Open ) @@ -305,5 +304,5 @@ func Read(path string) (Gff, error) { // Write takes an poly.Sequence struct and a path string and writes out a gff to that path. func Write(sequence Gff, path string) error { gff, _ := Build(sequence) - return ioutil.WriteFile(path, gff, 0644) + return os.WriteFile(path, gff, 0644) } diff --git a/io/gff/gff_test.go b/io/gff/gff_test.go index 6c80d2e1..42b79966 100644 --- a/io/gff/gff_test.go +++ b/io/gff/gff_test.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "io" - "io/ioutil" "os" "path/filepath" "strings" @@ -24,7 +23,7 @@ Gff related tests and benchmarks begin here. // TODO should delete output files. func TestGffIO(t *testing.T) { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { t.Error(err) } @@ -42,8 +41,8 @@ func TestGffIO(t *testing.T) { t.Errorf("Parsing the output of Build() does not produce the same output as parsing the original file read with ReadGff(). Got this diff:\n%s", diff) } - original, _ := ioutil.ReadFile(testInputPath) - builtOutput, _ := ioutil.ReadFile(tmpGffFilePath) + original, _ := os.ReadFile(testInputPath) + builtOutput, _ := os.ReadFile(tmpGffFilePath) gffDiff := difflib.UnifiedDiff{ A: difflib.SplitLines(string(original)), B: difflib.SplitLines(string(builtOutput)), diff --git a/io/polyjson/example_test.go b/io/polyjson/example_test.go index e54140f6..37989fb3 100644 --- a/io/polyjson/example_test.go +++ b/io/polyjson/example_test.go @@ -2,7 +2,6 @@ package polyjson_test import ( "fmt" - "io/ioutil" "os" "path/filepath" "time" @@ -14,7 +13,7 @@ import ( func Example() { // this example also is run by the poly's test suite so this just sets up a temporary directory for writing files - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } @@ -74,7 +73,7 @@ func ExampleParse() { } func ExampleWrite() { - tmpDataDir, err := ioutil.TempDir("", "data-*") + tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } diff --git a/io/polyjson/polyjson.go b/io/polyjson/polyjson.go index 1cae5584..81370480 100644 --- a/io/polyjson/polyjson.go +++ b/io/polyjson/polyjson.go @@ -10,7 +10,6 @@ import ( "bytes" "encoding/json" "io" - "io/ioutil" "os" "time" @@ -151,7 +150,7 @@ func Write(sequence Poly, path string) error { if err != nil { return err } - return ioutil.WriteFile(path, file, 0644) + return os.WriteFile(path, file, 0644) } /****************************************************************************** diff --git a/io/rebase/rebase.go b/io/rebase/rebase.go index 1f8f106c..04e0816b 100644 --- a/io/rebase/rebase.go +++ b/io/rebase/rebase.go @@ -21,112 +21,113 @@ header with the commercial suppliers format and an example enzyme. ``` REBASE version 104 withrefm.104 - =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= - REBASE, The Restriction Enzyme Database http://rebase.neb.com - Copyright (c) Dr. Richard J. Roberts, 2021. All rights reserved. - =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= - -Rich Roberts Mar 31 2021 + =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + REBASE, The Restriction Enzyme Database http://rebase.neb.com + Copyright (c) Dr. Richard J. Roberts, 2021. All rights reserved. + =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +# Rich Roberts Mar 31 2021 Restriction enzyme name. Other enzymes with this specificity. - These are written from 5' to 3', only one strand being given. - If the point of cleavage has been determined, the precise site - is marked with ^. For enzymes such as HgaI, MboII etc., which - cleave away from their recognition sequence the cleavage sites - are indicated in parentheses. - For example HgaI GACGC (5/10) indicates cleavage as follows: - 5' GACGCNNNNN^ 3' - 3' CTGCGNNNNNNNNNN^ 5' + These are written from 5' to 3', only one strand being given. + If the point of cleavage has been determined, the precise site + is marked with ^. For enzymes such as HgaI, MboII etc., which + cleave away from their recognition sequence the cleavage sites + are indicated in parentheses. + + For example HgaI GACGC (5/10) indicates cleavage as follows: + 5' GACGCNNNNN^ 3' + 3' CTGCGNNNNNNNNNN^ 5' - In all cases the recognition sequences are oriented so that - the cleavage sites lie on their 3' side. + In all cases the recognition sequences are oriented so that + the cleavage sites lie on their 3' side. - REBASE Recognition sequences representations use the standard - abbreviations (Eur. J. Biochem. 150: 1-5, 1985) to represent - ambiguity. - R = G or A - Y = C or T - M = A or C - K = G or T - S = G or C - W = A or T - B = not A (C or G or T) - D = not C (A or G or T) - H = not G (A or C or T) - V = not T (A or C or G) - N = A or C or G or T + REBASE Recognition sequences representations use the standard + abbreviations (Eur. J. Biochem. 150: 1-5, 1985) to represent + ambiguity. + R = G or A + Y = C or T + M = A or C + K = G or T + S = G or C + W = A or T + B = not A (C or G or T) + D = not C (A or G or T) + H = not G (A or C or T) + V = not T (A or C or G) + N = A or C or G or T - ENZYMES WITH UNUSUAL CLEAVAGE PROPERTIES: + ENZYMES WITH UNUSUAL CLEAVAGE PROPERTIES: - Enzymes that cut on both sides of their recognition sequences, - such as BcgI, Bsp24I, CjeI and CjePI, have 4 cleavage sites - each instead of 2. + Enzymes that cut on both sides of their recognition sequences, + such as BcgI, Bsp24I, CjeI and CjePI, have 4 cleavage sites + each instead of 2. - Bsp24I - 5' ^NNNNNNNNGACNNNNNNTGGNNNNNNNNNNNN^ 3' - 3' ^NNNNNNNNNNNNNCTGNNNNNNACCNNNNNNN^ 5' + Bsp24I + 5' ^NNNNNNNNGACNNNNNNTGGNNNNNNNNNNNN^ 3' + 3' ^NNNNNNNNNNNNNCTGNNNNNNACCNNNNNNN^ 5' - This will be described in some REBASE reports as: + This will be described in some REBASE reports as: - Bsp24I (8/13)GACNNNNNNTGG(12/7) + Bsp24I (8/13)GACNNNNNNTGG(12/7) - The site of methylation by the cognate methylase when known - is indicated X(Y) or X,X2(Y,Y2), where X is the base within - the recognition sequence that is modified. A negative number - indicates the complementary strand, numbered from the 5' base - of that strand, and Y is the specific type of methylation - involved: - (6) = N6-methyladenosine - (5) = 5-methylcytosine - (4) = N4-methylcytosine - - If the methylation information is different for the 3' strand, - X2 and Y2 are given as well. + + The site of methylation by the cognate methylase when known + is indicated X(Y) or X,X2(Y,Y2), where X is the base within + the recognition sequence that is modified. A negative number + indicates the complementary strand, numbered from the 5' base + of that strand, and Y is the specific type of methylation + involved: + (6) = N6-methyladenosine + (5) = 5-methylcytosine + (4) = N4-methylcytosine + + If the methylation information is different for the 3' strand, + X2 and Y2 are given as well. Organism from which this enzyme had been isolated. Either an individual or a National Culture Collection. - Each commercial source of restriction enzymes and/or methylases - listed in REBASE is assigned a single character abbreviation - code. For example: - K Takara (1/98) - M Boehringer Mannheim (10/97) - N New England Biolabs (4/98) + Each commercial source of restriction enzymes and/or methylases + listed in REBASE is assigned a single character abbreviation + code. For example: + + K Takara (1/98) + M Boehringer Mannheim (10/97) + N New England Biolabs (4/98) - The date in parentheses indicates the most recent update of - that organization's listings in REBASE. + The date in parentheses indicates the most recent update of + that organization's listings in REBASE. only the primary references for the isolation and/or purification of the restriction enzyme or methylase, the determination of the recognition sequence and cleavage site or the methylation specificity are given. - REBASE codes for commercial sources of enzymes - B Life Technologies (3/21) - C Minotech Biotechnology (3/21) - E Agilent Technologies (8/20) - I SibEnzyme Ltd. (3/21) - J Nippon Gene Co., Ltd. (3/21) - K Takara Bio Inc. (6/18) - M Roche Applied Science (4/18) - N New England Biolabs (3/21) - O Toyobo Biochemicals (8/14) - Q Molecular Biology Resources - CHIMERx (3/21) - R Promega Corporation (11/20) - S Sigma Chemical Corporation (3/21) - V Vivantis Technologies (1/18) - X EURx Ltd. (1/21) - Y SinaClon BioScience Co. (1/18) + B Life Technologies (3/21) + C Minotech Biotechnology (3/21) + E Agilent Technologies (8/20) + I SibEnzyme Ltd. (3/21) + J Nippon Gene Co., Ltd. (3/21) + K Takara Bio Inc. (6/18) + M Roche Applied Science (4/18) + N New England Biolabs (3/21) + O Toyobo Biochemicals (8/14) + Q Molecular Biology Resources - CHIMERx (3/21) + R Promega Corporation (11/20) + S Sigma Chemical Corporation (3/21) + V Vivantis Technologies (1/18) + X EURx Ltd. (1/21) + Y SinaClon BioScience Co. (1/18) <1>AaaI <2>XmaIII,BseX3I,BsoDI,BstZI,EagI,EclXI,Eco52I,SenPT16I,TauII,Tsp504I @@ -144,13 +145,12 @@ package rebase import ( "encoding/json" "io" - "io/ioutil" "os" "strings" ) var ( - readAllFn = ioutil.ReadAll + readAllFn = io.ReadAll parseFn = Parse marshallFn = json.Marshal ) diff --git a/seqhash/example_test.go b/seqhash/example_test.go index 50eb59e5..02629c85 100644 --- a/seqhash/example_test.go +++ b/seqhash/example_test.go @@ -10,7 +10,7 @@ import ( // This example shows how to seqhash a sequence. func Example_basic() { sequence := "ATGC" - sequenceType := "DNA" + sequenceType := seqhash.DNA circular := false doubleStranded := true @@ -21,7 +21,7 @@ func Example_basic() { func ExampleHash() { sequence := "ATGC" - sequenceType := "DNA" + sequenceType := seqhash.DNA circular := false doubleStranded := true diff --git a/seqhash/seqhash.go b/seqhash/seqhash.go index 40e15887..2b5f62fd 100644 --- a/seqhash/seqhash.go +++ b/seqhash/seqhash.go @@ -66,6 +66,15 @@ import ( "lukechampine.com/blake3" ) +// Seqhash is a struct that contains the Seqhash algorithm sequence types. +type SequenceType string + +const ( + DNA SequenceType = "DNA" + RNA SequenceType = "RNA" + PROTEIN SequenceType = "PROTEIN" +) + // boothLeastRotation gets the least rotation of a circular string. func boothLeastRotation(sequence string) int { @@ -133,27 +142,27 @@ func RotateSequence(sequence string) string { } // Hash is a function to create Seqhashes, a specific kind of identifier. -func Hash(sequence string, sequenceType string, circular bool, doubleStranded bool) (string, error) { +func Hash(sequence string, sequenceType SequenceType, circular bool, doubleStranded bool) (string, error) { // By definition, Seqhashes are of uppercase sequences sequence = strings.ToUpper(sequence) // If RNA, convert to a DNA sequence. The hash itself between a DNA and RNA sequence will not // be different, but their Seqhash will have a different metadata string (R vs D) - if sequenceType == "RNA" { + if sequenceType == SequenceType("RNA") { sequence = strings.ReplaceAll(sequence, "U", "T") } // Run checks on the input - if sequenceType != "DNA" && sequenceType != "RNA" && sequenceType != "PROTEIN" { - return "", errors.New("Only sequenceTypes of DNA, RNA, or PROTEIN allowed. Got sequenceType: " + sequenceType) + if sequenceType != DNA && sequenceType != RNA && sequenceType != PROTEIN { + return "", errors.New("Only sequenceTypes of DNA, RNA, or PROTEIN allowed. Got sequenceType: " + string(sequenceType)) } - if sequenceType == "DNA" || sequenceType == "RNA" { + if sequenceType == DNA || sequenceType == RNA { for _, char := range sequence { if !strings.Contains("ATUGCYRSWKMBDHVNZ", string(char)) { return "", errors.New("Only letters ATUGCYRSWKMBDHVNZ are allowed for DNA/RNA. Got letter: " + string(char)) } } } - if sequenceType == "PROTEIN" { + if sequenceType == PROTEIN { for _, char := range sequence { // Selenocysteine (Sec; U) and pyrrolysine (Pyl; O) are added // in accordance with https://www.uniprot.org/help/sequences @@ -166,7 +175,7 @@ func Hash(sequence string, sequenceType string, circular bool, doubleStranded bo } } // There is no check for circular proteins since proteins can be circular - if sequenceType == "PROTEIN" && doubleStranded { + if sequenceType == PROTEIN && doubleStranded { return "", errors.New("Proteins cannot be double stranded") } @@ -193,11 +202,11 @@ func Hash(sequence string, sequenceType string, circular bool, doubleStranded bo var doubleStrandedLetter string // Get first letter. D for DNA, R for RNA, and P for Protein switch sequenceType { - case "DNA": + case DNA: sequenceTypeLetter = "D" - case "RNA": + case RNA: sequenceTypeLetter = "R" - case "PROTEIN": + case PROTEIN: sequenceTypeLetter = "P" } // Get 2nd letter. C for circular, L for Linear diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index 07b3b913..5365ba57 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -21,8 +21,8 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "math/rand" + "os" "strings" "time" @@ -429,7 +429,7 @@ func ParseCodonJSON(file []byte) Table { // ReadCodonJSON reads a codonTable JSON file. func ReadCodonJSON(path string) Table { - file, _ := ioutil.ReadFile(path) + file, _ := os.ReadFile(path) codonTable := ParseCodonJSON(file) return codonTable } @@ -437,7 +437,7 @@ func ReadCodonJSON(path string) Table { // WriteCodonJSON writes a codonTable struct out to JSON. func WriteCodonJSON(codonTable Table, path string) { file, _ := json.MarshalIndent(codonTable, "", " ") - _ = ioutil.WriteFile(path, file, 0644) + _ = os.WriteFile(path, file, 0644) } /****************************************************************************** diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go index 150350b9..42c79103 100644 --- a/synthesis/codon/example_test.go +++ b/synthesis/codon/example_test.go @@ -2,7 +2,6 @@ package codon_test import ( "fmt" - "io/ioutil" "os" "strings" @@ -59,7 +58,7 @@ func ExampleReadCodonJSON() { } func ExampleParseCodonJSON() { - file, _ := ioutil.ReadFile("../../data/bsub_codon_test.json") + file, _ := os.ReadFile("../../data/bsub_codon_test.json") codontable := codon.ParseCodonJSON(file) fmt.Println(codontable.GetAminoAcids()[0].Codons[0].Weight)