diff --git a/scripts/helper_scripts/TaxonsUniprots2Tables.jar b/scripts/helper_scripts/TaxonsUniprots2Tables.jar index ff0bcb9..bf9f13e 100644 Binary files a/scripts/helper_scripts/TaxonsUniprots2Tables.jar and b/scripts/helper_scripts/TaxonsUniprots2Tables.jar differ diff --git a/scripts/helper_scripts/parser/src/storage/TableWriter.java b/scripts/helper_scripts/parser/src/storage/TableWriter.java index 2325667..98de7a3 100755 --- a/scripts/helper_scripts/parser/src/storage/TableWriter.java +++ b/scripts/helper_scripts/parser/src/storage/TableWriter.java @@ -41,8 +41,6 @@ public class TableWriter implements UniprotObserver { // csv files private CSV.IndexedWriter peptides; private CSV.IndexedWriter uniprotEntries; - private CSV.IndexedWriter refseqCrossReferences; - private CSV.IndexedWriter emblCrossReferences; private CSV.IndexedWriter goCrossReferences; private CSV.IndexedWriter ecCrossReferences; private CSV.IndexedWriter interProCrossReferences; @@ -80,19 +78,15 @@ public void store(UniprotEntry entry) { long uniprotEntryId = addUniprotEntry(entry.getUniprotAccessionNumber(), entry.getVersion(), entry.getTaxonId(), entry.getType(), entry.getName(), entry.getSequence()); if (uniprotEntryId != -1) { // failed to add entry - - // todo make cleaner String faSummary = Stream.of( entry.getGOReferences().stream().map(UniprotGORef::getId), - entry.getECReferences().stream().map(x->"EC:"+x.getId()), - entry.getInterProReferences().stream().map(x->"IPR:"+x.getId()) + entry.getECReferences().stream().filter(x -> !x.getId().isEmpty()).map(x->"EC:"+x.getId()), + entry.getInterProReferences().stream().filter(x -> !x.getId().isEmpty()).map(x->"IPR:"+x.getId()) ).flatMap(i -> i).collect(Collectors.joining(";")); for(String sequence : entry.digest()) { addData(sequence.replace('I', 'L'), uniprotEntryId, sequence, faSummary); } - for (UniprotDbRef ref : entry.getDbReferences()) - addDbRef(ref, uniprotEntryId); for (UniprotGORef ref : entry.getGOReferences()) addGORef(ref, uniprotEntryId); for (UniprotECRef ref : entry.getECReferences()) @@ -124,12 +118,13 @@ public long addUniprotEntry(String uniprotAccessionNumber, int version, int taxo if(0 <= taxonId && taxonId < taxonList.size() && taxonList.get(taxonId) != null) { try { uniprotEntries.write( - uniprotAccessionNumber, - Integer.toString(version), - Integer.toString(taxonId), - type, - name, - sequence); + uniprotAccessionNumber, + Integer.toString(version), + Integer.toString(taxonId), + type, + name, + sequence + ); return uniprotEntries.index(); } catch(IOException e) { System.err.println(new Timestamp(System.currentTimeMillis()) @@ -163,11 +158,11 @@ public long addUniprotEntry(String uniprotAccessionNumber, int version, int taxo public void addData(String unifiedSequence, long uniprotEntryId, String originalSequence, String functionalAnnotations) { try { peptides.write( - unifiedSequence, - originalSequence, - Long.toString(uniprotEntryId), - functionalAnnotations - ); + unifiedSequence, + originalSequence, + Long.toString(uniprotEntryId), + functionalAnnotations + ); } catch(IOException e) { System.err.println(new Timestamp(System.currentTimeMillis()) + " Error adding this peptide to the database: " + unifiedSequence); @@ -175,29 +170,6 @@ public void addData(String unifiedSequence, long uniprotEntryId, String original } } - /** - * Adds a uniprot entry cross reference to the database - * - * @param ref - * The uniprot cross reference to add - * @param uniprotEntryId - * The uniprotEntry of the cross reference - */ - public void addDbRef(UniprotDbRef ref, long uniprotEntryId) { - try { - CSV.Writer w = (ref.getType().equals("EMBL")) - ? emblCrossReferences - : refseqCrossReferences; - w.write(Long.toString(uniprotEntryId), - ref.getProteinId(), - ref.getSequenceId()); - } catch (IOException e) { - System.err.println(new Timestamp(System.currentTimeMillis()) - + " Error adding this cross reference to the database."); - e.printStackTrace(); - } - } - /** * Adds a uniprot entry GO reference to the database * diff --git a/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java b/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java index 1151018..06a72e5 100755 --- a/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java +++ b/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java @@ -18,7 +18,7 @@ public class NamesNodes2TaxonsLineages { * Parse a list of taxons and their lineages from the NCBI dumps. * * This program will parse the first two argument files, and create the next - * two. The first two arguments are the nodes.dmp en names.dmp files + * two. The first two arguments are the nodes.dmp and names.dmp files * downloaded from the NCBI. TSV-dumps of the parsed taxons and lineages * will be written to the third and fourth parameter. */