loadedClusters = new ArrayList<>(1000);
+
+ while (iterator.hasNext()) {
+ ICluster cluster = iterator.next();
+
+ // all clusters are primarily stored in the cluster storage
+ clusterStorage.put(cluster.getId(), cluster);
+
+ // only retain the basic properties
+ loadedClusters.add(cluster.getProperties());
+ }
- log.debug(String.format("Results written in %d seconds",
- Duration.between(clusteringCompleteTime, LocalDateTime.now()).getSeconds()));
- System.out.println("Results written to " + thisResult.toString());
+ // some nice output
+ LocalDateTime loadingCompleteTime = LocalDateTime.now();
+ log.debug(String.format("Loaded %d spectra in %d seconds", loadedClusters.size(),
+ Duration.between(startTime, loadingCompleteTime).getSeconds()));
- log.debug(String.format("Process completed in %d seconds", Duration.between(startTime,
- LocalDateTime.now()).getSeconds()));
+ return loadedClusters.toArray(new IClusterProperties[0]);
+ }
- // Close the property storage and delete folders and property files.
- localStorage.close();
+ /**
+ * Create a new unique directory name. In case the target name already exists, a new directory with the name
+ * {target name}-{N} is created.
+ *
+ * @param targetDirectoryName The target directory name / path.
+ * @return The File object representing the finally created unique directory.
+ * @throws IOException Thrown if creating the directory failed.
+ */
+ private File createUniqueDirectory(File targetDirectoryName) throws IOException {
+ int iteration = 1;
+ String orgPath = targetDirectoryName.getAbsolutePath();
+
+ while (targetDirectoryName.exists()) {
+ targetDirectoryName = new File(orgPath + "-" + String.valueOf(iteration));
+ iteration++;
+ }
- System.exit(0);
- } catch (MissingParameterException e) {
- System.out.println("Error: " + e.getMessage() + "\n\n");
- printUsage();
+ // now that the directory does not yet exist, create it
+ if (!targetDirectoryName.mkdir()) {
+ throw new IOException("Failed to create directory " + targetDirectoryName.getAbsolutePath());
+ }
- System.exit(1);
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println("Error: " + e.getMessage());
+ return targetDirectoryName;
+ }
- System.exit(1);
+ /**
+ * Ensures that the set user parameters are valid. In case they are not, an Exception is thrown.
+ *
+ * @param clusteringParameters The set parameters
+ * @throws Exception In case something is invalid.
+ * @throws MissingParameterException
+ */
+ private void checkParameterValidity(ClusteringParameters clusteringParameters) throws Exception, MissingParameterException {
+ // RESULT FILE PATH
+ if (clusteringParameters.getOutputFile() == null)
+ throw new MissingParameterException("Missing required option " +
+ CliOptions.OPTIONS.OUTPUT_PATH.getValue());
+
+ // ensure that the output file does not exist
+ if (clusteringParameters.getOutputFile().exists())
+ throw new Exception("Result file " + clusteringParameters.getOutputFile().getAbsolutePath() + " already exists");
+
+ // check whether the fragment tolerance is valid
+ if (!"high".equalsIgnoreCase(clusteringParameters.getFragmentIonPrecision()) &&
+ !"low".equalsIgnoreCase(clusteringParameters.getFragmentIonPrecision())) {
+ throw new Exception("Invalid fragment precision set. Allowed values are 'low' and 'high'");
}
}
+ /**
+
+ TODO: update print function
private void printSettings(File finalResultFile, int nMajorPeakJobs, float startThreshold,
float endThreshold, int rounds, boolean keepBinaryFiles, File binaryTmpDirectory,
String[] peaklistFilenames, boolean reUseBinaryFiles, boolean fastMode,
@@ -278,7 +297,7 @@ private void printSettings(File finalResultFile, int nMajorPeakJobs, float start
System.out.println("Using fast mode: " + (fastMode ? "yes" : "no"));
System.out.println("\nOther settings:");
- System.out.println("Precursor tolerance: " + defaultParameters.getPrecursorIonTolerance());
+ System.out.println("Precursor tolerance: " + clusteringParameters.getPrecursorIonTolerance());
System.out.println("Fragment ion precision: " + fragmentPrecision);
// used filters
@@ -296,7 +315,7 @@ private void printSettings(File finalResultFile, int nMajorPeakJobs, float start
// System.out.println("Minimum number of comparisons: " + Defaults.getMinNumberComparisons());
System.out.println();
- }
+ }*/
private void printUsage() {
HelpFormatter formatter = new HelpFormatter();
diff --git a/src/main/java/org/spectra/cluster/util/ClusteringParameters.java b/src/main/java/org/spectra/cluster/util/ClusteringParameters.java
new file mode 100644
index 0000000..5d7ce6a
--- /dev/null
+++ b/src/main/java/org/spectra/cluster/util/ClusteringParameters.java
@@ -0,0 +1,246 @@
+package org.spectra.cluster.util;
+
+import lombok.Data;
+import org.apache.commons.cli.CommandLine;
+import org.spectra.cluster.cdf.SpectraPerBinNumberComparisonAssessor;
+import org.spectra.cluster.engine.GreedyClusteringEngine;
+import org.spectra.cluster.filter.rawpeaks.*;
+import org.spectra.cluster.model.cluster.ICluster;
+import org.spectra.cluster.normalizer.BasicIntegerNormalizer;
+import org.spectra.cluster.normalizer.HighResolutionMzBinner;
+import org.spectra.cluster.normalizer.IMzBinner;
+import org.spectra.cluster.normalizer.TideBinner;
+import org.spectra.cluster.predicates.IComparisonPredicate;
+import org.spectra.cluster.predicates.SameChargePredicate;
+import org.spectra.cluster.predicates.ShareNComparisonPeaksPredicate;
+import org.spectra.cluster.similarity.CombinedFisherIntensityTest;
+import org.spectra.cluster.tools.CliOptions;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.util.Properties;
+
+/**
+ * This code is licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * ==Overview==
+ *
+ * @author ypriverol on 18/10/2018.
+ */
+@Data
+public class ClusteringParameters {
+
+ private String binaryDirectory;
+ private boolean reuseBinary;
+ private boolean fastMode;
+ private Integer clusterRounds;
+ private boolean filterReportPeaks;
+ private Integer numberHigherPeaks;
+ private Double precursorIonTolerance;
+ private String fragmentIonPrecision;
+ private boolean ignoreCharge;
+
+ private Float thresholdStart;
+ private Float thresholdEnd;
+ private int nInitiallySharedPeaks;
+ private int minNumberOfComparisons;
+
+ private File outputFile;
+ private boolean outputMsp;
+
+ private int nThreads;
+
+
+ public ClusteringParameters(){
+
+ try {
+ Properties properties = readProperties();
+ setProperties(properties);
+
+ } catch (URISyntaxException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ private void setProperties(Properties properties) {
+ if(properties.containsKey("precursor.tolerance"))
+ this.precursorIonTolerance = Double.parseDouble(properties.getProperty("precursor.tolerance").trim());
+ if(properties.containsKey("fragment.precision"))
+ this.fragmentIonPrecision = properties.getProperty("fragment.precision");
+ if(properties.containsKey("n.threads"))
+ this.nThreads = Integer.parseInt(properties.getProperty("n.threads"));
+ if(properties.containsKey("threshold.start"))
+ this.thresholdStart = Float.parseFloat(properties.getProperty("threshold.start"));
+ if(properties.containsKey("threshold.end"))
+ this.thresholdEnd = Float.parseFloat(properties.getProperty("threshold.end"));
+ if(properties.containsKey("number.higher.peaks"))
+ this.numberHigherPeaks = Integer.parseInt(properties.getProperty("number.higher.peaks"));
+ if(properties.containsKey("cluster.rounds"))
+ this.clusterRounds = Integer.parseInt(properties.getProperty("cluster.rounds"));
+ if(properties.containsKey("binary.temp.directory"))
+ this.binaryDirectory = properties.getProperty("binary.temp.directory");
+ if(properties.containsKey("reuse.binary.files"))
+ this.reuseBinary = Boolean.parseBoolean(properties.getProperty("reuse.binary.files"));
+ if(properties.containsKey("ignore.charge"))
+ this.ignoreCharge = Boolean.parseBoolean(properties.getProperty("ignore.charge"));
+ if(properties.containsKey("cluster.fast.mode"))
+ this.fastMode = Boolean.parseBoolean(properties.getProperty("cluster.fast.mode"));
+ if(properties.containsKey("filters.remove.reporter.peaks"))
+ this.filterReportPeaks = Boolean.parseBoolean(properties.getProperty("filters.remove.reporter.peaks"));
+ if(properties.containsKey("initially.shared.peaks"))
+ this.nInitiallySharedPeaks = Integer.parseInt(properties.getProperty("initially.shared.peaks"));
+ if(properties.containsKey("x.min.comparisons"))
+ this.minNumberOfComparisons = Integer.parseInt(properties.getProperty("x.min.comparisons"));
+ if(properties.contains("output.msp"))
+ this.outputMsp = Boolean.parseBoolean(properties.getProperty("output.msp"));
+ }
+
+ public Properties readProperties() throws URISyntaxException {
+ Properties properties = new Properties();
+ InputStream output;
+
+ try {
+ output = getClass().getClassLoader().getResourceAsStream("application.properties");
+ properties.load(output);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return properties;
+ }
+
+ /**
+ * Adapt the default parameters based on the set command line arguments.
+ *
+ * @param commandLine A command line object.
+ */
+ public void mergeCommandLineArgs(CommandLine commandLine) {
+ if (commandLine.hasOption(CliOptions.OPTIONS.OUTPUT_PATH.getValue()))
+ outputFile = new File(commandLine.getOptionValue(CliOptions.OPTIONS.OUTPUT_PATH.getValue()));
+
+ // NUMBER OF ROUNDS
+ if (commandLine.hasOption(CliOptions.OPTIONS.ROUNDS.getValue()))
+ clusterRounds = Integer.parseInt(commandLine.getOptionValue(CliOptions.OPTIONS.ROUNDS.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.START_THRESHOLD.getValue()))
+ thresholdStart = Float.parseFloat(commandLine.getOptionValue(CliOptions.OPTIONS.START_THRESHOLD.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.END_THRESHOLD.getValue()))
+ thresholdEnd = Float.parseFloat(commandLine.getOptionValue(CliOptions.OPTIONS.END_THRESHOLD.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.PRECURSOR_TOLERANCE.getValue()))
+ precursorIonTolerance = Double.parseDouble(commandLine.getOptionValue(CliOptions.OPTIONS.PRECURSOR_TOLERANCE.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.FRAGMENT_PRECISION.getValue()))
+ fragmentIonPrecision = commandLine.getOptionValue(CliOptions.OPTIONS.FRAGMENT_PRECISION.getValue());
+
+ ignoreCharge = commandLine.hasOption(CliOptions.OPTIONS.IGNORE_CHARGE.getValue());
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.TEMP_DIRECTORY.getValue()))
+ binaryDirectory = commandLine.getOptionValue(CliOptions.OPTIONS.TEMP_DIRECTORY.getValue());
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.ADVANCED_MIN_NUMBER_COMPARISONS.getValue()))
+ minNumberOfComparisons = Integer.parseInt(commandLine.getOptionValue(CliOptions.OPTIONS.ADVANCED_MIN_NUMBER_COMPARISONS.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.ADVANCED_MIN_INITIAL_PEAKS.getValue()))
+ nInitiallySharedPeaks = Integer.parseInt(commandLine.getOptionValue(CliOptions.OPTIONS.ADVANCED_MIN_INITIAL_PEAKS.getValue()));
+
+ if (commandLine.hasOption(CliOptions.OPTIONS.N_THREADS.getValue()))
+ nThreads = Integer.parseInt(commandLine.getOptionValue(CliOptions.OPTIONS.N_THREADS.getValue()));
+
+ outputMsp = commandLine.hasOption(CliOptions.OPTIONS.OUTPUT_MSP.getValue());
+ }
+
+ public void mergeParameters(String configFile) throws IOException {
+ File propertiesFactoryBean = new File(configFile);
+ Properties newProperties = new Properties();
+ InputStream output = new FileInputStream(propertiesFactoryBean);
+ newProperties.load(output);
+ setProperties(newProperties);
+ }
+
+ // ----------
+ // A collection of functions to return clustering objects matching the
+ // parameters
+
+ /**
+ * Get the precursor tolerance as an integer.
+ *
+ * @return The precursor tolerance
+ */
+ public int getIntPrecursorTolerance() {
+ return (int) Math.round(precursorIonTolerance * (double) BasicIntegerNormalizer.MZ_CONSTANT);
+ }
+
+ /**
+ * Creates a new GreedyClusteringEngine based on the currently set parameters.
+ *
+ * Note: The validity of these parameters is not checked in this function but
+ * must be checked before calling it.
+ *
+ * @return A new instance of a GreedyClusteringEngine
+ * @throws Exception
+ */
+ public GreedyClusteringEngine createGreedyClusteringEngine() throws Exception {
+ int precursorTolerance = getIntPrecursorTolerance();
+
+ SpectraPerBinNumberComparisonAssessor numberOfComparisonAssessor = new SpectraPerBinNumberComparisonAssessor(
+ precursorTolerance * 2, minNumberOfComparisons, BasicIntegerNormalizer.MZ_CONSTANT * 2500
+ );
+
+ IComparisonPredicate firstRoundPredicate = new ShareNComparisonPeaksPredicate(
+ nInitiallySharedPeaks);
+
+ if (!ignoreCharge) {
+ firstRoundPredicate = new SameChargePredicate().and(firstRoundPredicate);
+ }
+
+ int windowSizeNoiseFilter = (fragmentIonPrecision.equalsIgnoreCase("high")) ? 3000 : 100;
+
+ GreedyClusteringEngine engine = new GreedyClusteringEngine(
+ precursorTolerance,
+ thresholdStart, thresholdEnd, clusterRounds, new CombinedFisherIntensityTest(),
+ numberOfComparisonAssessor, firstRoundPredicate,
+ windowSizeNoiseFilter);
+
+ return engine;
+ }
+
+ /**
+ * Create a new loading filter based on the currently set parameters.
+ *
+ * Note: The validity of these parameters is not checked in this function but
+ * must be checked before calling it.
+ *
+ * @return A new instance of an IRawSpectrumFunction
+ */
+ public IRawSpectrumFunction createLoadingFilter() {
+ // set an approximate fragment tolerance for the filters
+ double fragmentTolerance = (fragmentIonPrecision.equalsIgnoreCase("high")) ? 0.01 : 0.5;
+
+ return new RemoveImpossiblyHighPeaksFunction()
+ .specAndThen(new RemovePrecursorPeaksFunction(fragmentTolerance))
+ .specAndThen(new RawPeaksWrapperFunction(new KeepNHighestRawPeaks(numberHigherPeaks)));
+
+ }
+
+ /**
+ * Creates a new instance of the matching m/z binner.
+ *
+ * Note: The validity of these parameters is not checked in this function but
+ * must be checked before calling it.
+ *
+ * @return A new IMzBinner object.
+ */
+ public IMzBinner createMzBinner() {
+ return (fragmentIonPrecision.equalsIgnoreCase("high")) ?
+ new HighResolutionMzBinner() : new TideBinner();
+ }
+}
diff --git a/src/main/java/org/spectra/cluster/util/DefaultParameters.java b/src/main/java/org/spectra/cluster/util/DefaultParameters.java
deleted file mode 100644
index 2adc120..0000000
--- a/src/main/java/org/spectra/cluster/util/DefaultParameters.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package org.spectra.cluster.util;
-
-import lombok.Data;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URISyntaxException;
-import java.util.Properties;
-
-/**
- * This code is licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * ==Overview==
- *
- * @author ypriverol on 18/10/2018.
- */
-@Data
-public class DefaultParameters {
-
- private String binaryDirectory;
- private boolean reuseBinary;
- private boolean fastMode;
- private Integer clusterRounds;
- private boolean filterReportPeaks;
- private Integer numberHigherPeaks;
- private Double precursorIonTolerance;
- private String fragmentIonPrecision;
- private boolean ignoreCharge;
-
- private Float thresholdStart;
- private Float thresholdEnd;
- private int nInitiallySharedPeaks;
- private int minNumberOfComparisons;
-
-
- public DefaultParameters(){
-
- try {
- Properties properties = readProperties();
- setProperties(properties);
-
- } catch (URISyntaxException e) {
- e.printStackTrace();
- }
-
- }
-
- private void setProperties(Properties properties) {
- if(properties.containsKey("precursor.tolerance"))
- this.precursorIonTolerance = Double.parseDouble(properties.getProperty("precursor.tolerance").trim());
- if(properties.containsKey("fragment.precision"))
- this.fragmentIonPrecision = properties.getProperty("fragment.precision");
- if(properties.containsKey("threshold.start"))
- this.thresholdStart = Float.parseFloat(properties.getProperty("threshold.start"));
- if(properties.containsKey("threshold.end"))
- this.thresholdEnd = Float.parseFloat(properties.getProperty("threshold.end"));
- if(properties.containsKey("number.higher.peaks"))
- this.numberHigherPeaks = Integer.parseInt(properties.getProperty("number.higher.peaks"));
- if(properties.containsKey("cluster.rounds"))
- this.clusterRounds = Integer.parseInt(properties.getProperty("cluster.rounds"));
- if(properties.containsKey("binary.temp.directory"))
- this.binaryDirectory = properties.getProperty("binary.temp.directory");
- if(properties.containsKey("reuse.binary.files"))
- this.reuseBinary = Boolean.parseBoolean(properties.getProperty("reuse.binary.files"));
- if(properties.containsKey("ignore.charge"))
- this.ignoreCharge = Boolean.parseBoolean(properties.getProperty("ignore.charge"));
- if(properties.containsKey("cluster.fast.mode"))
- this.fastMode = Boolean.parseBoolean(properties.getProperty("cluster.fast.mode"));
- if(properties.containsKey("filters.remove.reporter.peaks"))
- this.filterReportPeaks = Boolean.parseBoolean(properties.getProperty("filters.remove.reporter.peaks"));
- if(properties.containsKey("initially.shared.peaks"))
- this.nInitiallySharedPeaks = Integer.parseInt(properties.getProperty("initially.shared.peaks"));
- if(properties.containsKey("x.min.comparisons"))
- this.minNumberOfComparisons = Integer.parseInt(properties.getProperty("x.min.comparisons"));
- }
-
- public Properties readProperties() throws URISyntaxException {
- Properties properties = new Properties();
- InputStream output;
-
- try {
- output = getClass().getClassLoader().getResourceAsStream("application.properties");
- properties.load(output);
- } catch (IOException e) {
- e.printStackTrace();
- }
- return properties;
- }
-
- public void mergeParameters(String configFile) throws IOException {
- File propertiesFactoryBean = new File(configFile);
- Properties newProperties = new Properties();
- InputStream output = new FileInputStream(propertiesFactoryBean);
- newProperties.load(output);
- setProperties(newProperties);
- }
-}
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
index cb28b27..3149508 100644
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@@ -1,7 +1,6 @@
precursor.tolerance=1
fragment.precision=low
-
# Clustering execution parameters
threshold.start=1.0F
threshold.end=0.99F
@@ -14,11 +13,17 @@ reuse.binary.files=false
cluster.fast.mode=false
ignore.charge=false
+# also write an MSP file containing all consensus spectra
+output.msp=true
+
# Number of peaks that will be used to perform the comparison
number.higher.peaks=40
initially.shared.peaks=5
filters.remove.reporter.peaks=true
+# number of parallel jobs
+n.threads = 2
+
# The minimum number of comparisons is derived from the data. The set number
# is used as an additional minimum
x.min.comparisons=0
\ No newline at end of file
diff --git a/src/test/java/org/spectra/cluster/consensus/AverageConsensusSpectrumBuilderTest.java b/src/test/java/org/spectra/cluster/consensus/AverageConsensusSpectrumBuilderTest.java
new file mode 100644
index 0000000..6887231
--- /dev/null
+++ b/src/test/java/org/spectra/cluster/consensus/AverageConsensusSpectrumBuilderTest.java
@@ -0,0 +1,70 @@
+package org.spectra.cluster.consensus;
+
+import io.github.bigbio.pgatk.io.common.spectra.Spectrum;
+import io.github.bigbio.pgatk.io.properties.IPropertyStorage;
+import io.github.bigbio.pgatk.io.properties.InMemoryPropertyStorage;
+import org.junit.Assert;
+import org.junit.Test;
+import org.spectra.cluster.cdf.MinNumberComparisonsAssessor;
+import org.spectra.cluster.engine.GreedyClusteringEngine;
+import org.spectra.cluster.engine.IClusteringEngine;
+import org.spectra.cluster.filter.binaryspectrum.HighestPeakPerBinFunction;
+import org.spectra.cluster.io.spectra.MzSpectraReader;
+import org.spectra.cluster.model.cluster.ICluster;
+import org.spectra.cluster.model.consensus.GreedyConsensusSpectrum;
+import org.spectra.cluster.normalizer.BasicIntegerNormalizer;
+import org.spectra.cluster.normalizer.MaxPeakNormalizer;
+import org.spectra.cluster.normalizer.TideBinner;
+import org.spectra.cluster.predicates.ShareHighestPeaksClusterPredicate;
+import org.spectra.cluster.similarity.CombinedFisherIntensityTest;
+import org.spectra.cluster.util.ClusteringParameters;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+public class AverageConsensusSpectrumBuilderTest {
+ @Test
+ public void testAverageSpectrum() throws Exception {
+ ClusteringParameters params = new ClusteringParameters();
+ IConsensusSpectrumBuilder builder = new AverageConsensusSpectrumBuilder(params);
+
+ // load the spectra
+ File mgfFile = new File(getClass().getClassLoader().getResource("imp_single_cluster.mgf").toURI());
+ IPropertyStorage localStorage = new InMemoryPropertyStorage();
+ MzSpectraReader reader = new MzSpectraReader(mgfFile, new TideBinner(), new MaxPeakNormalizer(),
+ new BasicIntegerNormalizer(), new HighestPeakPerBinFunction(), params.createLoadingFilter(),
+ GreedyClusteringEngine.COMPARISON_FILTER, params.createGreedyClusteringEngine());
+
+ Iterator iterator = reader.readClusterIterator(localStorage);
+ List spectra = new ArrayList<>(1_000);
+
+ while (iterator.hasNext()) {
+ spectra.add(iterator.next());
+ }
+
+ // sort according to m/z
+ spectra.sort(Comparator.comparingInt(ICluster::getPrecursorMz));
+
+ // cluster everything
+ IClusteringEngine engine = new GreedyClusteringEngine(BasicIntegerNormalizer.MZ_CONSTANT,
+ 1, 0.99f, 5, new CombinedFisherIntensityTest(),
+ new MinNumberComparisonsAssessor(10_000), new ShareHighestPeaksClusterPredicate(5),
+ GreedyConsensusSpectrum.NOISE_FILTER_INCREMENT);
+
+ ICluster[] clusters = engine.clusterSpectra(spectra.toArray(new ICluster[spectra.size()]));
+
+ Assert.assertEquals(1, clusters.length);
+
+ // create the consensus spectrum
+ Spectrum consensusSpectrum = builder.createConsensusSpectrum(clusters[0], localStorage);
+
+ Assert.assertNotNull(consensusSpectrum);
+ Assert.assertEquals(2, (int) consensusSpectrum.getPrecursorCharge());
+ Assert.assertEquals(2, (int) consensusSpectrum.getMsLevel());
+ Assert.assertEquals(69, consensusSpectrum.getPeakList().size());
+ Assert.assertEquals(402.717, consensusSpectrum.getPrecursorMZ(), 0.001);
+ }
+}
diff --git a/src/test/java/org/spectra/cluster/consensus/TestAbstractConsensusBuilder.java b/src/test/java/org/spectra/cluster/consensus/TestAbstractConsensusBuilder.java
new file mode 100644
index 0000000..82fe678
--- /dev/null
+++ b/src/test/java/org/spectra/cluster/consensus/TestAbstractConsensusBuilder.java
@@ -0,0 +1,97 @@
+package org.spectra.cluster.consensus;
+
+import io.github.bigbio.pgatk.io.properties.IPropertyStorage;
+import io.github.bigbio.pgatk.io.properties.InMemoryPropertyStorage;
+import org.junit.Assert;
+import org.junit.Test;
+import org.spectra.cluster.cdf.MinNumberComparisonsAssessor;
+import org.spectra.cluster.engine.GreedyClusteringEngine;
+import org.spectra.cluster.engine.IClusteringEngine;
+import org.spectra.cluster.io.spectra.MzSpectraReader;
+import org.spectra.cluster.model.cluster.ICluster;
+import org.spectra.cluster.model.consensus.GreedyConsensusSpectrum;
+import org.spectra.cluster.normalizer.BasicIntegerNormalizer;
+import org.spectra.cluster.normalizer.MaxPeakNormalizer;
+import org.spectra.cluster.predicates.ShareHighestPeaksClusterPredicate;
+import org.spectra.cluster.similarity.CombinedFisherIntensityTest;
+
+import java.io.File;
+import java.net.URI;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class TestAbstractConsensusBuilder {
+ private List loadClusters(IPropertyStorage propertyStorage) throws Exception {
+ // open the file
+ URI uri = Objects.requireNonNull(getClass().getClassLoader().getResource("single-spectra.mgf")).toURI();
+ File mgfFile = new File(uri);
+
+ IClusteringEngine engine = new GreedyClusteringEngine(BasicIntegerNormalizer.MZ_CONSTANT,
+ 1, 0.99f, 5, new CombinedFisherIntensityTest(),
+ new MinNumberComparisonsAssessor(10000), new ShareHighestPeaksClusterPredicate(5),
+ GreedyConsensusSpectrum.NOISE_FILTER_INCREMENT);
+
+ MzSpectraReader spectraReader = new MzSpectraReader(mgfFile, GreedyClusteringEngine.COMPARISON_FILTER, engine);
+
+
+ // read the spectra
+ Iterator clusterIterator = spectraReader.readClusterIterator(propertyStorage);
+ List clusters = new ArrayList<>(20);
+
+ while (clusterIterator.hasNext()) {
+ clusters.add(clusterIterator.next());
+ }
+
+ Assert.assertEquals(2, clusters.size());
+
+ return clusters;
+ }
+
+ @Test
+ public void testLoadOriginalPeaks() throws Exception {
+ IPropertyStorage propertyStorage = new InMemoryPropertyStorage();
+ List clusters = loadClusters(propertyStorage);
+
+ // get the original peaks
+ for (ICluster cluster : clusters) {
+ List rawPeaks = AbstractConsensusSpectrumBuilder.loadOriginalPeaks(cluster, propertyStorage, true);
+
+ Assert.assertEquals(50, rawPeaks.size());
+
+ // max peak must always be the same
+ double maxValue = rawPeaks.stream().mapToDouble(ConsensusPeak::getIntensity).max().getAsDouble();
+
+ Assert.assertEquals(MaxPeakNormalizer.MAX_INTENSITY, maxValue, 0);
+ }
+ }
+
+ @Test
+ public void testAveragePrecursorMz () throws Exception {
+ IPropertyStorage propertyStorage = new InMemoryPropertyStorage();
+ List clusters = loadClusters(propertyStorage);
+
+ // get the average precuror m/z for every cluster
+ for (ICluster cluster : clusters) {
+ Double averageMz = AbstractConsensusSpectrumBuilder.getAveragePrecursorMz(cluster, propertyStorage);
+
+ Assert.assertEquals(400.29, averageMz, 0.01);
+ }
+ }
+
+ @Test
+ public void testMergePeaks() throws Exception {
+ ConsensusPeak p1 = new ConsensusPeak(10.0, 1.0);
+ ConsensusPeak p2 = new ConsensusPeak(10.1, 2.0);
+ ConsensusPeak p3 = new ConsensusPeak(11.0, 1.0);
+
+ List peaks = Arrays.stream(new ConsensusPeak[]{p1, p2, p3}).collect(Collectors.toList());
+
+ List mergedPeaks = AbstractConsensusSpectrumBuilder.mergeConsensusPeaks(peaks, 0.5);
+
+ Assert.assertEquals(2, mergedPeaks.size());
+ Assert.assertEquals(10.05, mergedPeaks.get(0).getMz(), 0);
+ Assert.assertEquals(2, mergedPeaks.get(0).getCount());
+ Assert.assertEquals(1.5, mergedPeaks.get(0).getIntensity(), 0);
+ Assert.assertEquals(1, mergedPeaks.get(1).getCount());
+ }
+}
diff --git a/src/test/java/org/spectra/cluster/consensus/TestConsensusPeak.java b/src/test/java/org/spectra/cluster/consensus/TestConsensusPeak.java
new file mode 100644
index 0000000..12b500f
--- /dev/null
+++ b/src/test/java/org/spectra/cluster/consensus/TestConsensusPeak.java
@@ -0,0 +1,26 @@
+package org.spectra.cluster.consensus;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestConsensusPeak {
+ @Test
+ public void basicConsensusPeakTest() {
+ ConsensusPeak p1 = new ConsensusPeak(1.0, 1.0);
+ ConsensusPeak p2 = new ConsensusPeak(2.0, 0.5);
+
+ p1.mergePeak(p2);
+
+ Assert.assertEquals(1.5, p1.getMz(), 0);
+ Assert.assertEquals(0.75, p1.getIntensity(), 0);
+ Assert.assertEquals(2, p1.getCount());
+
+ ConsensusPeak p3 = new ConsensusPeak(3.0, 4.0);
+
+ p1.mergePeak(p3);
+
+ Assert.assertEquals(2.0, p1.getMz(), 0);
+ Assert.assertEquals(1.833, p1.getIntensity(), 0.001);
+ Assert.assertEquals(3, p1.getCount());
+ }
+}
diff --git a/src/test/java/org/spectra/cluster/io/MapClusterStorageBenchmarkTest.java b/src/test/java/org/spectra/cluster/io/MapClusterStorageBenchmarkTest.java
index bf6bb9c..02f14df 100644
--- a/src/test/java/org/spectra/cluster/io/MapClusterStorageBenchmarkTest.java
+++ b/src/test/java/org/spectra/cluster/io/MapClusterStorageBenchmarkTest.java
@@ -112,12 +112,8 @@ public void storeBigClusterStatic() throws IOException, SpectraClusterException,
time = System.currentTimeMillis();
IntStream.range(0, MAX_READING_VALUE).forEach(x -> {
- try {
- int key = random.nextInt(clusters.length);
- ICluster value = clusterStorage.get(clusters[key].getId() + String.valueOf(x));
- }catch (PgatkIOException ex){
- log.error("Error reading entry -- " + x);
- }
+ int key = random.nextInt(clusters.length);
+ ICluster value = clusterStorage.get(clusters[key].getId() + String.valueOf(x));
});
System.out.println("ChronicleMap: Reading 200'000 Clusters -- " + (System.currentTimeMillis() - time) / 1000);
@@ -152,11 +148,7 @@ public void storeBigClusterDynamic() throws IOException, SpectraClusterException
time = System.currentTimeMillis();
IntStream.range(0, MAX_READING_VALUE).forEach(x -> {
- try {
- ICluster value = clusterStorage.get(clusters[0].getId() + "-" + String.valueOf(x));
- }catch (PgatkIOException ex){
- log.error("Error reading entry -- " + x);
- }
+ ICluster value = clusterStorage.get(clusters[0].getId() + "-" + String.valueOf(x));
});
System.out.println("Sparkey: Reading 200'000 Clusters -- " + (System.currentTimeMillis() - time) / 1000);
diff --git a/src/test/java/org/spectra/cluster/io/MzSpectraReaderTest.java b/src/test/java/org/spectra/cluster/io/MzSpectraReaderTest.java
index f51253a..e58c4fa 100644
--- a/src/test/java/org/spectra/cluster/io/MzSpectraReaderTest.java
+++ b/src/test/java/org/spectra/cluster/io/MzSpectraReaderTest.java
@@ -117,7 +117,7 @@ public void testPropertyLoading() throws Exception {
Assert.assertEquals(136, nIdentified);
- Assert.assertEquals(7, storage.getAvailableProperties().size());
+ Assert.assertEquals(9, storage.getAvailableProperties().size());
}
@Test
diff --git a/src/test/java/org/spectra/cluster/io/result/MspWriterTest.java b/src/test/java/org/spectra/cluster/io/result/MspWriterTest.java
new file mode 100644
index 0000000..b338439
--- /dev/null
+++ b/src/test/java/org/spectra/cluster/io/result/MspWriterTest.java
@@ -0,0 +1,127 @@
+package org.spectra.cluster.io.result;
+
+import io.github.bigbio.pgatk.io.objectdb.LongObject;
+import io.github.bigbio.pgatk.io.objectdb.ObjectsDB;
+import io.github.bigbio.pgatk.io.properties.IPropertyStorage;
+import io.github.bigbio.pgatk.io.properties.InMemoryPropertyStorage;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.spectra.cluster.cdf.MinNumberComparisonsAssessor;
+import org.spectra.cluster.consensus.AverageConsensusSpectrumBuilder;
+import org.spectra.cluster.engine.GreedyClusteringEngine;
+import org.spectra.cluster.filter.binaryspectrum.HighestPeakPerBinFunction;
+import org.spectra.cluster.filter.rawpeaks.*;
+import org.spectra.cluster.io.cluster.ObjectDBGreedyClusterStorage;
+import org.spectra.cluster.io.spectra.MzSpectraReader;
+import org.spectra.cluster.model.cluster.GreedySpectralCluster;
+import org.spectra.cluster.model.cluster.ICluster;
+import org.spectra.cluster.model.cluster.IClusterProperties;
+import org.spectra.cluster.model.consensus.GreedyConsensusSpectrum;
+import org.spectra.cluster.normalizer.BasicIntegerNormalizer;
+import org.spectra.cluster.normalizer.MaxPeakNormalizer;
+import org.spectra.cluster.normalizer.TideBinner;
+import org.spectra.cluster.predicates.ShareHighestPeaksClusterPredicate;
+import org.spectra.cluster.similarity.CombinedFisherIntensityTest;
+import org.spectra.cluster.util.ClusteringParameters;
+
+import java.io.File;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+public class MspWriterTest {
+ Path testDir;
+
+ @Before
+ public void setUp() throws Exception {
+ testDir = Files.createTempDirectory("clusters-");
+ }
+
+ @Test
+ public void testMspWriting() throws Exception {
+ // ignore the property storage for now
+ IPropertyStorage propertyStorage = new InMemoryPropertyStorage();
+
+ IRawSpectrumFunction loadingFilter = new RemoveImpossiblyHighPeaksFunction()
+ .specAndThen(new RemovePrecursorPeaksFunction(0.5))
+ .specAndThen(new RawPeaksWrapperFunction(new KeepNHighestRawPeaks(40)));
+
+ // create a basic clustering engine for testing
+ GreedyClusteringEngine engine = new GreedyClusteringEngine(BasicIntegerNormalizer.MZ_CONSTANT,
+ 1, 0.99f, 5, new CombinedFisherIntensityTest(),
+ new MinNumberComparisonsAssessor(10000), new ShareHighestPeaksClusterPredicate(5),
+ GreedyConsensusSpectrum.NOISE_FILTER_INCREMENT);
+
+ URI[] mgfFiles = new URI[] {
+ getClass().getClassLoader().getResource("same_sequence_cluster.mgf").toURI(),
+ getClass().getClassLoader().getResource("synthetic_mixed_runs.mgf").toURI()};
+ File[] inFiles = Arrays.stream(mgfFiles).map(File::new).toArray(File[]::new);
+
+ // read all files at once
+ MzSpectraReader reader = new MzSpectraReader(new TideBinner(), new MaxPeakNormalizer(),
+ new BasicIntegerNormalizer(), new HighestPeakPerBinFunction(), loadingFilter,
+ GreedyClusteringEngine.COMPARISON_FILTER, engine, inFiles);
+
+ // create the iterator
+ Iterator iterator = reader.readClusterIterator(propertyStorage);
+
+ // keep track of the cluster ids
+ List clusterProperties = new ArrayList<>(10_000);
+
+ // create the output file
+ Path clusteringResult = Paths.get(testDir.toString(), "clustering_result.cls");
+ ObjectDBGreedyClusterStorage clusterStorage = new ObjectDBGreedyClusterStorage(new ObjectsDB(clusteringResult.toString(), true));
+
+ while (iterator.hasNext()) {
+ GreedySpectralCluster c = (GreedySpectralCluster) iterator.next();
+ clusterStorage.addGreedySpectralCluster(LongObject.asLongHash(c.getId()), c);
+ }
+
+ clusterStorage.writeDBMode();
+ clusterStorage.flush();
+
+ // convert
+ Path mspFile = Paths.get(testDir.toString(), "clusters.msp");
+
+ MspWriter resultWriter = new MspWriter(new AverageConsensusSpectrumBuilder(new ClusteringParameters()));
+ resultWriter.writeResult(mspFile, clusterStorage, propertyStorage);
+
+ // check that everything worked
+ Assert.assertTrue(Files.exists(mspFile));
+
+ List lines = Files.readAllLines(mspFile);
+ Assert.assertEquals("Name: +42.011EVQLVETGGGLIQPGGSLR/2", lines.get(0));
+ Assert.assertEquals("Comment: Spec=Consensus Parent=977.0230 Mods=1(0,[,Acetyl) Nreps=1 Naa=26 MaxRatio=1.000", lines.get(1));
+ Assert.assertEquals("Num peaks: 50", lines.get(2));
+ }
+
+ @Test
+ public void testExtractPtms() {
+ MspWriter writer = new MspWriter(new AverageConsensusSpectrumBuilder(new ClusteringParameters()));
+ String sequence = "+42.011EVQLVET+42.011GGGLIQPGGSLR+42.011";
+
+ List mods = writer.extractModsFromSequence(sequence);
+
+ Assert.assertEquals(3, mods.size());
+
+ Assert.assertEquals(0, mods.get(0).getPosition());
+ Assert.assertEquals("Acetyl", mods.get(0).getName());
+ Assert.assertEquals("[", mods.get(0).getAminoAcid());
+ }
+
+ @Test
+ public void testGetModString() {
+ MspWriter writer = new MspWriter(new AverageConsensusSpectrumBuilder(new ClusteringParameters()));
+ String sequence = "+42.011EVQLVET+42.011GGGLIQPGGSLR+42.011";
+
+ String modString = writer.getModString(sequence);
+
+ Assert.assertEquals("3(0,[,Acetyl)(7,T,Acetyl)(19,],Acetyl)", modString);
+ }
+}
diff --git a/src/test/java/org/spectra/cluster/tools/LocalParallelBinnedClusteringToolTest.java b/src/test/java/org/spectra/cluster/tools/LocalParallelBinnedClusteringToolTest.java
index 5a98978..247d00e 100644
--- a/src/test/java/org/spectra/cluster/tools/LocalParallelBinnedClusteringToolTest.java
+++ b/src/test/java/org/spectra/cluster/tools/LocalParallelBinnedClusteringToolTest.java
@@ -24,6 +24,7 @@
import org.spectra.cluster.normalizer.TideBinner;
import org.spectra.cluster.predicates.ShareHighestPeaksClusterPredicate;
import org.spectra.cluster.similarity.CombinedFisherIntensityTest;
+import org.spectra.cluster.util.ClusteringParameters;
import java.io.File;
import java.net.URI;
@@ -108,11 +109,18 @@ public void testParallelClustering() throws Exception {
File finalResultFile = new File(testDir.toFile(), "result.bcs");
- clusterer.runClustering(testClusters, clusterStorage, finalResultFile,
- BasicIntegerNormalizer.MZ_CONSTANT,
- 1, 0.99f, 5, new CombinedFisherIntensityTest(),
- new MinNumberComparisonsAssessor(10000), new ShareHighestPeaksClusterPredicate(5),
- GreedyConsensusSpectrum.NOISE_FILTER_INCREMENT);
+ ClusteringParameters clusteringParameters = new ClusteringParameters();
+ clusteringParameters.setThresholdStart(1f);
+ clusteringParameters.setThresholdEnd(0.99f);
+ clusteringParameters.setClusterRounds(5);
+ clusteringParameters.setFragmentIonPrecision("high");
+ clusteringParameters.setPrecursorIonTolerance((double) 1);
+ clusteringParameters.setIgnoreCharge(false);
+ clusteringParameters.setNInitiallySharedPeaks(5);
+ clusteringParameters.setNThreads(1);
+ clusteringParameters.setOutputFile(finalResultFile);
+
+ clusterer.runClustering(testClusters, clusterStorage, clusteringParameters);
// make sure the final result file exists
Assert.assertTrue(finalResultFile.exists());
@@ -131,7 +139,7 @@ public void testParallelClustering() throws Exception {
totalSpectra += cluster.getClusteredSpectraCount();
}
- Assert.assertEquals(192, totalClusters);
+ Assert.assertEquals(95, totalClusters);
Assert.assertEquals(testClusters.length, totalSpectra);
}
}