diff --git a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs
index 1ca7d5947..093e2ba7e 100644
--- a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs
+++ b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs
@@ -165,5 +165,22 @@ public static double GetMaxValue(this double[] data)
return max;
}
+
+ ///
+ /// retrieving the min value of a vector
+ ///
+ public static double GetMinValue(this double[] data)
+ {
+ double min = data[0];
+ for (int i = 1; i < data.Length; i++)
+ {
+ if (data[i] < min)
+ {
+ min = data[i];
+ }
+ }
+
+ return min;
+ }
}
}
diff --git a/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs b/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs
index 840f89db5..d9abb32cb 100644
--- a/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs
+++ b/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs
@@ -221,18 +221,18 @@ public static void MinMax(this double[,] data, out double min, out double max)
///
/// returns an empty matrix with the same number of rows and columns of the input matrix.
///
- public static double[,] EmptyCopy(this double[,] matrix)
+ public static T[,] EmptyCopy(this T[,] matrix)
{
- return new double[matrix.GetLength(0), matrix.GetLength(1)];
+ return new T[matrix.GetLength(0), matrix.GetLength(1)];
}
///
/// retrieving a full column of a matrix
/// columnIndex is the column we want to access
///
- public static double[] GetColumn(this double[,] matrix, int columnIndex)
+ public static T[] GetColumn(this T[,] matrix, int columnIndex)
{
- double[] column = new double[matrix.GetLength(0)];
+ T[] column = new T[matrix.GetLength(0)];
for (int row = 0; row < matrix.GetLength(0); row++)
{
column[row] = matrix[row, columnIndex];
@@ -245,9 +245,9 @@ public static double[] GetColumn(this double[,] matrix, int columnIndex)
/// retrieving a full row of a matrix
/// rowIndex is the row we want to access
///
- public static double[] GetRow(this double[,] matrix, int rowIndex)
+ public static T[] GetRow(this T[,] matrix, int rowIndex)
{
- double[] row = new double[matrix.GetLength(1)];
+ T[] row = new T[matrix.GetLength(1)];
for (int column = 0; column < matrix.GetLength(1); column++)
{
row[column] = matrix[rowIndex, column];
@@ -270,7 +270,7 @@ public enum MergingDirection
/// adding a 2D-array to another 2D-array either by "column" or by "row"
///
- public static void AddToArray(double[,] result, double[,] array, MergingDirection mergingDirection, int start = 0)
+ public static void AddToArray(this T[,] result, T[,] array, MergingDirection mergingDirection, int start = 0)
{
for (int i = 0; i < array.GetLength(0); i++)
{
diff --git a/src/AnalysisConfigFiles/FeatureLearningConfig.yml b/src/AnalysisConfigFiles/FeatureLearningConfig.yml
new file mode 100644
index 000000000..9badeae82
--- /dev/null
+++ b/src/AnalysisConfigFiles/FeatureLearningConfig.yml
@@ -0,0 +1,53 @@
+---
+# Summary: Generates Clustering Features
+#
+# This analysis outputs:
+# 1. an image of the clusters' centroids
+# 2. a csv file that contains the vectors of the clusters' centroids
+# 3. a csv file that contains clusters' id and size
+# 4. a csv file that contains feature vectors
+#
+# The feature vectors can be used in training the machine learning models.
+
+# The directory for the output of parallel job running on MahnooshSandpit
+# OutputDirectory: "D:\Mahnoosh\Liz\ParallelJobs\"
+
+# The properties to generate Mel scale
+FrequencyScaleType: Mel
+# HertzInterval: 1000
+FrameSize: 1024
+FinalBinCount: 128
+
+# The default values for minFreqBin and maxFreqBin are 1 and FinalBinCount
+# For any other arbitrary frequency bin bounds, these two parameters need to be manually set.
+MinFreqBin: 24
+MaxFreqBin: 82
+
+# The number of frequency band for feature generation process
+numFreqBand: 1
+
+# The width and height of the patches to be taken from the patch sampling set
+# A default patch is a single full-band frame which patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand, patchHeight = 1
+# PatchWidth: 5
+PatchHeight: 1
+
+# the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
+FrameWindowLength : 1
+
+# the step size to make a window of frames
+StepSize : 1
+
+# The number of patches to be selected from each recording of the patch sampling set
+NumRandomPatches: 1000
+
+# the number of clusters to be generated from the selected patch set
+NumClusters: 256
+
+# Applying noise reduction and whitening if these options are set to 'true'
+DoNoiseReduction: true
+DoWhitening: true
+
+# The factor of data downsampling using max pooling
+MaxPoolingFactor: 6
+...
+
diff --git a/src/AnalysisPrograms/AnalysisPrograms.csproj b/src/AnalysisPrograms/AnalysisPrograms.csproj
index 13454ff93..170d3a8a2 100644
--- a/src/AnalysisPrograms/AnalysisPrograms.csproj
+++ b/src/AnalysisPrograms/AnalysisPrograms.csproj
@@ -52,14 +52,20 @@
latest
-
- ..\..\packages\Accord.2.12.0.0\lib\Accord.dll
+
+ ..\..\packages\Accord.3.8.0\lib\net462\Accord.dll
-
- ..\..\packages\Accord.Math.2.12.0.0\lib\Accord.Math.dll
+
+ ..\..\packages\Accord.MachineLearning.3.8.0\lib\net462\Accord.MachineLearning.dll
-
- ..\..\packages\Accord.Statistics.2.12.0.0\lib\Accord.Statistics.dll
+
+ ..\..\packages\Accord.Math.3.8.0\lib\net462\Accord.Math.dll
+
+
+ ..\..\packages\Accord.Math.3.8.0\lib\net462\Accord.Math.Core.dll
+
+
+ ..\..\packages\Accord.Statistics.3.8.0\lib\net462\Accord.Statistics.dll
..\..\packages\AForge.2.2.5\lib\AForge.dll
@@ -94,8 +100,8 @@
..\..\packages\MathNet.Numerics.3.20.2\lib\net40\MathNet.Numerics.dll
-
- ..\..\packages\McMaster.Extensions.CommandLineUtils.2.2.5\lib\net45\McMaster.Extensions.CommandLineUtils.dll
+
+ ..\..\packages\McMaster.Extensions.CommandLineUtils.2.3.0-alpha\lib\net45\McMaster.Extensions.CommandLineUtils.dll
@@ -287,6 +293,7 @@
+
@@ -488,10 +495,12 @@
+
+
\ No newline at end of file
diff --git a/src/AnalysisPrograms/Audio2Sonogram.cs b/src/AnalysisPrograms/Audio2Sonogram.cs
index 04d1d9079..87db6dca2 100644
--- a/src/AnalysisPrograms/Audio2Sonogram.cs
+++ b/src/AnalysisPrograms/Audio2Sonogram.cs
@@ -171,6 +171,10 @@ private static Dictionary GetConfigDictionary(FileInfo configFil
return configDict;
}
+ ///
+ /// Dictionary string, string> configDict is an obsolete class.
+ /// Should avoid calls to this method.
+ ///
public static AudioToSonogramResult GenerateFourSpectrogramImages(
FileInfo sourceRecording,
FileInfo path2SoxSpectrogram,
diff --git a/src/AnalysisPrograms/MahnooshSandpit.cs b/src/AnalysisPrograms/MahnooshSandpit.cs
new file mode 100644
index 000000000..769e13f7a
--- /dev/null
+++ b/src/AnalysisPrograms/MahnooshSandpit.cs
@@ -0,0 +1,351 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AnalysisPrograms
+{
+ using System;
+ using System.Collections.Generic;
+ using System.Drawing;
+ using System.Drawing.Imaging;
+ using System.IO;
+ using System.Linq;
+ using System.Threading.Tasks;
+ using Accord.Math;
+ using Acoustics.Shared;
+ using Acoustics.Shared.ConfigFile;
+ using Acoustics.Shared.Csv;
+ using AudioAnalysisTools.DSP;
+ using AudioAnalysisTools.StandardSpectrograms;
+ using AudioAnalysisTools.WavTools;
+ using McMaster.Extensions.CommandLineUtils;
+ using Production.Arguments;
+ using TowseyLibrary;
+
+ public class MahnooshSandpit
+ {
+ public const string CommandName = "MahnooshSandpit";
+
+ public void Execute(Arguments arguments)
+ {
+ LoggedConsole.WriteLine("feature learning process...");
+
+ var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\";
+ var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings");
+ var trainSetPath = Path.Combine(inputDir, "TrainSet\\train_data");
+ // var testSetPath = Path.Combine(inputDir, "TestSet");
+ var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml";
+ var resultDir = Path.Combine(inputDir, "FeatureLearning");
+ Directory.CreateDirectory(resultDir);
+
+ // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
+ // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
+ // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
+ // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
+ // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");
+
+ // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings
+
+ var configFile = configPath.ToFileInfo();
+
+ if (configFile == null)
+ {
+ throw new FileNotFoundException("No config file argument provided");
+ }
+ else if (!configFile.Exists)
+ {
+ throw new ArgumentException($"Config file {configFile.FullName} not found");
+ }
+
+ var configuration = ConfigFile.Deserialize(configFile);
+ int patchWidth =
+ (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand;
+
+ var clusteringOutputList = FeatureLearning.UnsupervisedFeatureLearning(configuration, inputPath);
+
+ List allBandsCentroids = new List();
+ for (int i = 0; i < clusteringOutputList.Count; i++)
+ {
+ var clusteringOutput = clusteringOutputList[i];
+
+ // writing centroids to a csv file
+ // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays)
+ // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
+ // it might be a better way to do this
+ string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
+ var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray();
+ Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());
+
+ // sorting clusters based on size and output it to a csv file
+ Dictionary clusterIdSize = clusteringOutput.ClusterIdSize;
+ int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);
+
+ // Write cluster ID and size to a CSV file
+ string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv");
+ Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);
+
+ // Draw cluster image directly from clustering output
+ List> list = clusteringOutput.ClusterIdCentroid.ToList();
+ double[][] centroids = new double[list.Count][];
+
+ for (int j = 0; j < list.Count; j++)
+ {
+ centroids[j] = list[j].Value;
+ }
+
+ allBandsCentroids.Add(centroids);
+
+ List allCentroids = new List();
+ for (int k = 0; k < centroids.Length; k++)
+ {
+ // convert each centroid to a matrix in order of cluster ID
+ // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
+ // OR: in order of cluster size
+ double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight);
+
+ // normalize each centroid
+ double[,] normCent = DataTools.normalise(cent);
+
+ // add a row of zero to each centroid
+ double[,] cent2 = PatchSampling.AddRow(normCent);
+
+ allCentroids.Add(cent2);
+ }
+
+ // concatenate all centroids
+ double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);
+
+ // Draw clusters
+ var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
+ clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);
+ var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");
+ clusterImage.Save(outputClusteringImage);
+ }
+
+ // extracting features
+ FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir);
+ LoggedConsole.WriteLine("Done...");
+ }
+
+ [Command(
+ CommandName,
+ Description = "Temporary entry point for unsupervised and semi-supervised feature learning")]
+ public class Arguments : SubCommandBase
+ {
+ public override Task Execute(CommandLineApplication app)
+ {
+ //var instance = new MahnooshSandpit();
+ //instance.Execute(this);
+ //GenerateSpectrograms();
+ //ExtractClusteringFeatures();
+ BuildSemisupervisedClusteringFeatures();
+
+ return this.Ok();
+ }
+ }
+
+ // output is the cluster centroids that obtained from a semi-supervised feature learning approach.
+ public static void BuildSemisupervisedClusteringFeatures()
+ {
+ LoggedConsole.WriteLine("semi-supervised feature learning process...");
+ var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\";
+ var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings");
+
+ // the infoFile contains the info about the frames of interest for supervised feature learning.
+ var frameInfoFilePath = @"D:\Mahnoosh\Liz\Least_Bittern\TrainSet\positive_frames.csv";
+ var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml";
+ var resultDir = Path.Combine(inputDir, "SemisupervisedClusteringFeatures");
+ Directory.CreateDirectory(resultDir);
+
+ var configFile = configPath.ToFileInfo();
+
+ if (configFile == null)
+ {
+ throw new FileNotFoundException("No config file argument provided");
+ }
+ else if (!configFile.Exists)
+ {
+ throw new ArgumentException($"Config file {configFile.FullName} not found");
+ }
+
+ var configuration = ConfigFile.Deserialize(configFile);
+
+ int patchWidth =
+ (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand;
+
+ var frameInfoFile = frameInfoFilePath.ToFileInfo();
+
+ if (frameInfoFile == null)
+ {
+ throw new FileNotFoundException("No information file argument provided");
+ }
+ else if (!frameInfoFile.Exists)
+ {
+ throw new ArgumentException($"Info file {frameInfoFile.FullName} not found");
+ }
+
+ // frame info contains information about positive frames
+ string[,] frameInfo = Csv.ReadMatrixFromCsv(frameInfoFile, TwoDimensionalArray.None);
+
+ var clusteringOutputList = FeatureLearning.SemisupervisedFeatureLearning(configuration, inputPath, frameInfo);
+
+ List allBandsCentroids = new List();
+ for (int i = 0; i < clusteringOutputList.Count; i++)
+ {
+ var clusteringOutput = clusteringOutputList[i];
+
+ // writing centroids to a csv file
+ // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays)
+ // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
+ // it might be a better way to do this
+ string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
+ var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray();
+ Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());
+
+ // sorting clusters based on size and output it to a csv file
+ Dictionary clusterIdSize = clusteringOutput.ClusterIdSize;
+ int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);
+
+ // Write cluster ID and size to a CSV file
+ string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv");
+ Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);
+
+ // Draw cluster image directly from clustering output
+ List> list = clusteringOutput.ClusterIdCentroid.ToList();
+ double[][] centroids = new double[list.Count][];
+
+ for (int j = 0; j < list.Count; j++)
+ {
+ centroids[j] = list[j].Value;
+ }
+
+ allBandsCentroids.Add(centroids);
+
+ List allCentroids = new List();
+ for (int k = 0; k < centroids.Length; k++)
+ {
+ // convert each centroid to a matrix in order of cluster ID
+ // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
+ // OR: in order of cluster size
+ double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight);
+
+ // normalize each centroid
+ double[,] normCent = DataTools.normalise(cent);
+
+ // add a row of zero to each centroid
+ double[,] cent2 = PatchSampling.AddRow(normCent);
+
+ allCentroids.Add(cent2);
+ }
+
+ // concatenate all centroids
+ double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);
+
+ // Draw clusters
+ var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
+ clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);
+ var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");
+ clusterImage.Save(outputClusteringImage);
+ }
+
+ // extracting features
+ FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, inputPath, resultDir);
+ LoggedConsole.WriteLine("Done...");
+ }
+
+ public static void ExtractClusteringFeatures()
+ {
+ LoggedConsole.WriteLine("feature extraction process...");
+ var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\";
+ var resultDir = Path.Combine(inputDir, "FeatureLearning");
+ //var trainSetPath = Path.Combine(inputDir, "TrainSet");
+ var testSetPath = Path.Combine(inputDir, "TestSet\\one_min_recordings");
+ var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml";
+ var centroidsPath = Path.Combine(resultDir, "ClusterCentroids0.csv");
+
+ var configFile = configPath.ToFileInfo();
+
+ if (configFile == null)
+ {
+ throw new FileNotFoundException("No config file argument provided");
+ }
+ else if (!configFile.Exists)
+ {
+ throw new ArgumentException($"Config file {configFile.FullName} not found");
+ }
+
+ var configuration = ConfigFile.Deserialize(configFile);
+
+ List centroids = new List();
+ centroids.Add(Csv.ReadMatrixFromCsv(centroidsPath.ToFileInfo(), TwoDimensionalArray.None).ToJagged());
+ FeatureExtraction.UnsupervisedFeatureExtraction(configuration, centroids, testSetPath, resultDir);
+ LoggedConsole.WriteLine("Done...");
+ }
+
+ public static void GenerateSpectrograms()
+ {
+ var recordingDir = @"M:\Liz\SupervisedPatchSamplingSet\Recordings\";
+ var resultDir = @"M:\Liz\SupervisedPatchSamplingSet\";
+
+ // check whether there is any file in the folder/subfolders
+ if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0)
+ {
+ throw new ArgumentException("The folder of recordings is empty...");
+ }
+
+ int frameSize = 1024;
+ int finalBinCount = 256;
+ FreqScaleType scaleType = FreqScaleType.Mel;
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+
+ // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
+ // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
+ // The "WindowOverlap" is calculated to answer this question
+ // each 24 single-frames duration is equal to 1 second
+ // note that the "WindowOverlap" value should be recalculated if frame size is changed
+ // this has not yet been considered in the Config file!
+ WindowOverlap = 0.10725204,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+
+ foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav"))
+ {
+ FileInfo fileInfo = filePath.ToFileInfo();
+
+ // process the wav file if it is not empty
+ if (fileInfo.Length != 0)
+ {
+ var recording = new AudioRecording(filePath);
+ settings.SourceFileName = recording.BaseName;
+
+ var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader);
+
+ var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);
+
+ // DO NOISE REDUCTION
+ decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
+
+ // draw the spectrogram
+ var attributes = new SpectrogramAttributes()
+ {
+ NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency,
+ Duration = decibelSpectrogram.Attributes.Duration,
+ };
+
+ Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes);
+ string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp");
+ image.Save(pathToSpectrogramFiles, ImageFormat.Bmp);
+
+ // write the matrix to a csv file
+ string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv");
+ Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data);
+ }
+ }
+ }
+ }
+}
diff --git a/src/AnalysisPrograms/Production/Arguments/MainArgs.cs b/src/AnalysisPrograms/Production/Arguments/MainArgs.cs
index 4403d8a51..59a2ceb8a 100644
--- a/src/AnalysisPrograms/Production/Arguments/MainArgs.cs
+++ b/src/AnalysisPrograms/Production/Arguments/MainArgs.cs
@@ -52,6 +52,7 @@ namespace AnalysisPrograms.Production.Arguments
[Subcommand(DummyAnalysis.CommandName, typeof(DummyAnalysis.Arguments))]
[Subcommand(FileRenamer.CommandName, typeof(FileRenamer.Arguments))]
[Subcommand(Sandpit.CommandName, typeof(Sandpit.Arguments))]
+ [Subcommand(MahnooshSandpit.CommandName, typeof(MahnooshSandpit.Arguments))]
public class MainArgs
{
private async Task OnExecuteAsync(CommandLineApplication app)
diff --git a/src/AnalysisPrograms/Sandpit.cs b/src/AnalysisPrograms/Sandpit.cs
index 7cc87ac08..31ac351b3 100644
--- a/src/AnalysisPrograms/Sandpit.cs
+++ b/src/AnalysisPrograms/Sandpit.cs
@@ -15,7 +15,8 @@ namespace AnalysisPrograms
using System.Threading.Tasks;
using Acoustics.Shared;
using Acoustics.Shared.Csv;
- using AnalysisPrograms.AnalyseLongRecordings;
+ using Acoustics.Tools.Wav;
+ using AnalyseLongRecordings;
using AudioAnalysisTools;
using AudioAnalysisTools.DSP;
using AudioAnalysisTools.Indices;
@@ -23,7 +24,7 @@ namespace AnalysisPrograms
using AudioAnalysisTools.StandardSpectrograms;
using AudioAnalysisTools.WavTools;
using McMaster.Extensions.CommandLineUtils;
- using AnalysisPrograms.Production.Arguments;
+ using Production.Arguments;
using TowseyLibrary;
///
@@ -78,6 +79,7 @@ public override Task Execute(CommandLineApplication app)
//CubeHelixDrawTestImage();
//DrawLongDurationSpectrogram();
//DrawClusterSequence();
+ DrawStandardSpectrograms();
//ExtractSpectralFeatures();
//HerveGlotinMethods();
@@ -510,6 +512,33 @@ public static void Audio2CsvOverOneFile()
AnalyseLongRecording.Execute(arguments);
}
+ ///
+ /// Draws a standard spectrogram
+ ///
+ public static void DrawStandardSpectrograms()
+ {
+ var audioFile = @"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040.wav";
+ var recording = new WavReader(audioFile);
+
+ var settings = new SpectrogramSettings()
+ {
+ SourceFileName = "BAC2_20071008-085040",
+ WindowSize = 1024,
+ WindowOverlap = 0.0,
+ DoMelScale = false,
+ MelBinCount = 256,
+ NoiseReductionType = NoiseReductionType.Median,
+ NoiseReductionParameter = 0.0,
+ };
+
+ //var amplSpectrogram = new AmplitudeSpectrogram(settings, recording);
+ //var dbSpectrogram = new DecibelSpectrogram(settings, recording);
+ //dbSpectrogram.DrawSpectrogram(@"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040_MelMedian.png");
+
+ var energySpectro = new EnergySpectrogram(settings, recording);
+ energySpectro.DrawLogPsd(@"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040_LogPSD.png");
+ }
+
public static void DrawLongDurationSpectrogram()
{
// the default ld fc spectrogram config file
diff --git a/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs b/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs
index f9f320689..43dee7b95 100644
--- a/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs
+++ b/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs
@@ -82,7 +82,8 @@ public static void Execute(Arguments arguments)
//var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
- var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram);
+ // Broken in merge b7e03070a9cd72ab0632789a3412967a6cc54cd6
+ //var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram);
var decibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
double frameStepSize = sonoConfig.GetFrameOffset();
diff --git a/src/AnalysisPrograms/packages.config b/src/AnalysisPrograms/packages.config
index 4718d0024..d9918a803 100644
--- a/src/AnalysisPrograms/packages.config
+++ b/src/AnalysisPrograms/packages.config
@@ -1,8 +1,9 @@
-
-
-
+
+
+
+
@@ -15,7 +16,7 @@
-
+
diff --git a/src/AudioAnalysisTools/AudioAnalysisTools.csproj b/src/AudioAnalysisTools/AudioAnalysisTools.csproj
index 89ca2f5ee..9b50bd6b0 100644
--- a/src/AudioAnalysisTools/AudioAnalysisTools.csproj
+++ b/src/AudioAnalysisTools/AudioAnalysisTools.csproj
@@ -241,6 +241,9 @@
+
+
+
@@ -251,6 +254,7 @@
+
@@ -294,9 +298,12 @@
-
+
+
+
+
diff --git a/src/AudioAnalysisTools/DSP/FeatureExtraction.cs b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs
new file mode 100644
index 000000000..c9f385ab3
--- /dev/null
+++ b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs
@@ -0,0 +1,482 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.DSP
+{
+ using System;
+ using System.Collections.Generic;
+ using System.IO;
+ using System.Linq;
+ using System.Text;
+ using Accord.Math;
+ using Accord.Statistics;
+ using Acoustics.Shared.Csv;
+ using NeuralNets;
+ using StandardSpectrograms;
+ using TowseyLibrary;
+ using WavTools;
+
+ ///
+ /// This class is designed to extract clustering features for target input recordings.
+ ///
+ public class FeatureExtraction
+ {
+ ///
+ /// Apply feature learning process on a set of target (1-minute) recordings (inputPath)
+ /// according to the a set of centroids learned using feature learning process.
+ /// Output feature vectors (outputPath)
+ ///
+ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List allCentroids,
+ string inputPath, string outputPath)
+ {
+ var simVecDir = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors"));
+ int frameSize = config.FrameSize;
+ int finalBinCount = config.FinalBinCount;
+ FreqScaleType scaleType = config.FrequencyScaleType;
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+
+ // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
+ // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
+ // The "WindowOverlap" is calculated to answer this question
+ // each 24 single-frames duration is equal to 1 second
+ // note that the "WindowOverlap" value should be recalculated if frame size is changed
+ // this has not yet been considered in the Config file!
+ WindowOverlap = 0.10725204,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+ double frameStep = frameSize * (1 - settings.WindowOverlap);
+ int minFreqBin = config.MinFreqBin;
+ int maxFreqBin = config.MaxFreqBin;
+ int numFreqBand = config.NumFreqBand;
+ int patchWidth =
+ (maxFreqBin - minFreqBin + 1) / numFreqBand;
+ int patchHeight = config.PatchHeight;
+
+ // the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
+ int frameWindowLength = config.FrameWindowLength;
+
+ // the step size to make a window of frames
+ int stepSize = config.StepSize;
+
+ // the factor of downsampling
+ int maxPoolingFactor = config.MaxPoolingFactor;
+
+ // check whether there is any file in the folder/subfolders
+ if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
+ {
+ throw new ArgumentException("The folder of recordings is empty...");
+ }
+
+ //*****
+ // lists of features for all processing files
+ // the key is the file name, and the value is the features for different bands
+ Dictionary> allFilesMinFeatureVectors = new Dictionary>();
+ Dictionary> allFilesMeanFeatureVectors = new Dictionary>();
+ Dictionary> allFilesMaxFeatureVectors = new Dictionary>();
+ Dictionary> allFilesStdFeatureVectors = new Dictionary>();
+ Dictionary> allFilesSkewnessFeatureVectors = new Dictionary>();
+
+ double[,] inputMatrix;
+ List recordings = new List();
+
+ foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
+ {
+ FileInfo fileInfo = filePath.ToFileInfo();
+
+ // process the wav file if it is not empty
+ if (fileInfo.Length != 0)
+ {
+ var recording = new AudioRecording(filePath);
+ settings.SourceFileName = recording.BaseName;
+
+ if (config.DoSegmentation)
+ {
+ recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
+ }
+ else
+ {
+ recordings.Add(recording);
+ }
+
+ for (int s = 0; s < recordings.Count; s++)
+ {
+ string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv");
+ var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[s].WavReader);
+ var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);
+
+ // DO RMS NORMALIZATION
+ //sonogram.Data = SNR.RmsNormalization(sonogram.Data);
+
+ // DO NOISE REDUCTION
+ if (config.DoNoiseReduction)
+ {
+ decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
+ }
+
+ // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+ if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+ {
+ inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
+ }
+ else
+ {
+ inputMatrix = decibelSpectrogram.Data;
+ }
+
+ // creating matrices from different freq bands of the source spectrogram
+ List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
+ double[][,] matrices2 = allSubmatrices2.ToArray();
+ List allSequentialPatchMatrix = new List();
+ for (int i = 0; i < matrices2.GetLength(0); i++)
+ {
+ // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+ double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor);
+
+ int rows = downsampledMatrix.GetLength(0);
+ int columns = downsampledMatrix.GetLength(1);
+ var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
+ allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
+ }
+
+ // +++++++++++++++++++++++++++++++++++Feature Transformation
+ // to do the feature transformation, we normalize centroids and
+ // sequential patches from the input spectrogram to unit length
+ // Then, we calculate the dot product of each patch with the centroids' matrix
+
+ List allNormCentroids = new List();
+ for (int i = 0; i < allCentroids.Count; i++)
+ {
+ // double check the index of the list
+ double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][];
+ for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++)
+ {
+ normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]);
+ }
+
+ allNormCentroids.Add(normCentroids);
+ }
+
+ List allFeatureTransVectors = new List();
+
+ // processing the sequential patch matrix for each band
+ for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
+ {
+ List featureTransVectors = new List();
+ double[][] similarityVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];
+
+ for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
+ {
+ // normalize each patch to unit length
+ var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
+ var normVector = inputVector;
+
+ // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
+ if (inputVector.Euclidean() != 0)
+ {
+ normVector = ART_2A.NormaliseVector(inputVector);
+ }
+
+ similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
+ }
+
+ Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());
+
+ // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames
+ // using FrameWindowLength
+
+ // patchId refers to the patch id that has been processed so far according to the step size.
+ // if we want no overlap between different frame windows, then stepSize = frameWindowLength
+ int patchId = 0;
+ while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0))
+ {
+ List patchGroup = new List();
+ for (int k = 0; k < frameWindowLength; k++)
+ {
+ patchGroup.Add(similarityVectors[k + patchId]);
+ }
+
+ featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup));
+ patchId = patchId + stepSize;
+ }
+
+ allFeatureTransVectors.Add(featureTransVectors.ToArray());
+ }
+
+ // +++++++++++++++++++++++++++++++++++Feature Transformation
+
+ // +++++++++++++++++++++++++++++++++++Temporal Summarization
+ // Based on the resolution to generate features, the "numFrames" parameter will be set.
+ // Each 24 single-frame patches form 1 second
+ // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net)
+ // The pre-assumption is that each input recording is 1 minute long
+
+ // store features of different bands in lists
+ List allMinFeatureVectors = new List();
+ List allMeanFeatureVectors = new List();
+ List allMaxFeatureVectors = new List();
+ List allStdFeatureVectors = new List();
+ List allSkewnessFeatureVectors = new List();
+
+ // Each 24 frames form 1 second using WindowOverlap
+ // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization.
+ int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor);
+
+ foreach (var freqBandFeature in allFeatureTransVectors)
+ {
+ List minFeatureVectors = new List();
+ List meanFeatureVectors = new List();
+ List maxFeatureVectors = new List();
+ List stdFeatureVectors = new List();
+ List skewnessFeatureVectors = new List();
+
+ int c = 0;
+ while (c + numFrames <= freqBandFeature.GetLength(0))
+ {
+ // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.)
+ List sequencesOfFramesList = new List();
+ for (int i = c; i < c + numFrames; i++)
+ {
+ sequencesOfFramesList.Add(freqBandFeature[i]);
+ }
+
+ List min = new List();
+ List mean = new List();
+ List std = new List();
+ List max = new List();
+ List skewness = new List();
+
+ double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();
+
+ // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
+ for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
+ {
+ double[] temp = new double[sequencesOfFrames.GetLength(0)];
+ for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
+ {
+ temp[k] = sequencesOfFrames[k, j];
+ }
+
+ min.Add(temp.GetMinValue());
+ mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
+ std.Add(AutoAndCrossCorrelation.GetStdev(temp));
+ max.Add(temp.GetMaxValue());
+ skewness.Add(temp.Skewness());
+ }
+
+ minFeatureVectors.Add(min.ToArray());
+ meanFeatureVectors.Add(mean.ToArray());
+ maxFeatureVectors.Add(max.ToArray());
+ stdFeatureVectors.Add(std.ToArray());
+ skewnessFeatureVectors.Add(skewness.ToArray());
+ c += numFrames;
+ }
+
+ // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames)
+ // (or the whole) at the end of the target recording , left unprocessed.
+ // this would be problematic when an the resolution to generate the feature vector is 1 min,
+ // but the the length of the target recording is a bit less than one min.
+ if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1)
+ {
+ // First, make a list of patches that would be less than the required resolution
+ List sequencesOfFramesList = new List();
+ int unprocessedFrames = freqBandFeature.GetLength(0) % numFrames;
+ for (int i = freqBandFeature.GetLength(0) - unprocessedFrames;
+ i < freqBandFeature.GetLength(0);
+ i++)
+ {
+ sequencesOfFramesList.Add(freqBandFeature[i]);
+ }
+
+ List min = new List();
+ List mean = new List();
+ List std = new List();
+ List max = new List();
+ List skewness = new List();
+
+ double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();
+
+ // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
+ for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
+ {
+ double[] temp = new double[sequencesOfFrames.GetLength(0)];
+ for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
+ {
+ temp[k] = sequencesOfFrames[k, j];
+ }
+
+ min.Add(temp.GetMinValue());
+ mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
+ std.Add(AutoAndCrossCorrelation.GetStdev(temp));
+ max.Add(temp.GetMaxValue());
+ skewness.Add(temp.Skewness());
+ }
+
+ minFeatureVectors.Add(min.ToArray());
+ meanFeatureVectors.Add(mean.ToArray());
+ maxFeatureVectors.Add(max.ToArray());
+ stdFeatureVectors.Add(std.ToArray());
+ skewnessFeatureVectors.Add(skewness.ToArray());
+ }
+
+ allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix());
+ allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
+ allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
+ allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
+ allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix());
+ }
+
+ //*****
+ // the keys of the following dictionaries contain file name
+ // and their values are a list which the list.count is
+ // the number of all subsegments for which features are extracted
+ // the number of freq bands defined as an user-defined parameter.
+ // the 2D-array is the feature vectors.
+ allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors);
+ allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors);
+ allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors);
+ allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors);
+ allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors);
+
+ // +++++++++++++++++++++++++++++++++++Temporal Summarization
+ }
+ }
+ }
+
+ // ++++++++++++++++++++++++++++++++++Writing features to one file
+ // First, concatenate mean, max, std for each second.
+ // Then, write the features of each pre-defined frequency band into a separate CSV file.
+ var filesName = allFilesMeanFeatureVectors.Keys.ToArray();
+ var minFeatures = allFilesMinFeatureVectors.Values.ToArray();
+ var meanFeatures = allFilesMeanFeatureVectors.Values.ToArray();
+ var maxFeatures = allFilesMaxFeatureVectors.Values.ToArray();
+ var stdFeatures = allFilesStdFeatureVectors.Values.ToArray();
+ var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray();
+
+ // The number of elements in the list shows the number of freq bands
+ // the size of each element in the list shows the number of files processed to generate feature for.
+ // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector
+ var allMins = new List();
+ var allMeans = new List();
+ var allMaxs = new List();
+ var allStds = new List();
+ var allSkewness = new List();
+
+ // looping over freq bands
+ for (int i = 0; i < meanFeatures[0].Count; i++)
+ {
+ var mins = new List();
+ var means = new List();
+ var maxs = new List();
+ var stds = new List();
+ var skewnesses = new List();
+
+ // looping over all files
+ for (int k = 0; k < meanFeatures.Length; k++)
+ {
+ mins.Add(minFeatures[k].ToArray()[i]);
+ means.Add(meanFeatures[k].ToArray()[i]);
+ maxs.Add(maxFeatures[k].ToArray()[i]);
+ stds.Add(stdFeatures[k].ToArray()[i]);
+ skewnesses.Add(skewnessFeatures[k].ToArray()[i]);
+ }
+
+ allMins.Add(mins.ToArray());
+ allMeans.Add(means.ToArray());
+ allMaxs.Add(maxs.ToArray());
+ allStds.Add(stds.ToArray());
+ allSkewness.Add(skewnesses.ToArray());
+ }
+
+ // each element of meanFeatures array is a list of features for different frequency bands.
+ // looping over the number of freq bands
+ for (int i = 0; i < allMeans.ToArray().GetLength(0); i++)
+ {
+ // creating output feature file based on the number of freq bands
+ var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv");
+
+ // creating the header for CSV file
+ List header = new List();
+ header.Add("file name");
+
+ for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++)
+ {
+ header.Add("min" + j.ToString());
+ }
+
+ for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++)
+ {
+ header.Add("mean" + j.ToString());
+ }
+
+ for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++)
+ {
+ header.Add("max" + j.ToString());
+ }
+
+ for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++)
+ {
+ header.Add("std" + j.ToString());
+ }
+
+ for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++)
+ {
+ header.Add("skewness" + j.ToString());
+ }
+
+ var csv = new StringBuilder();
+ string content = string.Empty;
+ foreach (var entry in header.ToArray())
+ {
+ content += entry.ToString() + ",";
+ }
+
+ csv.AppendLine(content);
+
+ var allFilesFeatureVectors = new Dictionary();
+
+ // looping over files
+ for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++)
+ {
+ // concatenating mean, std, and max vector together for the pre-defined resolution
+ List featureVectors = new List();
+ for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++)
+ {
+ List featureList = new List
+ {
+ allMins.ToArray()[i][j].ToJagged()[k],
+ allMeans.ToArray()[i][j].ToJagged()[k],
+ allMaxs.ToArray()[i][j].ToJagged()[k],
+ allStds.ToArray()[i][j].ToJagged()[k],
+ allSkewness.ToArray()[i][j].ToJagged()[k],
+ };
+ double[] featureVector = DataTools.ConcatenateVectors(featureList);
+ featureVectors.Add(featureVector);
+ }
+
+ allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix());
+ }
+
+ // writing feature vectors to CSV file
+ foreach (var entry in allFilesFeatureVectors)
+ {
+ content = string.Empty;
+ content += entry.Key.ToString() + ",";
+ foreach (var cent in entry.Value)
+ {
+ content += cent.ToString() + ",";
+ }
+
+ csv.AppendLine(content);
+ }
+
+ File.WriteAllText(outputFeatureFile, csv.ToString());
+ }
+ }
+ }
+}
diff --git a/src/AudioAnalysisTools/DSP/FeatureLearning.cs b/src/AudioAnalysisTools/DSP/FeatureLearning.cs
new file mode 100644
index 000000000..5da3d4aee
--- /dev/null
+++ b/src/AudioAnalysisTools/DSP/FeatureLearning.cs
@@ -0,0 +1,485 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.DSP
+{
+ using System;
+ using System.Collections.Generic;
+ using System.IO;
+ using System.Linq;
+ using Accord.Math;
+ using StandardSpectrograms;
+ using WavTools;
+
+ ///
+ /// This class is designed to learn bases (cluster centroids) through feature learning process.
+ ///
+ public static class FeatureLearning
+ {
+ ///
+ /// Apply feature learning process on a set of patch sampling set in an unsupervised manner
+ /// Output clusters
+ ///
+ public static List UnsupervisedFeatureLearning(FeatureLearningSettings config, string inputPath)
+ {
+ // check whether there is any file in the folder/subfolders
+ if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
+ {
+ throw new ArgumentException("The folder of recordings is empty...");
+ }
+
+ int frameSize = config.FrameSize;
+ int finalBinCount = config.FinalBinCount;
+ FreqScaleType scaleType = config.FrequencyScaleType;
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+
+ // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
+ // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
+ // The "WindowOverlap" is calculated to answer this question
+ // each 24 single-frames duration is equal to 1 second
+ // note that the "WindowOverlap" value should be recalculated if frame size is changed
+ // this has not yet been considered in the Config file!
+ WindowOverlap = 0.10725204,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+ double frameStep = frameSize * (1 - settings.WindowOverlap);
+ int minFreqBin = config.MinFreqBin;
+ int maxFreqBin = config.MaxFreqBin;
+ int numFreqBand = config.NumFreqBand;
+ int patchWidth =
+ (maxFreqBin - minFreqBin + 1) / numFreqBand;
+ int patchHeight = config.PatchHeight;
+ int numRandomPatches = config.NumRandomPatches;
+
+ // Define variable number of "randomPatch" lists based on "numFreqBand"
+ Dictionary> randomPatchLists = new Dictionary>();
+ for (int i = 0; i < numFreqBand; i++)
+ {
+ randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List());
+ }
+
+ List randomPatches = new List();
+ double[,] inputMatrix;
+ List recordings = new List();
+
+ foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
+ {
+ FileInfo fileInfo = filePath.ToFileInfo();
+
+ // process the wav file if it is not empty
+ if (fileInfo.Length != 0)
+ {
+ var recording = new AudioRecording(filePath);
+ settings.SourceFileName = recording.BaseName;
+
+ if (config.DoSegmentation)
+ {
+ recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
+ }
+ else
+ {
+ recordings.Add(recording);
+ }
+
+ for (int i = 0; i < recordings.Count; i++)
+ {
+ var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader);
+ var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);
+
+ // DO RMS NORMALIZATION
+ //sonogram.Data = SNR.RmsNormalization(sonogram.Data);
+
+ if (config.DoNoiseReduction)
+ {
+ decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
+ }
+
+ // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+ if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+ {
+ inputMatrix =
+ PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
+ }
+ else
+ {
+ inputMatrix = decibelSpectrogram.Data;
+ }
+
+ // creating matrices from different freq bands of the source spectrogram
+ List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
+
+ // selecting random patches from each freq band matrix and add them to the corresponding patch list
+ int count = 0;
+
+ while (count < allSubmatrices.Count)
+ {
+ // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+ double[,] downsampledMatrix = MaxPooling(allSubmatrices.ToArray()[count], config.MaxPoolingFactor);
+
+ randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+ .GetPatches(downsampledMatrix, patchWidth, patchHeight, numRandomPatches,
+ PatchSampling.SamplingMethod.Random).ToMatrix());
+ count++;
+ }
+ }
+ }
+ }
+
+ foreach (string key in randomPatchLists.Keys)
+ {
+ randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
+ }
+
+ // convert list of random patches matrices to one matrix
+ int numClusters =
+ config.NumClusters;
+
+ List allClusteringOutput = new List();
+ for (int i = 0; i < randomPatches.Count; i++)
+ {
+ double[,] patchMatrix = randomPatches[i];
+
+ // Apply PCA Whitening
+ var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);
+
+ // Do k-means clustering
+ var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters);
+ allClusteringOutput.Add(clusteringOutput);
+ }
+
+ return allClusteringOutput;
+ }
+
+ ///
+ /// This method downsamples the input matrix (x,y) by a factor of n on the temporal scale (x) using max pooling
+ ///
+ public static double[,] MaxPooling(double[,] matrix, int factor)
+ {
+ int count = 0;
+ List downsampledMatrix = new List();
+ while (count + factor <= matrix.GetLength(0))
+ {
+ List maxValues = new List();
+ for (int j = 0; j < matrix.GetLength(1); j++)
+ {
+ List temp = new List();
+ for (int i = count; i < count + factor; i++)
+ {
+ temp.Add(matrix[i, j]);
+ }
+
+ maxValues.Add(temp.ToArray().GetMaxValue());
+ }
+
+ downsampledMatrix.Add(maxValues.ToArray());
+ count = count + factor;
+ }
+
+ return downsampledMatrix.ToArray().ToMatrix();
+ }
+
+ ///
+ /// This method is called semi-supervised feature learning because one of the clusters is formed using
+ /// the positive frames manually selected from 1-min recordings.
+ /// The input to this methods is a group of files that contains the call of interest,
+ /// a 2D-array that contains file name, the second number and the corresponding frame numbers in each file.
+ /// At the moment, this method only handles single-frames as patches (PatchHeight = 1).
+ ///
+ public static List SemisupervisedFeatureLearning(FeatureLearningSettings config,
+ string inputPath, string[,] frameInfo)
+ {
+ // making a dictionary of frame info as file name and second number as key, and start and end frame number as value.
+ Dictionary, int[]> info = new Dictionary, int[]>();
+ for (int i = 0; i < frameInfo.GetLength(0); i++)
+ {
+ Tuple keys = new Tuple(frameInfo[i, 0], Convert.ToInt32(frameInfo[i, 1]));
+ int[] values = new int[2] { Convert.ToInt32(frameInfo[i, 2]), Convert.ToInt32(frameInfo[i, 3]) };
+ info.Add(keys, values);
+ }
+
+ // processing the recordings within the input path
+ // check whether there is any file in the folder/subfolders
+ if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
+ {
+ throw new ArgumentException("The folder of recordings is empty...");
+ }
+
+ int frameSize = config.FrameSize;
+ int finalBinCount = config.FinalBinCount;
+ FreqScaleType scaleType = config.FrequencyScaleType;
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+
+ // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
+ // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
+ // The "WindowOverlap" is calculated to answer this question
+ // each 24 single-frames duration is equal to 1 second
+ // note that the "WindowOverlap" value should be recalculated if frame size is changed
+ // this has not yet been considered in the Config file!
+ WindowOverlap = 0.10725204,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+ double frameStep = frameSize * (1 - settings.WindowOverlap);
+ int minFreqBin = config.MinFreqBin;
+ int maxFreqBin = config.MaxFreqBin;
+ int numFreqBand = config.NumFreqBand;
+ int patchWidth =
+ (maxFreqBin - minFreqBin + 1) / numFreqBand;
+ int patchHeight = config.PatchHeight;
+ int numRandomPatches = config.NumRandomPatches;
+
+ // Define variable number of "randomPatch" lists based on "numFreqBand"
+ Dictionary> randomPatchLists = new Dictionary>();
+ Dictionary> sequentialPatchLists = new Dictionary>();
+ for (int i = 0; i < numFreqBand; i++)
+ {
+ randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List());
+ sequentialPatchLists.Add(string.Format("sequentialPatch{0}", i.ToString()), new List());
+ }
+
+ List randomPatches = new List();
+ List positivePatches = new List();
+ double[,] inputMatrix;
+ List recordings = new List();
+
+ foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
+ {
+ FileInfo fileInfo = filePath.ToFileInfo();
+
+ // process the wav file if it is not empty
+ if (fileInfo.Length != 0)
+ {
+ var recording = new AudioRecording(filePath);
+ settings.SourceFileName = recording.BaseName;
+
+ if (config.DoSegmentation)
+ {
+ recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
+ }
+ else
+ {
+ recordings.Add(recording);
+ }
+
+ for (int i = 0; i < recordings.Count; i++)
+ {
+ var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader);
+
+ var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);
+
+ if (config.DoNoiseReduction)
+ {
+ decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
+ }
+
+ // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+ if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+ {
+ inputMatrix =
+ PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
+ }
+ else
+ {
+ inputMatrix = decibelSpectrogram.Data;
+ }
+
+ // creating matrices from different freq bands of the source spectrogram
+ List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
+
+ // check whether the file has any positive frame
+ List positiveFrameNumbers = new List();
+ foreach (var entry in info)
+ {
+ // check whether the file and the current second (i) has positive frame
+ if ((fileInfo.Name == entry.Key.Item1) && (i == entry.Key.Item2))
+ {
+ // make a list of frame numbers
+ for (int j = entry.Value[0]; j <= entry.Value[1]; j++)
+ {
+ positiveFrameNumbers.Add(j);
+ }
+ }
+ }
+
+ // making two matrices, one from positive frames and one from negative frames.
+ List allPositiveFramesSubmatrices = new List();
+ List allNegativeFramesSubmatrices = new List();
+ List negativeFrameNumbers = new List();
+
+ for (int j = 1; j <= 24; j++)
+ {
+ bool flag = false;
+ foreach (var number in positiveFrameNumbers)
+ {
+ if (j == number)
+ {
+ flag = true;
+ break;
+ }
+ }
+
+ // if flag is false, it means that the frame does not contain a part of bird call and should be added
+ // to the negativeFrameNumbers list.
+ if (!flag)
+ {
+ negativeFrameNumbers.Add(j);
+ }
+ }
+
+ if (positiveFrameNumbers.ToArray().Length != 0)
+ {
+ foreach (var submatrix in allSubmatrices)
+ {
+ List positiveFrames = new List();
+ foreach (var number in positiveFrameNumbers)
+ {
+ positiveFrames.Add(submatrix.ToJagged()[number - 1]);
+ }
+
+ allPositiveFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix());
+
+ List negativeFrames = new List();
+ foreach (var number in negativeFrameNumbers)
+ {
+ negativeFrames.Add(submatrix.ToJagged()[number - 1]);
+ }
+
+ allNegativeFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix());
+ }
+ }
+ else
+ {
+ allNegativeFramesSubmatrices = allSubmatrices;
+ }
+
+ // selecting random patches from each freq band matrix and add them to the corresponding patch list
+ int count = 0;
+
+ while (count < allNegativeFramesSubmatrices.Count)
+ {
+ // select random patches from those segments that do not contain the call of interest
+ if (allPositiveFramesSubmatrices.Count != 0)
+ {
+ // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+ double[,] downsampledPositiveMatrix = MaxPooling(allPositiveFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
+ int rows = downsampledPositiveMatrix.GetLength(0);
+ int columns = downsampledPositiveMatrix.GetLength(1);
+ sequentialPatchLists[$"sequentialPatch{count.ToString()}"].Add(
+ PatchSampling.GetPatches(downsampledPositiveMatrix, patchWidth, patchHeight,
+ (rows / patchHeight) * (columns / patchWidth),
+ PatchSampling.SamplingMethod.Sequential).ToMatrix());
+ }
+ else
+ {
+ // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+ double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
+ randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+ .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches,
+ PatchSampling.SamplingMethod.Random).ToMatrix());
+ }
+
+ /*
+ We can use this block of code instead of line 384 to 389, if we want to select random patches from negative frames of the segments with call of interest
+ // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+ double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor);
+ if (downsampledNegativeMatrix.GetLength(0) < numRandomPatches)
+ {
+ int numR = downsampledNegativeMatrix.GetLength(0);
+ int numC = downsampledNegativeMatrix.GetLength(1);
+ randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+ .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight,
+ (numR / patchHeight) * (numC / patchWidth),
+ PatchSampling.SamplingMethod.Sequential).ToMatrix());
+ }
+ else
+ {
+ randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+ .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches,
+ PatchSampling.SamplingMethod.Random).ToMatrix());
+ }
+ */
+
+ count++;
+ }
+ }
+ }
+ }
+
+ foreach (string key in sequentialPatchLists.Keys)
+ {
+ positivePatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(sequentialPatchLists[key]));
+ }
+
+ foreach (string key in randomPatchLists.Keys)
+ {
+ randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
+ }
+
+ // convert list of random patches matrices to one matrix
+ int numClusters =
+ config.NumClusters - 1;
+
+ List semisupervisedClusteringOutput = new List();
+ List unsupervisedClusteringOutput = new List();
+ List supervisedClusteringOutput = new List();
+
+ // clustering of random patches
+ for (int i = 0; i < randomPatches.Count; i++)
+ {
+ double[,] patchMatrix = randomPatches[i];
+
+ // Apply PCA Whitening
+ var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);
+
+ // Do k-means clustering
+ var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters);
+ unsupervisedClusteringOutput.Add(clusteringOutput);
+ }
+
+ // build one cluster out of positive frames
+ for (int i = 0; i < positivePatches.Count; i++)
+ {
+ double[,] patchMatrix = positivePatches[i];
+
+ // Apply PCA Whitening
+ var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix);
+
+ // Do k-means clustering
+ // build one cluster from positive patches
+ var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, 1);
+ supervisedClusteringOutput.Add(clusteringOutput);
+ }
+
+ // merge the output of two clustering obtained from supervised and unsupervised approaches
+ var positiveClusterId = config.NumClusters - 1;
+ List positiveCentroids = new List();
+ List positiveClusterSize = new List();
+
+ foreach (var output in supervisedClusteringOutput)
+ {
+ positiveCentroids.Add(output.ClusterIdCentroid.Values.ToArray());
+ positiveClusterSize.Add(output.ClusterIdSize.Values.ToArray());
+ }
+
+ semisupervisedClusteringOutput = unsupervisedClusteringOutput;
+
+ for (int i = 0; i < semisupervisedClusteringOutput.Count; i++)
+ {
+ semisupervisedClusteringOutput[i].ClusterIdCentroid.Add(positiveClusterId, positiveCentroids[i][0]);
+ semisupervisedClusteringOutput[i].ClusterIdSize.Add(positiveClusterId, positiveClusterSize[i][0]);
+ }
+
+ return semisupervisedClusteringOutput;
+ }
+ }
+}
diff --git a/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs b/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs
new file mode 100644
index 000000000..adee42b7e
--- /dev/null
+++ b/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs
@@ -0,0 +1,83 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.DSP
+{
+ using Acoustics.Shared.ConfigFile;
+
+ public class FeatureLearningSettings : Config
+ {
+ public const FreqScaleType DefaultFrequencyScaleType = FreqScaleType.Mel;
+
+ //public const int DefaultHertzInterval = 1000;
+
+ public const int DefaultFrameSize = 1024;
+
+ public const int DefaultFinalBinCount = 128;
+
+ public const int DefaultMinFreqBin = 1;
+
+ public const int DefaultMaxFreqBin = DefaultFinalBinCount;
+
+ public const int DefaultNumFreqBand = 1;
+
+ //public const int DefaultPatchWidth = (DefaultMaxFreqBin - DefaultMinFreqBin + 1) / DefaultNumFreqBand;
+
+ public const int DefaultPatchHeight = 1;
+
+ public const int DefaultFrameWindowLength = 1;
+
+ public const int DefaultStepSize = 1;
+
+ public const int DefaultNumRandomPatches = 4;
+
+ public const int DefaultNumClusters = 256;
+
+ public const bool DefaultDoNoiseReduction = true;
+
+ public const bool DefaultDoWhitening = true;
+
+ public const int DefaultMaxPoolingFactor = 1;
+
+ public const bool DefaultDoSegmentation = true;
+
+ public const double DefaultSubsegmentDurationInSeconds = 1.0;
+
+ public FreqScaleType FrequencyScaleType { get; set; } = DefaultFrequencyScaleType;
+
+ //public int HertzInterval { get; set; } = DefaultHertzInterval;
+
+ public int FrameSize { get; set; } = DefaultFrameSize;
+
+ public int FinalBinCount { get; set; } = DefaultFinalBinCount;
+
+ public int MinFreqBin { get; set; } = DefaultMinFreqBin;
+
+ public int MaxFreqBin { get; set; } = DefaultMaxFreqBin;
+
+ public int NumFreqBand { get; set; } = DefaultNumFreqBand;
+
+ //public int PatchWidth { get; set; } = DefaultPatchWidth;
+
+ public int PatchHeight { get; set; } = DefaultPatchHeight;
+
+ public int FrameWindowLength { get; set; } = DefaultFrameWindowLength;
+
+ public int StepSize { get; set; } = DefaultStepSize;
+
+ public int NumRandomPatches { get; set; } = DefaultNumRandomPatches;
+
+ public int NumClusters { get; set; } = DefaultNumClusters;
+
+ public bool DoNoiseReduction { get; set; } = DefaultDoNoiseReduction;
+
+ public bool DoWhitening { get; set; } = DefaultDoWhitening;
+
+ public int MaxPoolingFactor { get; set; } = DefaultMaxPoolingFactor;
+
+ public bool DoSegmentation { get; set; } = DefaultDoSegmentation;
+
+ public double SubsegmentDurationInSeconds { get; set; } = DefaultSubsegmentDurationInSeconds;
+ }
+}
diff --git a/src/AudioAnalysisTools/DSP/KmeansClustering.cs b/src/AudioAnalysisTools/DSP/KmeansClustering.cs
index 24808d2ba..decd2ac30 100644
--- a/src/AudioAnalysisTools/DSP/KmeansClustering.cs
+++ b/src/AudioAnalysisTools/DSP/KmeansClustering.cs
@@ -6,17 +6,10 @@ namespace AudioAnalysisTools.DSP
{
using System;
using System.Collections.Generic;
- using System.Drawing;
- using System.IO;
using System.Linq;
using Accord.MachineLearning;
using Accord.Math;
using Accord.Math.Distances;
- using Accord.Statistics.Kernels;
- using Acoustics.Shared;
- using Acoustics.Shared.Csv;
- using CsvHelper;
- using Zio;
public static class KmeansClustering
{
@@ -29,7 +22,7 @@ public class Output
public KMeansClusterCollection Clusters { get; set; }
}
- public static Output Clustering(double[,] patches, int numberOfClusters, string pathToCentroidFile)
+ public static Output Clustering(double[,] patches, int numberOfClusters)
{
// "Generator.Seed" sets a random seed for the framework's main internal number generator, which
// gets a reference to the random number generator used internally by the Accord.NET classes and methods.
@@ -55,8 +48,6 @@ public static Output Clustering(double[,] patches, int numberOfClusters, string
clusterIdCentroid.Add(clust.Index, clust.Centroid);
}
- Csv.WriteToCsv(pathToCentroidFile.ToFileInfo(), clusterIdCentroid);
-
var output = new Output()
{
ClusterIdCentroid = clusterIdCentroid,
diff --git a/src/AudioAnalysisTools/DSP/NoiseProfile.cs b/src/AudioAnalysisTools/DSP/NoiseProfile.cs
index 543126262..b6f1b2e7d 100644
--- a/src/AudioAnalysisTools/DSP/NoiseProfile.cs
+++ b/src/AudioAnalysisTools/DSP/NoiseProfile.cs
@@ -116,6 +116,8 @@ public static NoiseProfile CalculateMeanNoiseProfile(double[,] matrix)
/// the spectrogram with origin top-left
public static NoiseProfile CalculateMedianNoiseProfile(double[,] matrix)
{
+ return CalculatePercentileNoiseProfile(matrix, 50);
+ /*
int rowCount = matrix.GetLength(0);
int colCount = matrix.GetLength(1);
double[] noiseMedian = new double[colCount];
@@ -131,6 +133,36 @@ public static NoiseProfile CalculateMedianNoiseProfile(double[,] matrix)
maxsOfBins[col] = freqBin.Max();
}
+ var profile = new NoiseProfile()
+ {
+ NoiseMedian = noiseMedian,
+ NoiseSd = null,
+ NoiseThresholds = noiseMedian,
+ MinDb = minsOfBins,
+ MaxDb = maxsOfBins,
+ };
+ return profile;
+ */
+
+ }
+
+ public static NoiseProfile CalculatePercentileNoiseProfile(double[,] matrix, int percentile)
+ {
+ int rowCount = matrix.GetLength(0);
+ int colCount = matrix.GetLength(1);
+ double[] noiseMedian = new double[colCount];
+ double[] minsOfBins = new double[colCount];
+ double[] maxsOfBins = new double[colCount];
+
+ for (int col = 0; col < colCount; col++)
+ {
+ double[] freqBin = MatrixTools.GetColumn(matrix, col);
+ Array.Sort(freqBin);
+ noiseMedian[col] = freqBin[rowCount * percentile / 100];
+ minsOfBins[col] = freqBin.Min();
+ maxsOfBins[col] = freqBin.Max();
+ }
+
var profile = new NoiseProfile()
{
NoiseMedian = noiseMedian,
diff --git a/src/AudioAnalysisTools/DSP/PatchSampling.cs b/src/AudioAnalysisTools/DSP/PatchSampling.cs
index cfe05b028..5c1eb7d7e 100644
--- a/src/AudioAnalysisTools/DSP/PatchSampling.cs
+++ b/src/AudioAnalysisTools/DSP/PatchSampling.cs
@@ -6,8 +6,10 @@ namespace AudioAnalysisTools.DSP
{
using System;
using System.Collections.Generic;
+ using System.Linq;
using Accord.Math;
using TowseyLibrary;
+ using WavTools;
public static class PatchSampling
{
@@ -189,6 +191,28 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf
return allSubmatrices;
}
+ ///
+ /// outputs a matrix with arbitrary minimum and maximum frequency bins.
+ ///
+ public static double[,] GetArbitraryFreqBandMatrix(double[,] matrix, int minFreqBin, int maxFreqBin)
+ {
+ double[,] outputMatrix = new double[matrix.GetLength(0), maxFreqBin - minFreqBin + 1];
+
+ int minColumnIndex = minFreqBin - 1;
+ int maxColumnIndex = maxFreqBin - 1;
+
+ // copying a part of the original matrix with pre-defined boundaries to Y axis (freq bins) to a new matrix
+ for (int col = minColumnIndex; col <= maxColumnIndex; col++)
+ {
+ for (int row = 0; row < matrix.GetLength(0); row++)
+ {
+ outputMatrix[row, col - minColumnIndex] = matrix[row, col];
+ }
+ }
+
+ return outputMatrix;
+ }
+
///
/// concatenate submatrices column-wise into one matrix, i.e., the number of rows for the output matrix
/// is equal to the number of rows of each of the frequency band matrices.
@@ -202,7 +226,7 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf
int count = 0;
while (count < submatrices.Count)
{
- DoubleSquareArrayExtensions.AddToArray(matrix, submatrices[count], DoubleSquareArrayExtensions.MergingDirection.Column, submatrices[count].GetLength(1) * count);
+ matrix.AddToArray(submatrices[count], DoubleSquareArrayExtensions.MergingDirection.Column, submatrices[count].GetLength(1) * count);
count++;
}
@@ -238,21 +262,25 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf
}
///
- /// convert a list of patch matrices to one matrix
+ /// convert a list of patch matrices to one matrix by row
+ /// patch matrices can have different row numbers but must have the same column number
///
public static double[,] ListOf2DArrayToOne2DArray(List listOfPatchMatrices)
{
- int numberOfPatches = listOfPatchMatrices[0].GetLength(0);
- double[,] allPatchesMatrix = new double[listOfPatchMatrices.Count * numberOfPatches, listOfPatchMatrices[0].GetLength(1)];
+ int sumNumberOfPatches = 0;
+ foreach (var matrix in listOfPatchMatrices)
+ {
+ sumNumberOfPatches = matrix.GetLength(0) + sumNumberOfPatches;
+ }
+
+ double[,] allPatchesMatrix = new double[sumNumberOfPatches, listOfPatchMatrices[0].GetLength(1)];
+ int start = 0;
+
for (int i = 0; i < listOfPatchMatrices.Count; i++)
{
var m = listOfPatchMatrices[i];
- if (m.GetLength(0) != numberOfPatches)
- {
- throw new ArgumentException("All arrays must be the same length");
- }
-
- DoubleSquareArrayExtensions.AddToArray(allPatchesMatrix, m, DoubleSquareArrayExtensions.MergingDirection.Row, i * m.GetLength(0));
+ allPatchesMatrix.AddToArray(m, DoubleSquareArrayExtensions.MergingDirection.Row, start);
+ start = m.GetLength(0) + start;
}
return allPatchesMatrix;
@@ -270,7 +298,6 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf
int minY = matrix.GetLength(1);
// copying the original matrix to a new matrix (row by row)
-
for (int i = 0; i < minX; ++i)
{
Array.Copy(matrix, i * matrix.GetLength(1), newMatrix, i * matrix.GetLength(1), minY);
@@ -326,21 +353,35 @@ private static List GetSequentialPatches(double[,] matrix, int patchWi
///
private static List GetRandomPatches(double[,] matrix, int patchWidth, int patchHeight, int numberOfPatches)
{
+ // Note: to make the method more flexible in terms of selecting a random patch with any height and width,
+ // first a random number generator is defined for both patchHeight and patchWidth.
+ // However, the possibility of selecting duplicates especially when selecting too many random numbers from
+ // a range (e.g., 1000 out of 1440) is high with a a random generator.
+ // Since, we are mostly interested in full-band patches, i.e., patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand,
+ // it is important to select non-duplicate patchHeights. Hence, instead of a random generator for patchHeight,
+ // a better solution is to make a sequence of numbers to be selected, shuffle them, and
+ // finally, a first n (number of required patches) numbers could be selected.
+
+ int rows = matrix.GetLength(0);
+ int columns = matrix.GetLength(1);
+
int seed = 100;
Random randomNumber = new Random(seed);
+
+ // not sure whether it is better to use new Guid() instead of randomNumber.Next()
+ var randomRowNumbers = Enumerable.Range(0, rows - patchHeight).OrderBy(x => randomNumber.Next()).Take(numberOfPatches).ToList();
List patches = new List();
- int rows = matrix.GetLength(0);
- int columns = matrix.GetLength(1);
- for (int i = 0; i < numberOfPatches; i++)
+ for (int i = 0; i < randomRowNumbers.Count; i++)
{
// selecting a random number from the height of the matrix
- int rowRandomNumber = randomNumber.Next(0, rows - patchHeight);
+ //int rowRandomNumber = randomNumber.Next(0, rows - patchHeight);
// selecting a random number from the width of the matrix
int columnRandomNumber = randomNumber.Next(0, columns - patchWidth);
- double[,] submatrix = MatrixTools.Submatrix(matrix, rowRandomNumber, columnRandomNumber,
- rowRandomNumber + patchHeight - 1, columnRandomNumber + patchWidth - 1);
+
+ double[,] submatrix = MatrixTools.Submatrix(matrix, randomRowNumbers[i], columnRandomNumber,
+ randomRowNumbers[i] + patchHeight - 1, columnRandomNumber + patchWidth - 1);
// convert a matrix to a vector by concatenating columns and
// store it to the array of vectors
@@ -401,5 +442,33 @@ private static List GetOverlappedRandomPatches(double[,] matrix, int p
return patches;
}
+
+ ///
+ /// cut audio to subsegments of desired length.
+ /// return list of subsegments
+ ///
+ public static List GetSubsegmentsSamples(AudioRecording recording, double subsegmentDurationInSeconds, double frameStep)
+ {
+ List subsegments = new List();
+
+ int sampleRate = recording.WavReader.SampleRate;
+ var segmentDuration = recording.WavReader.Time.TotalSeconds;
+ int segmentSampleCount = (int)(segmentDuration * sampleRate);
+ int subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate);
+ double subsegmentFrameCount = subsegmentSampleCount / (double)frameStep;
+ subsegmentFrameCount = (int)subsegmentFrameCount;
+ subsegmentSampleCount = ((int)(subsegmentFrameCount * frameStep) < subsegmentSampleCount) ? subsegmentSampleCount : (int)(subsegmentFrameCount * frameStep);
+
+ for (int i = 0; i < (int)(segmentSampleCount / subsegmentSampleCount); i++)
+ {
+ AudioRecording subsegmentRecording = recording;
+ double[] subsamples = DataTools.Subarray(recording.WavReader.Samples, i * subsegmentSampleCount, subsegmentSampleCount);
+ var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate);
+ subsegmentRecording = new AudioRecording(wr);
+ subsegments.Add(subsegmentRecording);
+ }
+
+ return subsegments;
+ }
}
}
\ No newline at end of file
diff --git a/src/AudioAnalysisTools/DSP/PcaWhitening.cs b/src/AudioAnalysisTools/DSP/PcaWhitening.cs
index 7732f83a0..488330851 100644
--- a/src/AudioAnalysisTools/DSP/PcaWhitening.cs
+++ b/src/AudioAnalysisTools/DSP/PcaWhitening.cs
@@ -28,7 +28,7 @@ public class Output
public int Components { get; set; }
}
- public static Output Whitening(double[,] matrix)
+ public static Output Whitening(bool doWhitening, double[,] matrix)
{
if (matrix == null)
{
@@ -43,7 +43,7 @@ public static Output Whitening(double[,] matrix)
{
// the "Center" method only subtracts the mean.
Method = PrincipalComponentMethod.Center,
- Whiten = true,
+ Whiten = doWhitening,
};
pca.Learn(jaggedArray);
@@ -165,23 +165,20 @@ public static Output Whitening(double[,] matrix)
}
///
- /// Median Noise Reduction
+ /// 10-percentile Noise Reduction
///
public static double[,] NoiseReduction(double[,] matrix)
{
double[,] nrm = matrix;
- // calculate modal noise profile
- // NoiseProfile profile = NoiseProfile.CalculateModalNoiseProfile(matrix, sdCount: 0.0);
- NoiseProfile profile = NoiseProfile.CalculateMedianNoiseProfile(matrix);
+ // calculate 10-percentile noise profile
+ NoiseProfile profile = NoiseProfile.CalculatePercentileNoiseProfile(matrix, 10);
// smooth the noise profile
double[] smoothedProfile = DataTools.filterMovingAverage(profile.NoiseThresholds, width: 7);
nrm = SNR.TruncateBgNoiseFromSpectrogram(nrm, smoothedProfile);
- // nrm = SNR.NoiseReduce_Standard(nrm, smoothedProfile, nhBackgroundThreshold: 2.0);
-
return nrm;
}
}
diff --git a/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs b/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs
new file mode 100644
index 000000000..889391fef
--- /dev/null
+++ b/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs
@@ -0,0 +1,55 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.DSP
+{
+
+ using System.Linq;
+ using TowseyLibrary;
+
+ public static class PowerSpectralDensity
+ {
+ ///
+ /// Square the FFT coefficients >> this gives an energy spectrogram.
+ /// MatrixTools.SquareValues is doing the same!
+ ///
+ public static double[,] GetEnergyValues(double[,] fftCoefficients)
+ {
+ double[,] energySpectrogram = new double[fftCoefficients.GetLength(0), fftCoefficients.GetLength(1)];
+ for (int i = 0; i < fftCoefficients.GetLength(0); i++)
+ {
+ for (int j = 0; j < fftCoefficients.GetLength(1); j++)
+ {
+ energySpectrogram[i, j] += fftCoefficients[i, j] * fftCoefficients[i, j];
+ }
+ }
+
+ return energySpectrogram;
+ }
+
+ ///
+ /// Take average of the energy values in each frequency bin to obtain power spectrum or PSD.
+ /// SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram is doing the same!
+ ///
+ public static double[] GetPowerSpectrum(double[,] energySpectrogram)
+ {
+ double[] powerSpectrum = new double[energySpectrogram.GetLength(1)];
+ for (int j = 0; j < energySpectrogram.GetLength(1); j++)
+ {
+ /*
+ double sum = 0;
+ for (int i = 0; i < energySpectrogram.GetLength(0); i++)
+ {
+ sum += energySpectrogram[i, j];
+ }
+ powerSpectrum[j] = sum / energySpectrogram.GetLength(0);
+ */
+
+ powerSpectrum[j] = MatrixTools.GetColumn(energySpectrogram, j).Average();
+ }
+
+ return powerSpectrum;
+ }
+ }
+}
diff --git a/src/AudioAnalysisTools/Indices/IndexCalculate.cs b/src/AudioAnalysisTools/Indices/IndexCalculate.cs
index 6f3d65d61..7b330de07 100644
--- a/src/AudioAnalysisTools/Indices/IndexCalculate.cs
+++ b/src/AudioAnalysisTools/Indices/IndexCalculate.cs
@@ -398,7 +398,7 @@ public static IndexCalculateResult Analysis(
deciBelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(deciBelSpectrogram, nhThreshold: 2.0);
// iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
- spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromSpectrogram(deciBelSpectrogram);
+ spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(deciBelSpectrogram);
// iv: CALCULATE SPECTRAL COVER.
// NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs
new file mode 100644
index 000000000..d217f9b15
--- /dev/null
+++ b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs
@@ -0,0 +1,71 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.StandardSpectrograms
+{
+ using System;
+ using Acoustics.Tools.Wav;
+ using DSP;
+ using WavTools;
+
+ ///
+ /// This class is designed to produce a full-bandwidth amplitude spectrogram
+ ///
+ public class AmplitudeSpectrogram
+ {
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav)
+ {
+ this.Configuration = config;
+ this.Attributes = new SpectrogramAttributes();
+
+ double minDuration = 1.0;
+ if (wav.Time.TotalSeconds < minDuration)
+ {
+ LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration);
+ return;
+ }
+
+ //set attributes for the current recording and spectrogram type
+ this.Attributes.SampleRate = wav.SampleRate;
+ this.Attributes.Duration = wav.Time;
+ this.Attributes.NyquistFrequency = wav.SampleRate / 2;
+ this.Attributes.Duration = wav.Time;
+ this.Attributes.MaxAmplitude = wav.CalculateMaximumAmplitude();
+ this.Attributes.FrameDuration = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate);
+
+ var recording = new AudioRecording(wav);
+ var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
+ recording,
+ config.WindowSize,
+ config.WindowOverlap,
+ this.Configuration.WindowFunction);
+
+ // now recover required data
+ //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value.
+ this.Attributes.Epsilon = fftdata.Epsilon;
+ this.Attributes.WindowPower = fftdata.WindowPower;
+ this.Attributes.FrameCount = fftdata.FrameCount;
+ this.Data = fftdata.AmplitudeSpectrogram;
+
+ // IF REQUIRED CONVERT TO MEL SCALE
+ if (this.Configuration.DoMelScale)
+ {
+ // this mel scale conversion uses the "Greg integral" !
+ this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency);
+ }
+ }
+
+ public SpectrogramSettings Configuration { get; set; }
+
+ public SpectrogramAttributes Attributes { get; set; }
+
+ ///
+ /// Gets or sets the spectrogram data matrix of doubles
+ ///
+ public double[,] Data { get; set; }
+ }
+}
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs
index e1c22df78..0fc848440 100644
--- a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs
+++ b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs
@@ -72,6 +72,7 @@ public int NPointSmoothFFT
} // Number of points to smooth FFT spectra
public double epsilon { get; set; } //small value to prevent log of zero value
+
public bool DoPreemphasis { get; set; }
public int? MinFreqBand { get; set; }
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs
new file mode 100644
index 000000000..94969aaa8
--- /dev/null
+++ b/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs
@@ -0,0 +1,191 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.StandardSpectrograms
+{
+ using System.Drawing;
+ using System.Drawing.Imaging;
+ using Acoustics.Tools.Wav;
+ using DSP;
+ using TowseyLibrary;
+
+ ///
+ /// There are two constructors
+ ///
+ public class DecibelSpectrogram
+ {
+ ///
+ /// Initializes a new instance of the class.
+ /// This constructor requires config and audio objects
+ /// It creates an amplitude spectrogram
+ ///
+ public DecibelSpectrogram(SpectrogramSettings config, WavReader wav)
+ : this(new AmplitudeSpectrogram(config, wav))
+ {
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram)
+ {
+ this.Configuration = amplitudeSpectrogram.Configuration;
+ this.Attributes = amplitudeSpectrogram.Attributes;
+
+ // (ii) CONVERT AMPLITUDES TO DECIBELS
+ this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon);
+
+ // (iii) NOISE REDUCTION
+ var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter);
+ this.Data = tuple.Item1; // store data matrix
+
+ if (this.SnrData != null)
+ {
+ this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile
+ }
+ }
+
+ /*
+ ///
+ /// Initializes a new instance of the class.
+ /// use this constructor to cut out a portion of a spectrum from start to end time.
+ ///
+ public DecibelSpectrogram(SpectrogramStandard sg, double startTime, double endTime)
+ {
+ int startFrame = (int)Math.Round(startTime * sg.FramesPerSecond);
+ int endFrame = (int)Math.Round(endTime * sg.FramesPerSecond);
+ int frameCount = endFrame - startFrame + 1;
+
+ //sg.MaxAmplitude { get; private set; }
+ this.SampleRate = sg.SampleRate;
+ this.Duration = TimeSpan.FromSeconds(endTime - startTime);
+ this.FrameCount = frameCount;
+
+ ////energy and dB per frame
+ this.DecibelsPerFrame = new double[frameCount]; // Normalised decibels per signal frame
+ for (int i = 0; i < frameCount; i++)
+ {
+ this.DecibelsPerFrame[i] = sg.DecibelsPerFrame[startFrame + i];
+ }
+
+ this.DecibelReference = sg.DecibelReference; // Used to NormaliseMatrixValues the dB values for MFCCs
+ this.DecibelsNormalised = new double[frameCount];
+ for (int i = 0; i < frameCount; i++)
+ {
+ this.DecibelsNormalised[i] = sg.DecibelsNormalised[startFrame + i];
+ }
+
+ this.SigState = new int[frameCount]; //Integer coded signal state ie 0=non-vocalisation, 1=vocalisation, etc.
+ for (int i = 0; i < frameCount; i++)
+ {
+ this.SigState[i] = sg.SigState[startFrame + i];
+ }
+
+ //the spectrogram data matrix
+ int featureCount = sg.Data.GetLength(1);
+ this.Data = new double[frameCount, featureCount];
+ for (int i = 0; i < frameCount; i++) //each row of matrix is a frame
+ {
+ for (int j = 0; j < featureCount; j++) //each col of matrix is a feature
+ {
+ this.Data[i, j] = sg.Data[startFrame + i, j];
+ }
+ }
+ }//end CONSTRUCTOR
+ */
+
+ public SpectrogramSettings Configuration { get; set; }
+
+ public SpectrogramAttributes Attributes { get; set; }
+
+ ///
+ /// Gets or sets the spectrogram data matrix of doubles
+ ///
+ public double[,] Data { get; set; }
+
+ ///
+ /// Gets or sets instance of class SNR that stores info about signal energy and dB per frame
+ ///
+ public SNR SnrData { get; set; }
+
+ public double MaxAmplitude { get; set; }
+
+ // TODO
+ // Need to calculate the following for decibel spectrograms only
+ // ##################################################################################################
+ // TODO The following properties need to be calculated within the DecibelSpectrogram class.
+
+ ///
+ /// Gets or sets decibels per signal frame
+ ///
+ public double[] DecibelsPerFrame { get; set; }
+
+ public double[] DecibelsNormalised { get; set; }
+
+ ///
+ /// Gets or sets decibel reference with which to NormaliseMatrixValues the dB values for MFCCs
+ ///
+ public double DecibelReference { get; protected set; }
+
+ ///
+ /// Gets or sets integer coded signal state ie 0=non-vocalisation, 1=vocalisation, etc.
+ ///
+ public int[] SigState { get; protected set; }
+
+ // ################################# SPECTROGRAM METHODS BELOW HERE ###############################
+
+ public void DrawSpectrogram(string path)
+ {
+ var image = DrawSpectrogramAnnotated(this.Data, this.Configuration, this.Attributes);
+ image.Save(path, ImageFormat.Png);
+ }
+
+ // ################################# STATIC METHODS BELOW HERE ###############################
+
+ public static Image DrawSpectrogramAnnotated(double[,] data, SpectrogramSettings config, SpectrogramAttributes attributes)
+ {
+ // normalise the data between 0 and 95th percentiles
+ int binCount = 100;
+ double min;
+ double max;
+ DataTools.MinMax(data, out min, out max);
+ double binWidth = (max - min) / binCount;
+ var histogram = Histogram.Histo(data, binCount, min, max, binWidth);
+
+ int percentile = 95;
+ int binId = Histogram.GetPercentileBin(histogram, percentile);
+ double upperBound = min + (binId * percentile);
+ var normedMatrix = MatrixTools.NormaliseInZeroOne(data, min, upperBound);
+
+ /*
+ int minPercentile = 5;
+ int minBinId = Histogram.GetPercentileBin(histogram, minPercentile);
+ double lowerBound = min + (minBinId * minPercentile);
+ int maxPercentile = 95;
+ int maxBinId = Histogram.GetPercentileBin(histogram, maxPercentile);
+ double upperBound = min + (maxBinId * maxPercentile);
+ var normedMatrix = MatrixTools.NormaliseInZeroOne(data, lowerBound, upperBound);
+ */
+ int nyquist = attributes.NyquistFrequency;
+ int frameSize = config.WindowSize;
+
+ // assuming linear frequency scale
+ int finalBinCount = frameSize / 2;
+ var scaleType = FreqScaleType.Linear;
+
+ // if doing mel scale then
+ if (config.DoMelScale)
+ {
+ finalBinCount = 256; //128; //512; // 256; // 100; // 40; // 200; //
+ scaleType = FreqScaleType.Mel;
+ }
+
+ var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzGridInterval: 1000);
+
+ var image = SpectrogramTools.GetImage(normedMatrix, nyquist, config.DoMelScale);
+ var annotatedImage = SpectrogramTools.GetImageFullyAnnotated(image, config.SourceFileName + ": " + scaleType.ToString(), freqScale.GridLineLocations, attributes.Duration);
+ return annotatedImage;
+ }
+ }
+}
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs
index 7dda91e94..00bcc777b 100644
--- a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs
+++ b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs
@@ -1,34 +1,76 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
namespace AudioAnalysisTools.StandardSpectrograms
{
+ using System.Collections.Generic;
+ using System.Drawing.Imaging;
+ using System.IO;
using Acoustics.Tools.Wav;
using TowseyLibrary;
- public class EnergySpectrogram : BaseSonogram
+ ///
+ /// There are two CONSTRUCTORS
+ ///
+ public class EnergySpectrogram
{
- public EnergySpectrogram(SonogramConfig config, double[,] amplitudeSpectrogram)
- : base(config, amplitudeSpectrogram)
+ ///
+ /// Initializes a new instance of the class.
+ /// Use this constructor when you have config and audio objects
+ ///
+ public EnergySpectrogram(SpectrogramSettings config, WavReader wav)
+ : this(new AmplitudeSpectrogram(config, wav))
{
- this.Configuration = config;
- this.FrameCount = amplitudeSpectrogram.GetLength(0);
- this.Data = amplitudeSpectrogram;
- this.Make(this.Data);
}
- public EnergySpectrogram(AmplitudeSonogram sg)
- : base(sg.Configuration)
+ public EnergySpectrogram(AmplitudeSpectrogram amplitudeSpectrogram)
{
- this.Data = MatrixTools.SquareValues(sg.Data);
+ this.Configuration = amplitudeSpectrogram.Configuration;
+ this.Attributes = amplitudeSpectrogram.Attributes;
+
+ // CONVERT AMPLITUDES TO ENERGY
+ this.Data = MatrixTools.SquareValues(amplitudeSpectrogram.Data);
+ }
+
+ public SpectrogramSettings Configuration { get; set; }
+
+ public SpectrogramAttributes Attributes { get; set; }
+
+ ///
+ /// Gets or sets the spectrogram data matrix of doubles
+ /// Note matrix orientation: ROWS = spectra; COLUMNS = frequency bins
+ ///
+ public double[,] Data { get; set; }
+
+ public void GetPsd(string path)
+ {
+ var psd = MatrixTools.GetColumnAverages(this.Data);
+
+ FileTools.WriteArray2File(psd, path + ".csv");
+ GraphsAndCharts.DrawGraph(psd, "Title", new FileInfo(path));
+
+ //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path));
+ //image.Save(path, ImageFormat.Png);
}
- public override void Make(double[,] amplitudeM)
+ public void DrawLogPsd(string path)
{
- this.Data = MatrixTools.SquareValues(amplitudeM);
+ var psd = MatrixTools.GetColumnAverages(this.Data);
+ var logPsd = DataTools.LogValues(psd);
+ FileTools.WriteArray2File(logPsd, path + ".csv");
+ GraphsAndCharts.DrawGraph(logPsd, "log PSD", new FileInfo(path));
+
+ //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path));
+ //image.Save(path, ImageFormat.Png);
+ }
+
+ public double[] GetLogPsd()
+ {
+ var psd = MatrixTools.GetColumnAverages(this.Data);
+ var logPsd = DataTools.LogValues(psd);
+ return logPsd;
+
}
}
}
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs
new file mode 100644
index 000000000..fd401ce48
--- /dev/null
+++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs
@@ -0,0 +1,83 @@
+//
+// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
+//
+
+namespace AudioAnalysisTools.StandardSpectrograms
+{
+ using System;
+ using DSP;
+ using TowseyLibrary;
+
+ public class SpectrogramSettings
+ {
+ ///
+ /// Gets or sets SourceFileName
+ /// Although this is not a setting, we need to store it right at the beginning.
+ ///
+ public string SourceFileName { get; set; }
+
+ public int WindowSize { get; set; } = 512;
+
+ public double WindowOverlap { get; set; } = 0.0;
+
+ ///
+ /// Gets or sets exact frame step in samples - an alternative to overlap
+ /// Note that the default setting should be same as WindowSize i.e. no overlap.
+ ///
+ public int WindowStep { get; set; } = 512;
+
+ public string WindowFunction { get; set; } = WindowFunctions.HAMMING.ToString();
+
+ public int SmoothingWindow { get; set; } = 3;
+
+ public bool DoMelScale { get; set; } = false;
+
+ ///
+ /// Gets or sets MelBinCount
+ /// This is used only if DoMelScale = true.
+ ///
+ public int MelBinCount { get; set; } = 256;
+
+ public NoiseReductionType NoiseReductionType { get; set; } = NoiseReductionType.None;
+
+ public double NoiseReductionParameter { get; set; } = 0.0;
+ }
+
+ public class SpectrogramAttributes
+ {
+ public int SampleRate { get; set; }
+
+ public double MaxAmplitude { get; set; }
+
+ public int NyquistFrequency { get; set; }
+
+ public TimeSpan Duration { get; set; }
+
+ public int FrameCount { get; set; }
+
+ ///
+ /// Gets or sets duration of full frame or window in seconds
+ ///
+ public TimeSpan FrameDuration { get; set; }
+
+ public double FramesPerSecond { get; set; } //= 1 / this.FrameStep;
+
+ public double FBinWidth { get; set; }
+
+ //this.FBinWidth = this.NyquistFrequency / (double) this.FreqBinCount;
+ public double Epsilon { get; set; }
+
+ public double WindowPower { get; set; }
+
+ ///
+ /// returns the duration of that part of frame not overlapped with following frame.
+ /// Duration is given in seconds.
+ /// Assumes window size and overlap fraction already known.
+ ///
+ public static TimeSpan GetFrameOffset(int windowSize, double windowOverlap, int sampleRate)
+ {
+ int step = DSP_Frames.FrameStep(windowSize, windowOverlap);
+ return TimeSpan.FromSeconds(step / (double)sampleRate);
+ }
+ }
+}
diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs
index 5959be7bd..9e6125fa5 100644
--- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs
+++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs
@@ -12,17 +12,16 @@ namespace AudioAnalysisTools.StandardSpectrograms
using System;
using System.Collections.Generic;
using System.Drawing;
- using System.Drawing.Imaging;
using System.IO;
using Acoustics.Shared;
- using AnalysisBase;
using ColorMine.ColorSpaces;
using DSP;
+ using LongDurationSpectrograms;
using TowseyLibrary;
- using WavTools;
public static class SpectrogramTools
{
+ /*
///
///
///
@@ -54,29 +53,6 @@ public static Image GetImageFromAudioSegment(FileInfo fiAudio, FileInfo fiConfig
}
throw new NotSupportedException("Code intentionally broken because it is out of date and not used");
-
- /*
- Image image = null;
- var settings = new AnalysisSettings
- {
- ConfigDict = config.GetDictionary(),
- SegmentAudioFile = fiAudio,
- ConfigFile = fiConfig,
- SegmentImageFile = fiImage,
- SegmentOutputDirectory = diOutputDir
- };
-
- // want to pass SampleRate of the original file.
- settings.SampleRateOfOriginalAudioFile = int.Parse(settings.ConfigDict[AnalysisKeys.ResampleRate]);
-
- analyser.BeforeAnalyze(settings);
-
- var results = analyser.Analyze(settings, new SegmentSettings(se));
-
- image = results.ImageFile == null ? null : Image.FromFile(results.ImageFile.FullName);
-
- analyser = null;
- return image;*/
}
else
{
@@ -185,7 +161,16 @@ public static BaseSonogram Audio2DecibelSonogram(FileInfo fiAudio, Dictionary
+ /// Used to normalise a spectrogram in 0,1
+ ///
+ /// the spectrogram data
+ /// set all values above to 1.0
+ /// set all values below to zero
+ /// used to de-emphisize the background
+ /// a normalised matrix of spectrogram data
public static double[,] NormaliseSpectrogramMatrix(double[,] matrix, double truncateMin, double truncateMax, double backgroundFilterCoeff)
{
double[,] m = MatrixTools.NormaliseInZeroOne(matrix, truncateMin, truncateMax);
@@ -193,41 +178,14 @@ public static BaseSonogram Audio2DecibelSonogram(FileInfo fiAudio, Dictionary
///
///
- ///
public static Image_MultiTrack Sonogram2MultiTrackImage(BaseSonogram sonogram, Dictionary configDict)
{
bool doHighlightSubband = false;
- //check if doing a reduced sonogram
- //int timeReductionFactor = 1;
- //if (configDict.ContainsKey(Keys.TIME_REDUCTION_FACTOR))
- // timeReductionFactor = ConfigDictionary.GetInt(Keys.TIME_REDUCTION_FACTOR, configDict);
- //int freqReductionFactor = 1;
- //if (configDict.ContainsKey(Keys.FREQ_REDUCTION_FACTOR))
- // freqReductionFactor = ConfigDictionary.GetInt(Keys.FREQ_REDUCTION_FACTOR, configDict);
- //if (!((timeReductionFactor == 1) && (freqReductionFactor == 1)))
- //{
- // sonogram.Data = ReduceDimensionalityOfSpectrogram(sonogram.Data, timeReductionFactor, freqReductionFactor);
- // return sonogram.GetImage(doHighlightSubband, add1kHzLines);
- //}
-
- // (iii) NOISE REDUCTION
- //bool doNoiseReduction = false;
- //if (configDict.ContainsKey(AnalysisKeys.NoiseDoReduction))
- // doNoiseReduction = ConfigDictionary.GetBoolean(AnalysisKeys.NoiseDoReduction, configDict);
- //if (doNoiseReduction)
- //{
- // //LoggedConsole.WriteLine("PERFORMING NOISE REDUCTION");
- // double bgThreshold = 3.0;
- // if (configDict.ContainsKey(AnalysisKeys.NoiseBgThreshold))
- // bgThreshold = ConfigDictionary.GetDouble(AnalysisKeys.NoiseBgThreshold, configDict);
- // var tuple = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.STANDARD, bgThreshold);
- // sonogram.Data = tuple.Item1; // store data matrix
- //}
-
//ADD time and frequency scales
bool addScale = false;
if (configDict.ContainsKey(AnalysisKeys.AddTimeScale))
@@ -261,10 +219,10 @@ public static Image_MultiTrack Sonogram2MultiTrackImage(BaseSonogram sonogram, D
}
return mti;
+ }
+ */
- //mti.AddTrack(ImageTrack.GetWavEnvelopeTrack(sonogram)); //add segmentation track
- }//Sonogram2MultiTrackImage()
-
+ /*
public static Image Sonogram2Image(BaseSonogram sonogram, Dictionary configDict, double[,] hits, List scores, List predictedEvents, double eventThreshold)
{
Image_MultiTrack multiTrackImage = Sonogram2MultiTrackImage(sonogram, configDict);
@@ -289,7 +247,17 @@ public static Image Sonogram2Image(BaseSonogram sonogram, Dictionary
+ /// This is experimental method to explore colour rendering of standard spectrograms
+ /// Used to convert a standard decibel spectrogram into a colour version using
+ /// a colour rendering for three separate properties.
+ ///
+ /// the raw decibel spectrogram data - assigned to red channel
+ /// the noise reduced decibel spectrogram data - assigned to green channel
+ /// assigned to ridge colours
+ /// coloured-rendered spectrogram as image
public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramData, double[,] nrSpectrogramData, byte[,] hits)
{
double truncateMin = -120.0;
@@ -305,9 +273,11 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD
Bitmap image = new Bitmap(width, height);
Color[] ridgeColours = { Color.Red, Color.DarkMagenta, Color.Black, Color.LightPink };
- for (int y = 0; y < height; y++) //over all freq bins
+ // for all freq bins
+ for (int y = 0; y < height; y++)
{
- for (int x = 0; x < width; x++) //for pixels in the line
+ //for pixels in freq bin
+ for (int x = 0; x < width; x++)
{
// NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
double dbValue = dbSpectrogramNorm[x, y];
@@ -350,6 +320,7 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD
// get colour for noise reduced portion
// superimpose ridge detection
+ // Have experimented with a bunch of ideas
if (hits[x, y] > 0)
{
//value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40);
@@ -362,7 +333,7 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD
image.SetPixel(x, height - y - 1, colour);
}
- }//end over all freq bins
+ } // freq bins
//image.Save(@"C:\SensorNetworks\Output\Sonograms\TEST3.png", ImageFormat.Png);
@@ -375,10 +346,10 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD
/// Also uses the spectral "hits" data for highlighting the spectrogram.
/// ### IMPORTANT WARNING!!!! THIS METHOD ASSUMES THAT BOTH SPECTRAL MATRICES HAVE BEEN NORMALISED IN [0,1].
///
- ///
- ///
- ///
- ///
+ /// the raw decibel spectrogram data - assigned to red channel
+ /// the noise reduced decibel spectrogram data - assigned to green channel
+ /// assigned to ridge colours
+ /// coloured-rendered spectrogram as image
public static Image CreateFalseColourDecibelSpectrogramForZooming(double[,] dbSpectrogramNorm, double[,] nrSpectrogramNorm, byte[,] hits)
{
int width = dbSpectrogramNorm.GetLength(0);
@@ -393,57 +364,42 @@ public static Image CreateFalseColourDecibelSpectrogramForZooming(double[,] dbSp
//var csp = new CubeHelix("cyanscale");
- for (int y = 0; y < height; y++) //over all freq bins
+ //over all freq bins
+ for (int y = 0; y < height; y++)
{
- for (int x = 0; x < width; x++) //for pixels in the line
+ //for pixels in the line
+ for (int x = 0; x < width; x++)
+ {
+ var colour = rsp.GetColorFromPallette(dbSpectrogramNorm[x, y]);
+
+ if (nrSpectrogramNorm[x, y] > 0.15)
{
- var colour = rsp.GetColorFromPallette(dbSpectrogramNorm[x, y]);
+ // get colour for noise reduced portion
+ int colourId = cch.GetColorID(nrSpectrogramNorm[x, y]);
- if (nrSpectrogramNorm[x, y] > 0.15)
+ // superimpose ridge detection
+ if (hits[x, y] > 0)
{
- // get colour for noise reduced portion
- int colourId = cch.GetColorID(nrSpectrogramNorm[x, y]);
-
- // superimpose ridge detection
- if (hits[x, y] > 0)
- {
- colourId += 20;
- if (colourId > 255)
- {
- colourId = 255;
- }
- }
-
- colour = cch.GetColorFromPallette(colourId);
+ colourId += 20;
+ if (colourId > 255)
+ {
+ colourId = 255;
}
-
- image.SetPixel(x, height - y - 1, colour);
}
- }//end over all freq bins
-
- return image;
- }
- public static Color[] GetCyanSpectrumPalette()
- {
- int count = 256 - 1;
- var palette = new Color[256];
- for (int i = 0; i <= count; i++)
- {
- double value = i / (double)count;
- int R = (int)Math.Round(value * value * value * count);
-
- //int G = i;
- int B = i;
- int G = (int)Math.Round(Math.Sqrt(value) * count);
+ colour = cch.GetColorFromPallette(colourId);
+ }
- //int B = (int)Math.Round(value * value * count);
- palette[i] = Color.FromArgb(255, R, G, B);
- }
+ image.SetPixel(x, height - y - 1, colour);
+ }
+ } // freq bins
- return palette;
+ return image;
}
+ ///
+ /// Another experimental method to colour render spectrograms, this time amplitude spectrograms.
+ ///
public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramData, double[,] nrSpectrogramData, byte[,] hits)
{
double truncateMin = 0.0;
@@ -454,14 +410,12 @@ public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramD
int width = spectrogramData.GetLength(0);
int height = spectrogramData.GetLength(1);
Bitmap image = new Bitmap(width, height);
- Color colour;
- Hsv myHsv;
- Rgb myRgb;
Color[] ridgeColours = { Color.Red, Color.Lime, Color.Blue, Color.Lime };
- for (int y = 0; y < height; y++) //over all freq bins
+ //over all freq bins
+ for (int y = 0; y < height; y++)
{
- for (int x = 0; x < width; x++) //for pixels in the line
+ for (int x = 0; x < width; x++)
{
// NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
double dbValue = spectrogramNorm[x, y];
@@ -478,48 +432,26 @@ public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramD
c1 = 255;
}
- colour = Color.FromArgb(c1, c1, c1);
-
- //if (nrSpectrogramNorm[x, y] > 0)
- //{
- // // use HSV colour space
- // int bottomColour = 30; // to avoid using the reds
- // int topColour = 320; // to avoid using the magentas
- // int hueRange = topColour - bottomColour;
- // int hue = bottomColour + (int)Math.Floor(hueRange * nrSpectrogramNorm[x, y]);
-
- // double saturation = 1.0;
- // //double saturation = 0.75 + (nrSpectrogramNorm[x, y] * 0.25);
- // //double saturation = nrSpectrogramNorm[x, y] * 0.5;
- // //double saturation = (1 - nrSpectrogramNorm[x, y]) * 0.5;
-
- // double value = 1.0;
- // //double value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40);
-
- // myHsv = new Hsv { H = hue, S = saturation, V = value };
- // myRgb = myHsv.To();
- // colour = Color.FromArgb((int)myRgb.R, (int)myRgb.G, (int)myRgb.B);
- //}
+ var colour = Color.FromArgb(c1, c1, c1);
// superimpose ridge detection
if (hits[x, y] > 0)
{
- //value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40);
- //myHsv = new Hsv { H = 260, S = saturation, V = value };
- //myRgb = myHsv.To();
- //colour = Color.FromArgb((int)myRgb.R, (int)myRgb.G, (int)myRgb.B);
colour = ridgeColours[hits[x, y] - 1];
}
image.SetPixel(x, height - y - 1, colour);
}
- }//end over all freq bins
-
- //image.Save(@"C:\SensorNetworks\Output\Sonograms\TEST3.png", ImageFormat.Png);
+ }
return image;
}
+ ///
+ /// Method to make spectrogram with SOX
+ /// But the ConfigDictionary clsas is now obsolete.
+ /// The method should be depracted some time.
+ ///
public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary configDict, FileInfo output)
{
var soxPath = new FileInfo(AppConfigHelper.SoxExe);
@@ -568,7 +500,7 @@ public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary
- public static double[] CalculateAvgSpectrumFromSpectrogram(double[,] spectrogram)
+ public static double[] CalculateAvgSpectrumFromEnergySpectrogram(double[,] spectrogram)
{
int frameCount = spectrogram.GetLength(0);
int freqBinCount = spectrogram.GetLength(1);
@@ -623,15 +555,14 @@ public static double[] CalculateAvgSpectrumFromSpectrogram(double[,] spectrogram
///
/// Use this method to average a decibel spectrogram
///
- public static double[] CalculateAvgDecibelSpectrumFromSpectrogram(double[,] spectrogram)
+ public static double[] CalculateAvgDecibelSpectrumFromDecibelSpectrogram(double[,] spectrogram)
{
- int frameCount = spectrogram.GetLength(0);
int freqBinCount = spectrogram.GetLength(1);
double[] avgSpectrum = new double[freqBinCount];
for (int j = 0; j < freqBinCount; j++)
{
var freqBin = MatrixTools.GetColumn(spectrogram, j);
- double av = SpectrogramTools.AverageAnArrayOfDecibelValues(freqBin);
+ double av = AverageAnArrayOfDecibelValues(freqBin);
avgSpectrum[j] = av;
}
@@ -660,6 +591,7 @@ public static double AverageAnArrayOfDecibelValues(double[] array)
return dB;
}
+ /*
public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram)
{
int frameCount = spectrogram.GetLength(0);
@@ -680,6 +612,7 @@ public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram
return sumSpectrum;
}
+ */
///
/// Returns AVERAGE POWER SPECTRUM (PSD) and VARIANCE OF POWER SPECTRUM.
@@ -698,7 +631,7 @@ public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram
/// As well as calculating the av power spectrum, this method also returns a variance spectrum and a spectrum of the Coeff of Variation = var/mean.
///
/// this is an amplitude spectrum. Must square values to get power
- ///
+ /// three spectral indices
public static Tuple CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(double[,] amplitudeSpectrogram)
{
int frameCount = amplitudeSpectrogram.GetLength(0);
@@ -706,7 +639,9 @@ public static Tuple CalculateAvgSpectrumAndVarianc
double[] avgSpectrum = new double[freqBinCount]; // for average of the spectral bins
double[] varSpectrum = new double[freqBinCount]; // for variance of the spectral bins
double[] covSpectrum = new double[freqBinCount]; // for coeff of variance of the spectral bins
- for (int j = 0; j < freqBinCount; j++) // for all frequency bins
+
+ // for all frequency bins
+ for (int j = 0; j < freqBinCount; j++)
{
var freqBin = new double[frameCount]; // set up an array to take all values in a freq bin i.e. column of matrix
for (int r = 0; r < frameCount; r++)
@@ -722,18 +657,19 @@ public static Tuple CalculateAvgSpectrumAndVarianc
}
return Tuple.Create(avgSpectrum, varSpectrum, covSpectrum);
- } // CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram()
+ }
///
- /// This method assumes P.D. Welch's method has been used to calculate a PSD.
+ /// Calculates Stuart gage's NDSI acoustic index from the Power Spectrum derived from a spectrogram.
+ /// This method assumes P.D. Welch's method has been used to calculate the PSD.
/// See method above: CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram()
///
/// power spectral density
- ///
- ///
- ///
- ///
- ///
+ /// original sample rate of the recording. Only used to get nyquist
+ /// low ndsi bound
+ /// mid ndsi bound
+ /// top ndsi bound
+ /// ndsi
public static double CalculateNdsi(double[] psd, int samplerate, int lowBound, int midBound, int topBound)
{
int nyquist = samplerate / 2;
@@ -741,35 +677,35 @@ public static double CalculateNdsi(double[] psd, int samplerate, int lowBound, i
double binWidth = nyquist / (double)binCount;
// skip lower 1kHz bin;
- int countOf1kHbin = (int)Math.Floor(lowBound / binWidth);
- int countOf2kHbin = (int)Math.Floor(midBound / binWidth);
- int countOf8kHbin = (int)Math.Floor(topBound / binWidth);
+ int countOf1KHbin = (int)Math.Floor(lowBound / binWidth);
+ int countOf2KHbin = (int)Math.Floor(midBound / binWidth);
+ int countOf8KHbin = (int)Math.Floor(topBound / binWidth);
// error checking - required for marine recordings where SR=2000.
- // all this is arbitrary hack to something working for marine recordings. Will not affect terrestrial recordings
- if (countOf8kHbin >= binCount)
+ // all this is arbitrary hack to get something working for marine recordings. Will not affect terrestrial recordings
+ if (countOf8KHbin >= binCount)
{
- countOf8kHbin = binCount - 2;
+ countOf8KHbin = binCount - 2;
}
- if (countOf2kHbin >= countOf8kHbin)
+ if (countOf2KHbin >= countOf8KHbin)
{
- countOf2kHbin = countOf8kHbin - 100;
+ countOf2KHbin = countOf8KHbin - 100;
}
- if (countOf1kHbin >= countOf2kHbin)
+ if (countOf1KHbin >= countOf2KHbin)
{
- countOf1kHbin = countOf2kHbin - 10;
+ countOf1KHbin = countOf2KHbin - 10;
}
double anthropoEnergy = 0.0;
- for (int i = countOf1kHbin; i < countOf2kHbin; i++)
+ for (int i = countOf1KHbin; i < countOf2KHbin; i++)
{
anthropoEnergy += psd[i];
}
double biophonyEnergy = 0.0;
- for (int i = countOf2kHbin; i < countOf8kHbin; i++)
+ for (int i = countOf2KHbin; i < countOf8KHbin; i++)
{
biophonyEnergy += psd[i];
}
@@ -785,7 +721,6 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram
{
if (spectrogram == null)
{
- return null;
throw new ArgumentNullException(nameof(spectrogram));
}
@@ -824,6 +759,7 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram
return DataTools.Submatrix(m, 0, c1, m.GetLength(0) - 1, c2);
}
+ /*
///
/// Extracts an acoustic event from a sonogram given the location of a user defined rectangular marquee.
/// NOTE: Nyquist value is used ONLY if using mel scale.
@@ -849,6 +785,7 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram
AcousticEvent.Freq2BinIDs(doMelscale, minHz, maxHz, nyquist, binWidth, out c1, out c2);
return DataTools.Submatrix(m, r1, c1, r2, c2);
}
+ */
public static double[] ExtractModalNoiseSubband(double[] modalNoise, int minHz, int maxHz, bool doMelScale, int nyquist, double binWidth)
{
@@ -881,7 +818,7 @@ public static void DrawGridLinesOnImage(Bitmap bmp, TimeSpan startOffset, TimeSp
{
FrequencyScale.DrawFrequencyLinesOnImage(bmp, freqScale, includeLabels: true);
- // we have stopped drawing temporal gridlines on these spectrograms. Create unnecessary clutter.
+ // We have stopped drawing temporal gridlines on these spectrograms. Create unnecessary clutter.
//DrawTimeLinesOnImage(bmp, startOffset, fullDuration, xAxisTicInterval);
}
@@ -908,5 +845,33 @@ public static void DrawTimeLinesOnImage(Bitmap bmp, TimeSpan startOffset, TimeSp
// #######################################################################################################################################
// ### ABOVE METHODS DRAW TIME GRID LINES ON SPECTROGRAMS ####################################################################################
// #######################################################################################################################################
+
+
+ public static Image GetImageFullyAnnotated(Image image, string title, int[,] gridLineLocations, TimeSpan duration)
+ {
+ if (image == null)
+ {
+ throw new ArgumentNullException(nameof(image));
+ }
+
+ FrequencyScale.DrawFrequencyLinesOnImage((Bitmap)image, gridLineLocations, includeLabels: true);
+
+ var titleBar = LDSpectrogramRGB.DrawTitleBarOfGrayScaleSpectrogram(title, image.Width);
+ var timeBmp = ImageTrack.DrawTimeTrack(duration, image.Width);
+ var list = new List { titleBar, timeBmp, image, timeBmp };
+ var compositeImage = ImageTools.CombineImagesVertically(list);
+ return compositeImage;
+ }
+
+ public static Image GetImage(double[,] data, int nyquist, bool DoMel)
+ {
+ int subBandMinHz = 1000;
+ int subBandMaxHz = 9000;
+ bool doHighlightSubband = false;
+
+ int maxFrequency = nyquist;
+ var image = BaseSonogram.GetSonogramImage(data, nyquist, maxFrequency, DoMel, 1, doHighlightSubband, subBandMinHz, subBandMaxHz);
+ return image;
+ }
}
}
diff --git a/src/TowseyLibrary/Histogram.cs b/src/TowseyLibrary/Histogram.cs
index b38da37ba..5f0b31e22 100644
--- a/src/TowseyLibrary/Histogram.cs
+++ b/src/TowseyLibrary/Histogram.cs
@@ -147,9 +147,6 @@ public static int[] Histo(byte[,] data, out byte min, out byte max)
///
/// HISTOGRAM from a matrix of double
///
- ///
- ///
- ///
public static int[] Histo(double[,] data, int binCount)
{
double min;
@@ -158,7 +155,6 @@ public static int[] Histo(double[,] data, int binCount)
double binWidth = (max - min) / binCount;
//LoggedConsole.WriteLine("data min=" + min + " data max=" + max + " binwidth=" + binWidth);
-
return Histo(data, binCount, min, max, binWidth);
}
@@ -322,10 +318,6 @@ public static int[] Histo(int[] data, int binCount, out double binWidth, out int
///
/// make histogram of integers where each bin has unit width
///
- ///
- ///
- ///
- ///
public static int[] Histo(int[] data, out int min, out int max)
{
int length = data.Length;
@@ -365,9 +357,6 @@ public static void GetHistogramOfWaveAmplitudes(double[] waveform, int window, o
///
/// Returns the bin ID that coincides with the passed percentile
///
- ///
- ///
- ///
public static int GetPercentileBin(int[] histogram, int percentile)
{
if (percentile > 99)
diff --git a/tests/Acoustics.Test/Acoustics.Test.csproj b/tests/Acoustics.Test/Acoustics.Test.csproj
index 65e4d69de..028424605 100644
--- a/tests/Acoustics.Test/Acoustics.Test.csproj
+++ b/tests/Acoustics.Test/Acoustics.Test.csproj
@@ -1,4 +1,4 @@
-
+
diff --git a/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs b/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs
index 38f353637..a95002e43 100644
--- a/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs
+++ b/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs
@@ -1,4 +1,4 @@
-//
+//
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
//
diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs
index 97adb3f78..91fe7832f 100644
--- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs
+++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs
@@ -26,7 +26,6 @@ public class KmeansClusteringTests
private DirectoryInfo outputDirectory;
[TestInitialize]
-
public void Setup()
{
this.outputDirectory = PathHelper.GetTempDir();
@@ -133,7 +132,7 @@ public void TestKmeansClustering()
// Do k-means clustering
string pathToClusterCsvFile = Path.Combine(outputDir.FullName, "ClusterCentroids" + i.ToString() + ".csv");
- var clusteringOutput = KmeansClustering.Clustering(patchMatrix, numberOfClusters, pathToClusterCsvFile);
+ var clusteringOutput = KmeansClustering.Clustering(patchMatrix, numberOfClusters);
// sorting clusters based on size and output it to a csv file
Dictionary clusterIdSize = clusteringOutput.ClusterIdSize;
@@ -182,7 +181,7 @@ public void TestKmeansClustering()
clusterImage.Save(outputClusteringImage);
}
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target spectrogram
+ //+++++++++++++++++++++++++++++++++++++++++++Reconstructing a target spectrogram from sequential patches and the cluster centroids
var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
var recording2 = new AudioRecording(recording2Path);
var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader);
diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs
index cfa460af7..861ab45e9 100644
--- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs
+++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs
@@ -68,7 +68,7 @@ public void PcaWhiteningDefault()
sonogram.Data = dataMatrix;
// DO PCA WHITENING
- var whitenedSpectrogram = PcaWhitening.Whitening(sonogram.Data);
+ var whitenedSpectrogram = PcaWhitening.Whitening(true, sonogram.Data);
// DO UNIT TESTING
// check if the dimensions of the reverted spectrogram (second output of the pca whitening) is equal to the input matrix
@@ -124,10 +124,10 @@ public void TestPcaWhitening()
double[,] sequentialPatchMatrix = sequentialPatches.ToMatrix();
// DO PCA WHITENING
- var whitenedSpectrogram = PcaWhitening.Whitening(sequentialPatchMatrix);
+ var whitenedSpectrogram = PcaWhitening.Whitening(true, sequentialPatchMatrix);
// reconstructing the spectrogram from sequential patches and the projection matrix obtained from random patches
- var projectionMatrix = whitenedSpectrogram.ProjectionMatrix;//whitenedSpectrogram.projectionMatrix;
+ var projectionMatrix = whitenedSpectrogram.ProjectionMatrix;
var eigenVectors = whitenedSpectrogram.EigenVectors;
var numComponents = whitenedSpectrogram.Components;
double[,] reconstructedSpec = PcaWhitening.ReconstructSpectrogram(projectionMatrix, sequentialPatchMatrix, eigenVectors, numComponents);
diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs
index 0a406516b..242892789 100644
--- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs
+++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs
@@ -1,4 +1,4 @@
-//
+//
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
//
@@ -33,13 +33,15 @@ public void TestFeatureLearning()
{
// var outputDir = this.outputDirectory;
var resultDir = PathHelper.ResolveAssetPath("FeatureLearning");
- var folderPath = Path.Combine(resultDir, "random_audio_segments");
+ var folderPath = Path.Combine(resultDir, "random_audio_segments"); // Liz
+
// PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening\random_audio_segments\1192_1000");
// var resultDir = PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening");
var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
+
// var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");
// +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1000 random 1-min recordings from Gympie
@@ -64,6 +66,7 @@ public void TestFeatureLearning()
var sonoConfig = new SonogramConfig
{
WindowSize = frameSize,
+
// since each 24 frames duration is equal to 1 second
WindowOverlap = 0.1028,
DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
@@ -71,13 +74,42 @@ public void TestFeatureLearning()
NoiseReductionType = NoiseReductionType.None,
};
- int numFreqBand = 4;
- int patchWidth = finalBinCount / numFreqBand;
+ /*
+ // testing
+ var recordingPath3 = PathHelper.ResolveAsset(folderPath, "SM304264_0+1_20160421_024539_46-47min.wav");
+ var recording3 = new AudioRecording(recordingPath3);
+ var sonogram3 = new SpectrogramStandard(sonoConfig, recording3.WavReader);
+
+ // DO DRAW SPECTROGRAM
+ var image4 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ image4.Save(outputMelImagePath, ImageFormat.Png);
+
+ // Do RMS normalization
+ sonogram3.Data = SNR.RmsNormalization(sonogram3.Data);
+ var image5 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ image5.Save(outputNormMelImagePath, ImageFormat.Png);
+
+ // NOISE REDUCTION
+ sonogram3.Data = PcaWhitening.NoiseReduction(sonogram3.Data);
+ var image6 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ image6.Save(outputNoiseReducedMelImagePath, ImageFormat.Png);
+
+ //testing
+ */
+
+ // Define the minFreBin and MaxFreqBin to be able to work at arbitrary frequency bin bounds.
+ // The default value is minFreqBin = 1 and maxFreqBin = finalBinCount.
+ // To work with arbitrary frequency bin bounds we need to manually set these two parameters.
+ int minFreqBin = 40; //1
+ int maxFreqBin = 80; //finalBinCount;
+ int numFreqBand = 1; //4;
+ int patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand; // finalBinCount / numFreqBand;
int patchHeight = 1; // 2; // 4; // 16; // 6; // Frame size
- int numRandomPatches = 80; // 40; // 20; // 30; // 100; // 500; //
+ int numRandomPatches = 20; // 40; // 80; // 30; // 100; // 500; //
+
// int fileCount = Directory.GetFiles(folderPath, "*.wav").Length;
- // Define variable number of "randomPatch" lists based on "noOfFreqBand"
+ // Define variable number of "randomPatch" lists based on "numFreqBand"
Dictionary> randomPatchLists = new Dictionary>();
for (int i = 0; i < numFreqBand; i++)
{
@@ -96,13 +128,14 @@ public void TestFeatureLearning()
}
}
*/
+ double[,] inputMatrix;
foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
{
- FileInfo f = filePath.ToFileInfo();
+ FileInfo fileInfo = filePath.ToFileInfo();
// process the wav file if it is not empty
- if (f.Length != 0)
+ if (fileInfo.Length != 0)
{
var recording = new AudioRecording(filePath);
sonoConfig.SourceFName = recording.BaseName;
@@ -116,14 +149,26 @@ public void TestFeatureLearning()
// sonogram.Data = SNR.NoiseReduce_Median(sonogram.Data, nhBackgroundThreshold: 2.0);
sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);
+ // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+ if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+ {
+ inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram.Data, minFreqBin, maxFreqBin);
+ }
+ else
+ {
+ inputMatrix = sonogram.Data;
+ }
+
// creating matrices from different freq bands of the source spectrogram
- List allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data, numFreqBand);
+ List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
// Second: selecting random patches from each freq band matrix and add them to the corresponding patch list
int count = 0;
while (count < allSubmatrices.Count)
{
- randomPatchLists[string.Format("randomPatch{0}", count.ToString())].Add(PatchSampling.GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix());
+ randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+ .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches,
+ PatchSampling.SamplingMethod.Random).ToMatrix());
count++;
}
}
@@ -135,7 +180,7 @@ public void TestFeatureLearning()
}
// convert list of random patches matrices to one matrix
- int numberOfClusters = 256; // 128; // 64; // 32; // 10; // 50;
+ int numberOfClusters = 50; //256; // 128; // 64; // 32; // 10; //
List allBandsCentroids = new List();
List allClusteringOutput = new List();
@@ -144,13 +189,23 @@ public void TestFeatureLearning()
double[,] patchMatrix = randomPatches[i];
// Apply PCA Whitening
- var whitenedSpectrogram = PcaWhitening.Whitening(patchMatrix);
+ var whitenedSpectrogram = PcaWhitening.Whitening(true, patchMatrix);
// Do k-means clustering
- string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
- var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters, pathToClusterCsvFile);
+ var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters);
+
// var clusteringOutput = KmeansClustering.Clustering(patchMatrix, noOfClusters, pathToClusterCsvFile);
+ // writing centroids to a csv file
+ // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays)
+ // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
+ // it might be a better way to do this
+ string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
+ var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray();
+ Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());
+
+ //Csv.WriteToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids);
+
// sorting clusters based on size and output it to a csv file
Dictionary clusterIdSize = clusteringOutput.ClusterIdSize;
int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);
@@ -177,7 +232,8 @@ public void TestFeatureLearning()
// convert each centroid to a matrix in order of cluster ID
// double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
// OR: in order of cluster size
- double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);
+ double[,] cent =
+ MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);
// normalize each centroid
double[,] normCent = DataTools.normalise(cent);
@@ -197,16 +253,19 @@ public void TestFeatureLearning()
var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);
+
// clusterImage.Save(outputClusterImagePath, ImageFormat.Bmp);
var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");
+
// Image bmp = ImageTools.ReadImage2Bitmap(filename);
FrequencyScale.DrawFrequencyLinesOnImage((Bitmap)clusterImage, freqScale, includeLabels: false);
clusterImage.Save(outputClusteringImage);
}
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target spectrogram
+ //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target recordings
var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
+
// var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353972_20160303_055854_60_0.wav"); // folder with 1000 files
// var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353887_20151230_042625_60_0.wav"); // folder with 1000 files
// var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_354744_20151018_053923_60_0.wav"); // folder with 100 files
@@ -215,28 +274,42 @@ public void TestFeatureLearning()
var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader);
// DO DRAW SPECTROGRAM
- var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(),
+ freqScale.GridLineLocations);
image.Save(outputMelImagePath, ImageFormat.Png);
// Do RMS normalization
sonogram2.Data = SNR.RmsNormalization(sonogram2.Data);
- var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
+ "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
image2.Save(outputNormMelImagePath, ImageFormat.Png);
// NOISE REDUCTION
sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data);
- var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
+ "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
image3.Save(outputNoiseReducedMelImagePath, ImageFormat.Png);
+ // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
+ if (minFreqBin != 1 || maxFreqBin != finalBinCount)
+ {
+ inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram2.Data, minFreqBin, maxFreqBin);
+ }
+ else
+ {
+ inputMatrix = sonogram2.Data;
+ }
+
// extracting sequential patches from the target spectrogram
- List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(sonogram2.Data, numFreqBand);
+ List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
double[][,] matrices2 = allSubmatrices2.ToArray();
List allSequentialPatchMatrix = new List();
for (int i = 0; i < matrices2.GetLength(0); i++)
{
int rows = matrices2[i].GetLength(0);
int columns = matrices2[i].GetLength(1);
- var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
+ var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight,
+ (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
}
@@ -264,7 +337,9 @@ public void TestFeatureLearning()
double[][] featureTransVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];
for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
{
- var normVector = ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i].ToJagged()[j]); // normalize each patch to unit length
+ var normVector =
+ ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i]
+ .ToJagged()[j]); // normalize each patch to unit length
featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
}
@@ -275,8 +350,8 @@ public void TestFeatureLearning()
// +++++++++++++++++++++++++++++++++++Temporal Summarization
// The resolution to generate features is 1 second
- // Each 6 patches form 1 second, when patches are formed by a sequence of four frames
- // for each 6 patch, we generate 3 vectors of mean, std, and max
+ // Each 24 single-frame patches form 1 second
+ // for each 24 patch, we generate 3 vectors of mean, std, and max
// The pre-assumption is that each input spectrogram is 1 minute
List allMeanFeatureVectors = new List();
@@ -284,10 +359,11 @@ public void TestFeatureLearning()
List allStdFeatureVectors = new List();
// number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second.
- int numFrames = 24 / patchHeight;
+ int numFrames = (24 / patchHeight) * 60;
foreach (var freqBandFeature in allFeatureTransVectors)
{
+ // store features of different bands in lists
List meanFeatureVectors = new List();
List maxFeatureVectors = new List();
List stdFeatureVectors = new List();
@@ -305,6 +381,7 @@ public void TestFeatureLearning()
List std = new List();
List max = new List();
double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();
+
// int len = sequencesOfFrames.GetLength(1);
// Second, calculate mean, max, and standard deviation of six vectors element-wise
@@ -340,6 +417,7 @@ public void TestFeatureLearning()
for (int j = 0; j < allMeanFeatureVectors.Count; j++)
{
+ // write the features of each pre-defined frequency band into a separate CSV file
var outputFeatureFile = Path.Combine(resultDir, "FeatureVectors" + j.ToString() + ".csv");
// creating the header for CSV file
@@ -349,24 +427,26 @@ public void TestFeatureLearning()
header.Add("mean" + i.ToString());
}
- for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++)
+ for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++)
{
- header.Add("std" + i.ToString());
+ header.Add("max" + i.ToString());
}
- for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++)
+ for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++)
{
- header.Add("max" + i.ToString());
+ header.Add("std" + i.ToString());
}
// concatenating mean, std, and max vector together for each 1 second
List featureVectors = new List();
for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].ToJagged().GetLength(0); i++)
{
- List featureList = new List();
- featureList.Add(allMeanFeatureVectors.ToArray()[j].ToJagged()[i]);
- featureList.Add(allMaxFeatureVectors.ToArray()[j].ToJagged()[i]);
- featureList.Add(allStdFeatureVectors.ToArray()[j].ToJagged()[i]);
+ List featureList = new List
+ {
+ allMeanFeatureVectors.ToArray()[j].ToJagged()[i],
+ allMaxFeatureVectors.ToArray()[j].ToJagged()[i],
+ allStdFeatureVectors.ToArray()[j].ToJagged()[i],
+ };
double[] featureVector = DataTools.ConcatenateVectors(featureList);
featureVectors.Add(featureVector);
}
@@ -394,6 +474,8 @@ public void TestFeatureLearning()
}
}
+ /*
+ // Reconstructing the target spectrogram based on clusters' centroids
List convertedSpec = new List();
int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand;
for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
@@ -407,6 +489,267 @@ public void TestFeatureLearning()
// DO DRAW SPECTROGRAM
var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);
reconstructedSpecImage.Save(outputReSpecImagePath, ImageFormat.Png);
+ */
+ }
+
+ ///
+ /// Input a directory of one-minute recordings for one day
+ /// Calculate PSD:
+ /// 1) Apply FFT to produce the amplitude spectrogram at given window width.
+ /// 2) Square the FFT coefficients >> this gives an energy spectrogram.
+ /// 3) Do RMS normalization and Subtract the median energy value from each frequency bin.
+ /// 4) Take average of each of the energy values in each frequency bin >> this gives power spectrum or PSD.
+ /// Finally draw the the spectrogram of PSD values for the whole day.
+ ///
+ [Ignore]
+ [TestMethod]
+ public void PowerSpectrumDensityTest()
+ {
+ var inputPath = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\";
+ var resultPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD.bmp";
+ var resultNoiseReducedPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD_NoiseReduced.bmp";
+
+ //var inputPath =Path.Combine(inputDir, "TrainSet"); // directory of the one-min recordings of one day (21 and 23 Apr - Black Rail Data)
+
+ // check whether there is any file in the folder/subfolders
+ if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
+ {
+ throw new ArgumentException("The folder of recordings is empty...");
+ }
+
+ // get the nyquist value from the first wav file in the folder of recordings
+ int nq = new AudioRecording(Directory.GetFiles(inputPath, "*.wav")[0]).Nyquist;
+ int nyquist = nq; // 11025;
+ int frameSize = 1024;
+ int finalBinCount = 512; //256; //
+ int hertzInterval = 1000;
+ FreqScaleType scaleType = FreqScaleType.Linear;
+ //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
+ //var fst = freqScale.ScaleType;
+ //var fst = FreqScaleType.Linear;
+ //var freqScale = new FrequencyScale(fst);
+
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+ WindowOverlap = 0.1028,
+
+ //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+
+ //DoMelScale = false,
+ MelBinCount = 256,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+
+ var attributes = new SpectrogramAttributes()
+ {
+ NyquistFrequency = nyquist,
+ Duration = TimeSpan.FromMinutes(1440),
+ };
+
+ List psd = new List();
+ foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
+ {
+ FileInfo fileInfo = filePath.ToFileInfo();
+
+ // process the wav file if it is not empty
+ if (fileInfo.Length != 0)
+ {
+ var recording = new AudioRecording(filePath);
+
+ //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
+ //var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
+ // save the matrix
+ // skip normalisation
+ // skip mel
+ settings.SourceFileName = recording.BaseName;
+
+ var spectrogram = new EnergySpectrogram(settings, recording.WavReader);
+ //var sonogram = new AmplitudeSpectrogram(settings, recording.WavReader);
+
+ //var energySpectrogram = new EnergySpectrogram(sonoConfig, amplitudeSpectrogram.Data);
+ //var energySpectrogram = new EnergySpectrogram(sonoConfig, recording.WavReader);
+ //var energySpectrogram = new EnergySpectrogram(settings, recording.WavReader);
+
+ // square the FFT coefficients to get an energy spectrogram
+ // double[,] energySpectrogram = PowerSpectrumDensity.GetEnergyValues(amplitudeSpectrogram.Data);
+
+ // RMS NORMALIZATION
+ //double[,] normalizedValues = SNR.RmsNormalization(energySpectro.Data);
+ //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data);
+
+ // Median Noise Reduction
+ //spectrogram.Data = PcaWhitening.NoiseReduction(spectrogram.Data);
+ //spectrogram.Data = SNR.NoiseReduce_Standard(spectrogram.Data);
+
+ //double[] psd = PowerSpectralDensity.GetPowerSpectrum(noiseReducedValues);
+ //psd.Add(energySpectro.GetLogPsd());
+ psd.Add(MatrixTools.GetColumnAverages(spectrogram.Data));
+
+ //psd.Add(SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(normalizedValues));
+ //psd.Add(PowerSpectralDensity.GetPowerSpectrum(normalizedValues));
+ }
+ }
+
+ // writing psd matrix to csv file
+ //Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\psd.csv"), psd.ToArray().ToMatrix());
+ //Image imagePsd = DecibelSpectrogram.DrawSpectrogramAnnotated(psd.ToArray().ToMatrix(), settings, attributes);
+ //imagePsd.Save(resultPsdPath, ImageFormat.Bmp);
+ var psdMatrix = psd.ToArray().ToMatrix();
+
+ // calculate the log of matrix
+ var logPsd = MatrixTools.Matrix2LogValues(psdMatrix);
+ Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd.csv"), logPsd);
+
+ Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(logPsd, settings, attributes);
+ image.Save(resultPsdPath, ImageFormat.Bmp);
+
+ var noiseReducedLogPsd = PcaWhitening.NoiseReduction(logPsd); //SNR.NoiseReduce_Standard(logPsd); //SNR.NoiseReduce_Mean(logPsd, 0.0);//SNR.NoiseReduce_Median(logPsd, 0.0); //
+ Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd_NoiseReduced.csv"), logPsd);
+
+ Image image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(noiseReducedLogPsd, settings, attributes);
+ image2.Save(resultNoiseReducedPsdPath, ImageFormat.Bmp);
+
+ //ImageTools.DrawMatrix(psd.ToArray().ToMatrix(), resultPath);
+ //ImageTools.DrawReversedMatrix(psd.ToArray().ToMatrix(), resultPath);
+ //var data = MatrixTools.Matrix2LogValues(psd.ToArray().ToMatrix());
+ //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(data);
+ //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(logPsd);
+ }
+
+ [TestMethod]
+ [Ignore]
+ public void TestSpectrograms()
+ {
+ var recordingPath = PathHelper.ResolveAsset("Recordings", "SM304264_0+1_20160421_004539_47-48min.wav"); // "SM304264_0+1_20160421_094539_37-38min.wav"
+ var resultDir = PathHelper.ResolveAssetPath("SpectrogramTestResults");
+ var outputAmpSpecImagePath = Path.Combine(resultDir, "AmplitudeSpectrogram.bmp");
+ var outputDecibelSpecImagePath = Path.Combine(resultDir, "DecibelSpectrogram.bmp");
+ var outputEnergySpecImagePath = Path.Combine(resultDir, "EnergySpectrogram.bmp");
+ var outputLogEnergySpecImagePath = Path.Combine(resultDir, "LogEnergySpectrogram.bmp");
+ var outputLinScaImagePath = Path.Combine(resultDir, "LinearScaleSpectrogram.bmp");
+ var outputMelScaImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.bmp");
+ var outputNormalizedImagePath = Path.Combine(resultDir, "NormalizedSpectrogram.bmp");
+ var outputNoiseReducedImagePath = Path.Combine(resultDir, "NoiseReducedSpectrogram.bmp");
+ var outputLogPsdImagePath = Path.Combine(resultDir, "Psd.bmp");
+
+ var recording = new AudioRecording(recordingPath);
+ int nyquist = recording.Nyquist; // 11025;
+ int frameSize = 1024;
+ int finalBinCount = 512; //256; //128; // 100; // 40; // 200; //
+ int hertzInterval = 1000;
+
+ //FreqScaleType scaleType = FreqScaleType.Linear;
+ var scaleType = FreqScaleType.Mel;
+
+ //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
+ //var fst = freqScale.ScaleType;
+
+ var settings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+ WindowOverlap = 0.1028,
+ DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ MelBinCount = 256, //(scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ //NoiseReductionType = NoiseReductionType.Median,
+ };
+ //settings.NoiseReductionParameter = 0.0; // backgroundNeighbourhood noise reduction in dB
+
+ settings.SourceFileName = recording.BaseName;
+ //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
+
+ var sonogram = new EnergySpectrogram(settings, recording.WavReader);
+ sonogram.Data = MatrixTools.Matrix2LogValues(sonogram.Data);
+
+ var attributes = new SpectrogramAttributes()
+ {
+ NyquistFrequency = sonogram.Attributes.NyquistFrequency,
+ Duration = sonogram.Attributes.Duration,
+ };
+
+ Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);
+ //image.Save(outputLogEnergySpecImagePath, ImageFormat.Bmp);
+
+ //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data);
+ //var dbSpectrogram = new DecibelSpectrogram(settings, recording.WavReader);
+ //dbSpectrogram.DrawSpectrogram(outputMelScaImagePath);
+
+ //var energySpectro = new EnergySpectrogram(settings, recording.WavReader);
+
+ //var image = SpectrogramTools.GetImage(sonogram.Data, nyquist, settings.DoMelScale);
+ //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, settings.Duration);
+
+ //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data);
+
+ //var image = SpectrogramTools.GetImage(logSonogramData, nyquist, settings.DoMelScale);
+ //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);
+
+ //specImage.Save(outputMelScaImagePath, ImageFormat.Png);
+ //specImage.Save(outputAmpSpecImagePath, ImageFormat.Png);
+
+ // DO RMS NORMALIZATION
+ //sonogram.Data = SNR.RmsNormalization(sonogram.Data);
+ //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data);
+
+ //dbSpectrogram.DrawSpectrogram(outputNormalizedImagePath);
+ //var image2 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale);
+ //var normImage = SpectrogramTools.GetImageFullyAnnotated(image2, "NORMALIZEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);
+ //normImage.Save(outputNormalizedImagePath, ImageFormat.Png);
+
+ // DO NOISE REDUCTION
+ sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);
+ //dbSpectrogram.DrawSpectrogram(outputNoiseReducedImagePath);
+ //var image3 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale);
+ //var noiseReducedImage = SpectrogramTools.GetImageFullyAnnotated(image3, "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration);
+ //noiseReducedImage.Save(outputNoiseReducedImagePath, ImageFormat.Png);
+ Image image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes);
+ //image2.Save(outputNoiseReducedImagePath, ImageFormat.Bmp);
+
+ //energySpectro.DrawLogPsd(outputLogPsdImagePath);
+
+ /*
+ var fst = FreqScaleType.Linear;
+ var freqScale = new FrequencyScale(fst);
+ var recording = new AudioRecording(recordingPath);
+
+ var sonoConfig = new SonogramConfig
+ {
+ WindowSize = freqScale.FinalBinCount * 2,
+ WindowOverlap = 0.2,
+ SourceFName = recording.BaseName,
+ NoiseReductionType = NoiseReductionType.None,
+ NoiseReductionParameter = 0.0,
+ };
+
+ // GENERATE AMPLITUDE SPECTROGRAM
+ var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
+ amplitudeSpectrogram.Configuration.WindowSize = freqScale.WindowSize;
+
+ var image = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "AmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ image.Save(outputAmpSpecImagePath, ImageFormat.Png);
+
+ // DO RMS NORMALIZATION
+ amplitudeSpectrogram.Data = SNR.RmsNormalization(amplitudeSpectrogram.Data);
+ var normImage = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "NORMAmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ normImage.Save(outputNormAmpImagePath, ImageFormat.Png);
+
+ // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM
+ var sonogram = new SpectrogramStandard(amplitudeSpectrogram);
+ var standImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "LinearScaleSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ standImage.Save(outputLinScaImagePath, ImageFormat.Png);
+
+ // DO NOISE REDUCTION
+ sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);
+ //SNR.NoiseReduce_Standard(sonogram.Data);
+ var noiseReducedImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
+ noiseReducedImage.Save(outputNoiseReducedImagePath, ImageFormat.Png);
+ */
}
}
}
diff --git a/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs b/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs
index 2f9797dac..ad5c23a92 100644
--- a/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs
+++ b/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs
@@ -104,7 +104,17 @@ public void LocalSpectralPeakTest()
var hertzPerFreqBin = nyquist / finalBinCount;
FreqScaleType scaleType = FreqScaleType.Linear;
- var sonoConfig = new SonogramConfig
+ var spectrogramSettings = new SpectrogramSettings()
+ {
+ WindowSize = frameSize,
+ WindowOverlap = frameOverlap,
+ //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
+ //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
+ NoiseReductionType = NoiseReductionType.None,
+ };
+
+
+ var sonoConfig = new SonogramConfig()
{
WindowSize = frameSize,
WindowOverlap = frameOverlap,
@@ -117,7 +127,7 @@ public void LocalSpectralPeakTest()
var secondsPerFrame = frameStep / (nyquist * 2);
//var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
- var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
+ var amplitudeSpectrogram = new AmplitudeSpectrogram(spectrogramSettings, recording.WavReader);
var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram);
var decibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
diff --git a/tests/Acoustics.Test/TestHelpers/TestHelper.cs b/tests/Acoustics.Test/TestHelpers/TestHelper.cs
index 6c838fac8..06a392d3f 100644
--- a/tests/Acoustics.Test/TestHelpers/TestHelper.cs
+++ b/tests/Acoustics.Test/TestHelpers/TestHelper.cs
@@ -614,7 +614,7 @@ public static void AssertFrequencyInSignal(WavReader wavReader, double[] signal,
{
var fft = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(signal, wavReader.SampleRate, wavReader.Epsilon, 512, 0.0);
- var histogram = SpectrogramTools.CalculateAvgSpectrumFromSpectrogram(fft.AmplitudeSpectrogram);
+ var histogram = SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(fft.AmplitudeSpectrogram);
var max = histogram.Max();
double threshold = max * 0.8;
diff --git a/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav b/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav
new file mode 100644
index 000000000..642fcf52a
--- /dev/null
+++ b/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20c1fa7b85bafcf65c2f1683ba095b6769e77929332a4bb10e41b5bca8d022dc
+size 2646044
diff --git a/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav b/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav
new file mode 100644
index 000000000..ac664aa03
--- /dev/null
+++ b/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e9d17fc9c73c80b11127867b7d74c9766549b6c75514db7393be70e0dcdf97c
+size 2646044
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp
new file mode 100644
index 000000000..fc521b1c4
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv
new file mode 100644
index 000000000..77bce521d
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94b29f0c3f371aee179d4ee60b10f33af6e691da9aad95146ab3a04cb11e16a6
+size 9715
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..b2c8ebf3c
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp
new file mode 100644
index 000000000..45ed5f85e
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv
new file mode 100644
index 000000000..8e42d8cdf
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ee4f714ebaed91448cc954d98ad430d7288779bf45eb85596508256df11123c
+size 4868
diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..32c13a12b
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..32c13a12b
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp
new file mode 100644
index 000000000..2a5269faa
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp
new file mode 100644
index 000000000..32c13a12b
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp
new file mode 100644
index 000000000..cae9c3587
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..b2c8ebf3c
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp
new file mode 100644
index 000000000..22602ec59
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv
new file mode 100644
index 000000000..e0e977925
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:733c1439b580b4821c6297b84904079d6ad18579cee957f983352ac05e7a5fd0
+size 10584
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..32c13a12b
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp
new file mode 100644
index 000000000..ee88d087d
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv
new file mode 100644
index 000000000..95561cb9b
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb41f8c54cd64a9f47dad7f4c0dd93392ca1f82106f328f2e3a6ddf40b7a3eb
+size 5219
diff --git a/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv
new file mode 100644
index 000000000..338117379
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b2443a09e3cbbb16b38e6d5dd247a5e38da11ce22ca9e0872c7d7abf939ea6f
+size 9666
diff --git a/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt
new file mode 100644
index 000000000..a067ae3ef
--- /dev/null
+++ b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt
@@ -0,0 +1,512 @@
+5.92342059178663
+6.78603139659209
+2.73763992468852
+0.484650528763745
+0.134196779728749
+0.060613335626598
+0.0449980957102696
+0.0308638089733839
+0.0230549452891953
+0.029574167257121
+0.045592680580702
+0.0619416735406503
+0.072292137960567
+0.0729769758377264
+0.0693316499344553
+0.0617875858058421
+0.0516206839286855
+0.0397318790338038
+0.0313889365981619
+0.0248176117194829
+0.0199693194102845
+0.0153640062104025
+0.0116573762514843
+0.00921718643579001
+0.00736126359426442
+0.00631222689466149
+0.00584066799068913
+0.00530833759647776
+0.00508085619589678
+0.00486726967483501
+0.00451838930388912
+0.00400071355255953
+0.00349263972018566
+0.00312364670092792
+0.00291824269715042
+0.00256370572427487
+0.00238174654968078
+0.00227325553567207
+0.00227983318428067
+0.00205553384102714
+0.00184061259688744
+0.00169351570451881
+0.00169852726616255
+0.00175181815176738
+0.00174476352094424
+0.00168204117785559
+0.00170478778480012
+0.00166432290835369
+0.00156375406850788
+0.00150489308759203
+0.0015041133696475
+0.00163016659576912
+0.0016663055287863
+0.00162467652147422
+0.00159416144388321
+0.00164774829631534
+0.00178213966891182
+0.00191347604320948
+0.00220144741579375
+0.00230618929826558
+0.00218721027774908
+0.00203714989488304
+0.00195714030196791
+0.00186911163160602
+0.00187040700302073
+0.00188247108896847
+0.00183193450033458
+0.00179035045699948
+0.00174445118984544
+0.00187316824761175
+0.00211835805264512
+0.00250011599513084
+0.00250969474544108
+0.00217201976751015
+0.0017933287463902
+0.00175299055356613
+0.00177184780236242
+0.00181133616466615
+0.00162732752887556
+0.00134105231604351
+0.00101199113538078
+0.000754903422774901
+0.000626766485557011
+0.000614244720489763
+0.00058333159413847
+0.000562623542539203
+0.000537908934271761
+0.000539477910630015
+0.000542870095994484
+0.000525911900481924
+0.00047983113654423
+0.000434921490948426
+0.000422073202189695
+0.000425590329881372
+0.000425181833317943
+0.000419817348531986
+0.0004077788980078
+0.000409178884055008
+0.000414411078598465
+0.000412339330241552
+0.000445152486589777
+0.000492832906013084
+0.000526117154039294
+0.000501340958234119
+0.000430540487576878
+0.000381851309566457
+0.00038523109883433
+0.000400144060866762
+0.000395078586324034
+0.000368138960390048
+0.000353795213391507
+0.000365274345585826
+0.000383547427239684
+0.000378323988078999
+0.000380069771820894
+0.000399230295546036
+0.000477348086841093
+0.000720583901922056
+0.00122844154298709
+0.00183874368176035
+0.00208102360369207
+0.00197263391511847
+0.00218310820204097
+0.00298701684311657
+0.00397455237348695
+0.00355282399813533
+0.00211878415700036
+0.000788646249848888
+0.000426370419069255
+0.000335856896845558
+0.000322816998710207
+0.000328507548525789
+0.000338683984258544
+0.000349326555858356
+0.000353057137036315
+0.000323188384510321
+0.000296180781851916
+0.000301398059193545
+0.000325425143195519
+0.0003334189778345
+0.000309482859823427
+0.000308240015600704
+0.0003103487672135
+0.000295331816438542
+0.000272931873567936
+0.000274934997518774
+0.000291109906983677
+0.000302774694364952
+0.000299800393745657
+0.000292052595725419
+0.000284497090845833
+0.000281033046234125
+0.000275223786867186
+0.000302184043767604
+0.000317848400274916
+0.000323366209004018
+0.000331174572267127
+0.000332729599154386
+0.000356703714929623
+0.000400836533699513
+0.00045847563863159
+0.000490069577933363
+0.00055601356906235
+0.000693980444342674
+0.00096794330013668
+0.00128694623088374
+0.00157411324133971
+0.0018505016577456
+0.00209021458495992
+0.00258182316489771
+0.0034100653376744
+0.00483749513368122
+0.00698128020125996
+0.0101961957461181
+0.0125253074188117
+0.015785659388509
+0.0208950868776112
+0.0300600434286705
+0.0470568907580039
+0.0606117340995085
+0.0699653495203272
+0.080999381927959
+0.0992922807732138
+0.121744445574435
+0.143628955187763
+0.163958470185328
+0.175302034057053
+0.168958593551732
+0.153989313861803
+0.134371764470529
+0.111437211665887
+0.10122208453419
+0.106014764127152
+0.111885727508556
+0.118518342549926
+0.127400441766813
+0.132307468885018
+0.115390039742709
+0.0954962157823734
+0.0863297048458831
+0.0819508257629367
+0.0880120055837407
+0.0969243006317774
+0.0736034313184319
+0.03189992791015
+0.0167802851210502
+0.020665190670411
+0.0231218739469633
+0.0113971589119597
+0.00438748628367456
+0.00290267700696139
+0.00327537094934574
+0.00210154306805208
+0.000849167649375279
+0.000486388934432319
+0.00057839538043529
+0.000628082734677565
+0.000644120334030811
+0.000562775924606834
+0.000476782450144881
+0.000369036111392155
+0.000327485262989244
+0.000325517423298084
+0.000331694154473157
+0.000335120538289499
+0.000321263769923705
+0.000285940647038625
+0.000255451268009806
+0.00025070228415632
+0.000258027712383378
+0.000259037627573138
+0.00026664865956847
+0.000264958077546918
+0.00028415983626037
+0.000283747433614669
+0.000270337154528126
+0.000245020375678894
+0.000235755889095187
+0.000229675295460236
+0.000233139808097966
+0.000226385800129463
+0.000215753215013351
+0.000217340167538521
+0.000222449901061098
+0.000220983036658176
+0.000201445267627303
+0.000196371273359384
+0.000209911794863315
+0.000231452163115747
+0.000213174770891635
+0.000197323272756498
+0.000211510284601431
+0.000220169598833855
+0.000213328046158373
+0.000193445965043203
+0.00019893499440385
+0.000215554593891906
+0.000224243213271129
+0.000224827385874794
+0.000225130162742188
+0.000214647035606757
+0.000203084762425824
+0.000199671532627531
+0.000201168714260613
+0.000197111954471548
+0.000195940467958558
+0.000192592146149875
+0.000193147892797519
+0.000198308048953625
+0.000199000112786909
+0.000198235753004126
+0.000191920326015812
+0.000195335414986316
+0.000190114133024043
+0.000197462939674182
+0.000197145363907007
+0.000197867519429887
+0.000194835231508809
+0.000184098539942656
+0.000176216607936817
+0.000176381794214087
+0.000184792529532461
+0.000190583260803435
+0.000195954078278375
+0.000200381533213994
+0.000196977761873929
+0.000195197907732914
+0.000195892740301248
+0.000200979224180509
+0.000196805211987536
+0.000197834907445491
+0.000189747527996663
+0.000187642294330323
+0.000186985443175167
+0.000188872356000939
+0.000197389124845894
+0.000205993231925291
+0.000198978835724956
+0.000189606971131363
+0.000181860905461806
+0.00018791662191321
+0.000190999048710944
+0.000192812176065193
+0.000188222941558719
+0.00019189824048678
+0.00019655305627445
+0.000201339601778108
+0.000194670377949367
+0.000180185320982464
+0.000175505754742858
+0.000175941617414825
+0.00018707551922134
+0.000189681021762388
+0.000189830182513327
+0.000185292548208458
+0.000185614744544628
+0.000191734904753198
+0.000198300522684971
+0.000198653898950781
+0.000198399686392815
+0.000211179851900352
+0.000207762199509266
+0.000210186120275576
+0.000198649079090712
+0.000203473416742617
+0.000198623724397584
+0.000184831094644113
+0.000179934500989128
+0.00018515769631671
+0.000191553305764961
+0.000200057958695553
+0.000200278372886422
+0.000193544245879366
+0.000186568339222366
+0.000190850440417821
+0.000203207368212081
+0.00021595511986196
+0.000218316276673687
+0.000205817110972747
+0.000212139075376423
+0.000219405364231359
+0.000224492180858786
+0.000218853733115119
+0.000210554225246233
+0.00021246062737957
+0.00022594831829876
+0.000232299356661781
+0.000233636889293826
+0.000250412659578138
+0.000266975553973092
+0.000269204410775137
+0.000267171761121167
+0.000272047682703472
+0.000248195290694898
+0.00022150999657165
+0.000254553539782728
+0.000352865364104451
+0.000421526225705925
+0.000402803565166639
+0.000354321177856905
+0.000326347921141917
+0.000352568357659741
+0.000462463307375687
+0.000643774295517939
+0.000784717414662457
+0.000838745677934633
+0.000757122481207381
+0.000678897861485588
+0.000626349496998761
+0.000658354368846629
+0.000749599188852712
+0.00083165582542375
+0.000886336045743687
+0.000768423933879982
+0.000585237174314642
+0.000508239266362428
+0.000567524033046338
+0.000647481898895154
+0.000624266596357504
+0.000560002445654088
+0.00058215979988373
+0.000684174175828965
+0.000727911906328144
+0.000673223088057037
+0.000615838783315468
+0.000652048367120072
+0.000707038145390686
+0.000823034034765323
+0.000956428762354955
+0.00106053724272944
+0.0010285915152128
+0.00100263542493879
+0.00111264621297516
+0.0014191521413006
+0.00172795581094616
+0.00192780129502807
+0.00206303064738915
+0.00215332113142019
+0.0022466954931446
+0.00215983557777723
+0.00223143191131219
+0.00245815927148455
+0.00274847567476636
+0.00273615517198675
+0.00253352585954081
+0.00225570484653126
+0.00188919842674377
+0.00157201023122244
+0.00145834911049567
+0.00148615868030397
+0.0015519412490252
+0.00157466545535257
+0.00144661607487311
+0.00117227370574575
+0.000938484790644089
+0.000732134816769824
+0.000528624015174057
+0.000366569551817972
+0.000278894213194701
+0.000243710316851626
+0.000238590319723254
+0.000241859431619529
+0.000268263483148327
+0.000260841365332497
+0.00023326504567899
+0.000226203929561463
+0.000238078996744937
+0.000249766971476454
+0.000262149083721957
+0.000266892944412421
+0.000267002077303342
+0.000265726614156371
+0.000271371209291191
+0.000281999036537821
+0.000280658229422484
+0.000248897262917992
+0.000219869705715068
+0.000209607206963666
+0.000222720261135429
+0.000230963069436546
+0.000233283489024344
+0.000235314731373244
+0.000232961936790798
+0.000231587693768191
+0.000226262761696977
+0.000225404330338338
+0.000227823690702315
+0.000229667855594692
+0.000224590485077645
+0.000216769165770145
+0.000217094607210963
+0.000218828136872295
+0.000220931041989363
+0.000219573958484272
+0.00021533192085349
+0.000217083181203663
+0.000227878319705463
+0.00022476619682191
+0.000221835867873572
+0.000213381927208331
+0.000213091544975543
+0.000214124965335125
+0.000214627726098925
+0.00021083419381561
+0.000208441202067815
+0.000207161788346288
+0.000214777783289231
+0.000216293426674694
+0.000205552966463787
+0.000189725444043717
+0.000182158187611337
+0.000182049892906279
+0.00017594803058018
+0.00016774870526203
+0.000170757993533594
+0.000174201609213675
+0.000175121687572624
+0.000164595830759266
+0.000158698956861484
+0.000155892765435281
+0.000146863651223553
+0.000147645864658281
+0.00015308948173169
+0.000160437983580839
+0.000158252172934376
+0.00015217085241057
+0.000152558308785518
+0.000153577471025759
+0.000150233555263141
+0.000152099519534582
+0.000143464711533248
+0.000136389269117607
+0.000133614678678209
+0.000138550850737478
+0.000142134193961718
+0.00014358215198744
+0.000133288650354347
+0.000116106840879223
+0.00011152141646913
+0.000114166717118501
+0.000116059394122705
+0.000115309289052816
+0.000109101804765123
+0.000104070228904335
+0.000108154046267337
+0.000107954799659394
+9.9539555835961E-05
+7.59303745994635E-05
+4.39112855419608E-05
+1.84394347997459E-05
+4.08219770974155E-06
+2.26734401784817E-06
diff --git a/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp
new file mode 100644
index 000000000..b2c8ebf3c
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp
new file mode 100644
index 000000000..f665ec9a7
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp differ
diff --git a/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp
new file mode 100644
index 000000000..b2c8ebf3c
Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp differ