diff --git a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs index 1ca7d5947..093e2ba7e 100644 --- a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs +++ b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs @@ -165,5 +165,22 @@ public static double GetMaxValue(this double[] data) return max; } + + /// + /// retrieving the min value of a vector + /// + public static double GetMinValue(this double[] data) + { + double min = data[0]; + for (int i = 1; i < data.Length; i++) + { + if (data[i] < min) + { + min = data[i]; + } + } + + return min; + } } } diff --git a/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs b/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs index 840f89db5..d9abb32cb 100644 --- a/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs +++ b/src/Acoustics.Shared/Extensions/DoubleSquareArrayExtensions.cs @@ -221,18 +221,18 @@ public static void MinMax(this double[,] data, out double min, out double max) /// /// returns an empty matrix with the same number of rows and columns of the input matrix. /// - public static double[,] EmptyCopy(this double[,] matrix) + public static T[,] EmptyCopy(this T[,] matrix) { - return new double[matrix.GetLength(0), matrix.GetLength(1)]; + return new T[matrix.GetLength(0), matrix.GetLength(1)]; } /// /// retrieving a full column of a matrix /// columnIndex is the column we want to access /// - public static double[] GetColumn(this double[,] matrix, int columnIndex) + public static T[] GetColumn(this T[,] matrix, int columnIndex) { - double[] column = new double[matrix.GetLength(0)]; + T[] column = new T[matrix.GetLength(0)]; for (int row = 0; row < matrix.GetLength(0); row++) { column[row] = matrix[row, columnIndex]; @@ -245,9 +245,9 @@ public static double[] GetColumn(this double[,] matrix, int columnIndex) /// retrieving a full row of a matrix /// rowIndex is the row we want to access /// - public static double[] GetRow(this double[,] matrix, int rowIndex) + public static T[] GetRow(this T[,] matrix, int rowIndex) { - double[] row = new double[matrix.GetLength(1)]; + T[] row = new T[matrix.GetLength(1)]; for (int column = 0; column < matrix.GetLength(1); column++) { row[column] = matrix[rowIndex, column]; @@ -270,7 +270,7 @@ public enum MergingDirection /// adding a 2D-array to another 2D-array either by "column" or by "row" /// - public static void AddToArray(double[,] result, double[,] array, MergingDirection mergingDirection, int start = 0) + public static void AddToArray(this T[,] result, T[,] array, MergingDirection mergingDirection, int start = 0) { for (int i = 0; i < array.GetLength(0); i++) { diff --git a/src/AnalysisConfigFiles/FeatureLearningConfig.yml b/src/AnalysisConfigFiles/FeatureLearningConfig.yml new file mode 100644 index 000000000..9badeae82 --- /dev/null +++ b/src/AnalysisConfigFiles/FeatureLearningConfig.yml @@ -0,0 +1,53 @@ +--- +# Summary: Generates Clustering Features +# +# This analysis outputs: +# 1. an image of the clusters' centroids +# 2. a csv file that contains the vectors of the clusters' centroids +# 3. a csv file that contains clusters' id and size +# 4. a csv file that contains feature vectors +# +# The feature vectors can be used in training the machine learning models. + +# The directory for the output of parallel job running on MahnooshSandpit +# OutputDirectory: "D:\Mahnoosh\Liz\ParallelJobs\" + +# The properties to generate Mel scale +FrequencyScaleType: Mel +# HertzInterval: 1000 +FrameSize: 1024 +FinalBinCount: 128 + +# The default values for minFreqBin and maxFreqBin are 1 and FinalBinCount +# For any other arbitrary frequency bin bounds, these two parameters need to be manually set. +MinFreqBin: 24 +MaxFreqBin: 82 + +# The number of frequency band for feature generation process +numFreqBand: 1 + +# The width and height of the patches to be taken from the patch sampling set +# A default patch is a single full-band frame which patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand, patchHeight = 1 +# PatchWidth: 5 +PatchHeight: 1 + +# the number of frames that their feature vectors will be concatenated in order to preserve temporal information. +FrameWindowLength : 1 + +# the step size to make a window of frames +StepSize : 1 + +# The number of patches to be selected from each recording of the patch sampling set +NumRandomPatches: 1000 + +# the number of clusters to be generated from the selected patch set +NumClusters: 256 + +# Applying noise reduction and whitening if these options are set to 'true' +DoNoiseReduction: true +DoWhitening: true + +# The factor of data downsampling using max pooling +MaxPoolingFactor: 6 +... + diff --git a/src/AnalysisPrograms/AnalysisPrograms.csproj b/src/AnalysisPrograms/AnalysisPrograms.csproj index 13454ff93..170d3a8a2 100644 --- a/src/AnalysisPrograms/AnalysisPrograms.csproj +++ b/src/AnalysisPrograms/AnalysisPrograms.csproj @@ -52,14 +52,20 @@ latest - - ..\..\packages\Accord.2.12.0.0\lib\Accord.dll + + ..\..\packages\Accord.3.8.0\lib\net462\Accord.dll - - ..\..\packages\Accord.Math.2.12.0.0\lib\Accord.Math.dll + + ..\..\packages\Accord.MachineLearning.3.8.0\lib\net462\Accord.MachineLearning.dll - - ..\..\packages\Accord.Statistics.2.12.0.0\lib\Accord.Statistics.dll + + ..\..\packages\Accord.Math.3.8.0\lib\net462\Accord.Math.dll + + + ..\..\packages\Accord.Math.3.8.0\lib\net462\Accord.Math.Core.dll + + + ..\..\packages\Accord.Statistics.3.8.0\lib\net462\Accord.Statistics.dll ..\..\packages\AForge.2.2.5\lib\AForge.dll @@ -94,8 +100,8 @@ ..\..\packages\MathNet.Numerics.3.20.2\lib\net40\MathNet.Numerics.dll - - ..\..\packages\McMaster.Extensions.CommandLineUtils.2.2.5\lib\net45\McMaster.Extensions.CommandLineUtils.dll + + ..\..\packages\McMaster.Extensions.CommandLineUtils.2.3.0-alpha\lib\net45\McMaster.Extensions.CommandLineUtils.dll @@ -287,6 +293,7 @@ + @@ -488,10 +495,12 @@ + + \ No newline at end of file diff --git a/src/AnalysisPrograms/Audio2Sonogram.cs b/src/AnalysisPrograms/Audio2Sonogram.cs index 04d1d9079..87db6dca2 100644 --- a/src/AnalysisPrograms/Audio2Sonogram.cs +++ b/src/AnalysisPrograms/Audio2Sonogram.cs @@ -171,6 +171,10 @@ private static Dictionary GetConfigDictionary(FileInfo configFil return configDict; } + /// + /// Dictionary string, string> configDict is an obsolete class. + /// Should avoid calls to this method. + /// public static AudioToSonogramResult GenerateFourSpectrogramImages( FileInfo sourceRecording, FileInfo path2SoxSpectrogram, diff --git a/src/AnalysisPrograms/MahnooshSandpit.cs b/src/AnalysisPrograms/MahnooshSandpit.cs new file mode 100644 index 000000000..769e13f7a --- /dev/null +++ b/src/AnalysisPrograms/MahnooshSandpit.cs @@ -0,0 +1,351 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AnalysisPrograms +{ + using System; + using System.Collections.Generic; + using System.Drawing; + using System.Drawing.Imaging; + using System.IO; + using System.Linq; + using System.Threading.Tasks; + using Accord.Math; + using Acoustics.Shared; + using Acoustics.Shared.ConfigFile; + using Acoustics.Shared.Csv; + using AudioAnalysisTools.DSP; + using AudioAnalysisTools.StandardSpectrograms; + using AudioAnalysisTools.WavTools; + using McMaster.Extensions.CommandLineUtils; + using Production.Arguments; + using TowseyLibrary; + + public class MahnooshSandpit + { + public const string CommandName = "MahnooshSandpit"; + + public void Execute(Arguments arguments) + { + LoggedConsole.WriteLine("feature learning process..."); + + var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\"; + var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings"); + var trainSetPath = Path.Combine(inputDir, "TrainSet\\train_data"); + // var testSetPath = Path.Combine(inputDir, "TestSet"); + var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml"; + var resultDir = Path.Combine(inputDir, "FeatureLearning"); + Directory.CreateDirectory(resultDir); + + // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png"); + // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png"); + // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png"); + // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png"); + // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp"); + + // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings + + var configFile = configPath.ToFileInfo(); + + if (configFile == null) + { + throw new FileNotFoundException("No config file argument provided"); + } + else if (!configFile.Exists) + { + throw new ArgumentException($"Config file {configFile.FullName} not found"); + } + + var configuration = ConfigFile.Deserialize(configFile); + int patchWidth = + (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand; + + var clusteringOutputList = FeatureLearning.UnsupervisedFeatureLearning(configuration, inputPath); + + List allBandsCentroids = new List(); + for (int i = 0; i < clusteringOutputList.Count; i++) + { + var clusteringOutput = clusteringOutputList[i]; + + // writing centroids to a csv file + // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays) + // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv + // it might be a better way to do this + string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); + var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); + Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); + + // sorting clusters based on size and output it to a csv file + Dictionary clusterIdSize = clusteringOutput.ClusterIdSize; + int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); + + // Write cluster ID and size to a CSV file + string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv"); + Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); + + // Draw cluster image directly from clustering output + List> list = clusteringOutput.ClusterIdCentroid.ToList(); + double[][] centroids = new double[list.Count][]; + + for (int j = 0; j < list.Count; j++) + { + centroids[j] = list[j].Value; + } + + allBandsCentroids.Add(centroids); + + List allCentroids = new List(); + for (int k = 0; k < centroids.Length; k++) + { + // convert each centroid to a matrix in order of cluster ID + // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); + // OR: in order of cluster size + double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight); + + // normalize each centroid + double[,] normCent = DataTools.normalise(cent); + + // add a row of zero to each centroid + double[,] cent2 = PatchSampling.AddRow(normCent); + + allCentroids.Add(cent2); + } + + // concatenate all centroids + double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); + + // Draw clusters + var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); + clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); + var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); + clusterImage.Save(outputClusteringImage); + } + + // extracting features + FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir); + LoggedConsole.WriteLine("Done..."); + } + + [Command( + CommandName, + Description = "Temporary entry point for unsupervised and semi-supervised feature learning")] + public class Arguments : SubCommandBase + { + public override Task Execute(CommandLineApplication app) + { + //var instance = new MahnooshSandpit(); + //instance.Execute(this); + //GenerateSpectrograms(); + //ExtractClusteringFeatures(); + BuildSemisupervisedClusteringFeatures(); + + return this.Ok(); + } + } + + // output is the cluster centroids that obtained from a semi-supervised feature learning approach. + public static void BuildSemisupervisedClusteringFeatures() + { + LoggedConsole.WriteLine("semi-supervised feature learning process..."); + var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\"; + var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings"); + + // the infoFile contains the info about the frames of interest for supervised feature learning. + var frameInfoFilePath = @"D:\Mahnoosh\Liz\Least_Bittern\TrainSet\positive_frames.csv"; + var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml"; + var resultDir = Path.Combine(inputDir, "SemisupervisedClusteringFeatures"); + Directory.CreateDirectory(resultDir); + + var configFile = configPath.ToFileInfo(); + + if (configFile == null) + { + throw new FileNotFoundException("No config file argument provided"); + } + else if (!configFile.Exists) + { + throw new ArgumentException($"Config file {configFile.FullName} not found"); + } + + var configuration = ConfigFile.Deserialize(configFile); + + int patchWidth = + (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand; + + var frameInfoFile = frameInfoFilePath.ToFileInfo(); + + if (frameInfoFile == null) + { + throw new FileNotFoundException("No information file argument provided"); + } + else if (!frameInfoFile.Exists) + { + throw new ArgumentException($"Info file {frameInfoFile.FullName} not found"); + } + + // frame info contains information about positive frames + string[,] frameInfo = Csv.ReadMatrixFromCsv(frameInfoFile, TwoDimensionalArray.None); + + var clusteringOutputList = FeatureLearning.SemisupervisedFeatureLearning(configuration, inputPath, frameInfo); + + List allBandsCentroids = new List(); + for (int i = 0; i < clusteringOutputList.Count; i++) + { + var clusteringOutput = clusteringOutputList[i]; + + // writing centroids to a csv file + // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays) + // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv + // it might be a better way to do this + string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); + var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); + Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); + + // sorting clusters based on size and output it to a csv file + Dictionary clusterIdSize = clusteringOutput.ClusterIdSize; + int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); + + // Write cluster ID and size to a CSV file + string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv"); + Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); + + // Draw cluster image directly from clustering output + List> list = clusteringOutput.ClusterIdCentroid.ToList(); + double[][] centroids = new double[list.Count][]; + + for (int j = 0; j < list.Count; j++) + { + centroids[j] = list[j].Value; + } + + allBandsCentroids.Add(centroids); + + List allCentroids = new List(); + for (int k = 0; k < centroids.Length; k++) + { + // convert each centroid to a matrix in order of cluster ID + // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); + // OR: in order of cluster size + double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight); + + // normalize each centroid + double[,] normCent = DataTools.normalise(cent); + + // add a row of zero to each centroid + double[,] cent2 = PatchSampling.AddRow(normCent); + + allCentroids.Add(cent2); + } + + // concatenate all centroids + double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); + + // Draw clusters + var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); + clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); + var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); + clusterImage.Save(outputClusteringImage); + } + + // extracting features + FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, inputPath, resultDir); + LoggedConsole.WriteLine("Done..."); + } + + public static void ExtractClusteringFeatures() + { + LoggedConsole.WriteLine("feature extraction process..."); + var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\"; + var resultDir = Path.Combine(inputDir, "FeatureLearning"); + //var trainSetPath = Path.Combine(inputDir, "TrainSet"); + var testSetPath = Path.Combine(inputDir, "TestSet\\one_min_recordings"); + var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml"; + var centroidsPath = Path.Combine(resultDir, "ClusterCentroids0.csv"); + + var configFile = configPath.ToFileInfo(); + + if (configFile == null) + { + throw new FileNotFoundException("No config file argument provided"); + } + else if (!configFile.Exists) + { + throw new ArgumentException($"Config file {configFile.FullName} not found"); + } + + var configuration = ConfigFile.Deserialize(configFile); + + List centroids = new List(); + centroids.Add(Csv.ReadMatrixFromCsv(centroidsPath.ToFileInfo(), TwoDimensionalArray.None).ToJagged()); + FeatureExtraction.UnsupervisedFeatureExtraction(configuration, centroids, testSetPath, resultDir); + LoggedConsole.WriteLine("Done..."); + } + + public static void GenerateSpectrograms() + { + var recordingDir = @"M:\Liz\SupervisedPatchSamplingSet\Recordings\"; + var resultDir = @"M:\Liz\SupervisedPatchSamplingSet\"; + + // check whether there is any file in the folder/subfolders + if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0) + { + throw new ArgumentException("The folder of recordings is empty..."); + } + + int frameSize = 1024; + int finalBinCount = 256; + FreqScaleType scaleType = FreqScaleType.Mel; + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + + // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds + // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second + // The "WindowOverlap" is calculated to answer this question + // each 24 single-frames duration is equal to 1 second + // note that the "WindowOverlap" value should be recalculated if frame size is changed + // this has not yet been considered in the Config file! + WindowOverlap = 0.10725204, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + + foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav")) + { + FileInfo fileInfo = filePath.ToFileInfo(); + + // process the wav file if it is not empty + if (fileInfo.Length != 0) + { + var recording = new AudioRecording(filePath); + settings.SourceFileName = recording.BaseName; + + var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader); + + var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); + + // DO NOISE REDUCTION + decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); + + // draw the spectrogram + var attributes = new SpectrogramAttributes() + { + NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency, + Duration = decibelSpectrogram.Attributes.Duration, + }; + + Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes); + string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp"); + image.Save(pathToSpectrogramFiles, ImageFormat.Bmp); + + // write the matrix to a csv file + string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv"); + Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data); + } + } + } + } +} diff --git a/src/AnalysisPrograms/Production/Arguments/MainArgs.cs b/src/AnalysisPrograms/Production/Arguments/MainArgs.cs index 4403d8a51..59a2ceb8a 100644 --- a/src/AnalysisPrograms/Production/Arguments/MainArgs.cs +++ b/src/AnalysisPrograms/Production/Arguments/MainArgs.cs @@ -52,6 +52,7 @@ namespace AnalysisPrograms.Production.Arguments [Subcommand(DummyAnalysis.CommandName, typeof(DummyAnalysis.Arguments))] [Subcommand(FileRenamer.CommandName, typeof(FileRenamer.Arguments))] [Subcommand(Sandpit.CommandName, typeof(Sandpit.Arguments))] + [Subcommand(MahnooshSandpit.CommandName, typeof(MahnooshSandpit.Arguments))] public class MainArgs { private async Task OnExecuteAsync(CommandLineApplication app) diff --git a/src/AnalysisPrograms/Sandpit.cs b/src/AnalysisPrograms/Sandpit.cs index 7cc87ac08..31ac351b3 100644 --- a/src/AnalysisPrograms/Sandpit.cs +++ b/src/AnalysisPrograms/Sandpit.cs @@ -15,7 +15,8 @@ namespace AnalysisPrograms using System.Threading.Tasks; using Acoustics.Shared; using Acoustics.Shared.Csv; - using AnalysisPrograms.AnalyseLongRecordings; + using Acoustics.Tools.Wav; + using AnalyseLongRecordings; using AudioAnalysisTools; using AudioAnalysisTools.DSP; using AudioAnalysisTools.Indices; @@ -23,7 +24,7 @@ namespace AnalysisPrograms using AudioAnalysisTools.StandardSpectrograms; using AudioAnalysisTools.WavTools; using McMaster.Extensions.CommandLineUtils; - using AnalysisPrograms.Production.Arguments; + using Production.Arguments; using TowseyLibrary; /// @@ -78,6 +79,7 @@ public override Task Execute(CommandLineApplication app) //CubeHelixDrawTestImage(); //DrawLongDurationSpectrogram(); //DrawClusterSequence(); + DrawStandardSpectrograms(); //ExtractSpectralFeatures(); //HerveGlotinMethods(); @@ -510,6 +512,33 @@ public static void Audio2CsvOverOneFile() AnalyseLongRecording.Execute(arguments); } + /// + /// Draws a standard spectrogram + /// + public static void DrawStandardSpectrograms() + { + var audioFile = @"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040.wav"; + var recording = new WavReader(audioFile); + + var settings = new SpectrogramSettings() + { + SourceFileName = "BAC2_20071008-085040", + WindowSize = 1024, + WindowOverlap = 0.0, + DoMelScale = false, + MelBinCount = 256, + NoiseReductionType = NoiseReductionType.Median, + NoiseReductionParameter = 0.0, + }; + + //var amplSpectrogram = new AmplitudeSpectrogram(settings, recording); + //var dbSpectrogram = new DecibelSpectrogram(settings, recording); + //dbSpectrogram.DrawSpectrogram(@"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040_MelMedian.png"); + + var energySpectro = new EnergySpectrogram(settings, recording); + energySpectro.DrawLogPsd(@"C:\Ecoacoustics\WavFiles\TestRecordings\BAC\BAC2_20071008-085040_LogPSD.png"); + } + public static void DrawLongDurationSpectrogram() { // the default ld fc spectrogram config file diff --git a/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs b/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs index f9f320689..43dee7b95 100644 --- a/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs +++ b/src/AnalysisPrograms/SpectralPeakTracking/SpectralPeakTrackingEntry.cs @@ -82,7 +82,8 @@ public static void Execute(Arguments arguments) //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram); + // Broken in merge b7e03070a9cd72ab0632789a3412967a6cc54cd6 + //var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram); var decibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader); double frameStepSize = sonoConfig.GetFrameOffset(); diff --git a/src/AnalysisPrograms/packages.config b/src/AnalysisPrograms/packages.config index 4718d0024..d9918a803 100644 --- a/src/AnalysisPrograms/packages.config +++ b/src/AnalysisPrograms/packages.config @@ -1,8 +1,9 @@  - - - + + + + @@ -15,7 +16,7 @@ - + diff --git a/src/AudioAnalysisTools/AudioAnalysisTools.csproj b/src/AudioAnalysisTools/AudioAnalysisTools.csproj index 89ca2f5ee..9b50bd6b0 100644 --- a/src/AudioAnalysisTools/AudioAnalysisTools.csproj +++ b/src/AudioAnalysisTools/AudioAnalysisTools.csproj @@ -241,6 +241,9 @@ + + + @@ -251,6 +254,7 @@ + @@ -294,9 +298,12 @@ - + + + + diff --git a/src/AudioAnalysisTools/DSP/FeatureExtraction.cs b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs new file mode 100644 index 000000000..c9f385ab3 --- /dev/null +++ b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs @@ -0,0 +1,482 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.DSP +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + using System.Text; + using Accord.Math; + using Accord.Statistics; + using Acoustics.Shared.Csv; + using NeuralNets; + using StandardSpectrograms; + using TowseyLibrary; + using WavTools; + + /// + /// This class is designed to extract clustering features for target input recordings. + /// + public class FeatureExtraction + { + /// + /// Apply feature learning process on a set of target (1-minute) recordings (inputPath) + /// according to the a set of centroids learned using feature learning process. + /// Output feature vectors (outputPath) + /// + public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List allCentroids, + string inputPath, string outputPath) + { + var simVecDir = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors")); + int frameSize = config.FrameSize; + int finalBinCount = config.FinalBinCount; + FreqScaleType scaleType = config.FrequencyScaleType; + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + + // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds + // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second + // The "WindowOverlap" is calculated to answer this question + // each 24 single-frames duration is equal to 1 second + // note that the "WindowOverlap" value should be recalculated if frame size is changed + // this has not yet been considered in the Config file! + WindowOverlap = 0.10725204, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + double frameStep = frameSize * (1 - settings.WindowOverlap); + int minFreqBin = config.MinFreqBin; + int maxFreqBin = config.MaxFreqBin; + int numFreqBand = config.NumFreqBand; + int patchWidth = + (maxFreqBin - minFreqBin + 1) / numFreqBand; + int patchHeight = config.PatchHeight; + + // the number of frames that their feature vectors will be concatenated in order to preserve temporal information. + int frameWindowLength = config.FrameWindowLength; + + // the step size to make a window of frames + int stepSize = config.StepSize; + + // the factor of downsampling + int maxPoolingFactor = config.MaxPoolingFactor; + + // check whether there is any file in the folder/subfolders + if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) + { + throw new ArgumentException("The folder of recordings is empty..."); + } + + //***** + // lists of features for all processing files + // the key is the file name, and the value is the features for different bands + Dictionary> allFilesMinFeatureVectors = new Dictionary>(); + Dictionary> allFilesMeanFeatureVectors = new Dictionary>(); + Dictionary> allFilesMaxFeatureVectors = new Dictionary>(); + Dictionary> allFilesStdFeatureVectors = new Dictionary>(); + Dictionary> allFilesSkewnessFeatureVectors = new Dictionary>(); + + double[,] inputMatrix; + List recordings = new List(); + + foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) + { + FileInfo fileInfo = filePath.ToFileInfo(); + + // process the wav file if it is not empty + if (fileInfo.Length != 0) + { + var recording = new AudioRecording(filePath); + settings.SourceFileName = recording.BaseName; + + if (config.DoSegmentation) + { + recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep); + } + else + { + recordings.Add(recording); + } + + for (int s = 0; s < recordings.Count; s++) + { + string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv"); + var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[s].WavReader); + var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); + + // DO RMS NORMALIZATION + //sonogram.Data = SNR.RmsNormalization(sonogram.Data); + + // DO NOISE REDUCTION + if (config.DoNoiseReduction) + { + decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); + } + + // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins + if (minFreqBin != 1 || maxFreqBin != finalBinCount) + { + inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin); + } + else + { + inputMatrix = decibelSpectrogram.Data; + } + + // creating matrices from different freq bands of the source spectrogram + List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); + double[][,] matrices2 = allSubmatrices2.ToArray(); + List allSequentialPatchMatrix = new List(); + for (int i = 0; i < matrices2.GetLength(0); i++) + { + // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling + double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor); + + int rows = downsampledMatrix.GetLength(0); + int columns = downsampledMatrix.GetLength(1); + var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); + allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); + } + + // +++++++++++++++++++++++++++++++++++Feature Transformation + // to do the feature transformation, we normalize centroids and + // sequential patches from the input spectrogram to unit length + // Then, we calculate the dot product of each patch with the centroids' matrix + + List allNormCentroids = new List(); + for (int i = 0; i < allCentroids.Count; i++) + { + // double check the index of the list + double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][]; + for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++) + { + normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]); + } + + allNormCentroids.Add(normCentroids); + } + + List allFeatureTransVectors = new List(); + + // processing the sequential patch matrix for each band + for (int i = 0; i < allSequentialPatchMatrix.Count; i++) + { + List featureTransVectors = new List(); + double[][] similarityVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][]; + + for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++) + { + // normalize each patch to unit length + var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j]; + var normVector = inputVector; + + // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero. + if (inputVector.Euclidean() != 0) + { + normVector = ART_2A.NormaliseVector(inputVector); + } + + similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector); + } + + Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix()); + + // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames + // using FrameWindowLength + + // patchId refers to the patch id that has been processed so far according to the step size. + // if we want no overlap between different frame windows, then stepSize = frameWindowLength + int patchId = 0; + while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0)) + { + List patchGroup = new List(); + for (int k = 0; k < frameWindowLength; k++) + { + patchGroup.Add(similarityVectors[k + patchId]); + } + + featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup)); + patchId = patchId + stepSize; + } + + allFeatureTransVectors.Add(featureTransVectors.ToArray()); + } + + // +++++++++++++++++++++++++++++++++++Feature Transformation + + // +++++++++++++++++++++++++++++++++++Temporal Summarization + // Based on the resolution to generate features, the "numFrames" parameter will be set. + // Each 24 single-frame patches form 1 second + // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net) + // The pre-assumption is that each input recording is 1 minute long + + // store features of different bands in lists + List allMinFeatureVectors = new List(); + List allMeanFeatureVectors = new List(); + List allMaxFeatureVectors = new List(); + List allStdFeatureVectors = new List(); + List allSkewnessFeatureVectors = new List(); + + // Each 24 frames form 1 second using WindowOverlap + // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization. + int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor); + + foreach (var freqBandFeature in allFeatureTransVectors) + { + List minFeatureVectors = new List(); + List meanFeatureVectors = new List(); + List maxFeatureVectors = new List(); + List stdFeatureVectors = new List(); + List skewnessFeatureVectors = new List(); + + int c = 0; + while (c + numFrames <= freqBandFeature.GetLength(0)) + { + // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.) + List sequencesOfFramesList = new List(); + for (int i = c; i < c + numFrames; i++) + { + sequencesOfFramesList.Add(freqBandFeature[i]); + } + + List min = new List(); + List mean = new List(); + List std = new List(); + List max = new List(); + List skewness = new List(); + + double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); + + // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise + for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) + { + double[] temp = new double[sequencesOfFrames.GetLength(0)]; + for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) + { + temp[k] = sequencesOfFrames[k, j]; + } + + min.Add(temp.GetMinValue()); + mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); + std.Add(AutoAndCrossCorrelation.GetStdev(temp)); + max.Add(temp.GetMaxValue()); + skewness.Add(temp.Skewness()); + } + + minFeatureVectors.Add(min.ToArray()); + meanFeatureVectors.Add(mean.ToArray()); + maxFeatureVectors.Add(max.ToArray()); + stdFeatureVectors.Add(std.ToArray()); + skewnessFeatureVectors.Add(skewness.ToArray()); + c += numFrames; + } + + // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames) + // (or the whole) at the end of the target recording , left unprocessed. + // this would be problematic when an the resolution to generate the feature vector is 1 min, + // but the the length of the target recording is a bit less than one min. + if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1) + { + // First, make a list of patches that would be less than the required resolution + List sequencesOfFramesList = new List(); + int unprocessedFrames = freqBandFeature.GetLength(0) % numFrames; + for (int i = freqBandFeature.GetLength(0) - unprocessedFrames; + i < freqBandFeature.GetLength(0); + i++) + { + sequencesOfFramesList.Add(freqBandFeature[i]); + } + + List min = new List(); + List mean = new List(); + List std = new List(); + List max = new List(); + List skewness = new List(); + + double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); + + // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise + for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) + { + double[] temp = new double[sequencesOfFrames.GetLength(0)]; + for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) + { + temp[k] = sequencesOfFrames[k, j]; + } + + min.Add(temp.GetMinValue()); + mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); + std.Add(AutoAndCrossCorrelation.GetStdev(temp)); + max.Add(temp.GetMaxValue()); + skewness.Add(temp.Skewness()); + } + + minFeatureVectors.Add(min.ToArray()); + meanFeatureVectors.Add(mean.ToArray()); + maxFeatureVectors.Add(max.ToArray()); + stdFeatureVectors.Add(std.ToArray()); + skewnessFeatureVectors.Add(skewness.ToArray()); + } + + allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix()); + allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix()); + allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix()); + allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix()); + allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix()); + } + + //***** + // the keys of the following dictionaries contain file name + // and their values are a list which the list.count is + // the number of all subsegments for which features are extracted + // the number of freq bands defined as an user-defined parameter. + // the 2D-array is the feature vectors. + allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors); + allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors); + allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors); + allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors); + allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors); + + // +++++++++++++++++++++++++++++++++++Temporal Summarization + } + } + } + + // ++++++++++++++++++++++++++++++++++Writing features to one file + // First, concatenate mean, max, std for each second. + // Then, write the features of each pre-defined frequency band into a separate CSV file. + var filesName = allFilesMeanFeatureVectors.Keys.ToArray(); + var minFeatures = allFilesMinFeatureVectors.Values.ToArray(); + var meanFeatures = allFilesMeanFeatureVectors.Values.ToArray(); + var maxFeatures = allFilesMaxFeatureVectors.Values.ToArray(); + var stdFeatures = allFilesStdFeatureVectors.Values.ToArray(); + var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray(); + + // The number of elements in the list shows the number of freq bands + // the size of each element in the list shows the number of files processed to generate feature for. + // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector + var allMins = new List(); + var allMeans = new List(); + var allMaxs = new List(); + var allStds = new List(); + var allSkewness = new List(); + + // looping over freq bands + for (int i = 0; i < meanFeatures[0].Count; i++) + { + var mins = new List(); + var means = new List(); + var maxs = new List(); + var stds = new List(); + var skewnesses = new List(); + + // looping over all files + for (int k = 0; k < meanFeatures.Length; k++) + { + mins.Add(minFeatures[k].ToArray()[i]); + means.Add(meanFeatures[k].ToArray()[i]); + maxs.Add(maxFeatures[k].ToArray()[i]); + stds.Add(stdFeatures[k].ToArray()[i]); + skewnesses.Add(skewnessFeatures[k].ToArray()[i]); + } + + allMins.Add(mins.ToArray()); + allMeans.Add(means.ToArray()); + allMaxs.Add(maxs.ToArray()); + allStds.Add(stds.ToArray()); + allSkewness.Add(skewnesses.ToArray()); + } + + // each element of meanFeatures array is a list of features for different frequency bands. + // looping over the number of freq bands + for (int i = 0; i < allMeans.ToArray().GetLength(0); i++) + { + // creating output feature file based on the number of freq bands + var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv"); + + // creating the header for CSV file + List header = new List(); + header.Add("file name"); + + for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++) + { + header.Add("min" + j.ToString()); + } + + for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++) + { + header.Add("mean" + j.ToString()); + } + + for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++) + { + header.Add("max" + j.ToString()); + } + + for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++) + { + header.Add("std" + j.ToString()); + } + + for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++) + { + header.Add("skewness" + j.ToString()); + } + + var csv = new StringBuilder(); + string content = string.Empty; + foreach (var entry in header.ToArray()) + { + content += entry.ToString() + ","; + } + + csv.AppendLine(content); + + var allFilesFeatureVectors = new Dictionary(); + + // looping over files + for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++) + { + // concatenating mean, std, and max vector together for the pre-defined resolution + List featureVectors = new List(); + for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++) + { + List featureList = new List + { + allMins.ToArray()[i][j].ToJagged()[k], + allMeans.ToArray()[i][j].ToJagged()[k], + allMaxs.ToArray()[i][j].ToJagged()[k], + allStds.ToArray()[i][j].ToJagged()[k], + allSkewness.ToArray()[i][j].ToJagged()[k], + }; + double[] featureVector = DataTools.ConcatenateVectors(featureList); + featureVectors.Add(featureVector); + } + + allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix()); + } + + // writing feature vectors to CSV file + foreach (var entry in allFilesFeatureVectors) + { + content = string.Empty; + content += entry.Key.ToString() + ","; + foreach (var cent in entry.Value) + { + content += cent.ToString() + ","; + } + + csv.AppendLine(content); + } + + File.WriteAllText(outputFeatureFile, csv.ToString()); + } + } + } +} diff --git a/src/AudioAnalysisTools/DSP/FeatureLearning.cs b/src/AudioAnalysisTools/DSP/FeatureLearning.cs new file mode 100644 index 000000000..5da3d4aee --- /dev/null +++ b/src/AudioAnalysisTools/DSP/FeatureLearning.cs @@ -0,0 +1,485 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.DSP +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + using Accord.Math; + using StandardSpectrograms; + using WavTools; + + /// + /// This class is designed to learn bases (cluster centroids) through feature learning process. + /// + public static class FeatureLearning + { + /// + /// Apply feature learning process on a set of patch sampling set in an unsupervised manner + /// Output clusters + /// + public static List UnsupervisedFeatureLearning(FeatureLearningSettings config, string inputPath) + { + // check whether there is any file in the folder/subfolders + if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) + { + throw new ArgumentException("The folder of recordings is empty..."); + } + + int frameSize = config.FrameSize; + int finalBinCount = config.FinalBinCount; + FreqScaleType scaleType = config.FrequencyScaleType; + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + + // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds + // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second + // The "WindowOverlap" is calculated to answer this question + // each 24 single-frames duration is equal to 1 second + // note that the "WindowOverlap" value should be recalculated if frame size is changed + // this has not yet been considered in the Config file! + WindowOverlap = 0.10725204, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + double frameStep = frameSize * (1 - settings.WindowOverlap); + int minFreqBin = config.MinFreqBin; + int maxFreqBin = config.MaxFreqBin; + int numFreqBand = config.NumFreqBand; + int patchWidth = + (maxFreqBin - minFreqBin + 1) / numFreqBand; + int patchHeight = config.PatchHeight; + int numRandomPatches = config.NumRandomPatches; + + // Define variable number of "randomPatch" lists based on "numFreqBand" + Dictionary> randomPatchLists = new Dictionary>(); + for (int i = 0; i < numFreqBand; i++) + { + randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List()); + } + + List randomPatches = new List(); + double[,] inputMatrix; + List recordings = new List(); + + foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) + { + FileInfo fileInfo = filePath.ToFileInfo(); + + // process the wav file if it is not empty + if (fileInfo.Length != 0) + { + var recording = new AudioRecording(filePath); + settings.SourceFileName = recording.BaseName; + + if (config.DoSegmentation) + { + recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep); + } + else + { + recordings.Add(recording); + } + + for (int i = 0; i < recordings.Count; i++) + { + var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader); + var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); + + // DO RMS NORMALIZATION + //sonogram.Data = SNR.RmsNormalization(sonogram.Data); + + if (config.DoNoiseReduction) + { + decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); + } + + // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins + if (minFreqBin != 1 || maxFreqBin != finalBinCount) + { + inputMatrix = + PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin); + } + else + { + inputMatrix = decibelSpectrogram.Data; + } + + // creating matrices from different freq bands of the source spectrogram + List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); + + // selecting random patches from each freq band matrix and add them to the corresponding patch list + int count = 0; + + while (count < allSubmatrices.Count) + { + // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling + double[,] downsampledMatrix = MaxPooling(allSubmatrices.ToArray()[count], config.MaxPoolingFactor); + + randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling + .GetPatches(downsampledMatrix, patchWidth, patchHeight, numRandomPatches, + PatchSampling.SamplingMethod.Random).ToMatrix()); + count++; + } + } + } + } + + foreach (string key in randomPatchLists.Keys) + { + randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key])); + } + + // convert list of random patches matrices to one matrix + int numClusters = + config.NumClusters; + + List allClusteringOutput = new List(); + for (int i = 0; i < randomPatches.Count; i++) + { + double[,] patchMatrix = randomPatches[i]; + + // Apply PCA Whitening + var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix); + + // Do k-means clustering + var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters); + allClusteringOutput.Add(clusteringOutput); + } + + return allClusteringOutput; + } + + /// + /// This method downsamples the input matrix (x,y) by a factor of n on the temporal scale (x) using max pooling + /// + public static double[,] MaxPooling(double[,] matrix, int factor) + { + int count = 0; + List downsampledMatrix = new List(); + while (count + factor <= matrix.GetLength(0)) + { + List maxValues = new List(); + for (int j = 0; j < matrix.GetLength(1); j++) + { + List temp = new List(); + for (int i = count; i < count + factor; i++) + { + temp.Add(matrix[i, j]); + } + + maxValues.Add(temp.ToArray().GetMaxValue()); + } + + downsampledMatrix.Add(maxValues.ToArray()); + count = count + factor; + } + + return downsampledMatrix.ToArray().ToMatrix(); + } + + /// + /// This method is called semi-supervised feature learning because one of the clusters is formed using + /// the positive frames manually selected from 1-min recordings. + /// The input to this methods is a group of files that contains the call of interest, + /// a 2D-array that contains file name, the second number and the corresponding frame numbers in each file. + /// At the moment, this method only handles single-frames as patches (PatchHeight = 1). + /// + public static List SemisupervisedFeatureLearning(FeatureLearningSettings config, + string inputPath, string[,] frameInfo) + { + // making a dictionary of frame info as file name and second number as key, and start and end frame number as value. + Dictionary, int[]> info = new Dictionary, int[]>(); + for (int i = 0; i < frameInfo.GetLength(0); i++) + { + Tuple keys = new Tuple(frameInfo[i, 0], Convert.ToInt32(frameInfo[i, 1])); + int[] values = new int[2] { Convert.ToInt32(frameInfo[i, 2]), Convert.ToInt32(frameInfo[i, 3]) }; + info.Add(keys, values); + } + + // processing the recordings within the input path + // check whether there is any file in the folder/subfolders + if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) + { + throw new ArgumentException("The folder of recordings is empty..."); + } + + int frameSize = config.FrameSize; + int finalBinCount = config.FinalBinCount; + FreqScaleType scaleType = config.FrequencyScaleType; + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + + // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds + // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second + // The "WindowOverlap" is calculated to answer this question + // each 24 single-frames duration is equal to 1 second + // note that the "WindowOverlap" value should be recalculated if frame size is changed + // this has not yet been considered in the Config file! + WindowOverlap = 0.10725204, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + double frameStep = frameSize * (1 - settings.WindowOverlap); + int minFreqBin = config.MinFreqBin; + int maxFreqBin = config.MaxFreqBin; + int numFreqBand = config.NumFreqBand; + int patchWidth = + (maxFreqBin - minFreqBin + 1) / numFreqBand; + int patchHeight = config.PatchHeight; + int numRandomPatches = config.NumRandomPatches; + + // Define variable number of "randomPatch" lists based on "numFreqBand" + Dictionary> randomPatchLists = new Dictionary>(); + Dictionary> sequentialPatchLists = new Dictionary>(); + for (int i = 0; i < numFreqBand; i++) + { + randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List()); + sequentialPatchLists.Add(string.Format("sequentialPatch{0}", i.ToString()), new List()); + } + + List randomPatches = new List(); + List positivePatches = new List(); + double[,] inputMatrix; + List recordings = new List(); + + foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) + { + FileInfo fileInfo = filePath.ToFileInfo(); + + // process the wav file if it is not empty + if (fileInfo.Length != 0) + { + var recording = new AudioRecording(filePath); + settings.SourceFileName = recording.BaseName; + + if (config.DoSegmentation) + { + recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep); + } + else + { + recordings.Add(recording); + } + + for (int i = 0; i < recordings.Count; i++) + { + var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[i].WavReader); + + var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); + + if (config.DoNoiseReduction) + { + decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); + } + + // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins + if (minFreqBin != 1 || maxFreqBin != finalBinCount) + { + inputMatrix = + PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin); + } + else + { + inputMatrix = decibelSpectrogram.Data; + } + + // creating matrices from different freq bands of the source spectrogram + List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); + + // check whether the file has any positive frame + List positiveFrameNumbers = new List(); + foreach (var entry in info) + { + // check whether the file and the current second (i) has positive frame + if ((fileInfo.Name == entry.Key.Item1) && (i == entry.Key.Item2)) + { + // make a list of frame numbers + for (int j = entry.Value[0]; j <= entry.Value[1]; j++) + { + positiveFrameNumbers.Add(j); + } + } + } + + // making two matrices, one from positive frames and one from negative frames. + List allPositiveFramesSubmatrices = new List(); + List allNegativeFramesSubmatrices = new List(); + List negativeFrameNumbers = new List(); + + for (int j = 1; j <= 24; j++) + { + bool flag = false; + foreach (var number in positiveFrameNumbers) + { + if (j == number) + { + flag = true; + break; + } + } + + // if flag is false, it means that the frame does not contain a part of bird call and should be added + // to the negativeFrameNumbers list. + if (!flag) + { + negativeFrameNumbers.Add(j); + } + } + + if (positiveFrameNumbers.ToArray().Length != 0) + { + foreach (var submatrix in allSubmatrices) + { + List positiveFrames = new List(); + foreach (var number in positiveFrameNumbers) + { + positiveFrames.Add(submatrix.ToJagged()[number - 1]); + } + + allPositiveFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix()); + + List negativeFrames = new List(); + foreach (var number in negativeFrameNumbers) + { + negativeFrames.Add(submatrix.ToJagged()[number - 1]); + } + + allNegativeFramesSubmatrices.Add(positiveFrames.ToArray().ToMatrix()); + } + } + else + { + allNegativeFramesSubmatrices = allSubmatrices; + } + + // selecting random patches from each freq band matrix and add them to the corresponding patch list + int count = 0; + + while (count < allNegativeFramesSubmatrices.Count) + { + // select random patches from those segments that do not contain the call of interest + if (allPositiveFramesSubmatrices.Count != 0) + { + // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling + double[,] downsampledPositiveMatrix = MaxPooling(allPositiveFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor); + int rows = downsampledPositiveMatrix.GetLength(0); + int columns = downsampledPositiveMatrix.GetLength(1); + sequentialPatchLists[$"sequentialPatch{count.ToString()}"].Add( + PatchSampling.GetPatches(downsampledPositiveMatrix, patchWidth, patchHeight, + (rows / patchHeight) * (columns / patchWidth), + PatchSampling.SamplingMethod.Sequential).ToMatrix()); + } + else + { + // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling + double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor); + randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling + .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches, + PatchSampling.SamplingMethod.Random).ToMatrix()); + } + + /* + We can use this block of code instead of line 384 to 389, if we want to select random patches from negative frames of the segments with call of interest + // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling + double[,] downsampledNegativeMatrix = MaxPooling(allNegativeFramesSubmatrices.ToArray()[count], config.MaxPoolingFactor); + if (downsampledNegativeMatrix.GetLength(0) < numRandomPatches) + { + int numR = downsampledNegativeMatrix.GetLength(0); + int numC = downsampledNegativeMatrix.GetLength(1); + randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling + .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, + (numR / patchHeight) * (numC / patchWidth), + PatchSampling.SamplingMethod.Sequential).ToMatrix()); + } + else + { + randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling + .GetPatches(downsampledNegativeMatrix, patchWidth, patchHeight, numRandomPatches, + PatchSampling.SamplingMethod.Random).ToMatrix()); + } + */ + + count++; + } + } + } + } + + foreach (string key in sequentialPatchLists.Keys) + { + positivePatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(sequentialPatchLists[key])); + } + + foreach (string key in randomPatchLists.Keys) + { + randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key])); + } + + // convert list of random patches matrices to one matrix + int numClusters = + config.NumClusters - 1; + + List semisupervisedClusteringOutput = new List(); + List unsupervisedClusteringOutput = new List(); + List supervisedClusteringOutput = new List(); + + // clustering of random patches + for (int i = 0; i < randomPatches.Count; i++) + { + double[,] patchMatrix = randomPatches[i]; + + // Apply PCA Whitening + var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix); + + // Do k-means clustering + var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numClusters); + unsupervisedClusteringOutput.Add(clusteringOutput); + } + + // build one cluster out of positive frames + for (int i = 0; i < positivePatches.Count; i++) + { + double[,] patchMatrix = positivePatches[i]; + + // Apply PCA Whitening + var whitenedSpectrogram = PcaWhitening.Whitening(config.DoWhitening, patchMatrix); + + // Do k-means clustering + // build one cluster from positive patches + var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, 1); + supervisedClusteringOutput.Add(clusteringOutput); + } + + // merge the output of two clustering obtained from supervised and unsupervised approaches + var positiveClusterId = config.NumClusters - 1; + List positiveCentroids = new List(); + List positiveClusterSize = new List(); + + foreach (var output in supervisedClusteringOutput) + { + positiveCentroids.Add(output.ClusterIdCentroid.Values.ToArray()); + positiveClusterSize.Add(output.ClusterIdSize.Values.ToArray()); + } + + semisupervisedClusteringOutput = unsupervisedClusteringOutput; + + for (int i = 0; i < semisupervisedClusteringOutput.Count; i++) + { + semisupervisedClusteringOutput[i].ClusterIdCentroid.Add(positiveClusterId, positiveCentroids[i][0]); + semisupervisedClusteringOutput[i].ClusterIdSize.Add(positiveClusterId, positiveClusterSize[i][0]); + } + + return semisupervisedClusteringOutput; + } + } +} diff --git a/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs b/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs new file mode 100644 index 000000000..adee42b7e --- /dev/null +++ b/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs @@ -0,0 +1,83 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.DSP +{ + using Acoustics.Shared.ConfigFile; + + public class FeatureLearningSettings : Config + { + public const FreqScaleType DefaultFrequencyScaleType = FreqScaleType.Mel; + + //public const int DefaultHertzInterval = 1000; + + public const int DefaultFrameSize = 1024; + + public const int DefaultFinalBinCount = 128; + + public const int DefaultMinFreqBin = 1; + + public const int DefaultMaxFreqBin = DefaultFinalBinCount; + + public const int DefaultNumFreqBand = 1; + + //public const int DefaultPatchWidth = (DefaultMaxFreqBin - DefaultMinFreqBin + 1) / DefaultNumFreqBand; + + public const int DefaultPatchHeight = 1; + + public const int DefaultFrameWindowLength = 1; + + public const int DefaultStepSize = 1; + + public const int DefaultNumRandomPatches = 4; + + public const int DefaultNumClusters = 256; + + public const bool DefaultDoNoiseReduction = true; + + public const bool DefaultDoWhitening = true; + + public const int DefaultMaxPoolingFactor = 1; + + public const bool DefaultDoSegmentation = true; + + public const double DefaultSubsegmentDurationInSeconds = 1.0; + + public FreqScaleType FrequencyScaleType { get; set; } = DefaultFrequencyScaleType; + + //public int HertzInterval { get; set; } = DefaultHertzInterval; + + public int FrameSize { get; set; } = DefaultFrameSize; + + public int FinalBinCount { get; set; } = DefaultFinalBinCount; + + public int MinFreqBin { get; set; } = DefaultMinFreqBin; + + public int MaxFreqBin { get; set; } = DefaultMaxFreqBin; + + public int NumFreqBand { get; set; } = DefaultNumFreqBand; + + //public int PatchWidth { get; set; } = DefaultPatchWidth; + + public int PatchHeight { get; set; } = DefaultPatchHeight; + + public int FrameWindowLength { get; set; } = DefaultFrameWindowLength; + + public int StepSize { get; set; } = DefaultStepSize; + + public int NumRandomPatches { get; set; } = DefaultNumRandomPatches; + + public int NumClusters { get; set; } = DefaultNumClusters; + + public bool DoNoiseReduction { get; set; } = DefaultDoNoiseReduction; + + public bool DoWhitening { get; set; } = DefaultDoWhitening; + + public int MaxPoolingFactor { get; set; } = DefaultMaxPoolingFactor; + + public bool DoSegmentation { get; set; } = DefaultDoSegmentation; + + public double SubsegmentDurationInSeconds { get; set; } = DefaultSubsegmentDurationInSeconds; + } +} diff --git a/src/AudioAnalysisTools/DSP/KmeansClustering.cs b/src/AudioAnalysisTools/DSP/KmeansClustering.cs index 24808d2ba..decd2ac30 100644 --- a/src/AudioAnalysisTools/DSP/KmeansClustering.cs +++ b/src/AudioAnalysisTools/DSP/KmeansClustering.cs @@ -6,17 +6,10 @@ namespace AudioAnalysisTools.DSP { using System; using System.Collections.Generic; - using System.Drawing; - using System.IO; using System.Linq; using Accord.MachineLearning; using Accord.Math; using Accord.Math.Distances; - using Accord.Statistics.Kernels; - using Acoustics.Shared; - using Acoustics.Shared.Csv; - using CsvHelper; - using Zio; public static class KmeansClustering { @@ -29,7 +22,7 @@ public class Output public KMeansClusterCollection Clusters { get; set; } } - public static Output Clustering(double[,] patches, int numberOfClusters, string pathToCentroidFile) + public static Output Clustering(double[,] patches, int numberOfClusters) { // "Generator.Seed" sets a random seed for the framework's main internal number generator, which // gets a reference to the random number generator used internally by the Accord.NET classes and methods. @@ -55,8 +48,6 @@ public static Output Clustering(double[,] patches, int numberOfClusters, string clusterIdCentroid.Add(clust.Index, clust.Centroid); } - Csv.WriteToCsv(pathToCentroidFile.ToFileInfo(), clusterIdCentroid); - var output = new Output() { ClusterIdCentroid = clusterIdCentroid, diff --git a/src/AudioAnalysisTools/DSP/NoiseProfile.cs b/src/AudioAnalysisTools/DSP/NoiseProfile.cs index 543126262..b6f1b2e7d 100644 --- a/src/AudioAnalysisTools/DSP/NoiseProfile.cs +++ b/src/AudioAnalysisTools/DSP/NoiseProfile.cs @@ -116,6 +116,8 @@ public static NoiseProfile CalculateMeanNoiseProfile(double[,] matrix) /// the spectrogram with origin top-left public static NoiseProfile CalculateMedianNoiseProfile(double[,] matrix) { + return CalculatePercentileNoiseProfile(matrix, 50); + /* int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); double[] noiseMedian = new double[colCount]; @@ -131,6 +133,36 @@ public static NoiseProfile CalculateMedianNoiseProfile(double[,] matrix) maxsOfBins[col] = freqBin.Max(); } + var profile = new NoiseProfile() + { + NoiseMedian = noiseMedian, + NoiseSd = null, + NoiseThresholds = noiseMedian, + MinDb = minsOfBins, + MaxDb = maxsOfBins, + }; + return profile; + */ + + } + + public static NoiseProfile CalculatePercentileNoiseProfile(double[,] matrix, int percentile) + { + int rowCount = matrix.GetLength(0); + int colCount = matrix.GetLength(1); + double[] noiseMedian = new double[colCount]; + double[] minsOfBins = new double[colCount]; + double[] maxsOfBins = new double[colCount]; + + for (int col = 0; col < colCount; col++) + { + double[] freqBin = MatrixTools.GetColumn(matrix, col); + Array.Sort(freqBin); + noiseMedian[col] = freqBin[rowCount * percentile / 100]; + minsOfBins[col] = freqBin.Min(); + maxsOfBins[col] = freqBin.Max(); + } + var profile = new NoiseProfile() { NoiseMedian = noiseMedian, diff --git a/src/AudioAnalysisTools/DSP/PatchSampling.cs b/src/AudioAnalysisTools/DSP/PatchSampling.cs index cfe05b028..5c1eb7d7e 100644 --- a/src/AudioAnalysisTools/DSP/PatchSampling.cs +++ b/src/AudioAnalysisTools/DSP/PatchSampling.cs @@ -6,8 +6,10 @@ namespace AudioAnalysisTools.DSP { using System; using System.Collections.Generic; + using System.Linq; using Accord.Math; using TowseyLibrary; + using WavTools; public static class PatchSampling { @@ -189,6 +191,28 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf return allSubmatrices; } + /// + /// outputs a matrix with arbitrary minimum and maximum frequency bins. + /// + public static double[,] GetArbitraryFreqBandMatrix(double[,] matrix, int minFreqBin, int maxFreqBin) + { + double[,] outputMatrix = new double[matrix.GetLength(0), maxFreqBin - minFreqBin + 1]; + + int minColumnIndex = minFreqBin - 1; + int maxColumnIndex = maxFreqBin - 1; + + // copying a part of the original matrix with pre-defined boundaries to Y axis (freq bins) to a new matrix + for (int col = minColumnIndex; col <= maxColumnIndex; col++) + { + for (int row = 0; row < matrix.GetLength(0); row++) + { + outputMatrix[row, col - minColumnIndex] = matrix[row, col]; + } + } + + return outputMatrix; + } + /// /// concatenate submatrices column-wise into one matrix, i.e., the number of rows for the output matrix /// is equal to the number of rows of each of the frequency band matrices. @@ -202,7 +226,7 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf int count = 0; while (count < submatrices.Count) { - DoubleSquareArrayExtensions.AddToArray(matrix, submatrices[count], DoubleSquareArrayExtensions.MergingDirection.Column, submatrices[count].GetLength(1) * count); + matrix.AddToArray(submatrices[count], DoubleSquareArrayExtensions.MergingDirection.Column, submatrices[count].GetLength(1) * count); count++; } @@ -238,21 +262,25 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf } /// - /// convert a list of patch matrices to one matrix + /// convert a list of patch matrices to one matrix by row + /// patch matrices can have different row numbers but must have the same column number /// public static double[,] ListOf2DArrayToOne2DArray(List listOfPatchMatrices) { - int numberOfPatches = listOfPatchMatrices[0].GetLength(0); - double[,] allPatchesMatrix = new double[listOfPatchMatrices.Count * numberOfPatches, listOfPatchMatrices[0].GetLength(1)]; + int sumNumberOfPatches = 0; + foreach (var matrix in listOfPatchMatrices) + { + sumNumberOfPatches = matrix.GetLength(0) + sumNumberOfPatches; + } + + double[,] allPatchesMatrix = new double[sumNumberOfPatches, listOfPatchMatrices[0].GetLength(1)]; + int start = 0; + for (int i = 0; i < listOfPatchMatrices.Count; i++) { var m = listOfPatchMatrices[i]; - if (m.GetLength(0) != numberOfPatches) - { - throw new ArgumentException("All arrays must be the same length"); - } - - DoubleSquareArrayExtensions.AddToArray(allPatchesMatrix, m, DoubleSquareArrayExtensions.MergingDirection.Row, i * m.GetLength(0)); + allPatchesMatrix.AddToArray(m, DoubleSquareArrayExtensions.MergingDirection.Row, start); + start = m.GetLength(0) + start; } return allPatchesMatrix; @@ -270,7 +298,6 @@ public static List GetFreqBandMatrices(double[,] matrix, int numberOf int minY = matrix.GetLength(1); // copying the original matrix to a new matrix (row by row) - for (int i = 0; i < minX; ++i) { Array.Copy(matrix, i * matrix.GetLength(1), newMatrix, i * matrix.GetLength(1), minY); @@ -326,21 +353,35 @@ private static List GetSequentialPatches(double[,] matrix, int patchWi /// private static List GetRandomPatches(double[,] matrix, int patchWidth, int patchHeight, int numberOfPatches) { + // Note: to make the method more flexible in terms of selecting a random patch with any height and width, + // first a random number generator is defined for both patchHeight and patchWidth. + // However, the possibility of selecting duplicates especially when selecting too many random numbers from + // a range (e.g., 1000 out of 1440) is high with a a random generator. + // Since, we are mostly interested in full-band patches, i.e., patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand, + // it is important to select non-duplicate patchHeights. Hence, instead of a random generator for patchHeight, + // a better solution is to make a sequence of numbers to be selected, shuffle them, and + // finally, a first n (number of required patches) numbers could be selected. + + int rows = matrix.GetLength(0); + int columns = matrix.GetLength(1); + int seed = 100; Random randomNumber = new Random(seed); + + // not sure whether it is better to use new Guid() instead of randomNumber.Next() + var randomRowNumbers = Enumerable.Range(0, rows - patchHeight).OrderBy(x => randomNumber.Next()).Take(numberOfPatches).ToList(); List patches = new List(); - int rows = matrix.GetLength(0); - int columns = matrix.GetLength(1); - for (int i = 0; i < numberOfPatches; i++) + for (int i = 0; i < randomRowNumbers.Count; i++) { // selecting a random number from the height of the matrix - int rowRandomNumber = randomNumber.Next(0, rows - patchHeight); + //int rowRandomNumber = randomNumber.Next(0, rows - patchHeight); // selecting a random number from the width of the matrix int columnRandomNumber = randomNumber.Next(0, columns - patchWidth); - double[,] submatrix = MatrixTools.Submatrix(matrix, rowRandomNumber, columnRandomNumber, - rowRandomNumber + patchHeight - 1, columnRandomNumber + patchWidth - 1); + + double[,] submatrix = MatrixTools.Submatrix(matrix, randomRowNumbers[i], columnRandomNumber, + randomRowNumbers[i] + patchHeight - 1, columnRandomNumber + patchWidth - 1); // convert a matrix to a vector by concatenating columns and // store it to the array of vectors @@ -401,5 +442,33 @@ private static List GetOverlappedRandomPatches(double[,] matrix, int p return patches; } + + /// + /// cut audio to subsegments of desired length. + /// return list of subsegments + /// + public static List GetSubsegmentsSamples(AudioRecording recording, double subsegmentDurationInSeconds, double frameStep) + { + List subsegments = new List(); + + int sampleRate = recording.WavReader.SampleRate; + var segmentDuration = recording.WavReader.Time.TotalSeconds; + int segmentSampleCount = (int)(segmentDuration * sampleRate); + int subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate); + double subsegmentFrameCount = subsegmentSampleCount / (double)frameStep; + subsegmentFrameCount = (int)subsegmentFrameCount; + subsegmentSampleCount = ((int)(subsegmentFrameCount * frameStep) < subsegmentSampleCount) ? subsegmentSampleCount : (int)(subsegmentFrameCount * frameStep); + + for (int i = 0; i < (int)(segmentSampleCount / subsegmentSampleCount); i++) + { + AudioRecording subsegmentRecording = recording; + double[] subsamples = DataTools.Subarray(recording.WavReader.Samples, i * subsegmentSampleCount, subsegmentSampleCount); + var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate); + subsegmentRecording = new AudioRecording(wr); + subsegments.Add(subsegmentRecording); + } + + return subsegments; + } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/DSP/PcaWhitening.cs b/src/AudioAnalysisTools/DSP/PcaWhitening.cs index 7732f83a0..488330851 100644 --- a/src/AudioAnalysisTools/DSP/PcaWhitening.cs +++ b/src/AudioAnalysisTools/DSP/PcaWhitening.cs @@ -28,7 +28,7 @@ public class Output public int Components { get; set; } } - public static Output Whitening(double[,] matrix) + public static Output Whitening(bool doWhitening, double[,] matrix) { if (matrix == null) { @@ -43,7 +43,7 @@ public static Output Whitening(double[,] matrix) { // the "Center" method only subtracts the mean. Method = PrincipalComponentMethod.Center, - Whiten = true, + Whiten = doWhitening, }; pca.Learn(jaggedArray); @@ -165,23 +165,20 @@ public static Output Whitening(double[,] matrix) } /// - /// Median Noise Reduction + /// 10-percentile Noise Reduction /// public static double[,] NoiseReduction(double[,] matrix) { double[,] nrm = matrix; - // calculate modal noise profile - // NoiseProfile profile = NoiseProfile.CalculateModalNoiseProfile(matrix, sdCount: 0.0); - NoiseProfile profile = NoiseProfile.CalculateMedianNoiseProfile(matrix); + // calculate 10-percentile noise profile + NoiseProfile profile = NoiseProfile.CalculatePercentileNoiseProfile(matrix, 10); // smooth the noise profile double[] smoothedProfile = DataTools.filterMovingAverage(profile.NoiseThresholds, width: 7); nrm = SNR.TruncateBgNoiseFromSpectrogram(nrm, smoothedProfile); - // nrm = SNR.NoiseReduce_Standard(nrm, smoothedProfile, nhBackgroundThreshold: 2.0); - return nrm; } } diff --git a/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs b/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs new file mode 100644 index 000000000..889391fef --- /dev/null +++ b/src/AudioAnalysisTools/DSP/PowerSpectralDensity.cs @@ -0,0 +1,55 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.DSP +{ + + using System.Linq; + using TowseyLibrary; + + public static class PowerSpectralDensity + { + /// + /// Square the FFT coefficients >> this gives an energy spectrogram. + /// MatrixTools.SquareValues is doing the same! + /// + public static double[,] GetEnergyValues(double[,] fftCoefficients) + { + double[,] energySpectrogram = new double[fftCoefficients.GetLength(0), fftCoefficients.GetLength(1)]; + for (int i = 0; i < fftCoefficients.GetLength(0); i++) + { + for (int j = 0; j < fftCoefficients.GetLength(1); j++) + { + energySpectrogram[i, j] += fftCoefficients[i, j] * fftCoefficients[i, j]; + } + } + + return energySpectrogram; + } + + /// + /// Take average of the energy values in each frequency bin to obtain power spectrum or PSD. + /// SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram is doing the same! + /// + public static double[] GetPowerSpectrum(double[,] energySpectrogram) + { + double[] powerSpectrum = new double[energySpectrogram.GetLength(1)]; + for (int j = 0; j < energySpectrogram.GetLength(1); j++) + { + /* + double sum = 0; + for (int i = 0; i < energySpectrogram.GetLength(0); i++) + { + sum += energySpectrogram[i, j]; + } + powerSpectrum[j] = sum / energySpectrogram.GetLength(0); + */ + + powerSpectrum[j] = MatrixTools.GetColumn(energySpectrogram, j).Average(); + } + + return powerSpectrum; + } + } +} diff --git a/src/AudioAnalysisTools/Indices/IndexCalculate.cs b/src/AudioAnalysisTools/Indices/IndexCalculate.cs index 6f3d65d61..7b330de07 100644 --- a/src/AudioAnalysisTools/Indices/IndexCalculate.cs +++ b/src/AudioAnalysisTools/Indices/IndexCalculate.cs @@ -398,7 +398,7 @@ public static IndexCalculateResult Analysis( deciBelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(deciBelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM - spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromSpectrogram(deciBelSpectrogram); + spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(deciBelSpectrogram); // iv: CALCULATE SPECTRAL COVER. // NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 diff --git a/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs new file mode 100644 index 000000000..d217f9b15 --- /dev/null +++ b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSpectrogram.cs @@ -0,0 +1,71 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.StandardSpectrograms +{ + using System; + using Acoustics.Tools.Wav; + using DSP; + using WavTools; + + /// + /// This class is designed to produce a full-bandwidth amplitude spectrogram + /// + public class AmplitudeSpectrogram + { + /// + /// Initializes a new instance of the class. + /// + public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav) + { + this.Configuration = config; + this.Attributes = new SpectrogramAttributes(); + + double minDuration = 1.0; + if (wav.Time.TotalSeconds < minDuration) + { + LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration); + return; + } + + //set attributes for the current recording and spectrogram type + this.Attributes.SampleRate = wav.SampleRate; + this.Attributes.Duration = wav.Time; + this.Attributes.NyquistFrequency = wav.SampleRate / 2; + this.Attributes.Duration = wav.Time; + this.Attributes.MaxAmplitude = wav.CalculateMaximumAmplitude(); + this.Attributes.FrameDuration = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate); + + var recording = new AudioRecording(wav); + var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( + recording, + config.WindowSize, + config.WindowOverlap, + this.Configuration.WindowFunction); + + // now recover required data + //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value. + this.Attributes.Epsilon = fftdata.Epsilon; + this.Attributes.WindowPower = fftdata.WindowPower; + this.Attributes.FrameCount = fftdata.FrameCount; + this.Data = fftdata.AmplitudeSpectrogram; + + // IF REQUIRED CONVERT TO MEL SCALE + if (this.Configuration.DoMelScale) + { + // this mel scale conversion uses the "Greg integral" ! + this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency); + } + } + + public SpectrogramSettings Configuration { get; set; } + + public SpectrogramAttributes Attributes { get; set; } + + /// + /// Gets or sets the spectrogram data matrix of doubles + /// + public double[,] Data { get; set; } + } +} diff --git a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs index e1c22df78..0fc848440 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs @@ -72,6 +72,7 @@ public int NPointSmoothFFT } // Number of points to smooth FFT spectra public double epsilon { get; set; } //small value to prevent log of zero value + public bool DoPreemphasis { get; set; } public int? MinFreqBand { get; set; } diff --git a/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs new file mode 100644 index 000000000..94969aaa8 --- /dev/null +++ b/src/AudioAnalysisTools/StandardSpectrograms/DecibelSpectrogram.cs @@ -0,0 +1,191 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.StandardSpectrograms +{ + using System.Drawing; + using System.Drawing.Imaging; + using Acoustics.Tools.Wav; + using DSP; + using TowseyLibrary; + + /// + /// There are two constructors + /// + public class DecibelSpectrogram + { + /// + /// Initializes a new instance of the class. + /// This constructor requires config and audio objects + /// It creates an amplitude spectrogram + /// + public DecibelSpectrogram(SpectrogramSettings config, WavReader wav) + : this(new AmplitudeSpectrogram(config, wav)) + { + } + + /// + /// Initializes a new instance of the class. + /// + public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) + { + this.Configuration = amplitudeSpectrogram.Configuration; + this.Attributes = amplitudeSpectrogram.Attributes; + + // (ii) CONVERT AMPLITUDES TO DECIBELS + this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon); + + // (iii) NOISE REDUCTION + var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); + this.Data = tuple.Item1; // store data matrix + + if (this.SnrData != null) + { + this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile + } + } + + /* + /// + /// Initializes a new instance of the class. + /// use this constructor to cut out a portion of a spectrum from start to end time. + /// + public DecibelSpectrogram(SpectrogramStandard sg, double startTime, double endTime) + { + int startFrame = (int)Math.Round(startTime * sg.FramesPerSecond); + int endFrame = (int)Math.Round(endTime * sg.FramesPerSecond); + int frameCount = endFrame - startFrame + 1; + + //sg.MaxAmplitude { get; private set; } + this.SampleRate = sg.SampleRate; + this.Duration = TimeSpan.FromSeconds(endTime - startTime); + this.FrameCount = frameCount; + + ////energy and dB per frame + this.DecibelsPerFrame = new double[frameCount]; // Normalised decibels per signal frame + for (int i = 0; i < frameCount; i++) + { + this.DecibelsPerFrame[i] = sg.DecibelsPerFrame[startFrame + i]; + } + + this.DecibelReference = sg.DecibelReference; // Used to NormaliseMatrixValues the dB values for MFCCs + this.DecibelsNormalised = new double[frameCount]; + for (int i = 0; i < frameCount; i++) + { + this.DecibelsNormalised[i] = sg.DecibelsNormalised[startFrame + i]; + } + + this.SigState = new int[frameCount]; //Integer coded signal state ie 0=non-vocalisation, 1=vocalisation, etc. + for (int i = 0; i < frameCount; i++) + { + this.SigState[i] = sg.SigState[startFrame + i]; + } + + //the spectrogram data matrix + int featureCount = sg.Data.GetLength(1); + this.Data = new double[frameCount, featureCount]; + for (int i = 0; i < frameCount; i++) //each row of matrix is a frame + { + for (int j = 0; j < featureCount; j++) //each col of matrix is a feature + { + this.Data[i, j] = sg.Data[startFrame + i, j]; + } + } + }//end CONSTRUCTOR + */ + + public SpectrogramSettings Configuration { get; set; } + + public SpectrogramAttributes Attributes { get; set; } + + /// + /// Gets or sets the spectrogram data matrix of doubles + /// + public double[,] Data { get; set; } + + /// + /// Gets or sets instance of class SNR that stores info about signal energy and dB per frame + /// + public SNR SnrData { get; set; } + + public double MaxAmplitude { get; set; } + + // TODO + // Need to calculate the following for decibel spectrograms only + // ################################################################################################## + // TODO The following properties need to be calculated within the DecibelSpectrogram class. + + /// + /// Gets or sets decibels per signal frame + /// + public double[] DecibelsPerFrame { get; set; } + + public double[] DecibelsNormalised { get; set; } + + /// + /// Gets or sets decibel reference with which to NormaliseMatrixValues the dB values for MFCCs + /// + public double DecibelReference { get; protected set; } + + /// + /// Gets or sets integer coded signal state ie 0=non-vocalisation, 1=vocalisation, etc. + /// + public int[] SigState { get; protected set; } + + // ################################# SPECTROGRAM METHODS BELOW HERE ############################### + + public void DrawSpectrogram(string path) + { + var image = DrawSpectrogramAnnotated(this.Data, this.Configuration, this.Attributes); + image.Save(path, ImageFormat.Png); + } + + // ################################# STATIC METHODS BELOW HERE ############################### + + public static Image DrawSpectrogramAnnotated(double[,] data, SpectrogramSettings config, SpectrogramAttributes attributes) + { + // normalise the data between 0 and 95th percentiles + int binCount = 100; + double min; + double max; + DataTools.MinMax(data, out min, out max); + double binWidth = (max - min) / binCount; + var histogram = Histogram.Histo(data, binCount, min, max, binWidth); + + int percentile = 95; + int binId = Histogram.GetPercentileBin(histogram, percentile); + double upperBound = min + (binId * percentile); + var normedMatrix = MatrixTools.NormaliseInZeroOne(data, min, upperBound); + + /* + int minPercentile = 5; + int minBinId = Histogram.GetPercentileBin(histogram, minPercentile); + double lowerBound = min + (minBinId * minPercentile); + int maxPercentile = 95; + int maxBinId = Histogram.GetPercentileBin(histogram, maxPercentile); + double upperBound = min + (maxBinId * maxPercentile); + var normedMatrix = MatrixTools.NormaliseInZeroOne(data, lowerBound, upperBound); + */ + int nyquist = attributes.NyquistFrequency; + int frameSize = config.WindowSize; + + // assuming linear frequency scale + int finalBinCount = frameSize / 2; + var scaleType = FreqScaleType.Linear; + + // if doing mel scale then + if (config.DoMelScale) + { + finalBinCount = 256; //128; //512; // 256; // 100; // 40; // 200; // + scaleType = FreqScaleType.Mel; + } + + var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzGridInterval: 1000); + + var image = SpectrogramTools.GetImage(normedMatrix, nyquist, config.DoMelScale); + var annotatedImage = SpectrogramTools.GetImageFullyAnnotated(image, config.SourceFileName + ": " + scaleType.ToString(), freqScale.GridLineLocations, attributes.Duration); + return annotatedImage; + } + } +} diff --git a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs index 7dda91e94..00bcc777b 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs @@ -1,34 +1,76 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// namespace AudioAnalysisTools.StandardSpectrograms { + using System.Collections.Generic; + using System.Drawing.Imaging; + using System.IO; using Acoustics.Tools.Wav; using TowseyLibrary; - public class EnergySpectrogram : BaseSonogram + /// + /// There are two CONSTRUCTORS + /// + public class EnergySpectrogram { - public EnergySpectrogram(SonogramConfig config, double[,] amplitudeSpectrogram) - : base(config, amplitudeSpectrogram) + /// + /// Initializes a new instance of the class. + /// Use this constructor when you have config and audio objects + /// + public EnergySpectrogram(SpectrogramSettings config, WavReader wav) + : this(new AmplitudeSpectrogram(config, wav)) { - this.Configuration = config; - this.FrameCount = amplitudeSpectrogram.GetLength(0); - this.Data = amplitudeSpectrogram; - this.Make(this.Data); } - public EnergySpectrogram(AmplitudeSonogram sg) - : base(sg.Configuration) + public EnergySpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) { - this.Data = MatrixTools.SquareValues(sg.Data); + this.Configuration = amplitudeSpectrogram.Configuration; + this.Attributes = amplitudeSpectrogram.Attributes; + + // CONVERT AMPLITUDES TO ENERGY + this.Data = MatrixTools.SquareValues(amplitudeSpectrogram.Data); + } + + public SpectrogramSettings Configuration { get; set; } + + public SpectrogramAttributes Attributes { get; set; } + + /// + /// Gets or sets the spectrogram data matrix of doubles + /// Note matrix orientation: ROWS = spectra; COLUMNS = frequency bins + /// + public double[,] Data { get; set; } + + public void GetPsd(string path) + { + var psd = MatrixTools.GetColumnAverages(this.Data); + + FileTools.WriteArray2File(psd, path + ".csv"); + GraphsAndCharts.DrawGraph(psd, "Title", new FileInfo(path)); + + //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path)); + //image.Save(path, ImageFormat.Png); } - public override void Make(double[,] amplitudeM) + public void DrawLogPsd(string path) { - this.Data = MatrixTools.SquareValues(amplitudeM); + var psd = MatrixTools.GetColumnAverages(this.Data); + var logPsd = DataTools.LogValues(psd); + FileTools.WriteArray2File(logPsd, path + ".csv"); + GraphsAndCharts.DrawGraph(logPsd, "log PSD", new FileInfo(path)); + + //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path)); + //image.Save(path, ImageFormat.Png); + } + + public double[] GetLogPsd() + { + var psd = MatrixTools.GetColumnAverages(this.Data); + var logPsd = DataTools.LogValues(psd); + return logPsd; + } } } diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs new file mode 100644 index 000000000..fd401ce48 --- /dev/null +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramSettings.cs @@ -0,0 +1,83 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.StandardSpectrograms +{ + using System; + using DSP; + using TowseyLibrary; + + public class SpectrogramSettings + { + /// + /// Gets or sets SourceFileName + /// Although this is not a setting, we need to store it right at the beginning. + /// + public string SourceFileName { get; set; } + + public int WindowSize { get; set; } = 512; + + public double WindowOverlap { get; set; } = 0.0; + + /// + /// Gets or sets exact frame step in samples - an alternative to overlap + /// Note that the default setting should be same as WindowSize i.e. no overlap. + /// + public int WindowStep { get; set; } = 512; + + public string WindowFunction { get; set; } = WindowFunctions.HAMMING.ToString(); + + public int SmoothingWindow { get; set; } = 3; + + public bool DoMelScale { get; set; } = false; + + /// + /// Gets or sets MelBinCount + /// This is used only if DoMelScale = true. + /// + public int MelBinCount { get; set; } = 256; + + public NoiseReductionType NoiseReductionType { get; set; } = NoiseReductionType.None; + + public double NoiseReductionParameter { get; set; } = 0.0; + } + + public class SpectrogramAttributes + { + public int SampleRate { get; set; } + + public double MaxAmplitude { get; set; } + + public int NyquistFrequency { get; set; } + + public TimeSpan Duration { get; set; } + + public int FrameCount { get; set; } + + /// + /// Gets or sets duration of full frame or window in seconds + /// + public TimeSpan FrameDuration { get; set; } + + public double FramesPerSecond { get; set; } //= 1 / this.FrameStep; + + public double FBinWidth { get; set; } + + //this.FBinWidth = this.NyquistFrequency / (double) this.FreqBinCount; + public double Epsilon { get; set; } + + public double WindowPower { get; set; } + + /// + /// returns the duration of that part of frame not overlapped with following frame. + /// Duration is given in seconds. + /// Assumes window size and overlap fraction already known. + /// + public static TimeSpan GetFrameOffset(int windowSize, double windowOverlap, int sampleRate) + { + int step = DSP_Frames.FrameStep(windowSize, windowOverlap); + return TimeSpan.FromSeconds(step / (double)sampleRate); + } + } +} diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs index 5959be7bd..9e6125fa5 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs @@ -12,17 +12,16 @@ namespace AudioAnalysisTools.StandardSpectrograms using System; using System.Collections.Generic; using System.Drawing; - using System.Drawing.Imaging; using System.IO; using Acoustics.Shared; - using AnalysisBase; using ColorMine.ColorSpaces; using DSP; + using LongDurationSpectrograms; using TowseyLibrary; - using WavTools; public static class SpectrogramTools { + /* /// /// /// @@ -54,29 +53,6 @@ public static Image GetImageFromAudioSegment(FileInfo fiAudio, FileInfo fiConfig } throw new NotSupportedException("Code intentionally broken because it is out of date and not used"); - - /* - Image image = null; - var settings = new AnalysisSettings - { - ConfigDict = config.GetDictionary(), - SegmentAudioFile = fiAudio, - ConfigFile = fiConfig, - SegmentImageFile = fiImage, - SegmentOutputDirectory = diOutputDir - }; - - // want to pass SampleRate of the original file. - settings.SampleRateOfOriginalAudioFile = int.Parse(settings.ConfigDict[AnalysisKeys.ResampleRate]); - - analyser.BeforeAnalyze(settings); - - var results = analyser.Analyze(settings, new SegmentSettings(se)); - - image = results.ImageFile == null ? null : Image.FromFile(results.ImageFile.FullName); - - analyser = null; - return image;*/ } else { @@ -185,7 +161,16 @@ public static BaseSonogram Audio2DecibelSonogram(FileInfo fiAudio, Dictionary + /// Used to normalise a spectrogram in 0,1 + /// + /// the spectrogram data + /// set all values above to 1.0 + /// set all values below to zero + /// used to de-emphisize the background + /// a normalised matrix of spectrogram data public static double[,] NormaliseSpectrogramMatrix(double[,] matrix, double truncateMin, double truncateMax, double backgroundFilterCoeff) { double[,] m = MatrixTools.NormaliseInZeroOne(matrix, truncateMin, truncateMax); @@ -193,41 +178,14 @@ public static BaseSonogram Audio2DecibelSonogram(FileInfo fiAudio, Dictionary /// /// - /// public static Image_MultiTrack Sonogram2MultiTrackImage(BaseSonogram sonogram, Dictionary configDict) { bool doHighlightSubband = false; - //check if doing a reduced sonogram - //int timeReductionFactor = 1; - //if (configDict.ContainsKey(Keys.TIME_REDUCTION_FACTOR)) - // timeReductionFactor = ConfigDictionary.GetInt(Keys.TIME_REDUCTION_FACTOR, configDict); - //int freqReductionFactor = 1; - //if (configDict.ContainsKey(Keys.FREQ_REDUCTION_FACTOR)) - // freqReductionFactor = ConfigDictionary.GetInt(Keys.FREQ_REDUCTION_FACTOR, configDict); - //if (!((timeReductionFactor == 1) && (freqReductionFactor == 1))) - //{ - // sonogram.Data = ReduceDimensionalityOfSpectrogram(sonogram.Data, timeReductionFactor, freqReductionFactor); - // return sonogram.GetImage(doHighlightSubband, add1kHzLines); - //} - - // (iii) NOISE REDUCTION - //bool doNoiseReduction = false; - //if (configDict.ContainsKey(AnalysisKeys.NoiseDoReduction)) - // doNoiseReduction = ConfigDictionary.GetBoolean(AnalysisKeys.NoiseDoReduction, configDict); - //if (doNoiseReduction) - //{ - // //LoggedConsole.WriteLine("PERFORMING NOISE REDUCTION"); - // double bgThreshold = 3.0; - // if (configDict.ContainsKey(AnalysisKeys.NoiseBgThreshold)) - // bgThreshold = ConfigDictionary.GetDouble(AnalysisKeys.NoiseBgThreshold, configDict); - // var tuple = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.STANDARD, bgThreshold); - // sonogram.Data = tuple.Item1; // store data matrix - //} - //ADD time and frequency scales bool addScale = false; if (configDict.ContainsKey(AnalysisKeys.AddTimeScale)) @@ -261,10 +219,10 @@ public static Image_MultiTrack Sonogram2MultiTrackImage(BaseSonogram sonogram, D } return mti; + } + */ - //mti.AddTrack(ImageTrack.GetWavEnvelopeTrack(sonogram)); //add segmentation track - }//Sonogram2MultiTrackImage() - + /* public static Image Sonogram2Image(BaseSonogram sonogram, Dictionary configDict, double[,] hits, List scores, List predictedEvents, double eventThreshold) { Image_MultiTrack multiTrackImage = Sonogram2MultiTrackImage(sonogram, configDict); @@ -289,7 +247,17 @@ public static Image Sonogram2Image(BaseSonogram sonogram, Dictionary + /// This is experimental method to explore colour rendering of standard spectrograms + /// Used to convert a standard decibel spectrogram into a colour version using + /// a colour rendering for three separate properties. + /// + /// the raw decibel spectrogram data - assigned to red channel + /// the noise reduced decibel spectrogram data - assigned to green channel + /// assigned to ridge colours + /// coloured-rendered spectrogram as image public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramData, double[,] nrSpectrogramData, byte[,] hits) { double truncateMin = -120.0; @@ -305,9 +273,11 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD Bitmap image = new Bitmap(width, height); Color[] ridgeColours = { Color.Red, Color.DarkMagenta, Color.Black, Color.LightPink }; - for (int y = 0; y < height; y++) //over all freq bins + // for all freq bins + for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) //for pixels in the line + //for pixels in freq bin + for (int x = 0; x < width; x++) { // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range double dbValue = dbSpectrogramNorm[x, y]; @@ -350,6 +320,7 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD // get colour for noise reduced portion // superimpose ridge detection + // Have experimented with a bunch of ideas if (hits[x, y] > 0) { //value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40); @@ -362,7 +333,7 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD image.SetPixel(x, height - y - 1, colour); } - }//end over all freq bins + } // freq bins //image.Save(@"C:\SensorNetworks\Output\Sonograms\TEST3.png", ImageFormat.Png); @@ -375,10 +346,10 @@ public static Image CreateFalseColourDecibelSpectrogram(double[,] dbSpectrogramD /// Also uses the spectral "hits" data for highlighting the spectrogram. /// ### IMPORTANT WARNING!!!! THIS METHOD ASSUMES THAT BOTH SPECTRAL MATRICES HAVE BEEN NORMALISED IN [0,1]. /// - /// - /// - /// - /// + /// the raw decibel spectrogram data - assigned to red channel + /// the noise reduced decibel spectrogram data - assigned to green channel + /// assigned to ridge colours + /// coloured-rendered spectrogram as image public static Image CreateFalseColourDecibelSpectrogramForZooming(double[,] dbSpectrogramNorm, double[,] nrSpectrogramNorm, byte[,] hits) { int width = dbSpectrogramNorm.GetLength(0); @@ -393,57 +364,42 @@ public static Image CreateFalseColourDecibelSpectrogramForZooming(double[,] dbSp //var csp = new CubeHelix("cyanscale"); - for (int y = 0; y < height; y++) //over all freq bins + //over all freq bins + for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) //for pixels in the line + //for pixels in the line + for (int x = 0; x < width; x++) + { + var colour = rsp.GetColorFromPallette(dbSpectrogramNorm[x, y]); + + if (nrSpectrogramNorm[x, y] > 0.15) { - var colour = rsp.GetColorFromPallette(dbSpectrogramNorm[x, y]); + // get colour for noise reduced portion + int colourId = cch.GetColorID(nrSpectrogramNorm[x, y]); - if (nrSpectrogramNorm[x, y] > 0.15) + // superimpose ridge detection + if (hits[x, y] > 0) { - // get colour for noise reduced portion - int colourId = cch.GetColorID(nrSpectrogramNorm[x, y]); - - // superimpose ridge detection - if (hits[x, y] > 0) - { - colourId += 20; - if (colourId > 255) - { - colourId = 255; - } - } - - colour = cch.GetColorFromPallette(colourId); + colourId += 20; + if (colourId > 255) + { + colourId = 255; } - - image.SetPixel(x, height - y - 1, colour); } - }//end over all freq bins - - return image; - } - public static Color[] GetCyanSpectrumPalette() - { - int count = 256 - 1; - var palette = new Color[256]; - for (int i = 0; i <= count; i++) - { - double value = i / (double)count; - int R = (int)Math.Round(value * value * value * count); - - //int G = i; - int B = i; - int G = (int)Math.Round(Math.Sqrt(value) * count); + colour = cch.GetColorFromPallette(colourId); + } - //int B = (int)Math.Round(value * value * count); - palette[i] = Color.FromArgb(255, R, G, B); - } + image.SetPixel(x, height - y - 1, colour); + } + } // freq bins - return palette; + return image; } + /// + /// Another experimental method to colour render spectrograms, this time amplitude spectrograms. + /// public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramData, double[,] nrSpectrogramData, byte[,] hits) { double truncateMin = 0.0; @@ -454,14 +410,12 @@ public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramD int width = spectrogramData.GetLength(0); int height = spectrogramData.GetLength(1); Bitmap image = new Bitmap(width, height); - Color colour; - Hsv myHsv; - Rgb myRgb; Color[] ridgeColours = { Color.Red, Color.Lime, Color.Blue, Color.Lime }; - for (int y = 0; y < height; y++) //over all freq bins + //over all freq bins + for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) //for pixels in the line + for (int x = 0; x < width; x++) { // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range double dbValue = spectrogramNorm[x, y]; @@ -478,48 +432,26 @@ public static Image CreateFalseColourAmplitudeSpectrogram(double[,] spectrogramD c1 = 255; } - colour = Color.FromArgb(c1, c1, c1); - - //if (nrSpectrogramNorm[x, y] > 0) - //{ - // // use HSV colour space - // int bottomColour = 30; // to avoid using the reds - // int topColour = 320; // to avoid using the magentas - // int hueRange = topColour - bottomColour; - // int hue = bottomColour + (int)Math.Floor(hueRange * nrSpectrogramNorm[x, y]); - - // double saturation = 1.0; - // //double saturation = 0.75 + (nrSpectrogramNorm[x, y] * 0.25); - // //double saturation = nrSpectrogramNorm[x, y] * 0.5; - // //double saturation = (1 - nrSpectrogramNorm[x, y]) * 0.5; - - // double value = 1.0; - // //double value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40); - - // myHsv = new Hsv { H = hue, S = saturation, V = value }; - // myRgb = myHsv.To(); - // colour = Color.FromArgb((int)myRgb.R, (int)myRgb.G, (int)myRgb.B); - //} + var colour = Color.FromArgb(c1, c1, c1); // superimpose ridge detection if (hits[x, y] > 0) { - //value = 0.60 + (nrSpectrogramNorm[x, y] * 0.40); - //myHsv = new Hsv { H = 260, S = saturation, V = value }; - //myRgb = myHsv.To(); - //colour = Color.FromArgb((int)myRgb.R, (int)myRgb.G, (int)myRgb.B); colour = ridgeColours[hits[x, y] - 1]; } image.SetPixel(x, height - y - 1, colour); } - }//end over all freq bins - - //image.Save(@"C:\SensorNetworks\Output\Sonograms\TEST3.png", ImageFormat.Png); + } return image; } + /// + /// Method to make spectrogram with SOX + /// But the ConfigDictionary clsas is now obsolete. + /// The method should be depracted some time. + /// public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary configDict, FileInfo output) { var soxPath = new FileInfo(AppConfigHelper.SoxExe); @@ -568,7 +500,7 @@ public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary - public static double[] CalculateAvgSpectrumFromSpectrogram(double[,] spectrogram) + public static double[] CalculateAvgSpectrumFromEnergySpectrogram(double[,] spectrogram) { int frameCount = spectrogram.GetLength(0); int freqBinCount = spectrogram.GetLength(1); @@ -623,15 +555,14 @@ public static double[] CalculateAvgSpectrumFromSpectrogram(double[,] spectrogram /// /// Use this method to average a decibel spectrogram /// - public static double[] CalculateAvgDecibelSpectrumFromSpectrogram(double[,] spectrogram) + public static double[] CalculateAvgDecibelSpectrumFromDecibelSpectrogram(double[,] spectrogram) { - int frameCount = spectrogram.GetLength(0); int freqBinCount = spectrogram.GetLength(1); double[] avgSpectrum = new double[freqBinCount]; for (int j = 0; j < freqBinCount; j++) { var freqBin = MatrixTools.GetColumn(spectrogram, j); - double av = SpectrogramTools.AverageAnArrayOfDecibelValues(freqBin); + double av = AverageAnArrayOfDecibelValues(freqBin); avgSpectrum[j] = av; } @@ -660,6 +591,7 @@ public static double AverageAnArrayOfDecibelValues(double[] array) return dB; } + /* public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram) { int frameCount = spectrogram.GetLength(0); @@ -680,6 +612,7 @@ public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram return sumSpectrum; } + */ /// /// Returns AVERAGE POWER SPECTRUM (PSD) and VARIANCE OF POWER SPECTRUM. @@ -698,7 +631,7 @@ public static double[] CalculateSumSpectrumFromSpectrogram(double[,] spectrogram /// As well as calculating the av power spectrum, this method also returns a variance spectrum and a spectrum of the Coeff of Variation = var/mean. /// /// this is an amplitude spectrum. Must square values to get power - /// + /// three spectral indices public static Tuple CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(double[,] amplitudeSpectrogram) { int frameCount = amplitudeSpectrogram.GetLength(0); @@ -706,7 +639,9 @@ public static Tuple CalculateAvgSpectrumAndVarianc double[] avgSpectrum = new double[freqBinCount]; // for average of the spectral bins double[] varSpectrum = new double[freqBinCount]; // for variance of the spectral bins double[] covSpectrum = new double[freqBinCount]; // for coeff of variance of the spectral bins - for (int j = 0; j < freqBinCount; j++) // for all frequency bins + + // for all frequency bins + for (int j = 0; j < freqBinCount; j++) { var freqBin = new double[frameCount]; // set up an array to take all values in a freq bin i.e. column of matrix for (int r = 0; r < frameCount; r++) @@ -722,18 +657,19 @@ public static Tuple CalculateAvgSpectrumAndVarianc } return Tuple.Create(avgSpectrum, varSpectrum, covSpectrum); - } // CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram() + } /// - /// This method assumes P.D. Welch's method has been used to calculate a PSD. + /// Calculates Stuart gage's NDSI acoustic index from the Power Spectrum derived from a spectrogram. + /// This method assumes P.D. Welch's method has been used to calculate the PSD. /// See method above: CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram() /// /// power spectral density - /// - /// - /// - /// - /// + /// original sample rate of the recording. Only used to get nyquist + /// low ndsi bound + /// mid ndsi bound + /// top ndsi bound + /// ndsi public static double CalculateNdsi(double[] psd, int samplerate, int lowBound, int midBound, int topBound) { int nyquist = samplerate / 2; @@ -741,35 +677,35 @@ public static double CalculateNdsi(double[] psd, int samplerate, int lowBound, i double binWidth = nyquist / (double)binCount; // skip lower 1kHz bin; - int countOf1kHbin = (int)Math.Floor(lowBound / binWidth); - int countOf2kHbin = (int)Math.Floor(midBound / binWidth); - int countOf8kHbin = (int)Math.Floor(topBound / binWidth); + int countOf1KHbin = (int)Math.Floor(lowBound / binWidth); + int countOf2KHbin = (int)Math.Floor(midBound / binWidth); + int countOf8KHbin = (int)Math.Floor(topBound / binWidth); // error checking - required for marine recordings where SR=2000. - // all this is arbitrary hack to something working for marine recordings. Will not affect terrestrial recordings - if (countOf8kHbin >= binCount) + // all this is arbitrary hack to get something working for marine recordings. Will not affect terrestrial recordings + if (countOf8KHbin >= binCount) { - countOf8kHbin = binCount - 2; + countOf8KHbin = binCount - 2; } - if (countOf2kHbin >= countOf8kHbin) + if (countOf2KHbin >= countOf8KHbin) { - countOf2kHbin = countOf8kHbin - 100; + countOf2KHbin = countOf8KHbin - 100; } - if (countOf1kHbin >= countOf2kHbin) + if (countOf1KHbin >= countOf2KHbin) { - countOf1kHbin = countOf2kHbin - 10; + countOf1KHbin = countOf2KHbin - 10; } double anthropoEnergy = 0.0; - for (int i = countOf1kHbin; i < countOf2kHbin; i++) + for (int i = countOf1KHbin; i < countOf2KHbin; i++) { anthropoEnergy += psd[i]; } double biophonyEnergy = 0.0; - for (int i = countOf2kHbin; i < countOf8kHbin; i++) + for (int i = countOf2KHbin; i < countOf8KHbin; i++) { biophonyEnergy += psd[i]; } @@ -785,7 +721,6 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram { if (spectrogram == null) { - return null; throw new ArgumentNullException(nameof(spectrogram)); } @@ -824,6 +759,7 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram return DataTools.Submatrix(m, 0, c1, m.GetLength(0) - 1, c2); } + /* /// /// Extracts an acoustic event from a sonogram given the location of a user defined rectangular marquee. /// NOTE: Nyquist value is used ONLY if using mel scale. @@ -849,6 +785,7 @@ public static Tuple HistogramOfSpectralPeaks(double[,] spectrogram AcousticEvent.Freq2BinIDs(doMelscale, minHz, maxHz, nyquist, binWidth, out c1, out c2); return DataTools.Submatrix(m, r1, c1, r2, c2); } + */ public static double[] ExtractModalNoiseSubband(double[] modalNoise, int minHz, int maxHz, bool doMelScale, int nyquist, double binWidth) { @@ -881,7 +818,7 @@ public static void DrawGridLinesOnImage(Bitmap bmp, TimeSpan startOffset, TimeSp { FrequencyScale.DrawFrequencyLinesOnImage(bmp, freqScale, includeLabels: true); - // we have stopped drawing temporal gridlines on these spectrograms. Create unnecessary clutter. + // We have stopped drawing temporal gridlines on these spectrograms. Create unnecessary clutter. //DrawTimeLinesOnImage(bmp, startOffset, fullDuration, xAxisTicInterval); } @@ -908,5 +845,33 @@ public static void DrawTimeLinesOnImage(Bitmap bmp, TimeSpan startOffset, TimeSp // ####################################################################################################################################### // ### ABOVE METHODS DRAW TIME GRID LINES ON SPECTROGRAMS #################################################################################### // ####################################################################################################################################### + + + public static Image GetImageFullyAnnotated(Image image, string title, int[,] gridLineLocations, TimeSpan duration) + { + if (image == null) + { + throw new ArgumentNullException(nameof(image)); + } + + FrequencyScale.DrawFrequencyLinesOnImage((Bitmap)image, gridLineLocations, includeLabels: true); + + var titleBar = LDSpectrogramRGB.DrawTitleBarOfGrayScaleSpectrogram(title, image.Width); + var timeBmp = ImageTrack.DrawTimeTrack(duration, image.Width); + var list = new List { titleBar, timeBmp, image, timeBmp }; + var compositeImage = ImageTools.CombineImagesVertically(list); + return compositeImage; + } + + public static Image GetImage(double[,] data, int nyquist, bool DoMel) + { + int subBandMinHz = 1000; + int subBandMaxHz = 9000; + bool doHighlightSubband = false; + + int maxFrequency = nyquist; + var image = BaseSonogram.GetSonogramImage(data, nyquist, maxFrequency, DoMel, 1, doHighlightSubband, subBandMinHz, subBandMaxHz); + return image; + } } } diff --git a/src/TowseyLibrary/Histogram.cs b/src/TowseyLibrary/Histogram.cs index b38da37ba..5f0b31e22 100644 --- a/src/TowseyLibrary/Histogram.cs +++ b/src/TowseyLibrary/Histogram.cs @@ -147,9 +147,6 @@ public static int[] Histo(byte[,] data, out byte min, out byte max) /// /// HISTOGRAM from a matrix of double /// - /// - /// - /// public static int[] Histo(double[,] data, int binCount) { double min; @@ -158,7 +155,6 @@ public static int[] Histo(double[,] data, int binCount) double binWidth = (max - min) / binCount; //LoggedConsole.WriteLine("data min=" + min + " data max=" + max + " binwidth=" + binWidth); - return Histo(data, binCount, min, max, binWidth); } @@ -322,10 +318,6 @@ public static int[] Histo(int[] data, int binCount, out double binWidth, out int /// /// make histogram of integers where each bin has unit width /// - /// - /// - /// - /// public static int[] Histo(int[] data, out int min, out int max) { int length = data.Length; @@ -365,9 +357,6 @@ public static void GetHistogramOfWaveAmplitudes(double[] waveform, int window, o /// /// Returns the bin ID that coincides with the passed percentile /// - /// - /// - /// public static int GetPercentileBin(int[] histogram, int percentile) { if (percentile > 99) diff --git a/tests/Acoustics.Test/Acoustics.Test.csproj b/tests/Acoustics.Test/Acoustics.Test.csproj index 65e4d69de..028424605 100644 --- a/tests/Acoustics.Test/Acoustics.Test.csproj +++ b/tests/Acoustics.Test/Acoustics.Test.csproj @@ -1,4 +1,4 @@ - + diff --git a/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs b/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs index 38f353637..a95002e43 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/Draw/Zooming/DrawZoomingTests.cs @@ -1,4 +1,4 @@ -// +// // All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). // diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs index 97adb3f78..91fe7832f 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/KmeansClusteringTests.cs @@ -26,7 +26,6 @@ public class KmeansClusteringTests private DirectoryInfo outputDirectory; [TestInitialize] - public void Setup() { this.outputDirectory = PathHelper.GetTempDir(); @@ -133,7 +132,7 @@ public void TestKmeansClustering() // Do k-means clustering string pathToClusterCsvFile = Path.Combine(outputDir.FullName, "ClusterCentroids" + i.ToString() + ".csv"); - var clusteringOutput = KmeansClustering.Clustering(patchMatrix, numberOfClusters, pathToClusterCsvFile); + var clusteringOutput = KmeansClustering.Clustering(patchMatrix, numberOfClusters); // sorting clusters based on size and output it to a csv file Dictionary clusterIdSize = clusteringOutput.ClusterIdSize; @@ -182,7 +181,7 @@ public void TestKmeansClustering() clusterImage.Save(outputClusteringImage); } - //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target spectrogram + //+++++++++++++++++++++++++++++++++++++++++++Reconstructing a target spectrogram from sequential patches and the cluster centroids var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var recording2 = new AudioRecording(recording2Path); var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader); diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs index cfa460af7..861ab45e9 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/PcaWhiteningTests.cs @@ -68,7 +68,7 @@ public void PcaWhiteningDefault() sonogram.Data = dataMatrix; // DO PCA WHITENING - var whitenedSpectrogram = PcaWhitening.Whitening(sonogram.Data); + var whitenedSpectrogram = PcaWhitening.Whitening(true, sonogram.Data); // DO UNIT TESTING // check if the dimensions of the reverted spectrogram (second output of the pca whitening) is equal to the input matrix @@ -124,10 +124,10 @@ public void TestPcaWhitening() double[,] sequentialPatchMatrix = sequentialPatches.ToMatrix(); // DO PCA WHITENING - var whitenedSpectrogram = PcaWhitening.Whitening(sequentialPatchMatrix); + var whitenedSpectrogram = PcaWhitening.Whitening(true, sequentialPatchMatrix); // reconstructing the spectrogram from sequential patches and the projection matrix obtained from random patches - var projectionMatrix = whitenedSpectrogram.ProjectionMatrix;//whitenedSpectrogram.projectionMatrix; + var projectionMatrix = whitenedSpectrogram.ProjectionMatrix; var eigenVectors = whitenedSpectrogram.EigenVectors; var numComponents = whitenedSpectrogram.Components; double[,] reconstructedSpec = PcaWhitening.ReconstructSpectrogram(projectionMatrix, sequentialPatchMatrix, eigenVectors, numComponents); diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs index 0a406516b..242892789 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/UnsupervisedFeatureLearningTest.cs @@ -1,4 +1,4 @@ -// +// // All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). // @@ -33,13 +33,15 @@ public void TestFeatureLearning() { // var outputDir = this.outputDirectory; var resultDir = PathHelper.ResolveAssetPath("FeatureLearning"); - var folderPath = Path.Combine(resultDir, "random_audio_segments"); + var folderPath = Path.Combine(resultDir, "random_audio_segments"); // Liz + // PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening\random_audio_segments\1192_1000"); // var resultDir = PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening"); var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png"); var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png"); var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png"); var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png"); + // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp"); // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1000 random 1-min recordings from Gympie @@ -64,6 +66,7 @@ public void TestFeatureLearning() var sonoConfig = new SonogramConfig { WindowSize = frameSize, + // since each 24 frames duration is equal to 1 second WindowOverlap = 0.1028, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, @@ -71,13 +74,42 @@ public void TestFeatureLearning() NoiseReductionType = NoiseReductionType.None, }; - int numFreqBand = 4; - int patchWidth = finalBinCount / numFreqBand; + /* + // testing + var recordingPath3 = PathHelper.ResolveAsset(folderPath, "SM304264_0+1_20160421_024539_46-47min.wav"); + var recording3 = new AudioRecording(recordingPath3); + var sonogram3 = new SpectrogramStandard(sonoConfig, recording3.WavReader); + + // DO DRAW SPECTROGRAM + var image4 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + image4.Save(outputMelImagePath, ImageFormat.Png); + + // Do RMS normalization + sonogram3.Data = SNR.RmsNormalization(sonogram3.Data); + var image5 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + image5.Save(outputNormMelImagePath, ImageFormat.Png); + + // NOISE REDUCTION + sonogram3.Data = PcaWhitening.NoiseReduction(sonogram3.Data); + var image6 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + image6.Save(outputNoiseReducedMelImagePath, ImageFormat.Png); + + //testing + */ + + // Define the minFreBin and MaxFreqBin to be able to work at arbitrary frequency bin bounds. + // The default value is minFreqBin = 1 and maxFreqBin = finalBinCount. + // To work with arbitrary frequency bin bounds we need to manually set these two parameters. + int minFreqBin = 40; //1 + int maxFreqBin = 80; //finalBinCount; + int numFreqBand = 1; //4; + int patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand; // finalBinCount / numFreqBand; int patchHeight = 1; // 2; // 4; // 16; // 6; // Frame size - int numRandomPatches = 80; // 40; // 20; // 30; // 100; // 500; // + int numRandomPatches = 20; // 40; // 80; // 30; // 100; // 500; // + // int fileCount = Directory.GetFiles(folderPath, "*.wav").Length; - // Define variable number of "randomPatch" lists based on "noOfFreqBand" + // Define variable number of "randomPatch" lists based on "numFreqBand" Dictionary> randomPatchLists = new Dictionary>(); for (int i = 0; i < numFreqBand; i++) { @@ -96,13 +128,14 @@ public void TestFeatureLearning() } } */ + double[,] inputMatrix; foreach (string filePath in Directory.GetFiles(folderPath, "*.wav")) { - FileInfo f = filePath.ToFileInfo(); + FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty - if (f.Length != 0) + if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); sonoConfig.SourceFName = recording.BaseName; @@ -116,14 +149,26 @@ public void TestFeatureLearning() // sonogram.Data = SNR.NoiseReduce_Median(sonogram.Data, nhBackgroundThreshold: 2.0); sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data); + // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins + if (minFreqBin != 1 || maxFreqBin != finalBinCount) + { + inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram.Data, minFreqBin, maxFreqBin); + } + else + { + inputMatrix = sonogram.Data; + } + // creating matrices from different freq bands of the source spectrogram - List allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data, numFreqBand); + List allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list int count = 0; while (count < allSubmatrices.Count) { - randomPatchLists[string.Format("randomPatch{0}", count.ToString())].Add(PatchSampling.GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix()); + randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling + .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches, + PatchSampling.SamplingMethod.Random).ToMatrix()); count++; } } @@ -135,7 +180,7 @@ public void TestFeatureLearning() } // convert list of random patches matrices to one matrix - int numberOfClusters = 256; // 128; // 64; // 32; // 10; // 50; + int numberOfClusters = 50; //256; // 128; // 64; // 32; // 10; // List allBandsCentroids = new List(); List allClusteringOutput = new List(); @@ -144,13 +189,23 @@ public void TestFeatureLearning() double[,] patchMatrix = randomPatches[i]; // Apply PCA Whitening - var whitenedSpectrogram = PcaWhitening.Whitening(patchMatrix); + var whitenedSpectrogram = PcaWhitening.Whitening(true, patchMatrix); // Do k-means clustering - string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); - var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters, pathToClusterCsvFile); + var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters); + // var clusteringOutput = KmeansClustering.Clustering(patchMatrix, noOfClusters, pathToClusterCsvFile); + // writing centroids to a csv file + // note that Csv.WriteToCsv can't write data types like dictionary (problems with arrays) + // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv + // it might be a better way to do this + string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); + var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); + Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); + + //Csv.WriteToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids); + // sorting clusters based on size and output it to a csv file Dictionary clusterIdSize = clusteringOutput.ClusterIdSize; int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); @@ -177,7 +232,8 @@ public void TestFeatureLearning() // convert each centroid to a matrix in order of cluster ID // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); // OR: in order of cluster size - double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight); + double[,] cent = + MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight); // normalize each centroid double[,] normCent = DataTools.normalise(cent); @@ -197,16 +253,19 @@ public void TestFeatureLearning() var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); + // clusterImage.Save(outputClusterImagePath, ImageFormat.Bmp); var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); + // Image bmp = ImageTools.ReadImage2Bitmap(filename); FrequencyScale.DrawFrequencyLinesOnImage((Bitmap)clusterImage, freqScale, includeLabels: false); clusterImage.Save(outputClusteringImage); } - //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target spectrogram + //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target recordings var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); + // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353972_20160303_055854_60_0.wav"); // folder with 1000 files // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353887_20151230_042625_60_0.wav"); // folder with 1000 files // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_354744_20151018_053923_60_0.wav"); // folder with 100 files @@ -215,28 +274,42 @@ public void TestFeatureLearning() var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader); // DO DRAW SPECTROGRAM - var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), + freqScale.GridLineLocations); image.Save(outputMelImagePath, ImageFormat.Png); // Do RMS normalization sonogram2.Data = SNR.RmsNormalization(sonogram2.Data); - var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), + "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image2.Save(outputNormMelImagePath, ImageFormat.Png); // NOISE REDUCTION sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data); - var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), + "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image3.Save(outputNoiseReducedMelImagePath, ImageFormat.Png); + // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins + if (minFreqBin != 1 || maxFreqBin != finalBinCount) + { + inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram2.Data, minFreqBin, maxFreqBin); + } + else + { + inputMatrix = sonogram2.Data; + } + // extracting sequential patches from the target spectrogram - List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(sonogram2.Data, numFreqBand); + List allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); double[][,] matrices2 = allSubmatrices2.ToArray(); List allSequentialPatchMatrix = new List(); for (int i = 0; i < matrices2.GetLength(0); i++) { int rows = matrices2[i].GetLength(0); int columns = matrices2[i].GetLength(1); - var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); + var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, + (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); } @@ -264,7 +337,9 @@ public void TestFeatureLearning() double[][] featureTransVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++) { - var normVector = ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i].ToJagged()[j]); // normalize each patch to unit length + var normVector = + ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i] + .ToJagged()[j]); // normalize each patch to unit length featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector); } @@ -275,8 +350,8 @@ public void TestFeatureLearning() // +++++++++++++++++++++++++++++++++++Temporal Summarization // The resolution to generate features is 1 second - // Each 6 patches form 1 second, when patches are formed by a sequence of four frames - // for each 6 patch, we generate 3 vectors of mean, std, and max + // Each 24 single-frame patches form 1 second + // for each 24 patch, we generate 3 vectors of mean, std, and max // The pre-assumption is that each input spectrogram is 1 minute List allMeanFeatureVectors = new List(); @@ -284,10 +359,11 @@ public void TestFeatureLearning() List allStdFeatureVectors = new List(); // number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second. - int numFrames = 24 / patchHeight; + int numFrames = (24 / patchHeight) * 60; foreach (var freqBandFeature in allFeatureTransVectors) { + // store features of different bands in lists List meanFeatureVectors = new List(); List maxFeatureVectors = new List(); List stdFeatureVectors = new List(); @@ -305,6 +381,7 @@ public void TestFeatureLearning() List std = new List(); List max = new List(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); + // int len = sequencesOfFrames.GetLength(1); // Second, calculate mean, max, and standard deviation of six vectors element-wise @@ -340,6 +417,7 @@ public void TestFeatureLearning() for (int j = 0; j < allMeanFeatureVectors.Count; j++) { + // write the features of each pre-defined frequency band into a separate CSV file var outputFeatureFile = Path.Combine(resultDir, "FeatureVectors" + j.ToString() + ".csv"); // creating the header for CSV file @@ -349,24 +427,26 @@ public void TestFeatureLearning() header.Add("mean" + i.ToString()); } - for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++) + for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++) { - header.Add("std" + i.ToString()); + header.Add("max" + i.ToString()); } - for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++) + for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++) { - header.Add("max" + i.ToString()); + header.Add("std" + i.ToString()); } // concatenating mean, std, and max vector together for each 1 second List featureVectors = new List(); for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].ToJagged().GetLength(0); i++) { - List featureList = new List(); - featureList.Add(allMeanFeatureVectors.ToArray()[j].ToJagged()[i]); - featureList.Add(allMaxFeatureVectors.ToArray()[j].ToJagged()[i]); - featureList.Add(allStdFeatureVectors.ToArray()[j].ToJagged()[i]); + List featureList = new List + { + allMeanFeatureVectors.ToArray()[j].ToJagged()[i], + allMaxFeatureVectors.ToArray()[j].ToJagged()[i], + allStdFeatureVectors.ToArray()[j].ToJagged()[i], + }; double[] featureVector = DataTools.ConcatenateVectors(featureList); featureVectors.Add(featureVector); } @@ -394,6 +474,8 @@ public void TestFeatureLearning() } } + /* + // Reconstructing the target spectrogram based on clusters' centroids List convertedSpec = new List(); int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand; for (int i = 0; i < allSequentialPatchMatrix.Count; i++) @@ -407,6 +489,267 @@ public void TestFeatureLearning() // DO DRAW SPECTROGRAM var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations); reconstructedSpecImage.Save(outputReSpecImagePath, ImageFormat.Png); + */ + } + + /// + /// Input a directory of one-minute recordings for one day + /// Calculate PSD: + /// 1) Apply FFT to produce the amplitude spectrogram at given window width. + /// 2) Square the FFT coefficients >> this gives an energy spectrogram. + /// 3) Do RMS normalization and Subtract the median energy value from each frequency bin. + /// 4) Take average of each of the energy values in each frequency bin >> this gives power spectrum or PSD. + /// Finally draw the the spectrogram of PSD values for the whole day. + /// + [Ignore] + [TestMethod] + public void PowerSpectrumDensityTest() + { + var inputPath = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\"; + var resultPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD.bmp"; + var resultNoiseReducedPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD_NoiseReduced.bmp"; + + //var inputPath =Path.Combine(inputDir, "TrainSet"); // directory of the one-min recordings of one day (21 and 23 Apr - Black Rail Data) + + // check whether there is any file in the folder/subfolders + if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) + { + throw new ArgumentException("The folder of recordings is empty..."); + } + + // get the nyquist value from the first wav file in the folder of recordings + int nq = new AudioRecording(Directory.GetFiles(inputPath, "*.wav")[0]).Nyquist; + int nyquist = nq; // 11025; + int frameSize = 1024; + int finalBinCount = 512; //256; // + int hertzInterval = 1000; + FreqScaleType scaleType = FreqScaleType.Linear; + //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); + //var fst = freqScale.ScaleType; + //var fst = FreqScaleType.Linear; + //var freqScale = new FrequencyScale(fst); + + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + WindowOverlap = 0.1028, + + //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + + //DoMelScale = false, + MelBinCount = 256, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + + var attributes = new SpectrogramAttributes() + { + NyquistFrequency = nyquist, + Duration = TimeSpan.FromMinutes(1440), + }; + + List psd = new List(); + foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) + { + FileInfo fileInfo = filePath.ToFileInfo(); + + // process the wav file if it is not empty + if (fileInfo.Length != 0) + { + var recording = new AudioRecording(filePath); + + //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); + //var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); + // save the matrix + // skip normalisation + // skip mel + settings.SourceFileName = recording.BaseName; + + var spectrogram = new EnergySpectrogram(settings, recording.WavReader); + //var sonogram = new AmplitudeSpectrogram(settings, recording.WavReader); + + //var energySpectrogram = new EnergySpectrogram(sonoConfig, amplitudeSpectrogram.Data); + //var energySpectrogram = new EnergySpectrogram(sonoConfig, recording.WavReader); + //var energySpectrogram = new EnergySpectrogram(settings, recording.WavReader); + + // square the FFT coefficients to get an energy spectrogram + // double[,] energySpectrogram = PowerSpectrumDensity.GetEnergyValues(amplitudeSpectrogram.Data); + + // RMS NORMALIZATION + //double[,] normalizedValues = SNR.RmsNormalization(energySpectro.Data); + //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data); + + // Median Noise Reduction + //spectrogram.Data = PcaWhitening.NoiseReduction(spectrogram.Data); + //spectrogram.Data = SNR.NoiseReduce_Standard(spectrogram.Data); + + //double[] psd = PowerSpectralDensity.GetPowerSpectrum(noiseReducedValues); + //psd.Add(energySpectro.GetLogPsd()); + psd.Add(MatrixTools.GetColumnAverages(spectrogram.Data)); + + //psd.Add(SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(normalizedValues)); + //psd.Add(PowerSpectralDensity.GetPowerSpectrum(normalizedValues)); + } + } + + // writing psd matrix to csv file + //Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\psd.csv"), psd.ToArray().ToMatrix()); + //Image imagePsd = DecibelSpectrogram.DrawSpectrogramAnnotated(psd.ToArray().ToMatrix(), settings, attributes); + //imagePsd.Save(resultPsdPath, ImageFormat.Bmp); + var psdMatrix = psd.ToArray().ToMatrix(); + + // calculate the log of matrix + var logPsd = MatrixTools.Matrix2LogValues(psdMatrix); + Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd.csv"), logPsd); + + Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(logPsd, settings, attributes); + image.Save(resultPsdPath, ImageFormat.Bmp); + + var noiseReducedLogPsd = PcaWhitening.NoiseReduction(logPsd); //SNR.NoiseReduce_Standard(logPsd); //SNR.NoiseReduce_Mean(logPsd, 0.0);//SNR.NoiseReduce_Median(logPsd, 0.0); // + Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd_NoiseReduced.csv"), logPsd); + + Image image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(noiseReducedLogPsd, settings, attributes); + image2.Save(resultNoiseReducedPsdPath, ImageFormat.Bmp); + + //ImageTools.DrawMatrix(psd.ToArray().ToMatrix(), resultPath); + //ImageTools.DrawReversedMatrix(psd.ToArray().ToMatrix(), resultPath); + //var data = MatrixTools.Matrix2LogValues(psd.ToArray().ToMatrix()); + //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(data); + //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(logPsd); + } + + [TestMethod] + [Ignore] + public void TestSpectrograms() + { + var recordingPath = PathHelper.ResolveAsset("Recordings", "SM304264_0+1_20160421_004539_47-48min.wav"); // "SM304264_0+1_20160421_094539_37-38min.wav" + var resultDir = PathHelper.ResolveAssetPath("SpectrogramTestResults"); + var outputAmpSpecImagePath = Path.Combine(resultDir, "AmplitudeSpectrogram.bmp"); + var outputDecibelSpecImagePath = Path.Combine(resultDir, "DecibelSpectrogram.bmp"); + var outputEnergySpecImagePath = Path.Combine(resultDir, "EnergySpectrogram.bmp"); + var outputLogEnergySpecImagePath = Path.Combine(resultDir, "LogEnergySpectrogram.bmp"); + var outputLinScaImagePath = Path.Combine(resultDir, "LinearScaleSpectrogram.bmp"); + var outputMelScaImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.bmp"); + var outputNormalizedImagePath = Path.Combine(resultDir, "NormalizedSpectrogram.bmp"); + var outputNoiseReducedImagePath = Path.Combine(resultDir, "NoiseReducedSpectrogram.bmp"); + var outputLogPsdImagePath = Path.Combine(resultDir, "Psd.bmp"); + + var recording = new AudioRecording(recordingPath); + int nyquist = recording.Nyquist; // 11025; + int frameSize = 1024; + int finalBinCount = 512; //256; //128; // 100; // 40; // 200; // + int hertzInterval = 1000; + + //FreqScaleType scaleType = FreqScaleType.Linear; + var scaleType = FreqScaleType.Mel; + + //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); + //var fst = freqScale.ScaleType; + + var settings = new SpectrogramSettings() + { + WindowSize = frameSize, + WindowOverlap = 0.1028, + DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + MelBinCount = 256, //(scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + //NoiseReductionType = NoiseReductionType.Median, + }; + //settings.NoiseReductionParameter = 0.0; // backgroundNeighbourhood noise reduction in dB + + settings.SourceFileName = recording.BaseName; + //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); + + var sonogram = new EnergySpectrogram(settings, recording.WavReader); + sonogram.Data = MatrixTools.Matrix2LogValues(sonogram.Data); + + var attributes = new SpectrogramAttributes() + { + NyquistFrequency = sonogram.Attributes.NyquistFrequency, + Duration = sonogram.Attributes.Duration, + }; + + Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes); + //image.Save(outputLogEnergySpecImagePath, ImageFormat.Bmp); + + //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data); + //var dbSpectrogram = new DecibelSpectrogram(settings, recording.WavReader); + //dbSpectrogram.DrawSpectrogram(outputMelScaImagePath); + + //var energySpectro = new EnergySpectrogram(settings, recording.WavReader); + + //var image = SpectrogramTools.GetImage(sonogram.Data, nyquist, settings.DoMelScale); + //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, settings.Duration); + + //var logSonogramData = MatrixTools.Matrix2LogValues(sonogram.Data); + + //var image = SpectrogramTools.GetImage(logSonogramData, nyquist, settings.DoMelScale); + //var specImage = SpectrogramTools.GetImageFullyAnnotated(image, "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration); + + //specImage.Save(outputMelScaImagePath, ImageFormat.Png); + //specImage.Save(outputAmpSpecImagePath, ImageFormat.Png); + + // DO RMS NORMALIZATION + //sonogram.Data = SNR.RmsNormalization(sonogram.Data); + //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data); + + //dbSpectrogram.DrawSpectrogram(outputNormalizedImagePath); + //var image2 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale); + //var normImage = SpectrogramTools.GetImageFullyAnnotated(image2, "NORMALIZEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration); + //normImage.Save(outputNormalizedImagePath, ImageFormat.Png); + + // DO NOISE REDUCTION + sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data); + //dbSpectrogram.DrawSpectrogram(outputNoiseReducedImagePath); + //var image3 = SpectrogramTools.GetImage(dbSpectrogram.Data, nyquist, settings.DoMelScale); + //var noiseReducedImage = SpectrogramTools.GetImageFullyAnnotated(image3, "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations, sonogram.Attributes.Duration); + //noiseReducedImage.Save(outputNoiseReducedImagePath, ImageFormat.Png); + Image image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(sonogram.Data, settings, attributes); + //image2.Save(outputNoiseReducedImagePath, ImageFormat.Bmp); + + //energySpectro.DrawLogPsd(outputLogPsdImagePath); + + /* + var fst = FreqScaleType.Linear; + var freqScale = new FrequencyScale(fst); + var recording = new AudioRecording(recordingPath); + + var sonoConfig = new SonogramConfig + { + WindowSize = freqScale.FinalBinCount * 2, + WindowOverlap = 0.2, + SourceFName = recording.BaseName, + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + + // GENERATE AMPLITUDE SPECTROGRAM + var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); + amplitudeSpectrogram.Configuration.WindowSize = freqScale.WindowSize; + + var image = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "AmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + image.Save(outputAmpSpecImagePath, ImageFormat.Png); + + // DO RMS NORMALIZATION + amplitudeSpectrogram.Data = SNR.RmsNormalization(amplitudeSpectrogram.Data); + var normImage = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "NORMAmplitudeSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + normImage.Save(outputNormAmpImagePath, ImageFormat.Png); + + // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM + var sonogram = new SpectrogramStandard(amplitudeSpectrogram); + var standImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "LinearScaleSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + standImage.Save(outputLinScaImagePath, ImageFormat.Png); + + // DO NOISE REDUCTION + sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data); + //SNR.NoiseReduce_Standard(sonogram.Data); + var noiseReducedImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "NOISEREDUCEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + noiseReducedImage.Save(outputNoiseReducedImagePath, ImageFormat.Png); + */ } } } diff --git a/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs b/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs index 2f9797dac..ad5c23a92 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/SpectralPeakTracking2018Tests.cs @@ -104,7 +104,17 @@ public void LocalSpectralPeakTest() var hertzPerFreqBin = nyquist / finalBinCount; FreqScaleType scaleType = FreqScaleType.Linear; - var sonoConfig = new SonogramConfig + var spectrogramSettings = new SpectrogramSettings() + { + WindowSize = frameSize, + WindowOverlap = frameOverlap, + //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, + //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, + NoiseReductionType = NoiseReductionType.None, + }; + + + var sonoConfig = new SonogramConfig() { WindowSize = frameSize, WindowOverlap = frameOverlap, @@ -117,7 +127,7 @@ public void LocalSpectralPeakTest() var secondsPerFrame = frameStep / (nyquist * 2); //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); - var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); + var amplitudeSpectrogram = new AmplitudeSpectrogram(spectrogramSettings, recording.WavReader); var energySpectrogram = new EnergySpectrogram(amplitudeSpectrogram); var decibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader); diff --git a/tests/Acoustics.Test/TestHelpers/TestHelper.cs b/tests/Acoustics.Test/TestHelpers/TestHelper.cs index 6c838fac8..06a392d3f 100644 --- a/tests/Acoustics.Test/TestHelpers/TestHelper.cs +++ b/tests/Acoustics.Test/TestHelpers/TestHelper.cs @@ -614,7 +614,7 @@ public static void AssertFrequencyInSignal(WavReader wavReader, double[] signal, { var fft = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(signal, wavReader.SampleRate, wavReader.Epsilon, 512, 0.0); - var histogram = SpectrogramTools.CalculateAvgSpectrumFromSpectrogram(fft.AmplitudeSpectrogram); + var histogram = SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(fft.AmplitudeSpectrogram); var max = histogram.Max(); double threshold = max * 0.8; diff --git a/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav b/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav new file mode 100644 index 000000000..642fcf52a --- /dev/null +++ b/tests/Fixtures/Recordings/SM304264_0+1_20160421_004539_47-48min.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20c1fa7b85bafcf65c2f1683ba095b6769e77929332a4bb10e41b5bca8d022dc +size 2646044 diff --git a/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav b/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav new file mode 100644 index 000000000..ac664aa03 --- /dev/null +++ b/tests/Fixtures/Recordings/SM304264_0+1_20160421_094539_37-38min.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9d17fc9c73c80b11127867b7d74c9766549b6c75514db7393be70e0dcdf97c +size 2646044 diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp new file mode 100644 index 000000000..fc521b1c4 Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv new file mode 100644 index 000000000..77bce521d --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/LogPsd.bmp.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b29f0c3f371aee179d4ee60b10f33af6e691da9aad95146ab3a04cb11e16a6 +size 9715 diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..b2c8ebf3c Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Linear/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp new file mode 100644 index 000000000..45ed5f85e Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv new file mode 100644 index 000000000..8e42d8cdf --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/LogPsd.bmp.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee4f714ebaed91448cc954d98ad430d7288779bf45eb85596508256df11123c +size 4868 diff --git a/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..32c13a12b Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/LogPSD_Mel/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..32c13a12b Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp new file mode 100644 index 000000000..2a5269faa Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/NoiseReducedSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp new file mode 100644 index 000000000..32c13a12b Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/Mel/NormalizedSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp b/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp new file mode 100644 index 000000000..cae9c3587 Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/NoiseRedudec_LogPsd.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..b2c8ebf3c Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp new file mode 100644 index 000000000..22602ec59 Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv new file mode 100644 index 000000000..e0e977925 --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/PSD_Linear/Psd.bmp.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733c1439b580b4821c6297b84904079d6ad18579cee957f983352ac05e7a5fd0 +size 10584 diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..32c13a12b Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp new file mode 100644 index 000000000..ee88d087d Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv new file mode 100644 index 000000000..95561cb9b --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/PSD_Mel/Psd.bmp.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb41f8c54cd64a9f47dad7f4c0dd93392ca1f82106f328f2e3a6ddf40b7a3eb +size 5219 diff --git a/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv new file mode 100644 index 000000000..338117379 --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b2443a09e3cbbb16b38e6d5dd247a5e38da11ce22ca9e0872c7d7abf939ea6f +size 9666 diff --git a/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt new file mode 100644 index 000000000..a067ae3ef --- /dev/null +++ b/tests/Fixtures/SpectrogramTestResults/Psd.bmp.csvOLD.txt @@ -0,0 +1,512 @@ +5.92342059178663 +6.78603139659209 +2.73763992468852 +0.484650528763745 +0.134196779728749 +0.060613335626598 +0.0449980957102696 +0.0308638089733839 +0.0230549452891953 +0.029574167257121 +0.045592680580702 +0.0619416735406503 +0.072292137960567 +0.0729769758377264 +0.0693316499344553 +0.0617875858058421 +0.0516206839286855 +0.0397318790338038 +0.0313889365981619 +0.0248176117194829 +0.0199693194102845 +0.0153640062104025 +0.0116573762514843 +0.00921718643579001 +0.00736126359426442 +0.00631222689466149 +0.00584066799068913 +0.00530833759647776 +0.00508085619589678 +0.00486726967483501 +0.00451838930388912 +0.00400071355255953 +0.00349263972018566 +0.00312364670092792 +0.00291824269715042 +0.00256370572427487 +0.00238174654968078 +0.00227325553567207 +0.00227983318428067 +0.00205553384102714 +0.00184061259688744 +0.00169351570451881 +0.00169852726616255 +0.00175181815176738 +0.00174476352094424 +0.00168204117785559 +0.00170478778480012 +0.00166432290835369 +0.00156375406850788 +0.00150489308759203 +0.0015041133696475 +0.00163016659576912 +0.0016663055287863 +0.00162467652147422 +0.00159416144388321 +0.00164774829631534 +0.00178213966891182 +0.00191347604320948 +0.00220144741579375 +0.00230618929826558 +0.00218721027774908 +0.00203714989488304 +0.00195714030196791 +0.00186911163160602 +0.00187040700302073 +0.00188247108896847 +0.00183193450033458 +0.00179035045699948 +0.00174445118984544 +0.00187316824761175 +0.00211835805264512 +0.00250011599513084 +0.00250969474544108 +0.00217201976751015 +0.0017933287463902 +0.00175299055356613 +0.00177184780236242 +0.00181133616466615 +0.00162732752887556 +0.00134105231604351 +0.00101199113538078 +0.000754903422774901 +0.000626766485557011 +0.000614244720489763 +0.00058333159413847 +0.000562623542539203 +0.000537908934271761 +0.000539477910630015 +0.000542870095994484 +0.000525911900481924 +0.00047983113654423 +0.000434921490948426 +0.000422073202189695 +0.000425590329881372 +0.000425181833317943 +0.000419817348531986 +0.0004077788980078 +0.000409178884055008 +0.000414411078598465 +0.000412339330241552 +0.000445152486589777 +0.000492832906013084 +0.000526117154039294 +0.000501340958234119 +0.000430540487576878 +0.000381851309566457 +0.00038523109883433 +0.000400144060866762 +0.000395078586324034 +0.000368138960390048 +0.000353795213391507 +0.000365274345585826 +0.000383547427239684 +0.000378323988078999 +0.000380069771820894 +0.000399230295546036 +0.000477348086841093 +0.000720583901922056 +0.00122844154298709 +0.00183874368176035 +0.00208102360369207 +0.00197263391511847 +0.00218310820204097 +0.00298701684311657 +0.00397455237348695 +0.00355282399813533 +0.00211878415700036 +0.000788646249848888 +0.000426370419069255 +0.000335856896845558 +0.000322816998710207 +0.000328507548525789 +0.000338683984258544 +0.000349326555858356 +0.000353057137036315 +0.000323188384510321 +0.000296180781851916 +0.000301398059193545 +0.000325425143195519 +0.0003334189778345 +0.000309482859823427 +0.000308240015600704 +0.0003103487672135 +0.000295331816438542 +0.000272931873567936 +0.000274934997518774 +0.000291109906983677 +0.000302774694364952 +0.000299800393745657 +0.000292052595725419 +0.000284497090845833 +0.000281033046234125 +0.000275223786867186 +0.000302184043767604 +0.000317848400274916 +0.000323366209004018 +0.000331174572267127 +0.000332729599154386 +0.000356703714929623 +0.000400836533699513 +0.00045847563863159 +0.000490069577933363 +0.00055601356906235 +0.000693980444342674 +0.00096794330013668 +0.00128694623088374 +0.00157411324133971 +0.0018505016577456 +0.00209021458495992 +0.00258182316489771 +0.0034100653376744 +0.00483749513368122 +0.00698128020125996 +0.0101961957461181 +0.0125253074188117 +0.015785659388509 +0.0208950868776112 +0.0300600434286705 +0.0470568907580039 +0.0606117340995085 +0.0699653495203272 +0.080999381927959 +0.0992922807732138 +0.121744445574435 +0.143628955187763 +0.163958470185328 +0.175302034057053 +0.168958593551732 +0.153989313861803 +0.134371764470529 +0.111437211665887 +0.10122208453419 +0.106014764127152 +0.111885727508556 +0.118518342549926 +0.127400441766813 +0.132307468885018 +0.115390039742709 +0.0954962157823734 +0.0863297048458831 +0.0819508257629367 +0.0880120055837407 +0.0969243006317774 +0.0736034313184319 +0.03189992791015 +0.0167802851210502 +0.020665190670411 +0.0231218739469633 +0.0113971589119597 +0.00438748628367456 +0.00290267700696139 +0.00327537094934574 +0.00210154306805208 +0.000849167649375279 +0.000486388934432319 +0.00057839538043529 +0.000628082734677565 +0.000644120334030811 +0.000562775924606834 +0.000476782450144881 +0.000369036111392155 +0.000327485262989244 +0.000325517423298084 +0.000331694154473157 +0.000335120538289499 +0.000321263769923705 +0.000285940647038625 +0.000255451268009806 +0.00025070228415632 +0.000258027712383378 +0.000259037627573138 +0.00026664865956847 +0.000264958077546918 +0.00028415983626037 +0.000283747433614669 +0.000270337154528126 +0.000245020375678894 +0.000235755889095187 +0.000229675295460236 +0.000233139808097966 +0.000226385800129463 +0.000215753215013351 +0.000217340167538521 +0.000222449901061098 +0.000220983036658176 +0.000201445267627303 +0.000196371273359384 +0.000209911794863315 +0.000231452163115747 +0.000213174770891635 +0.000197323272756498 +0.000211510284601431 +0.000220169598833855 +0.000213328046158373 +0.000193445965043203 +0.00019893499440385 +0.000215554593891906 +0.000224243213271129 +0.000224827385874794 +0.000225130162742188 +0.000214647035606757 +0.000203084762425824 +0.000199671532627531 +0.000201168714260613 +0.000197111954471548 +0.000195940467958558 +0.000192592146149875 +0.000193147892797519 +0.000198308048953625 +0.000199000112786909 +0.000198235753004126 +0.000191920326015812 +0.000195335414986316 +0.000190114133024043 +0.000197462939674182 +0.000197145363907007 +0.000197867519429887 +0.000194835231508809 +0.000184098539942656 +0.000176216607936817 +0.000176381794214087 +0.000184792529532461 +0.000190583260803435 +0.000195954078278375 +0.000200381533213994 +0.000196977761873929 +0.000195197907732914 +0.000195892740301248 +0.000200979224180509 +0.000196805211987536 +0.000197834907445491 +0.000189747527996663 +0.000187642294330323 +0.000186985443175167 +0.000188872356000939 +0.000197389124845894 +0.000205993231925291 +0.000198978835724956 +0.000189606971131363 +0.000181860905461806 +0.00018791662191321 +0.000190999048710944 +0.000192812176065193 +0.000188222941558719 +0.00019189824048678 +0.00019655305627445 +0.000201339601778108 +0.000194670377949367 +0.000180185320982464 +0.000175505754742858 +0.000175941617414825 +0.00018707551922134 +0.000189681021762388 +0.000189830182513327 +0.000185292548208458 +0.000185614744544628 +0.000191734904753198 +0.000198300522684971 +0.000198653898950781 +0.000198399686392815 +0.000211179851900352 +0.000207762199509266 +0.000210186120275576 +0.000198649079090712 +0.000203473416742617 +0.000198623724397584 +0.000184831094644113 +0.000179934500989128 +0.00018515769631671 +0.000191553305764961 +0.000200057958695553 +0.000200278372886422 +0.000193544245879366 +0.000186568339222366 +0.000190850440417821 +0.000203207368212081 +0.00021595511986196 +0.000218316276673687 +0.000205817110972747 +0.000212139075376423 +0.000219405364231359 +0.000224492180858786 +0.000218853733115119 +0.000210554225246233 +0.00021246062737957 +0.00022594831829876 +0.000232299356661781 +0.000233636889293826 +0.000250412659578138 +0.000266975553973092 +0.000269204410775137 +0.000267171761121167 +0.000272047682703472 +0.000248195290694898 +0.00022150999657165 +0.000254553539782728 +0.000352865364104451 +0.000421526225705925 +0.000402803565166639 +0.000354321177856905 +0.000326347921141917 +0.000352568357659741 +0.000462463307375687 +0.000643774295517939 +0.000784717414662457 +0.000838745677934633 +0.000757122481207381 +0.000678897861485588 +0.000626349496998761 +0.000658354368846629 +0.000749599188852712 +0.00083165582542375 +0.000886336045743687 +0.000768423933879982 +0.000585237174314642 +0.000508239266362428 +0.000567524033046338 +0.000647481898895154 +0.000624266596357504 +0.000560002445654088 +0.00058215979988373 +0.000684174175828965 +0.000727911906328144 +0.000673223088057037 +0.000615838783315468 +0.000652048367120072 +0.000707038145390686 +0.000823034034765323 +0.000956428762354955 +0.00106053724272944 +0.0010285915152128 +0.00100263542493879 +0.00111264621297516 +0.0014191521413006 +0.00172795581094616 +0.00192780129502807 +0.00206303064738915 +0.00215332113142019 +0.0022466954931446 +0.00215983557777723 +0.00223143191131219 +0.00245815927148455 +0.00274847567476636 +0.00273615517198675 +0.00253352585954081 +0.00225570484653126 +0.00188919842674377 +0.00157201023122244 +0.00145834911049567 +0.00148615868030397 +0.0015519412490252 +0.00157466545535257 +0.00144661607487311 +0.00117227370574575 +0.000938484790644089 +0.000732134816769824 +0.000528624015174057 +0.000366569551817972 +0.000278894213194701 +0.000243710316851626 +0.000238590319723254 +0.000241859431619529 +0.000268263483148327 +0.000260841365332497 +0.00023326504567899 +0.000226203929561463 +0.000238078996744937 +0.000249766971476454 +0.000262149083721957 +0.000266892944412421 +0.000267002077303342 +0.000265726614156371 +0.000271371209291191 +0.000281999036537821 +0.000280658229422484 +0.000248897262917992 +0.000219869705715068 +0.000209607206963666 +0.000222720261135429 +0.000230963069436546 +0.000233283489024344 +0.000235314731373244 +0.000232961936790798 +0.000231587693768191 +0.000226262761696977 +0.000225404330338338 +0.000227823690702315 +0.000229667855594692 +0.000224590485077645 +0.000216769165770145 +0.000217094607210963 +0.000218828136872295 +0.000220931041989363 +0.000219573958484272 +0.00021533192085349 +0.000217083181203663 +0.000227878319705463 +0.00022476619682191 +0.000221835867873572 +0.000213381927208331 +0.000213091544975543 +0.000214124965335125 +0.000214627726098925 +0.00021083419381561 +0.000208441202067815 +0.000207161788346288 +0.000214777783289231 +0.000216293426674694 +0.000205552966463787 +0.000189725444043717 +0.000182158187611337 +0.000182049892906279 +0.00017594803058018 +0.00016774870526203 +0.000170757993533594 +0.000174201609213675 +0.000175121687572624 +0.000164595830759266 +0.000158698956861484 +0.000155892765435281 +0.000146863651223553 +0.000147645864658281 +0.00015308948173169 +0.000160437983580839 +0.000158252172934376 +0.00015217085241057 +0.000152558308785518 +0.000153577471025759 +0.000150233555263141 +0.000152099519534582 +0.000143464711533248 +0.000136389269117607 +0.000133614678678209 +0.000138550850737478 +0.000142134193961718 +0.00014358215198744 +0.000133288650354347 +0.000116106840879223 +0.00011152141646913 +0.000114166717118501 +0.000116059394122705 +0.000115309289052816 +0.000109101804765123 +0.000104070228904335 +0.000108154046267337 +0.000107954799659394 +9.9539555835961E-05 +7.59303745994635E-05 +4.39112855419608E-05 +1.84394347997459E-05 +4.08219770974155E-06 +2.26734401784817E-06 diff --git a/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp new file mode 100644 index 000000000..b2c8ebf3c Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/MelScaleSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp new file mode 100644 index 000000000..f665ec9a7 Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/NoiseReducedSpectrogram.bmp differ diff --git a/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp b/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp new file mode 100644 index 000000000..b2c8ebf3c Binary files /dev/null and b/tests/Fixtures/SpectrogramTestResults/linear/NormalizedSpectrogram.bmp differ