From e63e9d58a66b813732bdf3e27b5a71454d52d676 Mon Sep 17 00:00:00 2001 From: Michael Towsey Date: Thu, 31 May 2018 15:36:40 +1000 Subject: [PATCH] WEKA EXPERIMENTS Working with Weka on Liz Znidersic data. refactoring a method name caused changes to SpeciesAccumulationCurve.cs. --- src/AnalysisPrograms/Sandpit.cs | 191 ++++++++++++++++-- .../SpeciesAccumulationCurve.cs | 14 +- src/TowseyLibrary/CsvTools.cs | 24 ++- src/TowseyLibrary/Plot.cs | 2 +- 4 files changed, 196 insertions(+), 35 deletions(-) diff --git a/src/AnalysisPrograms/Sandpit.cs b/src/AnalysisPrograms/Sandpit.cs index 640ee287b..d0ee919f9 100644 --- a/src/AnalysisPrograms/Sandpit.cs +++ b/src/AnalysisPrograms/Sandpit.cs @@ -23,6 +23,7 @@ namespace AnalysisPrograms using AudioAnalysisTools.LongDurationSpectrograms; using AudioAnalysisTools.StandardSpectrograms; using AudioAnalysisTools.WavTools; + using log4net.Util; using McMaster.Extensions.CommandLineUtils; using Production; using Production.Arguments; @@ -65,7 +66,13 @@ public override Task Execute(CommandLineApplication app) //AnalyseFrogDataSet(); //Audio2CsvOverOneFile(); //Audio2CsvOverMultipleFiles(); + + // used to get files from availae for Black rail and Least Bittern papers. CodeToExtractFeatureVectorOfIndices(); + //CodeToGetLdfcSpectrogramsFromAvailae(); + //CodeToPlaceScoreTracksUnderLdfcSpectrograms(); + //CodeToPlaceScoreTracksUnderSingleImage(); + //ConcatenateIndexFilesAndSpectrograms(); //ConcatenateMarineImages(); //ConcatenateImages(); @@ -2102,24 +2109,164 @@ public static void CodeToDrawClusterImage() } */ /// - /// This code used to extract acoustic indices for recognisers. + /// This code used to get LDFC spectrograms from availae /// It cycles through all the subdirecotries in a dir. /// All depends on the consistency of file naming. /// Check the call method for index names and bounds. /// - public static void CodeToExtractFeatureVectorOfIndices() + public static void CodeToGetLdfcSpectrogramsFromAvailae() { //var sourceDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\Original concatenated index files ARU10"); - //string superDir = @"Y:\Results\2017Jun05-113313 - Liz, Towsey.Indices, ICD=60.0, #154\ConcatResults"; //var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_Tasmania_ARU10\ARU 10 27.12.2016 Data"); + string superDir = @"Y:\Results\2017Apr13-135831 - Liz, Towsey.Indices, ICD=60.0, #154\ConcatResults"; - var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 7"); - //var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 10"); + //var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 7"); + var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 10"); + string searchPattern = "2016*"; + + var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU10_spectrograms"); + //var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7_spectrograms"); + if (!outputDir.Exists) + { + outputDir.Create(); + } + + DirectoryInfo[] dirs = sourceDir.GetDirectories(searchPattern, SearchOption.AllDirectories); + Console.WriteLine("Dir Count = " + dirs.Length); + foreach (DirectoryInfo dir in dirs) + { + // assume this file exists + var fileinfo = dir.GetFiles("*__ACI-ENT-EVN.png"); + string site = fileinfo[0].Name.Split('_')[0]; + //string site = sourceDir.Name; + string date = dir.Name; + string siteAndDate = site + "_" + date; + string opFileName = siteAndDate + "_ACI-ENT-EVN.png"; + + foreach (var file in fileinfo) + { + Console.WriteLine("Copying file:: " + file.Name); + var opFileInfo = new FileInfo(Path.Combine(outputDir.FullName, opFileName)); + file.CopyTo(opFileInfo.FullName); + } + } + } + + /// + /// Places score tracks under a Single Image, assumed to be a spectrogram + /// + public static void CodeToPlaceScoreTracksUnderSingleImage() + { + // construct paths to files + string dirName = @"C:\SensorNetworks\Collaborations\LizZnidersic\LeastBittern"; + //string imageName = "ORNL ARU 2 6.6.2017 Powerhouse trail Data_20170601__2Maps.png"; + //var scoreFile = new FileInfo(Path.Combine(dirName, "ARU2_ORNL_20170601_LEBI_LZ_TrainingLabels.csv")); + string imageName = "ORNL ARU 2 6.6.2017 Powerhouse trail Data_20170604__2Maps.png"; + var scoreFile = new FileInfo(Path.Combine(dirName, "ARU2_ORNL_20170604_LEBI_LZ_ValidationLabels.csv")); + + var imageFile = new FileInfo(Path.Combine(dirName, imageName)); + var opFile = Path.Combine(dirName, imageName + "WithScores.png"); + + double threshold = 3.5; + double maxScore = 15.0; + CodeToPlaceScoreTrackUnderImageFile(imageFile, scoreFile, opFile, maxScore, threshold); + } + + /// + /// This code used to get LDFC spectrograms from availae + /// It cycles through all the subdirecotries in a dir. + /// All depends on the consistency of file naming. + /// Check the call method for index names and bounds. + /// + public static void CodeToPlaceScoreTracksUnderLdfcSpectrograms() + { + var scoreDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7_predictions"); + var imageDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7_spectrograms"); + + var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7_spectrogramsWithScores"); + if (!outputDir.Exists) + { + outputDir.Create(); + } + + var imageFiles = imageDir.GetFiles("*ACI-ENT-EVN.png"); + double threshold = 3.5; + double maxScore = 15.0; + + Console.WriteLine("File Count= " + imageFiles.Length); + foreach (var spImageFile in imageFiles) + { + // assume this file exists + string site = spImageFile.Name.Split('_')[0]; + string date = spImageFile.Name.Split('_')[1]; + string scoreFileName = site + "_" + date + "_FeatureSet.csv"; + + // get the corresponding csv file of scores + var scoreFile = new FileInfo(Path.Combine(scoreDir.FullName, scoreFileName)); + + // construct path to output file + var opFile = Path.Combine(outputDir.FullName, spImageFile.Name); + CodeToPlaceScoreTrackUnderImageFile(spImageFile, scoreFile, opFile, maxScore, threshold); + } + } + + public static void CodeToPlaceScoreTrackUnderImageFile(FileInfo imageFile, FileInfo scoreFile, string opFile, double maxScore, double threshold) + { + // read in the image file - assumed to be spectrogram + Image spectrogram = ImageTools.ReadImage2Bitmap(imageFile.FullName); + + // Cannot get the following line to work, so use the depracated method + //var data1 = Csv.ReadMatrixFromCsv(scoreFile); + var data = CsvTools.ReadColumnOfCsvFile(scoreFile.FullName, 1, out string header); + + // create a score track + var scoreTrack = ImageTrack.GetNamedScoreTrack(data, scoreMin: 0.0, scoreMax: 15.0, scoreThreshold: threshold, name: "Predictions"); + + // attach score track to the LDFC spectrogram + var scoreImage = new Bitmap(spectrogram.Width, 40); + scoreTrack.DrawTrack(scoreImage); + Image[] images = { spectrogram, scoreImage }; + var combinedImage = ImageTools.CombineImagesVertically(images); + + // write image to file + //Console.WriteLine("Copying file:: " + fileinfo.Name); + combinedImage.Save(opFile); + } + + /// + /// This code can be used to extract acoustic indices for recognisers. + /// It cycles through all the subdirecotries in a dir. + /// All depends on the consistency of file naming. + /// Check the call method for index names and bounds. + /// + public static void CodeToExtractFeatureVectorOfIndices() + { + /* + // THESE ARE PATHS FOR LEWINS RAIL PROJECT + //var sourceDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\Original concatenated index files ARU10"); + //var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_Tasmania_ARU10\ARU 10 27.12.2016 Data"); + */ + + /* + // THESE ARE PATHS FOR BLACK RAIL PROJECT + //string superDir = @"Y:\Results\2017Jun05-113313 - Liz, Towsey.Indices, ICD=60.0, #154\ConcatResults"; + string superDir = @"Y:\Results\2017Apr13-135831 - Liz, Towsey.Indices, ICD=60.0, #154\ConcatResults"; + //var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 7"); + var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_USA - South Carolina_ARU UNIT 10"); string searchPattern = "2016*"; - //var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU10"); - var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7"); + var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU10"); + //var outputDir = new DirectoryInfo(@"C:\SensorNetworks\Collaborations\LizZnidersic\BlackRail\UnlabelledDataSets\Job154_2017Apr13_135831 SouthCarolina\ARU7"); + */ + + // THESE ARE PATHS FOR LEAST BITTERN PROJECT + string superDir = @"Y:\Results\2017Jun26-111643- Liz, Towsey.Indices, ICD=60.0, #160\ConcatResults"; + var sourceDir = new DirectoryInfo(superDir + @"\David Watson_Liz_Oak Ridge\ORNL ARU 2 6.6.2017 Powerhouse trail Data"); + string searchPattern = "201706*"; + + var outputDir = new DirectoryInfo(@"G:\SensorNetworks\Collaborations\LizZnidersic\LeastBittern\UnlabelledDataSets"); + if (!outputDir.Exists) { outputDir.Create(); @@ -2137,7 +2284,7 @@ public static void CodeToExtractFeatureVectorOfIndices() string siteAndDate = site + "_" + date; string filePrefix = siteAndDate + "__Towsey.Acoustic."; - string opFileName = siteAndDate + "_FeatureSet.csv"; + string opFileName = siteAndDate + "_FeatureSet3.csv"; var opFileInfo = new FileInfo(Path.Combine(outputDir.FullName, opFileName)); Console.WriteLine("Extracting dir " + dir.Name); ExtractFeatureVectorOfIndices(dir, filePrefix, siteAndDate, opFileInfo); @@ -2149,12 +2296,21 @@ public static void CodeToExtractFeatureVectorOfIndices() /// public static void ExtractFeatureVectorOfIndices(DirectoryInfo sourceDir, string filePrefix, string siteAndDate, FileInfo opFileInfo) { - // source directory + // acooustic indices/features to select and the start and end frequency bins + /* + // BLACK RAIL string[] indexCodes = { "ACI", "ENT", "EVN" }; + int startBin = 22; + int endBin = 74; + */ + + // LEAST BITTERN + string[] indexCodes = { "ACI", "ENT", "EVN", "R3D" }; + int startBin = 12; + int endbin = 21; + int indexCount = indexCodes.Length; - int startIndex = 22; - int endIndex = 74; - int length = endIndex - startIndex + 1; + int length = endbin - startBin + 1; // matrix of string var extractedLines = new List>(); @@ -2169,10 +2325,11 @@ public static void ExtractFeatureVectorOfIndices(DirectoryInfo sourceDir, string for (int i = 0; i < length; i++) { - int id = i + startIndex; + int id = i + startBin; newHeader.Append(indexKey + id.ToString("D4") + ","); } } + newHeader.Append("Target"); for (int keyId = 0; keyId < indexCount; keyId++) @@ -2195,7 +2352,7 @@ public static void ExtractFeatureVectorOfIndices(DirectoryInfo sourceDir, string var words = line.Split(','); // take subarray. +1 because the first column containing ID is ignored. - var subArray = DataTools.Subarray(words, startIndex + 1, length); + var subArray = DataTools.Subarray(words, startBin + 1, length); var newLine = DataTools.Array2String(subArray); lines.Add(newLine); } @@ -2221,8 +2378,10 @@ public static void ExtractFeatureVectorOfIndices(DirectoryInfo sourceDir, string line += extractedLines[j][i]; } - // add '?' as place holder for the unknown to be predicted - line += "?"; + // add '?' as place holder for an unknown category to be predicted + // line += "?"; + // add '0' as place holder for a numeric value to be predicted + line += "0"; ssw.WriteLine(line); } } diff --git a/src/AnalysisPrograms/SpeciesAccumulationCurve.cs b/src/AnalysisPrograms/SpeciesAccumulationCurve.cs index 3b909e4ed..b3a0a237f 100644 --- a/src/AnalysisPrograms/SpeciesAccumulationCurve.cs +++ b/src/AnalysisPrograms/SpeciesAccumulationCurve.cs @@ -618,7 +618,7 @@ public static void Execute(Arguments arguments) public static int[] GetRankOrder(string fileName, int colNumber) { string header1; - double[] array = CsvTools.ReadColumnOfCSVFile(fileName, colNumber, out header1); + double[] array = CsvTools.ReadColumnOfCsvFile(fileName, colNumber, out header1); var results2 = DataTools.SortArray(array); return results2.Item1; } @@ -632,28 +632,28 @@ public static int[] GetRankOrder1(string fileName) string header1, header2, header3, header4, header5, header6; int colNumber1 = offset + 1; //background noise - double[] array1 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber1, out header1); + double[] array1 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber1, out header1); //array1 = DataTools.NormaliseArea(array1); int colNumber2 = offset + 3; //SegmentCount - double[] array2 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber2, out header2); + double[] array2 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber2, out header2); array2 = DataTools.NormaliseArea(array2); int colNumber3 = offset + 8; //H[avSpectrum] - double[] array3 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber3, out header3); + double[] array3 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber3, out header3); array3 = DataTools.NormaliseArea(array3); int colNumber4 = offset + 9; //H[varSpectrum] - double[] array4 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber4, out header4); + double[] array4 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber4, out header4); array4 = DataTools.NormaliseArea(array4); int colNumber5 = offset + 10; //number of clusters - double[] array5 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber5, out header5); + double[] array5 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber5, out header5); array5 = DataTools.NormaliseArea(array5); int colNumber6 = offset + 11; //av cluster duration - double[] array6 = CsvTools.ReadColumnOfCSVFile(fileName, colNumber6, out header6); + double[] array6 = CsvTools.ReadColumnOfCsvFile(fileName, colNumber6, out header6); array6 = DataTools.NormaliseArea(array6); //create sampling bias array - ie bias towards the dawn chorus diff --git a/src/TowseyLibrary/CsvTools.cs b/src/TowseyLibrary/CsvTools.cs index c0ade9c5d..41606d689 100644 --- a/src/TowseyLibrary/CsvTools.cs +++ b/src/TowseyLibrary/CsvTools.cs @@ -483,28 +483,30 @@ public static Dictionary ReadCSVFile2Dictionary(string csvFile /// /// Returns the requested column of data from a CSV file and also returns the column header /// - /// - /// - /// - /// - public static double[] ReadColumnOfCSVFile(string fileName, int colNumber, out string header) + public static double[] ReadColumnOfCsvFile(string fileName, int colNumber, out string header) { List lines = FileTools.ReadTextFile(fileName); string[] words = lines[0].Split(','); header = words[colNumber]; - double[] array = new double[lines.Count - 1]; //-1 because ignore header + // -1 because ignore header + double[] array = new double[lines.Count - 1]; - //read csv data into arrays. - for (int i = 1; i < lines.Count; i++) //ignore first line = header. + // read csv data into arrays. Ignore first line = header. + for (int i = 1; i < lines.Count; i++) { words = lines[i].Split(','); - array[i - 1] = double.Parse(words[colNumber]); - if (double.IsNaN(array[i - 1])) + if(words.Length <= colNumber) { array[i - 1] = 0.0; + LoggedConsole.WriteErrorLine("WARNING: Error while reading line " + i + "of CSV file."); } - }//end + else + { + double value; + array[i - 1] = double.TryParse(words[colNumber], out value) ? value : 0.0; + } + } return array; } diff --git a/src/TowseyLibrary/Plot.cs b/src/TowseyLibrary/Plot.cs index 19e532749..e842be891 100644 --- a/src/TowseyLibrary/Plot.cs +++ b/src/TowseyLibrary/Plot.cs @@ -8,7 +8,7 @@ namespace TowseyLibrary using System.Drawing; /// - /// Represents a single array of data with Xand Y scales and other info useful for pltting a graph. + /// Represents a single array of data with Xand Y scales and other info useful for plotting a graph. /// Was first used to represent a track of scores at the bottom of a sonogram image. /// public class Plot