diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 97ad3e89..672846d1 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -503,7 +503,7 @@ fn merge_clusters( /// # Return value /// /// Silhouette coefficient -fn avg_sil(dir: &Path, jobs: usize) -> Result { +fn calc_avg_sil(dir: &Path, jobs: usize) -> Result { // Get cluster dirs let mut dirs: Vec = fs::read_dir(dir) .unwrap() @@ -538,16 +538,8 @@ fn avg_sil(dir: &Path, jobs: usize) -> Result { if size == 0 { bail!("{} valid reports, nothing to calculate...", size); } - // Init sil sum - let mut sum = 0f64; - // Calculate silhouette coefficient for each casrep - for i in 0..clusters.len() { - for num in 0..clusters[i].len() { - let sil = sil_coef(num, i, &clusters); - sum += sil; - } - } - Ok(sum / size as f64) + let avg_sil = avg_sil_ceof(&clusters, size); + Ok(avg_sil) } fn main() -> Result<()> { @@ -781,11 +773,11 @@ fn main() -> Result<()> { if before != after { println!("Number of reports after crashline deduplication in new clusters: {after}"); } - let sil = avg_sil(paths[1], jobs)?; + let sil = calc_avg_sil(paths[1], jobs)?; println!("Cluster silhouette score: {sil}"); } else if matches.contains_id("estimate") { let path: &PathBuf = matches.get_one::("estimate").unwrap(); - let sil = avg_sil(path, jobs)?; + let sil = calc_avg_sil(path, jobs)?; println!("Cluster silhouette score: {sil}"); } diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 119434ca..9cfd866d 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -642,7 +642,7 @@ fn sil_subcoef_b(num: usize, i: usize, clusters: &[Vec]) -> f64 { /// # Return value /// /// Silhouette coefficient -pub fn sil_coef(num: usize, i: usize, clusters: &[Vec]) -> f64 { +fn sil_coef(num: usize, i: usize, clusters: &[Vec]) -> f64 { if clusters[i].len() != 1 { let a = sil_subcoef_a(num, &clusters[i]); let b = sil_subcoef_b(num, i, clusters); @@ -652,6 +652,31 @@ pub fn sil_coef(num: usize, i: usize, clusters: &[Vec]) -> f64 { } } +/// Get average silhouette coefficient calculating for given stacktraces +/// Read more: https://en.wikipedia.org/wiki/Silhouette_(clustering)#Definition +/// +/// # Arguments +/// +/// * `clusters` - a vector of clusters represented as slice of `Stacktrace` structures +/// +/// * `size` - total amount of elements in clusters +/// +/// # Return value +/// +/// Average silhouette coefficient +pub fn avg_sil_ceof(clusters: &[Vec], size: usize) -> f64 { + // Init sil sum + let mut sum = 0f64; + // Calculate silhouette coefficient for each casrep + for i in 0..clusters.len() { + for num in 0..clusters[i].len() { + let sil = sil_coef(num, i, clusters); + sum += sil; + } + } + sum / size as f64 +} + /// Stack trace filtering trait. pub trait Filter { /// Filter frames from the stack trace that are not related to analyzed code containing crash.