use crate::error::*;
use crate::stacktrace::*;
use std::collections::HashMap;
use std::path::PathBuf;
pub type ReportInfo = (PathBuf, (Stacktrace, String));
pub enum Relation {
Dup,
Inner(f64),
Outer,
}
pub struct Cluster {
pub number: usize,
paths: Vec<PathBuf>,
stacktraces: Vec<Stacktrace>,
diam: Option<f64>,
crashlines: HashMap<String, usize>,
}
impl Cluster {
pub fn new(
number: usize,
paths: Vec<PathBuf>,
stacktraces: Vec<Stacktrace>,
crashlines: Vec<String>,
) -> Self {
let mut unique_crashlines: HashMap<String, usize> = HashMap::new();
for (i, crashline) in crashlines.into_iter().enumerate() {
if unique_crashlines.contains_key(&crashline) {
continue;
}
unique_crashlines.insert(crashline, i);
}
Cluster {
number,
paths,
stacktraces,
diam: None,
crashlines: unique_crashlines,
}
}
pub fn paths(&self) -> &Vec<PathBuf> {
&self.paths
}
pub fn stacktraces(&self) -> &Vec<Stacktrace> {
&self.stacktraces
}
pub fn cluster_reports(
reports: &[ReportInfo],
offset: usize,
dedup: bool,
) -> Result<(HashMap<usize, Cluster>, usize, usize)> {
let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) =
reports.iter().cloned().unzip();
let len = casreps.len();
let mut numbers = cluster_stacktraces(&stacktraces)?;
let after = if dedup {
dedup_crashlines(&crashlines, &mut numbers)
} else {
len
};
let mut clusters: HashMap<usize, Cluster> = HashMap::new();
for i in 0..len {
if numbers[i] == 0 {
continue;
}
let number = numbers[i] + offset;
clusters
.entry(number)
.or_insert_with(|| Cluster::new(number, Vec::new(), Vec::new(), Vec::new()));
clusters.get_mut(&number).unwrap().insert(
casreps[i].to_path_buf(),
stacktraces[i].to_vec(),
crashlines[i].to_string(),
dedup,
);
}
Ok((clusters, len, after))
}
pub fn insert(
&mut self,
path: PathBuf,
stacktrace: Stacktrace,
crashline: String,
dedup: bool,
) -> bool {
if dedup && !crashline.is_empty() && self.crashlines.contains_key(&crashline) {
return false;
}
self.paths.push(path);
self.stacktraces.push(stacktrace);
self.diam = None;
self.crashlines.insert(crashline, self.paths.len() - 1);
true
}
pub fn diam(&mut self) -> f64 {
if self.diam.is_none() {
self.diam = Some(diam(&self.stacktraces));
}
self.diam.unwrap()
}
pub fn relation(&mut self, new: &Stacktrace) -> Relation {
let diam = self.diam();
let mut max = 0f64;
for stacktrace in self.stacktraces() {
let dist = 1.0 - similarity(new, stacktrace);
if dist == 0.0 {
return Relation::Dup;
} else if dist > THRESHOLD {
return Relation::Outer;
}
if dist > max {
max = dist;
}
}
if diam >= max {
Relation::Inner(diam)
} else {
Relation::Outer
}
}
pub fn dist(cluster1: &Cluster, cluster2: &Cluster) -> f64 {
let mut stacktraces1 = cluster1.stacktraces().clone();
let mut stacktraces2 = cluster2.stacktraces().clone();
stacktraces1.append(&mut stacktraces2);
diam(&stacktraces1)
}
pub fn dist_rep(cluster: &Cluster, report: &ReportInfo) -> f64 {
let (_, (trace, _)) = report;
cluster
.stacktraces()
.iter()
.map(|s| 1.0 - similarity(s, trace))
.max_by(|a, b| a.total_cmp(b))
.unwrap_or(0f64)
}
}
fn diam(stacktraces: &[Stacktrace]) -> f64 {
let mut diam = 0f64;
let len = stacktraces.len();
for i in 0..len {
for j in i + 1..len {
let dist = 1.0 - similarity(&stacktraces[i], &stacktraces[j]);
if dist > diam {
diam = dist;
}
}
}
diam
}
fn sil_subcoef_a(num: usize, stacktraces: &[Stacktrace]) -> f64 {
let mut sum = 0f64;
for (i, stacktrace) in stacktraces.iter().enumerate() {
if i == num {
continue;
}
sum += 1.0 - similarity(&stacktraces[num], stacktrace);
}
sum / (stacktraces.len() - 1) as f64
}
fn sil_subcoef_b(num: usize, i: usize, clusters: &[Vec<Stacktrace>]) -> f64 {
let mut min = f64::MAX;
for (j, cluster) in clusters.iter().enumerate() {
if j == i {
continue;
}
let mut sum = 0f64;
for stacktrace in cluster {
sum += 1.0 - similarity(&clusters[i][num], stacktrace);
}
let res = sum / cluster.len() as f64;
if res < min {
min = res;
}
}
min
}
fn sil_coef(num: usize, i: usize, clusters: &[Vec<Stacktrace>]) -> f64 {
if clusters[i].len() != 1 {
let a = sil_subcoef_a(num, &clusters[i]);
let b = sil_subcoef_b(num, i, clusters);
(b - a) / a.max(b)
} else {
0f64
}
}
pub fn avg_sil_coef(clusters: &[Vec<Stacktrace>], size: usize) -> f64 {
let mut sum = 0f64;
for i in 0..clusters.len() {
for num in 0..clusters[i].len() {
let sil = sil_coef(num, i, clusters);
sum += sil;
}
}
sum / size as f64
}