gzip_cmp/
lib.rs

1mod clusters;
2mod zip_distance;
3
4use std::vec;
5
6use clusters::*;
7use zip_distance::*;
8
9use rayon::prelude::*;
10
11//pub struct Builder {
12//    max_dist: f64
13//}
14//
15//impl Builder {
16//    pub fn new() -> Self {
17//        Builder { max_dist: 0.45 }
18//    }
19//
20//    pub fn distance(mut self, d: f64) -> Self {
21//        self.max_dist = d;
22//        self
23//    }
24//
25//    pub fn build_groups(self, data: &[&[u8]]) -> Vec<usize> {
26//        vec![]
27//    }
28//}
29
30fn build_distance_table(data: &[&[u8]]) -> Vec<Vec<f64>> {
31    data.par_iter()
32        .map(|a| data.par_iter().map(|b| distance(a, b)).collect())
33        .collect()
34}
35
36pub fn build(data: &[&[u8]], max_dist: f64) -> Vec<usize> {
37    println!("Building distance table...");
38    let d = build_distance_table(data);
39    println!("Building mst...");
40    let (_mst, graph) = build_mst(&d, data.len());
41    println!("Building clusters...");
42    return clusters(&graph, max_dist);
43}