pub fn cluster_strings<'a>(
inputs: &'a Vec<&'a str>,
max_edit_frac: f32,
n_threads: usize,
) -> Result<Vec<Vec<&'a str>>, ValueError>
Expand description
Group similar input strings into clusters.
Strings will be grouped into a cluster if the Levenshtein distance between the strings is below ‘max_edit_frac’ of the shorter string’s length.
§Examples
Basic usage:
let inputs = vec!["aaaa", "aaax", "bbbb", "bbbz"];
let expected = vec![vec!["aaaa", "aaax"], vec!["bbbb", "bbbz"]];
let clusters = clustr::cluster_strings(&inputs, 0.25, 1)?;
assert_eq!(clusters, expected);
§Multiple threads:
let inputs = vec!["aa", "bb", "aa", "bb"];
let expected = vec![vec!["aa", "aa"], vec!["bb", "bb"]];
let results = clustr::cluster_strings(&inputs, 0.0, 4)?;
// Order of returned clusters nondeterministic
for e in expected {
assert!(results.contains(&e));
}