oxits 0.1.0

Time series classification and transformation library for Rust
Documentation
use std::collections::HashMap;

/// BOSS distance: histogram intersection distance between two word histograms.
///
/// BOSS distance is defined as the sum of squared differences for words
/// that appear in the first histogram (query). Words only in the second
/// histogram are ignored.
pub fn boss_distance(a: &HashMap<String, usize>, b: &HashMap<String, usize>) -> f64 {
    let sum: f64 = a
        .iter()
        .map(|(word, &count_a)| {
            let count_b = b.get(word).copied().unwrap_or(0);
            let diff = count_a as f64 - count_b as f64;
            diff * diff
        })
        .sum();

    sum.sqrt()
}

/// Symmetric BOSS distance: considers words in both histograms.
pub fn boss_distance_symmetric(a: &HashMap<String, usize>, b: &HashMap<String, usize>) -> f64 {
    let mut all_words: HashMap<&str, (usize, usize)> = HashMap::new();
    for (word, &count) in a {
        all_words.entry(word.as_str()).or_insert((0, 0)).0 = count;
    }
    for (word, &count) in b {
        all_words.entry(word.as_str()).or_insert((0, 0)).1 = count;
    }

    let sum: f64 = all_words
        .values()
        .map(|&(ca, cb)| {
            let diff = ca as f64 - cb as f64;
            diff * diff
        })
        .sum();

    sum.sqrt()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_boss_identical() {
        let mut a = HashMap::new();
        a.insert("abc".to_string(), 3);
        a.insert("bcd".to_string(), 2);
        let d = boss_distance(&a, &a);
        assert!(d.abs() < 1e-10);
    }

    #[test]
    fn test_boss_different() {
        let mut a = HashMap::new();
        a.insert("abc".to_string(), 3);

        let mut b = HashMap::new();
        b.insert("abc".to_string(), 1);

        let d = boss_distance(&a, &b);
        assert!((d - 2.0).abs() < 1e-10);
    }

    #[test]
    fn test_boss_missing_word() {
        let mut a = HashMap::new();
        a.insert("abc".to_string(), 3);

        let b = HashMap::new();

        let d = boss_distance(&a, &b);
        assert!((d - 3.0).abs() < 1e-10);
    }

    #[test]
    fn test_boss_asymmetric() {
        let mut a = HashMap::new();
        a.insert("abc".to_string(), 1);

        let mut b = HashMap::new();
        b.insert("xyz".to_string(), 100);

        // BOSS distance only looks at words in `a`
        let d = boss_distance(&a, &b);
        assert!((d - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_boss_symmetric() {
        let mut a = HashMap::new();
        a.insert("abc".to_string(), 1);

        let mut b = HashMap::new();
        b.insert("xyz".to_string(), 2);

        let d = boss_distance_symmetric(&a, &b);
        // sqrt(1^2 + 2^2) = sqrt(5)
        assert!((d - 5.0_f64.sqrt()).abs() < 1e-10);
    }
}