1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
use unicode_segmentation::UnicodeSegmentation;

pub fn vec_hamming_distance<T: PartialEq>(s1: &Vec<T>, s2: &Vec<T>) -> usize {
    let (longer, shorter) = if s1.len() > s2.len() {
        (s1, s2)
    } else {
        (s2, s1)
    };

    // distance is difference in length + differing chars
    let mut distance = longer.len() - shorter.len();
    for (i, c) in shorter.iter().enumerate() {
        if *c != longer[i] {
            distance += 1
        }
    }

    return distance;
}

pub fn hamming_distance(s1: &str, s2: &str) -> usize {
    let us1 = UnicodeSegmentation::graphemes(s1, true).collect::<Vec<&str>>();
    let us2 = UnicodeSegmentation::graphemes(s2, true).collect::<Vec<&str>>();

    vec_hamming_distance(&us1, &us2)
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::testutils::testutils;
    #[test]
    fn test_hamming() {
        testutils::test_distance_func("testdata/hamming.csv", hamming_distance);
    }
}