hipparchus_metrics/
text.rs

1use crate::metrics::Metrics;
2
3#[repr(i32)]
4#[derive(Clone,PartialEq,Debug)]
5pub enum TextDistance
6{
7    Hamming = 1,
8    Levenshtein = 2,
9}
10
11impl Metrics<&str, f32> for TextDistance
12{
13    fn measure(self, t1:&str, t2:&str) -> f32
14    {
15        match self
16        {
17            TextDistance::Hamming => TextDistance::hamming(t1, t2) as f32,
18            TextDistance::Levenshtein => TextDistance::levenshtein(t1, t2) as f32,
19        }
20    }
21}
22
23impl TextDistance
24{
25    pub fn hamming(x: &str, y: &str) -> usize
26    {
27        let mut total = 0;
28        let mut xchars = x.chars();
29        let mut ychars = y.chars();
30        loop
31        {
32            let xchar = xchars.next();
33            let ychar = ychars.next();
34            if xchar == None || ychar == None
35            {
36                let xdelta = if xchar != None { xchars.count() + 1 } else { 0 };
37                let ydelta = if ychar != None { ychars.count() + 1 } else { 0 };
38                total += xdelta + ydelta;
39                break;
40            }
41            if xchar.unwrap() != ychar.unwrap()
42            {
43                total += 1
44            }
45        }
46        total
47    }
48
49    pub fn levenshtein(x: &str, y: &str) -> usize
50    {
51        if x.is_empty()
52        {
53            return y.chars().count();
54        }
55        if y.is_empty()
56        {
57            return x.chars().count();
58        }
59        if x == y
60        {
61            return 0;
62        }
63    
64        let mut cache: Vec<usize> = (1..).take(x.chars().count()).collect();
65        let mut distance_a;
66        let mut distance_b;
67        let mut result = 0;
68        for (idxb, b) in y.chars().enumerate()
69        {
70            result = idxb;
71            distance_a = idxb;
72    
73            for (idxa, a) in x.chars().enumerate()
74            {
75                distance_b = if a == b { distance_a } else { distance_a + 1 };
76                distance_a = cache[idxa];
77                result = 
78                    if distance_a > result
79                    {
80                        if distance_b > result
81                        {
82                            result + 1
83                        }
84                        else
85                        {
86                            distance_b
87                        }
88                    }
89                    else if distance_b > distance_a
90                    {
91                        distance_a + 1
92                    }
93                    else
94                    {
95                        distance_b
96                    };
97    
98                cache[idxa] = result;
99            }
100        }
101    
102        result
103    }
104}
105
106#[cfg(test)]
107mod tests 
108{
109    use super::*;
110    use rstest::*;
111    use float_cmp::assert_approx_eq;
112
113    #[rstest]
114    #[case("ABCD", "ABCE", TextDistance::Hamming, 1.0)]
115    #[case("ABCD", "AB", TextDistance::Hamming, 2.0)]
116    #[case("ABCD", "CD", TextDistance::Hamming, 4.0)]
117    #[case("Hello, world!", "hello, world", TextDistance::Hamming, 2.0)]
118    #[case("ABCD", "ABCE", TextDistance::Levenshtein, 1.0)]
119    #[case("ABCD", "AB", TextDistance::Levenshtein, 2.0)]
120    #[case("ABCD", "CD", TextDistance::Levenshtein, 2.0)]
121    #[case("Hello, world!", "hello, world", TextDistance::Levenshtein, 2.0)]
122    fn test_text_distance(#[case] t1: &str, #[case] t2: &str, #[case] metrics: TextDistance, #[case] distance: f32)
123    {
124        assert_approx_eq!(f32, distance, metrics.measure(t1, t2));
125    }
126
127    #[rstest]
128    #[case("", TextDistance::Hamming)]
129    #[case("", TextDistance::Levenshtein)]
130    #[case("Hello, world!", TextDistance::Hamming)]
131    #[case("Hello, world!", TextDistance::Levenshtein)]
132    fn test_text_distance_eq(#[case] t: &str, #[case] metrics: TextDistance)
133    {
134        assert_approx_eq!(f32, 0.0, metrics.measure(t, t));
135    }
136
137    #[rstest]
138    #[case("Hello, world!", TextDistance::Hamming, 13.0)]
139    #[case("ABCD", TextDistance::Levenshtein, 4.0)]
140    fn test_text_distance_empty(#[case] t: &str, #[case] metrics: TextDistance, #[case] distance: f32)
141    {
142        assert_approx_eq!(f32, distance, metrics.measure(t, ""));
143    }
144}