hipparchus_metrics/
text.rs1use crate::metrics::Metrics;
2
3#[repr(i32)]
4#[derive(Clone,PartialEq,Debug)]
5pub enum TextDistance
6{
7 Hamming = 1,
8 Levenshtein = 2,
9}
10
11impl Metrics<&str, f32> for TextDistance
12{
13 fn measure(self, t1:&str, t2:&str) -> f32
14 {
15 match self
16 {
17 TextDistance::Hamming => TextDistance::hamming(t1, t2) as f32,
18 TextDistance::Levenshtein => TextDistance::levenshtein(t1, t2) as f32,
19 }
20 }
21}
22
23impl TextDistance
24{
25 pub fn hamming(x: &str, y: &str) -> usize
26 {
27 let mut total = 0;
28 let mut xchars = x.chars();
29 let mut ychars = y.chars();
30 loop
31 {
32 let xchar = xchars.next();
33 let ychar = ychars.next();
34 if xchar == None || ychar == None
35 {
36 let xdelta = if xchar != None { xchars.count() + 1 } else { 0 };
37 let ydelta = if ychar != None { ychars.count() + 1 } else { 0 };
38 total += xdelta + ydelta;
39 break;
40 }
41 if xchar.unwrap() != ychar.unwrap()
42 {
43 total += 1
44 }
45 }
46 total
47 }
48
49 pub fn levenshtein(x: &str, y: &str) -> usize
50 {
51 if x.is_empty()
52 {
53 return y.chars().count();
54 }
55 if y.is_empty()
56 {
57 return x.chars().count();
58 }
59 if x == y
60 {
61 return 0;
62 }
63
64 let mut cache: Vec<usize> = (1..).take(x.chars().count()).collect();
65 let mut distance_a;
66 let mut distance_b;
67 let mut result = 0;
68 for (idxb, b) in y.chars().enumerate()
69 {
70 result = idxb;
71 distance_a = idxb;
72
73 for (idxa, a) in x.chars().enumerate()
74 {
75 distance_b = if a == b { distance_a } else { distance_a + 1 };
76 distance_a = cache[idxa];
77 result =
78 if distance_a > result
79 {
80 if distance_b > result
81 {
82 result + 1
83 }
84 else
85 {
86 distance_b
87 }
88 }
89 else if distance_b > distance_a
90 {
91 distance_a + 1
92 }
93 else
94 {
95 distance_b
96 };
97
98 cache[idxa] = result;
99 }
100 }
101
102 result
103 }
104}
105
106#[cfg(test)]
107mod tests
108{
109 use super::*;
110 use rstest::*;
111 use float_cmp::assert_approx_eq;
112
113 #[rstest]
114 #[case("ABCD", "ABCE", TextDistance::Hamming, 1.0)]
115 #[case("ABCD", "AB", TextDistance::Hamming, 2.0)]
116 #[case("ABCD", "CD", TextDistance::Hamming, 4.0)]
117 #[case("Hello, world!", "hello, world", TextDistance::Hamming, 2.0)]
118 #[case("ABCD", "ABCE", TextDistance::Levenshtein, 1.0)]
119 #[case("ABCD", "AB", TextDistance::Levenshtein, 2.0)]
120 #[case("ABCD", "CD", TextDistance::Levenshtein, 2.0)]
121 #[case("Hello, world!", "hello, world", TextDistance::Levenshtein, 2.0)]
122 fn test_text_distance(#[case] t1: &str, #[case] t2: &str, #[case] metrics: TextDistance, #[case] distance: f32)
123 {
124 assert_approx_eq!(f32, distance, metrics.measure(t1, t2));
125 }
126
127 #[rstest]
128 #[case("", TextDistance::Hamming)]
129 #[case("", TextDistance::Levenshtein)]
130 #[case("Hello, world!", TextDistance::Hamming)]
131 #[case("Hello, world!", TextDistance::Levenshtein)]
132 fn test_text_distance_eq(#[case] t: &str, #[case] metrics: TextDistance)
133 {
134 assert_approx_eq!(f32, 0.0, metrics.measure(t, t));
135 }
136
137 #[rstest]
138 #[case("Hello, world!", TextDistance::Hamming, 13.0)]
139 #[case("ABCD", TextDistance::Levenshtein, 4.0)]
140 fn test_text_distance_empty(#[case] t: &str, #[case] metrics: TextDistance, #[case] distance: f32)
141 {
142 assert_approx_eq!(f32, distance, metrics.measure(t, ""));
143 }
144}