1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use std::cmp::min;
use crate::utils::TextSlice;
pub fn hamming(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u64 {
assert_eq!(
alpha.len(),
beta.len(),
"hamming distance cannot be calculated for texts of different length ({}!={})",
alpha.len(),
beta.len()
);
let mut dist = 0;
for (a, b) in alpha.iter().zip(beta) {
if a != b {
dist += 1;
}
}
dist
}
#[allow(unused_assignments)]
pub fn levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u32 {
let mut columns = [vec![0u32; alpha.len() + 1], vec![0u32; alpha.len() + 1]];
let mut i_prev = 0;
let mut i_cur = 1;
for i in 0..columns[0].len() {
columns[0][i] = i as u32;
}
for (j, item) in beta.iter().enumerate() {
i_cur %= 2;
i_prev = 1 - i_cur;
columns[i_cur][0] = 1 + j as u32;
for i in 1..columns[0].len() {
columns[i_cur][i] = min(
columns[i_prev][i - 1] + if alpha[i - 1] == *item { 0 } else { 1 },
min(columns[i_cur][i - 1] + 1, columns[i_prev][i] + 1),
);
}
i_cur += 1;
}
columns[i_cur - 1][columns[0].len() - 1]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hamming_dist_good() {
let x = b"GTCTGCATGCG";
let y = b"TTTAGCTAGCG";
assert_eq!(hamming(x, y), 5);
}
#[test]
#[should_panic(
expected = "hamming distance cannot be calculated for texts of different length (11!=8)"
)]
fn test_hamming_dist_bad() {
let x = b"GACTATATCGA";
let y = b"TTTAGCTC";
hamming(x, y);
}
#[test]
fn test_levenshtein_dist() {
let x = b"ACCGTGGAT";
let y = b"AAAAACCGTTGAT";
assert_eq!(levenshtein(x, y), 5);
assert_eq!(levenshtein(x, y), levenshtein(y, x));
assert_eq!(levenshtein(b"AAA", b"TTTT"), 4);
assert_eq!(levenshtein(b"TTTT", b"AAA"), 4);
}
}