Skip to main content

luaur_common/functions/
edit_distance.rs

1#[allow(non_snake_case)]
2pub fn editDistance(mut a: &[u8], mut b: &[u8]) -> usize {
3    while !a.is_empty() && !b.is_empty() && a[0] == b[0] {
4        a = &a[1..];
5        b = &b[1..];
6    }
7
8    while !a.is_empty() && !b.is_empty() && a[a.len() - 1] == b[b.len() - 1] {
9        a = &a[..a.len() - 1];
10        b = &b[..b.len() - 1];
11    }
12
13    if a.is_empty() {
14        return b.len();
15    }
16    if b.is_empty() {
17        return a.len();
18    }
19
20    let max_distance = a.len() + b.len();
21    let b_stride = b.len() + 2;
22    let mut distances = alloc::vec![0; (a.len() + 2) * b_stride];
23
24    let get_pos = |x: usize, y: usize| -> usize { (x * b_stride) + y };
25
26    distances[0] = max_distance;
27
28    for x in 0..=a.len() {
29        distances[get_pos(x + 1, 0)] = max_distance;
30        distances[get_pos(x + 1, 1)] = x;
31    }
32
33    for y in 0..=b.len() {
34        distances[get_pos(0, y + 1)] = max_distance;
35        distances[get_pos(1, y + 1)] = y;
36    }
37
38    let mut seen_char_to_row = [0usize; 256];
39
40    for x in 1..=a.len() {
41        let mut last_matched_y = 0;
42
43        for y in 1..=b.len() {
44            let b_seen_char_index = b[y - 1] as usize;
45            let x1 = seen_char_to_row[b_seen_char_index];
46            let y1 = last_matched_y;
47
48            let mut cost = 1;
49            if a[x - 1] == b[y - 1] {
50                cost = 0;
51                last_matched_y = y;
52            }
53
54            let transposition = distances[get_pos(x1, y1)] + (x - x1 - 1) + 1 + (y - y1 - 1);
55            let substitution = distances[get_pos(x, y)] + cost;
56            let insertion = distances[get_pos(x, y + 1)] + 1;
57            let deletion = distances[get_pos(x + 1, y)] + 1;
58
59            distances[get_pos(x + 1, y + 1)] = core::cmp::min(
60                core::cmp::min(insertion, deletion),
61                core::cmp::min(substitution, transposition),
62            );
63        }
64
65        let a_seen_char_index = a[x - 1] as usize;
66        seen_char_to_row[a_seen_char_index] = x;
67    }
68
69    distances[get_pos(a.len() + 1, b.len() + 1)]
70}