aiken_lang/
levenshtein.rs

1use std::cmp;
2
3/// Calculate Levenshtein distance for two UTF-8 encoded strings.
4///
5/// Returns a minimum number of edits to transform from source to target string.
6///
7/// Levenshtein distance accepts three edit operations: insertion, deletion,
8/// and substitution.
9///
10/// References:
11///
12/// - [Levenshtein distance in Cargo][1]
13/// - [Ilia Schelokov: Optimizing loop heavy Rust code][2]
14///
15/// [1]: https://github.com/rust-lang/cargo/blob/7d7fe6797ad07f313706380d251796702272b150/src/cargo/util/lev_distance.rs
16/// [2]: https://thaumant.me/optimizing-loop-heavy-rust/
17pub fn distance(source: &str, target: &str) -> usize {
18    if source.is_empty() {
19        return target.len();
20    }
21    if target.is_empty() {
22        return source.len();
23    }
24
25    let mut distances = (0..=target.chars().count()).collect::<Vec<_>>();
26
27    for (i, ch1) in source.chars().enumerate() {
28        let mut sub = i;
29        distances[0] = sub + 1;
30        for (j, ch2) in target.chars().enumerate() {
31            let dist = cmp::min(
32                cmp::min(distances[j], distances[j + 1]) + 1,
33                sub + (ch1 != ch2) as usize,
34            );
35
36            sub = distances[j + 1];
37            distances[j + 1] = dist;
38        }
39    }
40
41    *distances.last().unwrap()
42}