aiken_lang/levenshtein.rs
1use std::cmp;
2
3/// Calculate Levenshtein distance for two UTF-8 encoded strings.
4///
5/// Returns a minimum number of edits to transform from source to target string.
6///
7/// Levenshtein distance accepts three edit operations: insertion, deletion,
8/// and substitution.
9///
10/// References:
11///
12/// - [Levenshtein distance in Cargo][1]
13/// - [Ilia Schelokov: Optimizing loop heavy Rust code][2]
14///
15/// [1]: https://github.com/rust-lang/cargo/blob/7d7fe6797ad07f313706380d251796702272b150/src/cargo/util/lev_distance.rs
16/// [2]: https://thaumant.me/optimizing-loop-heavy-rust/
17pub fn distance(source: &str, target: &str) -> usize {
18 if source.is_empty() {
19 return target.len();
20 }
21 if target.is_empty() {
22 return source.len();
23 }
24
25 let mut distances = (0..=target.chars().count()).collect::<Vec<_>>();
26
27 for (i, ch1) in source.chars().enumerate() {
28 let mut sub = i;
29 distances[0] = sub + 1;
30 for (j, ch2) in target.chars().enumerate() {
31 let dist = cmp::min(
32 cmp::min(distances[j], distances[j + 1]) + 1,
33 sub + (ch1 != ch2) as usize,
34 );
35
36 sub = distances[j + 1];
37 distances[j + 1] = dist;
38 }
39 }
40
41 *distances.last().unwrap()
42}