Skip to main content

laminar_sql/error/
suggest.rs

1//! Edit-distance based suggestions for typo correction.
2//!
3//! Provides Levenshtein distance computation and closest-match lookup
4//! for generating "Did you mean ...?" hints in error messages.
5
6/// Computes the Levenshtein edit distance between two byte slices.
7fn levenshtein(a: &[u8], b: &[u8]) -> usize {
8    let m = a.len();
9    let n = b.len();
10    let mut prev: Vec<usize> = (0..=n).collect();
11    let mut curr = vec![0; n + 1];
12
13    for i in 1..=m {
14        curr[0] = i;
15        for j in 1..=n {
16            let cost = usize::from(a[i - 1] != b[j - 1]);
17            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
18        }
19        std::mem::swap(&mut prev, &mut curr);
20    }
21
22    prev[n]
23}
24
25/// Returns the closest match to `input` from `candidates`, if within
26/// `max_distance` edit distance.
27///
28/// Comparison is case-insensitive. Returns `None` if no candidate is
29/// close enough or if `candidates` is empty. Exact matches (distance 0)
30/// are excluded since they are not typos.
31#[must_use]
32pub fn closest_match<'a>(
33    input: &str,
34    candidates: &[&'a str],
35    max_distance: usize,
36) -> Option<&'a str> {
37    let input_lower = input.to_ascii_lowercase();
38    let mut best: Option<(&'a str, usize)> = None;
39
40    for &candidate in candidates {
41        let cand_lower = candidate.to_ascii_lowercase();
42        let dist = levenshtein(input_lower.as_bytes(), cand_lower.as_bytes());
43        if dist > 0 && dist <= max_distance && best.is_none_or(|(_, d)| dist < d) {
44            best = Some((candidate, dist));
45        }
46    }
47
48    best.map(|(s, _)| s)
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54
55    #[test]
56    fn test_levenshtein_identical() {
57        assert_eq!(levenshtein(b"abc", b"abc"), 0);
58    }
59
60    #[test]
61    fn test_levenshtein_empty() {
62        assert_eq!(levenshtein(b"", b"abc"), 3);
63        assert_eq!(levenshtein(b"abc", b""), 3);
64    }
65
66    #[test]
67    fn test_levenshtein_single_edit() {
68        assert_eq!(levenshtein(b"cat", b"car"), 1); // substitution
69        assert_eq!(levenshtein(b"cat", b"cats"), 1); // insertion
70        assert_eq!(levenshtein(b"cats", b"cat"), 1); // deletion
71    }
72
73    #[test]
74    fn test_levenshtein_multiple_edits() {
75        assert_eq!(levenshtein(b"kitten", b"sitting"), 3);
76    }
77
78    #[test]
79    fn test_closest_match_found() {
80        let candidates = &["user_id", "user_name", "email"];
81        assert_eq!(closest_match("user_ie", candidates, 2), Some("user_id"));
82    }
83
84    #[test]
85    fn test_closest_match_case_insensitive() {
86        let candidates = &["UserName", "Email"];
87        assert_eq!(closest_match("username", candidates, 2), None); // exact (case-insensitive) = distance 0
88        assert_eq!(closest_match("usrname", candidates, 2), Some("UserName"));
89    }
90
91    #[test]
92    fn test_closest_match_none_too_far() {
93        let candidates = &["user_id", "email"];
94        assert_eq!(closest_match("completely_different", candidates, 2), None);
95    }
96
97    #[test]
98    fn test_closest_match_empty_candidates() {
99        let candidates: &[&str] = &[];
100        assert_eq!(closest_match("foo", candidates, 2), None);
101    }
102
103    #[test]
104    fn test_closest_match_picks_best() {
105        let candidates = &["price", "pride", "prime"];
106        // "pric" is distance 1 from "price", distance 2 from "pride"
107        assert_eq!(closest_match("pric", candidates, 2), Some("price"));
108    }
109}