Skip to main content

laminar_sql/error/
suggest.rs

1//! Edit-distance based suggestions and case-insensitive column resolution.
2//!
3//! Provides Levenshtein distance computation, closest-match lookup for
4//! generating "Did you mean ...?" hints, and case-insensitive column
5//! name resolution for mixed-case Arrow schemas.
6
7/// Computes the Levenshtein edit distance between two byte slices.
8fn levenshtein(a: &[u8], b: &[u8]) -> usize {
9    let m = a.len();
10    let n = b.len();
11    let mut prev: Vec<usize> = (0..=n).collect();
12    let mut curr = vec![0; n + 1];
13
14    for i in 1..=m {
15        curr[0] = i;
16        for j in 1..=n {
17            let cost = usize::from(a[i - 1] != b[j - 1]);
18            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
19        }
20        std::mem::swap(&mut prev, &mut curr);
21    }
22
23    prev[n]
24}
25
26/// Returns the closest match to `input` from `candidates`, if within
27/// `max_distance` edit distance.
28///
29/// Comparison is case-insensitive. Returns `None` if no candidate is
30/// close enough or if `candidates` is empty. Exact matches (distance 0)
31/// are excluded since they are not typos.
32#[must_use]
33pub fn closest_match<'a>(
34    input: &str,
35    candidates: &[&'a str],
36    max_distance: usize,
37) -> Option<&'a str> {
38    let input_lower = input.to_ascii_lowercase();
39    let mut best: Option<(&'a str, usize)> = None;
40
41    for &candidate in candidates {
42        let cand_lower = candidate.to_ascii_lowercase();
43        let dist = levenshtein(input_lower.as_bytes(), cand_lower.as_bytes());
44        if dist > 0 && dist <= max_distance && best.is_none_or(|(_, d)| dist < d) {
45            best = Some((candidate, dist));
46        }
47    }
48
49    best.map(|(s, _)| s)
50}
51
52/// Resolves a column name against a list of available columns with
53/// case-insensitive fallback.
54///
55/// 1. **Exact match** — returns the name unchanged.
56/// 2. **Unique case-insensitive match** — returns the actual column name
57///    from `available`.
58/// 3. **Ambiguous** (multiple case-insensitive matches) — returns `Err`
59///    listing all matches.
60/// 4. **No match** — returns `Err` with an edit-distance suggestion if one
61///    is close enough.
62///
63/// # Errors
64///
65/// Returns [`ColumnResolveError::NotFound`] when no column matches, or
66/// [`ColumnResolveError::Ambiguous`] when multiple columns match
67/// case-insensitively.
68pub fn resolve_column_name<'a>(
69    input: &str,
70    available: &[&'a str],
71) -> Result<&'a str, ColumnResolveError> {
72    // 1. Exact match
73    for &col in available {
74        if col == input {
75            return Ok(col);
76        }
77    }
78
79    // 2. Case-insensitive match
80    let matches: Vec<&'a str> = available
81        .iter()
82        .copied()
83        .filter(|c| c.eq_ignore_ascii_case(input))
84        .collect();
85
86    match matches.len() {
87        1 => Ok(matches[0]),
88        n if n > 1 => Err(ColumnResolveError::Ambiguous {
89            input: input.to_string(),
90            matches: matches.iter().map(|s| (*s).to_string()).collect(),
91        }),
92        _ => {
93            // 3. No match — provide edit-distance hint
94            let hint = closest_match(input, available, 2).map(ToString::to_string);
95            Err(ColumnResolveError::NotFound {
96                input: input.to_string(),
97                suggestion: hint,
98            })
99        }
100    }
101}
102
103/// Error from [`resolve_column_name`].
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum ColumnResolveError {
106    /// No column matched (exact or case-insensitive).
107    NotFound {
108        /// The column name the user wrote.
109        input: String,
110        /// An edit-distance suggestion, if available.
111        suggestion: Option<String>,
112    },
113    /// Multiple columns matched case-insensitively.
114    Ambiguous {
115        /// The column name the user wrote.
116        input: String,
117        /// All columns that matched case-insensitively.
118        matches: Vec<String>,
119    },
120}
121
122impl std::fmt::Display for ColumnResolveError {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        match self {
125            Self::NotFound { input, suggestion } => {
126                write!(f, "Column '{input}' not found")?;
127                if let Some(s) = suggestion {
128                    write!(f, ". Did you mean '{s}'?")?;
129                }
130                Ok(())
131            }
132            Self::Ambiguous { input, matches } => {
133                write!(
134                    f,
135                    "Column '{input}' is ambiguous — matches: {}. \
136                     Use double quotes for exact match.",
137                    matches.join(", ")
138                )
139            }
140        }
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn test_levenshtein_identical() {
150        assert_eq!(levenshtein(b"abc", b"abc"), 0);
151    }
152
153    #[test]
154    fn test_levenshtein_empty() {
155        assert_eq!(levenshtein(b"", b"abc"), 3);
156        assert_eq!(levenshtein(b"abc", b""), 3);
157    }
158
159    #[test]
160    fn test_levenshtein_single_edit() {
161        assert_eq!(levenshtein(b"cat", b"car"), 1); // substitution
162        assert_eq!(levenshtein(b"cat", b"cats"), 1); // insertion
163        assert_eq!(levenshtein(b"cats", b"cat"), 1); // deletion
164    }
165
166    #[test]
167    fn test_levenshtein_multiple_edits() {
168        assert_eq!(levenshtein(b"kitten", b"sitting"), 3);
169    }
170
171    #[test]
172    fn test_closest_match_found() {
173        let candidates = &["user_id", "user_name", "email"];
174        assert_eq!(closest_match("user_ie", candidates, 2), Some("user_id"));
175    }
176
177    #[test]
178    fn test_closest_match_case_insensitive() {
179        let candidates = &["UserName", "Email"];
180        assert_eq!(closest_match("username", candidates, 2), None); // exact (case-insensitive) = distance 0
181        assert_eq!(closest_match("usrname", candidates, 2), Some("UserName"));
182    }
183
184    #[test]
185    fn test_closest_match_none_too_far() {
186        let candidates = &["user_id", "email"];
187        assert_eq!(closest_match("completely_different", candidates, 2), None);
188    }
189
190    #[test]
191    fn test_closest_match_empty_candidates() {
192        let candidates: &[&str] = &[];
193        assert_eq!(closest_match("foo", candidates, 2), None);
194    }
195
196    #[test]
197    fn test_closest_match_picks_best() {
198        let candidates = &["price", "pride", "prime"];
199        // "pric" is distance 1 from "price", distance 2 from "pride"
200        assert_eq!(closest_match("pric", candidates, 2), Some("price"));
201    }
202
203    // -- resolve_column_name tests --
204
205    #[test]
206    fn test_resolve_exact_match() {
207        let cols = &["tradeId", "symbol", "lastPrice"];
208        assert_eq!(resolve_column_name("tradeId", cols), Ok("tradeId"));
209    }
210
211    #[test]
212    fn test_resolve_case_insensitive_fallback() {
213        let cols = &["tradeId", "symbol", "lastPrice"];
214        assert_eq!(resolve_column_name("tradeid", cols), Ok("tradeId"));
215        assert_eq!(resolve_column_name("TRADEID", cols), Ok("tradeId"));
216        assert_eq!(resolve_column_name("SYMBOL", cols), Ok("symbol"));
217        assert_eq!(resolve_column_name("lastprice", cols), Ok("lastPrice"));
218    }
219
220    #[test]
221    fn test_resolve_ambiguous() {
222        let cols = &["price", "Price", "PRICE"];
223        let err = resolve_column_name("pRiCe", cols).unwrap_err();
224        match err {
225            ColumnResolveError::Ambiguous { input, matches } => {
226                assert_eq!(input, "pRiCe");
227                assert_eq!(matches.len(), 3);
228            }
229            other @ ColumnResolveError::NotFound { .. } => {
230                panic!("expected Ambiguous, got {other}")
231            }
232        }
233    }
234
235    #[test]
236    fn test_resolve_not_found_with_suggestion() {
237        let cols = &["tradeId", "symbol"];
238        let err = resolve_column_name("tadeId", cols).unwrap_err();
239        match err {
240            ColumnResolveError::NotFound { suggestion, .. } => {
241                assert_eq!(suggestion, Some("tradeId".to_string()));
242            }
243            other @ ColumnResolveError::Ambiguous { .. } => {
244                panic!("expected NotFound, got {other}")
245            }
246        }
247    }
248
249    #[test]
250    fn test_resolve_not_found_no_suggestion() {
251        let cols = &["tradeId", "symbol"];
252        let err = resolve_column_name("completely_different", cols).unwrap_err();
253        match err {
254            ColumnResolveError::NotFound { suggestion, .. } => {
255                assert!(suggestion.is_none());
256            }
257            other @ ColumnResolveError::Ambiguous { .. } => {
258                panic!("expected NotFound, got {other}")
259            }
260        }
261    }
262}