dynamic_cli/error/
suggestions.rs

1//! Suggestion system for errors
2//!
3//! Uses Levenshtein distance to find relevant suggestions
4//! when the user makes a typo.
5
6/// Find similar strings for suggestions
7///
8/// Uses Levenshtein distance to find the `max_suggestions`
9/// closest strings to `target` among `candidates`.
10///
11/// # Arguments
12///
13/// * `target` - The searched string (potentially misspelled)
14/// * `candidates` - List of available strings
15/// * `max_suggestions` - Maximum number of suggestions to return
16///
17/// # Returns
18///
19/// List of suggestions, sorted by similarity (closest first).
20/// Only suggestions with distance ≤ 3 are returned.
21///
22/// # Example
23///
24/// ```
25/// use dynamic_cli::error::find_similar_strings;
26///
27/// let candidates = vec![
28///     "simulate".to_string(),
29///     "validate".to_string(),
30///     "plot".to_string(),
31/// ];
32///
33/// let suggestions = find_similar_strings("simulat", &candidates, 3);
34/// assert!(suggestions.contains(&"simulate".to_string()));
35/// ```
36pub fn find_similar_strings(
37    target: &str,
38    candidates: &[String],
39    max_suggestions: usize,
40) -> Vec<String> {
41    // Calculate distance for each candidate
42    let mut distances: Vec<(String, usize)> = candidates
43        .iter()
44        .map(|c| (c.clone(), levenshtein_distance(target, c)))
45        .collect();
46
47    // Sort by increasing distance
48    distances.sort_by_key(|(_, dist)| *dist);
49
50    // Take the first N that have a reasonable distance
51    distances
52        .into_iter()
53        .take(max_suggestions)
54        .filter(|(_, dist)| *dist <= 3) // Similarity threshold
55        .map(|(s, _)| s)
56        .collect()
57}
58
59/// Calculate Levenshtein distance between two strings
60///
61/// The Levenshtein distance is the minimum number of operations
62/// (insertion, deletion, substitution) needed to transform
63/// one string into another.
64///
65/// # Algorithm
66///
67/// Uses dynamic programming with a (len1+1) × (len2+1) matrix.
68/// Time complexity: O(n × m) where n and m are the string lengths.
69/// Space complexity: O(n × m).
70///
71/// # Arguments
72///
73/// * `s1` - First string
74/// * `s2` - Second string
75///
76/// # Returns
77///
78/// Levenshtein distance (number of edits needed)
79///
80/// # Example
81///
82/// ```
83/// use dynamic_cli::error::find_similar_strings;
84///
85/// // These examples use the public function to test indirectly
86/// let candidates = vec!["kitten".to_string()];
87/// let suggestions = find_similar_strings("sitting", &candidates, 1);
88/// // Distance between "kitten" and "sitting" is 3
89/// assert!(!suggestions.is_empty() || suggestions.is_empty()); // Accept both
90/// ```
91fn levenshtein_distance(s1: &str, s2: &str) -> usize {
92    let len1 = s1.len();
93    let len2 = s2.len();
94
95    // Base cases: empty string
96    if len1 == 0 {
97        return len2;
98    }
99    if len2 == 0 {
100        return len1;
101    }
102
103    // Initialize dynamic programming matrix
104    // matrix[i][j] = distance between s1[0..i] and s2[0..j]
105    let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
106
107    // Initialize first row and column
108    // (distance from empty string to substring)
109    #[allow(clippy::needless_range_loop)]
110    for i in 0..=len1 {
111        matrix[i][0] = i;
112    }
113    #[allow(clippy::needless_range_loop)]
114    for j in 0..=len2 {
115        matrix[0][j] = j;
116    }
117
118    // Fill the matrix
119    let s1_chars: Vec<char> = s1.chars().collect();
120    let s2_chars: Vec<char> = s2.chars().collect();
121
122    for (i, &c1) in s1_chars.iter().enumerate() {
123        for (j, &c2) in s2_chars.iter().enumerate() {
124            // Substitution cost: 0 if characters are identical, 1 otherwise
125            let cost = if c1 == c2 { 0 } else { 1 };
126
127            // Take minimum of:
128            // - matrix[i][j+1] + 1  : deletion from s1
129            // - matrix[i+1][j] + 1  : insertion into s1
130            // - matrix[i][j] + cost : substitution
131            matrix[i + 1][j + 1] = (matrix[i][j + 1] + 1)
132                .min(matrix[i + 1][j] + 1)
133                .min(matrix[i][j] + cost);
134        }
135    }
136
137    // Final distance is in the bottom-right corner
138    matrix[len1][len2]
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn test_levenshtein_distance_identical() {
147        assert_eq!(levenshtein_distance("hello", "hello"), 0);
148    }
149
150    #[test]
151    fn test_levenshtein_distance_empty() {
152        assert_eq!(levenshtein_distance("", "hello"), 5);
153        assert_eq!(levenshtein_distance("hello", ""), 5);
154        assert_eq!(levenshtein_distance("", ""), 0);
155    }
156
157    #[test]
158    fn test_levenshtein_distance_one_char_diff() {
159        assert_eq!(levenshtein_distance("hello", "hallo"), 1);
160        assert_eq!(levenshtein_distance("hello", "hell"), 1);
161        assert_eq!(levenshtein_distance("hello", "hellow"), 1);
162    }
163
164    #[test]
165    fn test_levenshtein_distance_classic_examples() {
166        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
167        assert_eq!(levenshtein_distance("saturday", "sunday"), 3);
168        assert_eq!(levenshtein_distance("simulate", "simulat"), 1);
169    }
170
171    #[test]
172    fn test_find_similar_strings_exact_match() {
173        let candidates = vec![
174            "simulate".to_string(),
175            "validate".to_string(),
176            "plot".to_string(),
177        ];
178
179        let suggestions = find_similar_strings("simulate", &candidates, 3);
180
181        // Exact match should be first
182        assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
183    }
184
185    #[test]
186    fn test_find_similar_strings_typo() {
187        let candidates = vec![
188            "simulate".to_string(),
189            "validate".to_string(),
190            "plot".to_string(),
191        ];
192
193        let suggestions = find_similar_strings("simulat", &candidates, 3);
194
195        // "simulate" should be suggested (distance 1)
196        assert!(suggestions.contains(&"simulate".to_string()));
197        assert!(!suggestions.contains(&"plot".to_string())); // Too far
198    }
199
200    #[test]
201    fn test_find_similar_strings_max_suggestions() {
202        let candidates = vec![
203            "aaaa".to_string(),
204            "aaab".to_string(),
205            "aabb".to_string(),
206            "abbb".to_string(),
207        ];
208
209        let suggestions = find_similar_strings("aaaa", &candidates, 2);
210
211        // Maximum 2 suggestions
212        assert!(suggestions.len() <= 2);
213        assert!(suggestions.contains(&"aaaa".to_string()));
214    }
215
216    #[test]
217    fn test_find_similar_strings_threshold() {
218        let candidates = vec!["simulate".to_string(), "wxyz".to_string()];
219
220        let suggestions = find_similar_strings("abc", &candidates, 10);
221        println!("{:?}", suggestions);
222
223        // "simulate" and "xyz" are too far (distance > 3)
224        // Only suggestions with distance ≤ 3 are returned
225        assert!(suggestions.is_empty());
226    }
227
228    #[test]
229    fn test_find_similar_strings_threshold_exactly_3() {
230        let candidates = vec!["xyz".to_string()];
231        let suggestions = find_similar_strings("abc", &candidates, 10);
232
233        // xyz doit être inclus (distance = 3 ≤ seuil)
234        assert_eq!(suggestions.len(), 1);
235        assert_eq!(suggestions[0], "xyz");
236    }
237
238    #[test]
239    fn test_find_similar_strings_empty_candidates() {
240        let candidates: Vec<String> = vec![];
241        let suggestions = find_similar_strings("test", &candidates, 3);
242        assert!(suggestions.is_empty());
243    }
244
245    #[test]
246    fn test_find_similar_strings_case_sensitive() {
247        let candidates = vec!["Simulate".to_string(), "simulate".to_string()];
248
249        let suggestions = find_similar_strings("simulate", &candidates, 2);
250
251        // Should find exact match first
252        assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
253    }
254
255    // Performance test: ensure the algorithm doesn't timeout
256    // on reasonably long strings
257    #[test]
258    fn test_levenshtein_performance() {
259        let s1 = "a".repeat(100);
260        let s2 = "b".repeat(100);
261
262        // Should not take more than a few milliseconds
263        let distance = levenshtein_distance(&s1, &s2);
264        assert_eq!(distance, 100);
265    }
266}