dynamic_cli/error/
suggestions.rs

1//! Suggestion system for errors
2//!
3//! Uses Levenshtein distance to find relevant suggestions
4//! when the user makes a typo.
5
6/// Find similar strings for suggestions
7///
8/// Uses Levenshtein distance to find the `max_suggestions`
9/// closest strings to `target` among `candidates`.
10///
11/// # Arguments
12///
13/// * `target` - The searched string (potentially misspelled)
14/// * `candidates` - List of available strings
15/// * `max_suggestions` - Maximum number of suggestions to return
16///
17/// # Returns
18///
19/// List of suggestions, sorted by similarity (closest first).
20/// Only suggestions with distance ≤ 3 are returned.
21///
22/// # Example
23///
24/// ```
25/// use dynamic_cli::error::find_similar_strings;
26///
27/// let candidates = vec![
28///     "simulate".to_string(),
29///     "validate".to_string(),
30///     "plot".to_string(),
31/// ];
32///
33/// let suggestions = find_similar_strings("simulat", &candidates, 3);
34/// assert!(suggestions.contains(&"simulate".to_string()));
35/// ```
36pub fn find_similar_strings(
37    target: &str,
38    candidates: &[String],
39    max_suggestions: usize,
40) -> Vec<String> {
41    // Calculate distance for each candidate
42    let mut distances: Vec<(String, usize)> = candidates
43        .iter()
44        .map(|c| (c.clone(), levenshtein_distance(target, c)))
45        .collect();
46
47    // Sort by increasing distance
48    distances.sort_by_key(|(_, dist)| *dist);
49
50    // Take the first N that have a reasonable distance
51    distances
52        .into_iter()
53        .take(max_suggestions)
54        .filter(|(_, dist)| *dist <= 3) // Similarity threshold
55        .map(|(s, _)| s)
56        .collect()
57}
58
59/// Calculate Levenshtein distance between two strings
60///
61/// The Levenshtein distance is the minimum number of operations
62/// (insertion, deletion, substitution) needed to transform
63/// one string into another.
64///
65/// # Algorithm
66///
67/// Uses dynamic programming with a (len1+1) × (len2+1) matrix.
68/// Time complexity: O(n × m) where n and m are the string lengths.
69/// Space complexity: O(n × m).
70///
71/// # Arguments
72///
73/// * `s1` - First string
74/// * `s2` - Second string
75///
76/// # Returns
77///
78/// Levenshtein distance (number of edits needed)
79///
80/// # Example
81///
82/// ```
83/// use dynamic_cli::error::find_similar_strings;
84///
85/// // These examples use the public function to test indirectly
86/// let candidates = vec!["kitten".to_string()];
87/// let suggestions = find_similar_strings("sitting", &candidates, 1);
88/// // Distance between "kitten" and "sitting" is 3
89/// assert!(!suggestions.is_empty() || suggestions.is_empty()); // Accept both
90/// ```
91fn levenshtein_distance(s1: &str, s2: &str) -> usize {
92    let len1 = s1.len();
93    let len2 = s2.len();
94
95    // Base cases: empty string
96    if len1 == 0 {
97        return len2;
98    }
99    if len2 == 0 {
100        return len1;
101    }
102
103    // Initialize dynamic programming matrix
104    // matrix[i][j] = distance between s1[0..i] and s2[0..j]
105    let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
106
107    // Initialize first row and column
108    // (distance from empty string to substring)
109    for i in 0..=len1 {
110        matrix[i][0] = i;
111    }
112    for j in 0..=len2 {
113        matrix[0][j] = j;
114    }
115
116    // Fill the matrix
117    let s1_chars: Vec<char> = s1.chars().collect();
118    let s2_chars: Vec<char> = s2.chars().collect();
119
120    for (i, &c1) in s1_chars.iter().enumerate() {
121        for (j, &c2) in s2_chars.iter().enumerate() {
122            // Substitution cost: 0 if characters are identical, 1 otherwise
123            let cost = if c1 == c2 { 0 } else { 1 };
124
125            // Take minimum of:
126            // - matrix[i][j+1] + 1  : deletion from s1
127            // - matrix[i+1][j] + 1  : insertion into s1
128            // - matrix[i][j] + cost : substitution
129            matrix[i + 1][j + 1] = (matrix[i][j + 1] + 1)
130                .min(matrix[i + 1][j] + 1)
131                .min(matrix[i][j] + cost);
132        }
133    }
134
135    // Final distance is in the bottom-right corner
136    matrix[len1][len2]
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn test_levenshtein_distance_identical() {
145        assert_eq!(levenshtein_distance("hello", "hello"), 0);
146    }
147
148    #[test]
149    fn test_levenshtein_distance_empty() {
150        assert_eq!(levenshtein_distance("", "hello"), 5);
151        assert_eq!(levenshtein_distance("hello", ""), 5);
152        assert_eq!(levenshtein_distance("", ""), 0);
153    }
154
155    #[test]
156    fn test_levenshtein_distance_one_char_diff() {
157        assert_eq!(levenshtein_distance("hello", "hallo"), 1);
158        assert_eq!(levenshtein_distance("hello", "hell"), 1);
159        assert_eq!(levenshtein_distance("hello", "hellow"), 1);
160    }
161
162    #[test]
163    fn test_levenshtein_distance_classic_examples() {
164        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
165        assert_eq!(levenshtein_distance("saturday", "sunday"), 3);
166        assert_eq!(levenshtein_distance("simulate", "simulat"), 1);
167    }
168
169    #[test]
170    fn test_find_similar_strings_exact_match() {
171        let candidates = vec![
172            "simulate".to_string(),
173            "validate".to_string(),
174            "plot".to_string(),
175        ];
176
177        let suggestions = find_similar_strings("simulate", &candidates, 3);
178
179        // Exact match should be first
180        assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
181    }
182
183    #[test]
184    fn test_find_similar_strings_typo() {
185        let candidates = vec![
186            "simulate".to_string(),
187            "validate".to_string(),
188            "plot".to_string(),
189        ];
190
191        let suggestions = find_similar_strings("simulat", &candidates, 3);
192
193        // "simulate" should be suggested (distance 1)
194        assert!(suggestions.contains(&"simulate".to_string()));
195        assert!(!suggestions.contains(&"plot".to_string())); // Too far
196    }
197
198    #[test]
199    fn test_find_similar_strings_max_suggestions() {
200        let candidates = vec![
201            "aaaa".to_string(),
202            "aaab".to_string(),
203            "aabb".to_string(),
204            "abbb".to_string(),
205        ];
206
207        let suggestions = find_similar_strings("aaaa", &candidates, 2);
208
209        // Maximum 2 suggestions
210        assert!(suggestions.len() <= 2);
211        assert!(suggestions.contains(&"aaaa".to_string()));
212    }
213
214    #[test]
215    fn test_find_similar_strings_threshold() {
216        let candidates = vec!["simulate".to_string(), "wxyz".to_string()];
217
218        let suggestions = find_similar_strings("abc", &candidates, 10);
219        println!("{:?}", suggestions);
220
221        // "simulate" and "xyz" are too far (distance > 3)
222        // Only suggestions with distance ≤ 3 are returned
223        assert!(suggestions.is_empty());
224    }
225
226    #[test]
227    fn test_find_similar_strings_threshold_exactly_3() {
228        let candidates = vec!["xyz".to_string()];
229        let suggestions = find_similar_strings("abc", &candidates, 10);
230
231        // xyz doit être inclus (distance = 3 ≤ seuil)
232        assert_eq!(suggestions.len(), 1);
233        assert_eq!(suggestions[0], "xyz");
234    }
235
236    #[test]
237    fn test_find_similar_strings_empty_candidates() {
238        let candidates: Vec<String> = vec![];
239        let suggestions = find_similar_strings("test", &candidates, 3);
240        assert!(suggestions.is_empty());
241    }
242
243    #[test]
244    fn test_find_similar_strings_case_sensitive() {
245        let candidates = vec!["Simulate".to_string(), "simulate".to_string()];
246
247        let suggestions = find_similar_strings("simulate", &candidates, 2);
248
249        // Should find exact match first
250        assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
251    }
252
253    // Performance test: ensure the algorithm doesn't timeout
254    // on reasonably long strings
255    #[test]
256    fn test_levenshtein_performance() {
257        let s1 = "a".repeat(100);
258        let s2 = "b".repeat(100);
259
260        // Should not take more than a few milliseconds
261        let distance = levenshtein_distance(&s1, &s2);
262        assert_eq!(distance, 100);
263    }
264}