Skip to main content

rec/session/
fuzzy.rs

1//! Fuzzy matching for session name suggestions.
2//!
3//! Provides Levenshtein distance-based fuzzy matching, substring matching,
4//! and UUID prefix matching to suggest similar session names when a user
5//! mistypes a session identifier.
6
7use strsim::levenshtein;
8
9/// A suggestion for a similar session name with its edit distance.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct Suggestion {
12    /// The suggested session name.
13    pub name: String,
14    /// The Levenshtein edit distance from the query (0 for substring/UUID matches).
15    pub distance: usize,
16}
17
18/// Compute the scaled Levenshtein threshold based on **query** length.
19///
20/// - `query.len() <= 3`: threshold 1
21/// - `query.len() <= 6`: threshold 2
22/// - `query.len() >  6`: `max(3, query.len() / 3)`
23fn threshold_for(query_len: usize) -> usize {
24    if query_len <= 3 {
25        1
26    } else if query_len <= 6 {
27        2
28    } else {
29        std::cmp::max(3, query_len / 3)
30    }
31}
32
33/// Suggest sessions similar to the given query.
34///
35/// Searches through `all_names` (a slice of `(session_name, uuid)` tuples)
36/// using three strategies:
37/// 1. Levenshtein distance (case-insensitive) with scaled threshold
38/// 2. Substring containment (case-insensitive)
39/// 3. UUID prefix match (case-sensitive)
40///
41/// Results are sorted by distance ascending, then alphabetically for ties,
42/// and truncated to `max_suggestions`.
43#[must_use]
44pub fn suggest_sessions(
45    query: &str,
46    all_names: &[(String, String)],
47    max_suggestions: usize,
48) -> Vec<Suggestion> {
49    let query_lower = query.to_lowercase();
50    let max_dist = threshold_for(query.len());
51
52    let mut suggestions: Vec<Suggestion> = Vec::new();
53
54    for (name, uuid) in all_names {
55        let name_lower = name.to_lowercase();
56
57        // Strategy 1: Levenshtein distance (case-insensitive)
58        let dist = levenshtein(&query_lower, &name_lower);
59        let lev_match = dist <= max_dist;
60
61        // Strategy 2: Substring containment (case-insensitive)
62        let substr_match = name_lower.contains(&query_lower);
63
64        // Strategy 3: UUID prefix match (case-sensitive)
65        let uuid_match = uuid.starts_with(query);
66
67        if lev_match || substr_match || uuid_match {
68            // Pick the best (lowest) distance for this candidate.
69            // Substring and UUID matches get distance 0 to sort first.
70            let effective_dist = if substr_match || uuid_match { 0 } else { dist };
71
72            // Deduplicate: only add if not already present
73            if !suggestions.iter().any(|s| s.name == *name) {
74                suggestions.push(Suggestion {
75                    name: name.clone(),
76                    distance: effective_dist,
77                });
78            }
79        }
80    }
81
82    // Sort by distance ascending, then alphabetically for ties
83    suggestions.sort_by(|a, b| a.distance.cmp(&b.distance).then(a.name.cmp(&b.name)));
84
85    suggestions.truncate(max_suggestions);
86    suggestions
87}
88
89/// Format suggestions into a user-friendly help message.
90///
91/// - 0 suggestions: "help: No similar names found. List all sessions with: rec list"
92/// - 1 suggestion: "help: Did you mean 'name'?"
93/// - 2 suggestions: "help: Did you mean 'name1' or 'name2'?"
94/// - 3+ suggestions: "help: Did you mean 'name1', 'name2', or 'name3'?"
95///
96/// # Panics
97///
98/// Panics if the suggestions slice is unexpectedly empty in the 3+ branch
99/// (cannot happen due to the match guard).
100#[must_use]
101pub fn format_suggestions(suggestions: &[Suggestion]) -> String {
102    match suggestions.len() {
103        0 => "help: No similar names found. List all sessions with: rec list".to_string(),
104        1 => format!("help: Did you mean '{}'?", suggestions[0].name),
105        2 => format!(
106            "help: Did you mean '{}' or '{}'?",
107            suggestions[0].name, suggestions[1].name
108        ),
109        _ => {
110            let all_but_last: Vec<String> = suggestions[..suggestions.len() - 1]
111                .iter()
112                .map(|s| format!("'{}'", s.name))
113                .collect();
114            format!(
115                "help: Did you mean {}, or '{}'?",
116                all_but_last.join(", "),
117                suggestions.last().unwrap().name
118            )
119        }
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    // ── Helper ──────────────────────────────────────────────────────
128    fn names(pairs: &[(&str, &str)]) -> Vec<(String, String)> {
129        pairs
130            .iter()
131            .map(|(n, u)| (n.to_string(), u.to_string()))
132            .collect()
133    }
134
135    // ── suggest_sessions tests ──────────────────────────────────────
136
137    #[test]
138    fn levenshtein_finds_close_match() {
139        let all = names(&[("deploy-v1", "aaa")]);
140        let results = suggest_sessions("deply-v1", &all, 5);
141        assert_eq!(results.len(), 1);
142        assert_eq!(results[0].name, "deploy-v1");
143        assert_eq!(results[0].distance, 1);
144    }
145
146    #[test]
147    fn substring_match_finds_containing_names() {
148        let all = names(&[("deploy-v1", "aaa"), ("deploy-v2", "bbb")]);
149        let results = suggest_sessions("ploy", &all, 5);
150        assert_eq!(results.len(), 2);
151        // Both should be found via substring
152        let found_names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
153        assert!(found_names.contains(&"deploy-v1"));
154        assert!(found_names.contains(&"deploy-v2"));
155    }
156
157    #[test]
158    fn uuid_prefix_match() {
159        let all = names(&[("session-1", "a3f7b2c4-1234-5678-9abc-def012345678")]);
160        let results = suggest_sessions("a3f", &all, 5);
161        assert_eq!(results.len(), 1);
162        assert_eq!(results[0].name, "session-1");
163    }
164
165    #[test]
166    fn short_name_threshold_rejects_distant_matches() {
167        // query "a" (len 1, threshold 1): "ab" distance 1 ✓, "xyz" distance 3 ✗, "abc" distance 2 ✗
168        let all = names(&[("ab", "aaa"), ("xyz", "bbb"), ("abc", "ccc")]);
169        let results = suggest_sessions("a", &all, 5);
170        let found_names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
171        assert!(
172            found_names.contains(&"ab"),
173            "should match 'ab' (distance 1)"
174        );
175        assert!(
176            found_names.contains(&"abc"),
177            "should match 'abc' (substring)"
178        );
179        assert!(
180            !found_names.contains(&"xyz"),
181            "should NOT match 'xyz' (distance 3)"
182        );
183    }
184
185    #[test]
186    fn max_suggestions_limits_results() {
187        let all = names(&[
188            ("deploy-v1", "aaa"),
189            ("deploy-v2", "bbb"),
190            ("deploy-latest", "ccc"),
191            ("unrelated", "ddd"),
192        ]);
193        let results = suggest_sessions("deploy", &all, 3);
194        assert!(results.len() <= 3);
195        // "unrelated" should NOT be in the results (not a match)
196        let found_names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
197        assert!(!found_names.contains(&"unrelated"));
198    }
199
200    #[test]
201    fn sorted_by_distance_then_alphabetically() {
202        let all = names(&[
203            ("deploy-v2", "aaa"),
204            ("deploy-v1", "bbb"),
205            ("deploy-latest", "ccc"),
206        ]);
207        let results = suggest_sessions("deploy", &all, 5);
208        assert!(results.len() >= 2);
209        // All are substring matches (distance 0), so sorted alphabetically
210        if results.len() >= 2 {
211            for i in 0..results.len() - 1 {
212                if results[i].distance == results[i + 1].distance {
213                    assert!(
214                        results[i].name <= results[i + 1].name,
215                        "Expected '{}' <= '{}' at same distance",
216                        results[i].name,
217                        results[i + 1].name
218                    );
219                } else {
220                    assert!(
221                        results[i].distance <= results[i + 1].distance,
222                        "Expected distance {} <= {}",
223                        results[i].distance,
224                        results[i + 1].distance
225                    );
226                }
227            }
228        }
229    }
230
231    #[test]
232    fn no_matches_returns_empty() {
233        let all = names(&[("deploy-v1", "aaa")]);
234        let results = suggest_sessions("zzzzzzz", &all, 5);
235        assert!(results.is_empty());
236    }
237
238    #[test]
239    fn case_insensitive_levenshtein() {
240        let all = names(&[("Deploy-V1", "aaa")]);
241        let results = suggest_sessions("deploy-v1", &all, 5);
242        assert_eq!(results.len(), 1);
243        assert_eq!(results[0].name, "Deploy-V1");
244    }
245
246    #[test]
247    fn case_insensitive_substring() {
248        let all = names(&[("MyDeploy", "aaa")]);
249        let results = suggest_sessions("mydep", &all, 5);
250        assert_eq!(results.len(), 1);
251        assert_eq!(results[0].name, "MyDeploy");
252    }
253
254    #[test]
255    fn uuid_prefix_is_case_sensitive() {
256        let all = names(&[("session-1", "a3F7b2c4-xxxx")]);
257        // "a3f" should NOT match "a3F..." (case-sensitive UUID)
258        let results = suggest_sessions("a3f", &all, 5);
259        // It may match via substring of the name though — "a3f" is not in "session-1"
260        // and UUID prefix is case-sensitive, so no match expected
261        assert!(
262            results.is_empty(),
263            "UUID prefix should be case-sensitive: {results:?}"
264        );
265    }
266
267    #[test]
268    fn medium_name_threshold() {
269        // query "test" (len 4, threshold 2): "tset" distance 2 ✓, "abcd" distance 4 ✗
270        let all = names(&[("tset", "aaa"), ("abcd", "bbb")]);
271        let results = suggest_sessions("test", &all, 5);
272        let found_names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
273        assert!(
274            found_names.contains(&"tset"),
275            "should match 'tset' (distance 2)"
276        );
277        assert!(
278            !found_names.contains(&"abcd"),
279            "should NOT match 'abcd' (distance 4)"
280        );
281    }
282
283    #[test]
284    fn long_name_threshold() {
285        // query "deployment-v1" (len 13, threshold max(3, 13/3)=4):
286        // "deployment-v2" distance 1 ✓, "xxxxxxxxxxx" distance large ✗
287        let all = names(&[("deployment-v2", "aaa"), ("xxxxxxxxxxx", "bbb")]);
288        let results = suggest_sessions("deployment-v1", &all, 5);
289        let found_names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
290        assert!(
291            found_names.contains(&"deployment-v2"),
292            "should match 'deployment-v2'"
293        );
294        assert!(
295            !found_names.contains(&"xxxxxxxxxxx"),
296            "should NOT match 'xxxxxxxxxxx'"
297        );
298    }
299
300    #[test]
301    fn deduplicates_matches() {
302        // A name that matches via both Levenshtein AND substring should appear only once
303        let all = names(&[("deploy", "aaa")]);
304        let results = suggest_sessions("deploy", &all, 5);
305        assert_eq!(results.len(), 1, "should not have duplicates");
306    }
307
308    // ── format_suggestions tests ────────────────────────────────────
309
310    #[test]
311    fn format_zero_suggestions() {
312        let result = format_suggestions(&[]);
313        assert_eq!(
314            result,
315            "help: No similar names found. List all sessions with: rec list"
316        );
317    }
318
319    #[test]
320    fn format_one_suggestion() {
321        let suggestions = vec![Suggestion {
322            name: "deploy-v1".to_string(),
323            distance: 1,
324        }];
325        assert_eq!(
326            format_suggestions(&suggestions),
327            "help: Did you mean 'deploy-v1'?"
328        );
329    }
330
331    #[test]
332    fn format_two_suggestions() {
333        let suggestions = vec![
334            Suggestion {
335                name: "deploy-v1".to_string(),
336                distance: 1,
337            },
338            Suggestion {
339                name: "deploy-v2".to_string(),
340                distance: 2,
341            },
342        ];
343        assert_eq!(
344            format_suggestions(&suggestions),
345            "help: Did you mean 'deploy-v1' or 'deploy-v2'?"
346        );
347    }
348
349    #[test]
350    fn format_three_suggestions() {
351        let suggestions = vec![
352            Suggestion {
353                name: "deploy-v1".to_string(),
354                distance: 1,
355            },
356            Suggestion {
357                name: "deploy-v2".to_string(),
358                distance: 2,
359            },
360            Suggestion {
361                name: "deploy-latest".to_string(),
362                distance: 3,
363            },
364        ];
365        assert_eq!(
366            format_suggestions(&suggestions),
367            "help: Did you mean 'deploy-v1', 'deploy-v2', or 'deploy-latest'?"
368        );
369    }
370}