Skip to main content

grafeo_common/utils/
strings.rs

1//! String utilities for error messages and suggestions.
2//!
3//! This module provides functions for generating helpful suggestions
4//! in error messages, such as "did you mean X?" when a user typos a label.
5
6/// Computes the Levenshtein edit distance between two strings.
7///
8/// This is the minimum number of single-character edits (insertions,
9/// deletions, or substitutions) required to change one string into the other.
10fn levenshtein_distance(a: &str, b: &str) -> usize {
11    let a_chars: Vec<char> = a.chars().collect();
12    let b_chars: Vec<char> = b.chars().collect();
13    let m = a_chars.len();
14    let n = b_chars.len();
15
16    if m == 0 {
17        return n;
18    }
19    if n == 0 {
20        return m;
21    }
22
23    // Use two rows for space efficiency
24    let mut prev = vec![0; n + 1];
25    let mut curr = vec![0; n + 1];
26
27    // Initialize first row
28    for j in 0..=n {
29        prev[j] = j;
30    }
31
32    for i in 1..=m {
33        curr[0] = i;
34        for j in 1..=n {
35            let cost = usize::from(a_chars[i - 1] != b_chars[j - 1]);
36            curr[j] = (prev[j] + 1) // deletion
37                .min(curr[j - 1] + 1) // insertion
38                .min(prev[j - 1] + cost); // substitution
39        }
40        std::mem::swap(&mut prev, &mut curr);
41    }
42
43    prev[n]
44}
45
46/// Finds the most similar string from a list of candidates.
47///
48/// Returns `Some((best_match, distance))` if a close match is found,
49/// `None` if no candidates are sufficiently similar.
50///
51/// A match is considered "close enough" if the edit distance is at most
52/// 2 for short strings (<=5 chars) or at most 3 for longer strings.
53///
54/// # Examples
55///
56/// ```
57/// use grafeo_common::utils::strings::find_similar;
58///
59/// let candidates = ["Person", "Place", "Organization"];
60/// assert_eq!(find_similar("Peron", &candidates), Some("Person"));
61/// assert_eq!(find_similar("XYZ", &candidates), None);
62/// ```
63pub fn find_similar<'a, S: AsRef<str>>(query: &str, candidates: &'a [S]) -> Option<&'a str> {
64    if candidates.is_empty() {
65        return None;
66    }
67
68    // Case-insensitive comparison
69    let query_lower = query.to_lowercase();
70
71    let mut best_match: Option<&str> = None;
72    let mut best_distance = usize::MAX;
73
74    for candidate in candidates {
75        let candidate_str = candidate.as_ref();
76        let candidate_lower = candidate_str.to_lowercase();
77
78        // Check for case-insensitive exact match first
79        if query_lower == candidate_lower {
80            return Some(candidate_str);
81        }
82
83        let distance = levenshtein_distance(&query_lower, &candidate_lower);
84
85        if distance < best_distance {
86            best_distance = distance;
87            best_match = Some(candidate_str);
88        }
89    }
90
91    // Determine maximum acceptable distance based on query length
92    let max_distance = if query.len() <= 3 {
93        1 // Very short strings: only 1 edit allowed
94    } else if query.len() <= 5 {
95        2 // Short strings: 2 edits allowed
96    } else {
97        3 // Longer strings: 3 edits allowed
98    };
99
100    if best_distance <= max_distance {
101        best_match
102    } else {
103        None
104    }
105}
106
107/// Formats a suggestion hint for error messages.
108///
109/// # Examples
110///
111/// ```
112/// use grafeo_common::utils::strings::format_suggestion;
113///
114/// assert_eq!(format_suggestion("Person"), "Did you mean 'Person'?");
115/// ```
116pub fn format_suggestion(suggestion: &str) -> String {
117    format!("Did you mean '{suggestion}'?")
118}
119
120/// Formats a list of suggestions as a hint.
121///
122/// # Examples
123///
124/// ```
125/// use grafeo_common::utils::strings::format_suggestions;
126///
127/// assert_eq!(
128///     format_suggestions(&["Person", "Place"]),
129///     "Did you mean one of: 'Person', 'Place'?"
130/// );
131/// ```
132pub fn format_suggestions(suggestions: &[&str]) -> String {
133    match suggestions.len() {
134        0 => String::new(),
135        1 => format_suggestion(suggestions[0]),
136        _ => {
137            let quoted: Vec<String> = suggestions.iter().map(|s| format!("'{s}'")).collect();
138            format!("Did you mean one of: {}?", quoted.join(", "))
139        }
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_levenshtein_distance() {
149        assert_eq!(levenshtein_distance("", ""), 0);
150        assert_eq!(levenshtein_distance("a", ""), 1);
151        assert_eq!(levenshtein_distance("", "a"), 1);
152        assert_eq!(levenshtein_distance("abc", "abc"), 0);
153        assert_eq!(levenshtein_distance("abc", "abd"), 1);
154        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
155        assert_eq!(levenshtein_distance("Person", "Peron"), 1);
156        assert_eq!(levenshtein_distance("Person", "person"), 1); // case difference
157    }
158
159    #[test]
160    fn test_find_similar() {
161        let labels = ["Person", "Place", "Organization", "Event"];
162
163        // Typo: missing 's'
164        assert_eq!(find_similar("Peron", &labels), Some("Person"));
165
166        // Case insensitive
167        assert_eq!(find_similar("person", &labels), Some("Person"));
168        assert_eq!(find_similar("PERSON", &labels), Some("Person"));
169
170        // Small typo
171        assert_eq!(find_similar("Plase", &labels), Some("Place"));
172
173        // Too different - no suggestion
174        assert_eq!(find_similar("XYZ", &labels), None);
175        assert_eq!(find_similar("FooBar", &labels), None);
176
177        // Empty candidates
178        let empty: Vec<&str> = vec![];
179        assert_eq!(find_similar("Person", &empty), None);
180    }
181
182    #[test]
183    fn test_format_suggestion() {
184        assert_eq!(format_suggestion("Person"), "Did you mean 'Person'?");
185    }
186
187    #[test]
188    fn test_format_suggestions() {
189        assert_eq!(format_suggestions(&[]), "");
190        assert_eq!(format_suggestions(&["Person"]), "Did you mean 'Person'?");
191        assert_eq!(
192            format_suggestions(&["Person", "Place"]),
193            "Did you mean one of: 'Person', 'Place'?"
194        );
195    }
196}