Skip to main content

sc/
validate.rs

1//! Input validation and intent detection for agent ergonomics.
2//!
3//! Provides O(1) validation sets and synonym maps so agents can use
4//! natural language for statuses, types, and priorities. Three-tier
5//! resolution: exact match → synonym lookup → error with suggestion.
6
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10// ── Valid value sets (O(1) lookups) ──────────────────────────
11
12pub static VALID_STATUSES: LazyLock<HashSet<&str>> = LazyLock::new(|| {
13    ["backlog", "open", "in_progress", "blocked", "closed", "deferred"]
14        .into_iter()
15        .collect()
16});
17
18pub static VALID_TYPES: LazyLock<HashSet<&str>> = LazyLock::new(|| {
19    ["task", "bug", "feature", "epic", "chore"]
20        .into_iter()
21        .collect()
22});
23
24// ── Synonym maps (agent typo recovery) ───────────────────────
25
26pub static STATUS_SYNONYMS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
27    [
28        ("done", "closed"),
29        ("complete", "closed"),
30        ("completed", "closed"),
31        ("finished", "closed"),
32        ("resolved", "closed"),
33        ("wontfix", "closed"),
34        ("wip", "in_progress"),
35        ("working", "in_progress"),
36        ("active", "in_progress"),
37        ("started", "in_progress"),
38        ("new", "open"),
39        ("todo", "open"),
40        ("pending", "open"),
41        ("waiting", "blocked"),
42        ("hold", "deferred"),
43        ("later", "deferred"),
44        ("postponed", "deferred"),
45    ]
46    .into_iter()
47    .collect()
48});
49
50pub static TYPE_SYNONYMS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
51    [
52        ("story", "feature"),
53        ("enhancement", "feature"),
54        ("improvement", "feature"),
55        ("issue", "bug"),
56        ("defect", "bug"),
57        ("problem", "bug"),
58        ("ticket", "task"),
59        ("item", "task"),
60        ("work", "task"),
61        ("cleanup", "chore"),
62        ("refactor", "chore"),
63        ("maintenance", "chore"),
64        ("parent", "epic"),
65        ("initiative", "epic"),
66    ]
67    .into_iter()
68    .collect()
69});
70
71/// Priority synonyms map to string digits.
72/// SaveContext: 0=lowest, 4=critical.
73pub static PRIORITY_SYNONYMS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
74    [
75        ("critical", "4"),
76        ("crit", "4"),
77        ("urgent", "4"),
78        ("highest", "4"),
79        ("high", "3"),
80        ("important", "3"),
81        ("medium", "2"),
82        ("normal", "2"),
83        ("default", "2"),
84        ("low", "1"),
85        ("minor", "1"),
86        ("backlog", "0"),
87        ("lowest", "0"),
88        ("trivial", "0"),
89    ]
90    .into_iter()
91    .collect()
92});
93
94/// Normalize a status string via exact match or synonym lookup.
95///
96/// Returns the canonical status, or an error with the original input
97/// and an optional suggestion.
98pub fn normalize_status(input: &str) -> Result<String, (String, Option<String>)> {
99    let lower = input.to_lowercase();
100
101    // Tier 1: exact match
102    if VALID_STATUSES.contains(lower.as_str()) {
103        return Ok(lower);
104    }
105
106    // Tier 2: synonym lookup
107    if let Some(&canonical) = STATUS_SYNONYMS.get(lower.as_str()) {
108        return Ok(canonical.to_string());
109    }
110
111    // Tier 3: find closest suggestion
112    let suggestion = find_closest_match(&lower, &VALID_STATUSES, &STATUS_SYNONYMS);
113    Err((input.to_string(), suggestion))
114}
115
116/// Normalize an issue type string via exact match or synonym lookup.
117pub fn normalize_type(input: &str) -> Result<String, (String, Option<String>)> {
118    let lower = input.to_lowercase();
119
120    if VALID_TYPES.contains(lower.as_str()) {
121        return Ok(lower);
122    }
123
124    if let Some(&canonical) = TYPE_SYNONYMS.get(lower.as_str()) {
125        return Ok(canonical.to_string());
126    }
127
128    let suggestion = find_closest_match(&lower, &VALID_TYPES, &TYPE_SYNONYMS);
129    Err((input.to_string(), suggestion))
130}
131
132/// Normalize a priority value from string, integer, synonym, or P-notation.
133///
134/// Accepts: "0"-"4", "P0"-"P4", "high", "critical", etc.
135pub fn normalize_priority(input: &str) -> Result<i32, (String, Option<String>)> {
136    let lower = input.to_lowercase();
137
138    // Tier 1: direct integer
139    if let Ok(n) = lower.parse::<i32>() {
140        if (0..=4).contains(&n) {
141            return Ok(n);
142        }
143        return Err((input.to_string(), Some("Priority must be 0-4 (0=lowest, 4=critical)".to_string())));
144    }
145
146    // Tier 2: P-notation (P0, P1, ..., P4)
147    if let Some(stripped) = lower.strip_prefix('p') {
148        if let Ok(n) = stripped.parse::<i32>() {
149            if (0..=4).contains(&n) {
150                return Ok(n);
151            }
152        }
153    }
154
155    // Tier 3: synonym lookup
156    if let Some(&digit) = PRIORITY_SYNONYMS.get(lower.as_str()) {
157        return Ok(digit.parse().unwrap());
158    }
159
160    Err((
161        input.to_string(),
162        Some("Use 0-4, P0-P4, or: critical, high, medium, low, backlog".to_string()),
163    ))
164}
165
166/// Find the closest matching value across valid set and synonyms.
167fn find_closest_match(
168    input: &str,
169    valid: &HashSet<&str>,
170    synonyms: &HashMap<&str, &str>,
171) -> Option<String> {
172    let mut best: Option<(&str, usize)> = None;
173
174    for &v in valid.iter().chain(synonyms.keys()) {
175        let dist = levenshtein_distance(input, v);
176        if dist <= 3 {
177            if best.is_none() || dist < best.unwrap().1 {
178                // For synonyms, show what it maps to
179                if let Some(&canonical) = synonyms.get(v) {
180                    best = Some((canonical, dist));
181                } else {
182                    best = Some((v, dist));
183                }
184            }
185        }
186    }
187
188    best.map(|(v, _)| v.to_string())
189}
190
191// ── Levenshtein distance ─────────────────────────────────────
192
193/// Compute the Levenshtein edit distance between two strings.
194pub fn levenshtein_distance(a: &str, b: &str) -> usize {
195    let a: Vec<char> = a.chars().collect();
196    let b: Vec<char> = b.chars().collect();
197    let a_len = a.len();
198    let b_len = b.len();
199
200    if a_len == 0 {
201        return b_len;
202    }
203    if b_len == 0 {
204        return a_len;
205    }
206
207    // Use single-row optimization (O(min(m,n)) space)
208    let mut prev: Vec<usize> = (0..=b_len).collect();
209    let mut curr = vec![0; b_len + 1];
210
211    for i in 1..=a_len {
212        curr[0] = i;
213        for j in 1..=b_len {
214            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
215            curr[j] = (prev[j] + 1) // deletion
216                .min(curr[j - 1] + 1) // insertion
217                .min(prev[j - 1] + cost); // substitution
218        }
219        std::mem::swap(&mut prev, &mut curr);
220    }
221
222    prev[b_len]
223}
224
225/// Find existing IDs similar to the searched ID.
226///
227/// Returns up to `max` suggestions with edit distance ≤ 3,
228/// sorted by distance then alphabetically.
229pub fn find_similar_ids(searched: &str, existing: &[String], max: usize) -> Vec<String> {
230    let mut candidates: Vec<(usize, &str)> = existing
231        .iter()
232        .map(|id| (levenshtein_distance(searched, id), id.as_str()))
233        .filter(|(dist, _)| *dist <= 3)
234        .collect();
235
236    candidates.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(b.1)));
237
238    candidates
239        .into_iter()
240        .take(max)
241        .map(|(_, id)| id.to_string())
242        .collect()
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_normalize_status() {
251        assert_eq!(normalize_status("open"), Ok("open".to_string()));
252        assert_eq!(normalize_status("done"), Ok("closed".to_string()));
253        assert_eq!(normalize_status("wip"), Ok("in_progress".to_string()));
254        assert_eq!(normalize_status("OPEN"), Ok("open".to_string()));
255        assert!(normalize_status("nonsense").is_err());
256    }
257
258    #[test]
259    fn test_normalize_type() {
260        assert_eq!(normalize_type("bug"), Ok("bug".to_string()));
261        assert_eq!(normalize_type("defect"), Ok("bug".to_string()));
262        assert_eq!(normalize_type("story"), Ok("feature".to_string()));
263        assert!(normalize_type("nonsense").is_err());
264    }
265
266    #[test]
267    fn test_normalize_priority() {
268        assert_eq!(normalize_priority("2"), Ok(2));
269        assert_eq!(normalize_priority("P3"), Ok(3));
270        assert_eq!(normalize_priority("high"), Ok(3));
271        assert_eq!(normalize_priority("critical"), Ok(4));
272        assert!(normalize_priority("nonsense").is_err());
273    }
274
275    #[test]
276    fn test_levenshtein() {
277        assert_eq!(levenshtein_distance("", ""), 0);
278        assert_eq!(levenshtein_distance("abc", "abc"), 0);
279        assert_eq!(levenshtein_distance("abc", "abd"), 1);
280        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
281    }
282
283    #[test]
284    fn test_find_similar_ids() {
285        let ids = vec!["SC-a1b2".to_string(), "SC-a1b3".to_string(), "SC-xxxx".to_string()];
286        let result = find_similar_ids("SC-a1b1", &ids, 3);
287        assert!(!result.is_empty());
288        assert!(result.contains(&"SC-a1b2".to_string()));
289    }
290}