aprender_shell/
quality.rs

1//! Suggestion quality validation and enhancement for aprender-shell
2//!
3//! Follows Toyota Way principle *Kaizen* (Continuous improvement):
4//! Small, incremental improvements in suggestion quality.
5
6/// Known typo corrections (learned from common mistakes)
7const CORRECTIONS: &[(&str, &str)] = &[
8    ("git stauts", "git status"),
9    ("git comit", "git commit"),
10    ("git psuh", "git push"),
11    ("git pul", "git pull"),
12    ("dokcer", "docker"),
13    ("kubeclt", "kubectl"),
14    ("carg ", "cargo "),
15    ("pytohn", "python"),
16    ("ndoe", "node"),
17    ("yran", "yarn"),
18];
19
20/// Known command tools for quality scoring
21const KNOWN_TOOLS: &[&str] = &[
22    "git",
23    "cargo",
24    "docker",
25    "kubectl",
26    "npm",
27    "yarn",
28    "make",
29    "aws",
30    "gcloud",
31    "az",
32    "terraform",
33    "ansible",
34    "helm",
35    "python",
36    "node",
37    "go",
38    "rustc",
39    "gcc",
40    "clang",
41];
42
43/// Validate and clean a suggestion before returning to user.
44///
45/// Returns `None` if the suggestion is invalid or malformed.
46///
47/// # Arguments
48/// * `suggestion` - The raw suggestion from the model
49/// * `prefix` - The user's input prefix
50///
51/// # Example
52/// ```
53/// use aprender_shell::quality::validate_suggestion;
54///
55/// assert!(validate_suggestion("git status", "git").is_some());
56/// assert!(validate_suggestion("git", "git").is_none()); // Not longer than prefix
57/// assert!(validate_suggestion("git status \\", "git").is_none()); // Trailing backslash
58/// ```
59pub fn validate_suggestion(suggestion: &str, prefix: &str) -> Option<String> {
60    // Must start with the prefix (case-sensitive for commands)
61    if !suggestion.starts_with(prefix) {
62        return None;
63    }
64
65    // Normalize whitespace (collapse multiple spaces)
66    let normalized: String = suggestion.split_whitespace().collect::<Vec<_>>().join(" ");
67
68    // Must be longer than prefix (actually suggesting something)
69    if normalized.len() <= prefix.trim().len() {
70        return None;
71    }
72
73    // Reject suggestions with obvious issues
74    if normalized.ends_with('\\') || normalized.ends_with('|') || normalized.ends_with('&') {
75        return None;
76    }
77
78    // Reject double spaces that weren't normalized away
79    if normalized.contains("  ") {
80        return None;
81    }
82
83    // Reject if it contains control characters
84    if normalized.chars().any(|c| c.is_control() && c != '\t') {
85        return None;
86    }
87
88    Some(normalized)
89}
90
91/// Apply common typo corrections to a suggestion.
92///
93/// This helps when the model has learned typos from actual history.
94///
95/// # Example
96/// ```
97/// use aprender_shell::quality::apply_typo_corrections;
98///
99/// assert_eq!(apply_typo_corrections("git stauts"), "git status");
100/// assert_eq!(apply_typo_corrections("dokcer ps"), "docker ps");
101/// ```
102pub fn apply_typo_corrections(suggestion: &str) -> String {
103    let mut corrected = suggestion.to_string();
104    for (typo, fix) in CORRECTIONS {
105        if corrected.contains(typo) {
106            corrected = corrected.replace(typo, fix);
107        }
108    }
109    corrected
110}
111
112/// Score suggestion quality (0.0 to 1.0).
113///
114/// Higher scores indicate better quality suggestions.
115///
116/// # Scoring Factors
117/// - Length: Very short suggestions are penalized
118/// - Characters: Unusual characters reduce score
119/// - Completeness: Trailing spaces/dashes are penalized
120/// - Tool recognition: Known tools boost score
121///
122/// # Example
123/// ```
124/// use aprender_shell::quality::suggestion_quality_score;
125///
126/// assert!(suggestion_quality_score("git status") > 0.8);
127/// assert!(suggestion_quality_score("git") < 0.6);
128/// ```
129pub fn suggestion_quality_score(suggestion: &str) -> f32 {
130    // Start with base score based on known tool
131    let first_word = suggestion.split_whitespace().next().unwrap_or("");
132    let mut score = if KNOWN_TOOLS.contains(&first_word) {
133        1.0_f32
134    } else {
135        0.8_f32 // Unknown tools start lower
136    };
137
138    // Penalize very short suggestions
139    if suggestion.len() < 5 {
140        score *= 0.5;
141    }
142
143    // Penalize suggestions with unusual characters
144    let unusual_char_count = suggestion
145        .chars()
146        .filter(|c| !c.is_alphanumeric() && !" -_./=:@".contains(*c))
147        .count();
148    score *= 1.0 - (unusual_char_count as f32 * 0.1).min(0.5);
149
150    // Penalize incomplete-looking commands
151    if suggestion.ends_with(' ') || suggestion.ends_with('-') {
152        score *= 0.7;
153    }
154
155    score.clamp(0.0, 1.0)
156}
157
158/// Filter and enhance suggestions with quality checks.
159///
160/// This function:
161/// 1. Validates each suggestion
162/// 2. Applies typo corrections
163/// 3. Filters by quality score threshold
164///
165/// # Arguments
166/// * `suggestions` - Raw suggestions from the model
167/// * `prefix` - The user's input prefix
168/// * `min_quality` - Minimum quality score (0.0 to 1.0)
169pub fn filter_quality_suggestions(
170    suggestions: Vec<(String, f32)>,
171    prefix: &str,
172    min_quality: f32,
173) -> Vec<(String, f32)> {
174    suggestions
175        .into_iter()
176        .filter_map(|(suggestion, model_score)| {
177            // Validate the suggestion
178            let validated = validate_suggestion(&suggestion, prefix)?;
179
180            // Apply typo corrections
181            let corrected = apply_typo_corrections(&validated);
182
183            // Check quality score
184            let quality = suggestion_quality_score(&corrected);
185            if quality < min_quality {
186                return None;
187            }
188
189            // Combine model score with quality score
190            let combined_score = model_score * quality;
191
192            Some((corrected, combined_score))
193        })
194        .collect()
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    // =========================================================================
202    // Suggestion Validation Tests
203    // =========================================================================
204
205    #[test]
206    fn test_valid_suggestion_accepted() {
207        assert!(validate_suggestion("git status", "git").is_some());
208        assert!(validate_suggestion("cargo build --release", "cargo").is_some());
209    }
210
211    #[test]
212    fn test_suggestion_not_longer_than_prefix() {
213        assert!(validate_suggestion("git", "git").is_none());
214        assert!(validate_suggestion("git", "git ").is_none());
215    }
216
217    #[test]
218    fn test_suggestion_must_start_with_prefix() {
219        assert!(validate_suggestion("cargo build", "git").is_none());
220    }
221
222    #[test]
223    fn test_trailing_backslash_rejected() {
224        assert!(validate_suggestion("git status \\", "git").is_none());
225    }
226
227    #[test]
228    fn test_trailing_pipe_rejected() {
229        assert!(validate_suggestion("git status |", "git").is_none());
230    }
231
232    #[test]
233    fn test_trailing_ampersand_rejected() {
234        assert!(validate_suggestion("git status &", "git").is_none());
235    }
236
237    #[test]
238    fn test_whitespace_normalized() {
239        let result = validate_suggestion("git  status   -v", "git").unwrap();
240        assert_eq!(result, "git status -v");
241    }
242
243    #[test]
244    fn test_control_chars_rejected() {
245        assert!(validate_suggestion("git\x07status", "git").is_none());
246    }
247
248    // =========================================================================
249    // Typo Correction Tests
250    // =========================================================================
251
252    #[test]
253    fn test_git_typos_corrected() {
254        assert_eq!(apply_typo_corrections("git stauts"), "git status");
255        assert_eq!(
256            apply_typo_corrections("git comit -m 'test'"),
257            "git commit -m 'test'"
258        );
259        assert_eq!(apply_typo_corrections("git psuh origin"), "git push origin");
260    }
261
262    #[test]
263    fn test_tool_typos_corrected() {
264        assert_eq!(apply_typo_corrections("dokcer ps"), "docker ps");
265        assert_eq!(
266            apply_typo_corrections("kubeclt get pods"),
267            "kubectl get pods"
268        );
269    }
270
271    #[test]
272    fn test_no_false_corrections() {
273        // These should not be changed
274        assert_eq!(apply_typo_corrections("git status"), "git status");
275        assert_eq!(apply_typo_corrections("docker run"), "docker run");
276    }
277
278    // =========================================================================
279    // Quality Scoring Tests
280    // =========================================================================
281
282    #[test]
283    fn test_known_tool_higher_score() {
284        let git_score = suggestion_quality_score("git status");
285        let unknown_score = suggestion_quality_score("xyz status");
286        assert!(git_score > unknown_score);
287    }
288
289    #[test]
290    fn test_short_suggestion_lower_score() {
291        let short = suggestion_quality_score("git");
292        let long = suggestion_quality_score("git status --verbose");
293        assert!(short < long);
294    }
295
296    #[test]
297    fn test_unusual_chars_lower_score() {
298        let normal = suggestion_quality_score("git status");
299        let unusual = suggestion_quality_score("git !@#$%");
300        assert!(normal > unusual);
301    }
302
303    #[test]
304    fn test_incomplete_lower_score() {
305        let complete = suggestion_quality_score("git status");
306        let incomplete = suggestion_quality_score("git status ");
307        assert!(complete > incomplete);
308    }
309
310    #[test]
311    fn test_score_bounded_zero_to_one() {
312        assert!(suggestion_quality_score("git status") <= 1.0);
313        assert!(suggestion_quality_score("git status") >= 0.0);
314        assert!(suggestion_quality_score("!@#$%^&*") <= 1.0);
315        assert!(suggestion_quality_score("!@#$%^&*") >= 0.0);
316    }
317
318    // =========================================================================
319    // Filter Quality Tests
320    // =========================================================================
321
322    #[test]
323    fn test_filter_quality_suggestions() {
324        let suggestions = vec![
325            ("git status".to_string(), 0.9),
326            ("git stauts".to_string(), 0.8), // typo
327            ("git".to_string(), 0.7),        // too short
328            ("git commit".to_string(), 0.6),
329        ];
330
331        let filtered = filter_quality_suggestions(suggestions, "git", 0.3);
332
333        // Should include status and commit (typo corrected)
334        assert!(filtered.iter().any(|(s, _)| s == "git status"));
335        assert!(filtered.iter().any(|(s, _)| s == "git commit"));
336
337        // "git" alone should be filtered (not longer than prefix)
338        assert!(!filtered.iter().any(|(s, _)| s == "git"));
339    }
340}