Skip to main content

bashrs_oracle/
classifier.rs

1//! Keyword-based error classifier (fallback when ML model not trained).
2
3use crate::ErrorCategory;
4
5/// Keyword-based classifier for shell errors.
6///
7/// Maps error message keywords to the 24 error categories.
8/// Used as fallback when ML model is not trained.
9pub struct ErrorClassifier {
10    /// Keywords mapped to categories.
11    keyword_map: Vec<(Vec<&'static str>, ErrorCategory)>,
12}
13
14impl Default for ErrorClassifier {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl ErrorClassifier {
21    /// Create a new keyword-based classifier with 24 category mappings.
22    #[must_use]
23    pub fn new() -> Self {
24        Self {
25            keyword_map: vec![
26                // Syntax errors (0-3)
27                (
28                    vec![
29                        "unexpected EOF",
30                        "unmatched",
31                        "unterminated",
32                        "missing '\"'",
33                        "missing \"'\"",
34                    ],
35                    ErrorCategory::SyntaxQuoteMismatch,
36                ),
37                (
38                    vec![
39                        "unexpected ')'",
40                        "unexpected ']'",
41                        "unexpected '}'",
42                        "missing ')'",
43                    ],
44                    ErrorCategory::SyntaxBracketMismatch,
45                ),
46                (
47                    vec![
48                        "unexpected token",
49                        "syntax error near",
50                        "parse error",
51                        "unexpected end",
52                    ],
53                    ErrorCategory::SyntaxUnexpectedToken,
54                ),
55                (
56                    vec!["operand expected", "missing operand", "expression expected"],
57                    ErrorCategory::SyntaxMissingOperand,
58                ),
59                // Command errors (10-13)
60                (
61                    vec!["command not found", "not found:", ": not found"],
62                    ErrorCategory::CommandNotFound,
63                ),
64                (
65                    vec![
66                        "Permission denied",
67                        "permission denied",
68                        "cannot execute",
69                        "not executable",
70                    ],
71                    ErrorCategory::CommandPermissionDenied,
72                ),
73                (
74                    vec!["invalid option", "unrecognized option", "illegal option"],
75                    ErrorCategory::CommandInvalidOption,
76                ),
77                (
78                    vec![
79                        "requires an argument",
80                        "missing argument",
81                        "option requires",
82                    ],
83                    ErrorCategory::CommandMissingArgument,
84                ),
85                // File errors (20-24)
86                (
87                    vec![
88                        "No such file",
89                        "no such file",
90                        "not found",
91                        "does not exist",
92                    ],
93                    ErrorCategory::FileNotFound,
94                ),
95                (
96                    vec!["Permission denied", "cannot access", "EACCES"],
97                    ErrorCategory::FilePermissionDenied,
98                ),
99                (
100                    vec!["Is a directory", "is a directory"],
101                    ErrorCategory::FileIsDirectory,
102                ),
103                (
104                    vec!["Not a directory", "not a directory"],
105                    ErrorCategory::FileNotDirectory,
106                ),
107                (
108                    vec!["Too many open files", "EMFILE", "ENFILE"],
109                    ErrorCategory::FileTooManyOpen,
110                ),
111                // Variable errors (30-32)
112                (
113                    vec![
114                        "unbound variable",
115                        "parameter not set",
116                        "undefined variable",
117                    ],
118                    ErrorCategory::VariableUnbound,
119                ),
120                (
121                    vec!["readonly variable", "read-only variable", "cannot assign"],
122                    ErrorCategory::VariableReadonly,
123                ),
124                (
125                    vec!["bad substitution", "bad parameter", "invalid substitution"],
126                    ErrorCategory::VariableBadSubstitution,
127                ),
128                // Process errors (40-42)
129                (
130                    vec!["Killed", "killed", "signal", "SIGKILL", "SIGTERM"],
131                    ErrorCategory::ProcessSignaled,
132                ),
133                (
134                    vec!["exit status", "exited with", "returned"],
135                    ErrorCategory::ProcessExitNonZero,
136                ),
137                (
138                    vec!["timed out", "timeout", "exceeded time"],
139                    ErrorCategory::ProcessTimeout,
140                ),
141                // Pipe/redirect errors (50-52)
142                (
143                    vec!["Broken pipe", "broken pipe", "SIGPIPE", "EPIPE"],
144                    ErrorCategory::PipeBroken,
145                ),
146                (
147                    vec![
148                        "cannot redirect",
149                        "redirect failed",
150                        "No space left",
151                        "ambiguous redirect",
152                    ],
153                    ErrorCategory::RedirectFailed,
154                ),
155                (
156                    vec![
157                        "here-document",
158                        "heredoc",
159                        "here document",
160                        "delimited by end-of-file",
161                    ],
162                    ErrorCategory::HereDocUnterminated,
163                ),
164            ],
165        }
166    }
167
168    /// Classify an error message by keywords.
169    #[must_use]
170    pub fn classify_by_keywords(&self, message: &str) -> ErrorCategory {
171        let message_lower = message.to_lowercase();
172
173        for (keywords, category) in &self.keyword_map {
174            for keyword in keywords {
175                if message_lower.contains(&keyword.to_lowercase()) {
176                    return *category;
177                }
178            }
179        }
180
181        ErrorCategory::Unknown
182    }
183
184    /// Calculate confidence based on keyword matches.
185    #[must_use]
186    pub fn confidence(&self, message: &str, category: ErrorCategory) -> f32 {
187        let message_lower = message.to_lowercase();
188        let mut matches = 0;
189        let mut total_keywords = 0;
190
191        for (keywords, cat) in &self.keyword_map {
192            if *cat == category {
193                total_keywords = keywords.len();
194                for keyword in keywords {
195                    if message_lower.contains(&keyword.to_lowercase()) {
196                        matches += 1;
197                    }
198                }
199                break;
200            }
201        }
202
203        if total_keywords == 0 {
204            return 0.5; // Default confidence for Unknown
205        }
206
207        // Base confidence + bonus for multiple matches
208        let base = 0.6;
209        let match_bonus = (matches as f32 / total_keywords as f32) * 0.35;
210        (base + match_bonus).min(0.95)
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_classify_syntax_quote_mismatch() {
220        let classifier = ErrorClassifier::new();
221        let msg = "unexpected EOF while looking for matching '\"'";
222        assert_eq!(
223            classifier.classify_by_keywords(msg),
224            ErrorCategory::SyntaxQuoteMismatch
225        );
226    }
227
228    #[test]
229    fn test_classify_syntax_bracket_mismatch() {
230        let classifier = ErrorClassifier::new();
231        let msg = "syntax error near unexpected ')'";
232        assert_eq!(
233            classifier.classify_by_keywords(msg),
234            ErrorCategory::SyntaxBracketMismatch
235        );
236    }
237
238    #[test]
239    fn test_classify_syntax_unexpected_token() {
240        let classifier = ErrorClassifier::new();
241        let msg = "syntax error near unexpected token 'done'";
242        assert_eq!(
243            classifier.classify_by_keywords(msg),
244            ErrorCategory::SyntaxUnexpectedToken
245        );
246    }
247
248    #[test]
249    fn test_classify_command_not_found() {
250        let classifier = ErrorClassifier::new();
251        let msg = "bash: foobar: command not found";
252        assert_eq!(
253            classifier.classify_by_keywords(msg),
254            ErrorCategory::CommandNotFound
255        );
256    }
257
258    #[test]
259    fn test_classify_command_permission_denied() {
260        let classifier = ErrorClassifier::new();
261        let msg = "bash: ./script.sh: Permission denied";
262        assert_eq!(
263            classifier.classify_by_keywords(msg),
264            ErrorCategory::CommandPermissionDenied
265        );
266    }
267
268    #[test]
269    fn test_classify_file_not_found() {
270        let classifier = ErrorClassifier::new();
271        let msg = "cat: /nonexistent: No such file or directory";
272        assert_eq!(
273            classifier.classify_by_keywords(msg),
274            ErrorCategory::FileNotFound
275        );
276    }
277
278    #[test]
279    fn test_classify_file_is_directory() {
280        let classifier = ErrorClassifier::new();
281        let msg = "cat: /tmp: Is a directory";
282        assert_eq!(
283            classifier.classify_by_keywords(msg),
284            ErrorCategory::FileIsDirectory
285        );
286    }
287
288    #[test]
289    fn test_classify_variable_unbound() {
290        let classifier = ErrorClassifier::new();
291        let msg = "bash: VAR: unbound variable";
292        assert_eq!(
293            classifier.classify_by_keywords(msg),
294            ErrorCategory::VariableUnbound
295        );
296    }
297
298    #[test]
299    fn test_classify_variable_readonly() {
300        let classifier = ErrorClassifier::new();
301        let msg = "bash: PATH: readonly variable";
302        assert_eq!(
303            classifier.classify_by_keywords(msg),
304            ErrorCategory::VariableReadonly
305        );
306    }
307
308    #[test]
309    fn test_classify_variable_bad_substitution() {
310        let classifier = ErrorClassifier::new();
311        let msg = "bash: ${foo: bad substitution";
312        assert_eq!(
313            classifier.classify_by_keywords(msg),
314            ErrorCategory::VariableBadSubstitution
315        );
316    }
317
318    #[test]
319    fn test_classify_pipe_broken() {
320        let classifier = ErrorClassifier::new();
321        let msg = "Broken pipe";
322        assert_eq!(
323            classifier.classify_by_keywords(msg),
324            ErrorCategory::PipeBroken
325        );
326    }
327
328    #[test]
329    fn test_classify_redirect_failed() {
330        let classifier = ErrorClassifier::new();
331        let msg = "bash: /dev/full: No space left on device";
332        assert_eq!(
333            classifier.classify_by_keywords(msg),
334            ErrorCategory::RedirectFailed
335        );
336    }
337
338    #[test]
339    fn test_classify_heredoc_unterminated() {
340        let classifier = ErrorClassifier::new();
341        let msg = "warning: here-document delimited by end-of-file";
342        assert_eq!(
343            classifier.classify_by_keywords(msg),
344            ErrorCategory::HereDocUnterminated
345        );
346    }
347
348    #[test]
349    fn test_classify_unknown() {
350        let classifier = ErrorClassifier::new();
351        let msg = "some random error message";
352        assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::Unknown);
353    }
354
355    #[test]
356    fn test_confidence_high_match() {
357        let classifier = ErrorClassifier::new();
358        let msg = "bash: foo: command not found";
359        let conf = classifier.confidence(msg, ErrorCategory::CommandNotFound);
360        assert!(conf > 0.6, "Expected high confidence, got {conf}");
361    }
362
363    #[test]
364    fn test_confidence_unknown() {
365        let classifier = ErrorClassifier::new();
366        let msg = "some message";
367        let conf = classifier.confidence(msg, ErrorCategory::Unknown);
368        assert!(
369            (conf - 0.5).abs() < f32::EPSILON,
370            "Expected default confidence 0.5, got {conf}"
371        );
372    }
373}