1#[derive(Debug, Clone)]
10pub struct ErrorFeatures {
11 pub features: Vec<f32>,
13}
14
15impl ErrorFeatures {
16 pub const SIZE: usize = 64;
18
19 #[must_use]
21 pub fn extract(exit_code: i32, stderr: &str, command: Option<&str>) -> Self {
22 let mut features = Vec::with_capacity(Self::SIZE);
23
24 features.push(exit_code as f32 / 255.0); features.push(if exit_code == 1 { 1.0 } else { 0.0 }); features.push(if exit_code == 2 { 1.0 } else { 0.0 }); features.push(if exit_code == 126 { 1.0 } else { 0.0 }); features.push(if exit_code == 127 { 1.0 } else { 0.0 }); features.push(if exit_code == 128 { 1.0 } else { 0.0 }); features.push(if exit_code == 130 { 1.0 } else { 0.0 }); features.push(if exit_code == 137 { 1.0 } else { 0.0 }); features.push(if exit_code == 141 { 1.0 } else { 0.0 }); features.push(if exit_code == 143 { 1.0 } else { 0.0 }); let stderr_len = stderr.len();
40 features.push((stderr_len as f32 / 1000.0).min(1.0)); features.push((stderr.lines().count() as f32 / 10.0).min(1.0)); let stderr_lower = stderr.to_lowercase();
45
46 features.push(if stderr_lower.contains("not found") {
48 1.0
49 } else {
50 0.0
51 });
52 features.push(if stderr_lower.contains("no such file") {
53 1.0
54 } else {
55 0.0
56 });
57 features.push(if stderr_lower.contains("permission denied") {
58 1.0
59 } else {
60 0.0
61 });
62 features.push(if stderr_lower.contains("is a directory") {
63 1.0
64 } else {
65 0.0
66 });
67 features.push(if stderr_lower.contains("not a directory") {
68 1.0
69 } else {
70 0.0
71 });
72 features.push(if stderr_lower.contains("too many open") {
73 1.0
74 } else {
75 0.0
76 });
77
78 features.push(if stderr_lower.contains("syntax error") {
80 1.0
81 } else {
82 0.0
83 });
84 features.push(if stderr_lower.contains("unexpected") {
85 1.0
86 } else {
87 0.0
88 });
89 features.push(if stderr_lower.contains("unmatched") {
90 1.0
91 } else {
92 0.0
93 });
94 features.push(if stderr_lower.contains("unterminated") {
95 1.0
96 } else {
97 0.0
98 });
99
100 features.push(if stderr_lower.contains("unbound variable") {
102 1.0
103 } else {
104 0.0
105 });
106 features.push(if stderr_lower.contains("bad substitution") {
107 1.0
108 } else {
109 0.0
110 });
111 features.push(if stderr_lower.contains("readonly") {
112 1.0
113 } else {
114 0.0
115 });
116
117 features.push(if stderr_lower.contains("command not found") {
119 1.0
120 } else {
121 0.0
122 });
123 features.push(if stderr_lower.contains("invalid option") {
124 1.0
125 } else {
126 0.0
127 });
128 features.push(if stderr_lower.contains("missing") {
129 1.0
130 } else {
131 0.0
132 });
133
134 features.push(if stderr_lower.contains("broken pipe") {
136 1.0
137 } else {
138 0.0
139 });
140 features.push(if stderr_lower.contains("killed") {
141 1.0
142 } else {
143 0.0
144 });
145 features.push(if stderr_lower.contains("timeout") {
146 1.0
147 } else {
148 0.0
149 });
150 features.push(if stderr_lower.contains("timed out") {
151 1.0
152 } else {
153 0.0
154 });
155
156 let single_quotes = stderr.matches('\'').count();
158 let double_quotes = stderr.matches('"').count();
159 let parens = stderr.matches('(').count() + stderr.matches(')').count();
160 let brackets = stderr.matches('[').count() + stderr.matches(']').count();
161 let braces = stderr.matches('{').count() + stderr.matches('}').count();
162
163 features.push((single_quotes as f32 / 10.0).min(1.0));
164 features.push((double_quotes as f32 / 10.0).min(1.0));
165 features.push(if !single_quotes.is_multiple_of(2) {
166 1.0
167 } else {
168 0.0
169 }); features.push(if !double_quotes.is_multiple_of(2) {
171 1.0
172 } else {
173 0.0
174 }); features.push(((parens + brackets + braces) as f32 / 20.0).min(1.0));
176 features.push(if !(parens + brackets + braces).is_multiple_of(2) {
177 1.0
178 } else {
179 0.0
180 }); let has_line_num = stderr_lower.contains("line ");
184 let has_column = stderr_lower.contains("column ") || stderr_lower.contains("col ");
185 features.push(if has_line_num { 1.0 } else { 0.0 });
186 features.push(if has_column { 1.0 } else { 0.0 });
187 features.push(if stderr_lower.contains("near") {
188 1.0
189 } else {
190 0.0
191 });
192 features.push(if stderr_lower.contains("expected") {
193 1.0
194 } else {
195 0.0
196 });
197
198 if let Some(cmd) = command {
200 let cmd_len = cmd.len();
201 features.push((cmd_len as f32 / 100.0).min(1.0)); features.push(if cmd.contains('|') { 1.0 } else { 0.0 }); features.push(if cmd.contains('>') { 1.0 } else { 0.0 }); features.push(if cmd.contains('<') { 1.0 } else { 0.0 }); features.push(if cmd.contains("2>") { 1.0 } else { 0.0 }); features.push(if cmd.starts_with("sudo") { 1.0 } else { 0.0 }); features.push(if cmd.contains("&&") || cmd.contains("||") {
208 1.0
209 } else {
210 0.0
211 }); features.push(if cmd.contains('$') { 1.0 } else { 0.0 }); } else {
214 features.extend([0.0; 8]);
215 }
216
217 features.push(if stderr_lower.contains("bash:") {
219 1.0
220 } else {
221 0.0
222 });
223 features.push(if stderr_lower.contains("sh:") {
224 1.0
225 } else {
226 0.0
227 });
228 features.push(if stderr_lower.contains("zsh:") {
229 1.0
230 } else {
231 0.0
232 });
233 features.push(if stderr_lower.contains("dash:") {
234 1.0
235 } else {
236 0.0
237 });
238 features.push(if stderr_lower.contains("ksh:") {
239 1.0
240 } else {
241 0.0
242 });
243 features.push(if stderr_lower.contains("fish:") {
244 1.0
245 } else {
246 0.0
247 });
248 features.push(if stderr_lower.contains("cannot") {
249 1.0
250 } else {
251 0.0
252 });
253 features.push(if stderr_lower.contains("failed") {
254 1.0
255 } else {
256 0.0
257 });
258
259 features.push(if stderr_lower.contains("error") {
261 1.0
262 } else {
263 0.0
264 });
265 features.push(if stderr_lower.contains("warning") {
266 1.0
267 } else {
268 0.0
269 });
270 features.push(if stderr_lower.contains("fatal") {
271 1.0
272 } else {
273 0.0
274 });
275 features.push(if stderr_lower.contains("abort") {
276 1.0
277 } else {
278 0.0
279 });
280 features.push(if stderr_lower.contains("segmentation") {
281 1.0
282 } else {
283 0.0
284 });
285 features.push(if stderr_lower.contains("core dump") {
286 1.0
287 } else {
288 0.0
289 });
290
291 debug_assert_eq!(features.len(), Self::SIZE, "Feature count mismatch");
293
294 Self { features }
295 }
296
297 #[must_use]
299 pub fn as_slice(&self) -> &[f32] {
300 &self.features
301 }
302
303 #[must_use]
305 pub fn feature_name(index: usize) -> &'static str {
306 match index {
307 0 => "exit_code_normalized",
308 1 => "exit_code_is_1",
309 2 => "exit_code_is_2",
310 3 => "exit_code_is_126",
311 4 => "exit_code_is_127",
312 5 => "exit_code_is_128",
313 6 => "signal_sigint",
314 7 => "signal_sigkill",
315 8 => "signal_sigpipe",
316 9 => "signal_sigterm",
317 10 => "stderr_length",
318 11 => "stderr_line_count",
319 12 => "kw_not_found",
320 13 => "kw_no_such_file",
321 14 => "kw_permission_denied",
322 15 => "kw_is_directory",
323 16 => "kw_not_directory",
324 17 => "kw_too_many_open",
325 18 => "kw_syntax_error",
326 19 => "kw_unexpected",
327 20 => "kw_unmatched",
328 21 => "kw_unterminated",
329 22 => "kw_unbound_variable",
330 23 => "kw_bad_substitution",
331 24 => "kw_readonly",
332 25 => "kw_command_not_found",
333 26 => "kw_invalid_option",
334 27 => "kw_missing",
335 28 => "kw_broken_pipe",
336 29 => "kw_killed",
337 30 => "kw_timeout",
338 31 => "kw_timed_out",
339 32 => "single_quote_count",
340 33 => "double_quote_count",
341 34 => "single_quote_mismatch",
342 35 => "double_quote_mismatch",
343 36 => "bracket_count",
344 37 => "bracket_mismatch",
345 38 => "has_line_number",
346 39 => "has_column",
347 40 => "has_near",
348 41 => "has_expected",
349 42 => "cmd_length",
350 43 => "cmd_has_pipe",
351 44 => "cmd_has_output_redirect",
352 45 => "cmd_has_input_redirect",
353 46 => "cmd_has_stderr_redirect",
354 47 => "cmd_has_sudo",
355 48 => "cmd_is_compound",
356 49 => "cmd_has_variables",
357 50 => "shell_bash",
358 51 => "shell_sh",
359 52 => "shell_zsh",
360 53 => "shell_dash",
361 54 => "shell_ksh",
362 55 => "shell_fish",
363 56 => "kw_cannot",
364 57 => "kw_failed",
365 58 => "kw_error",
366 59 => "kw_warning",
367 60 => "kw_fatal",
368 61 => "kw_abort",
369 62 => "kw_segmentation",
370 63 => "kw_core_dump",
371 _ => "unknown",
372 }
373 }
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 #[test]
381 fn test_feature_vector_size() {
382 let features = ErrorFeatures::extract(1, "test error", None);
383 assert_eq!(features.features.len(), ErrorFeatures::SIZE);
384 }
385
386 #[test]
387 fn test_exit_code_127_features() {
388 let features = ErrorFeatures::extract(127, "bash: foobar: command not found", None);
389 assert!((features.features[4] - 1.0).abs() < f32::EPSILON); assert!((features.features[25] - 1.0).abs() < f32::EPSILON); }
392
393 #[test]
394 fn test_exit_code_126_features() {
395 let features = ErrorFeatures::extract(126, "bash: ./script.sh: Permission denied", None);
396 assert!((features.features[3] - 1.0).abs() < f32::EPSILON); assert!((features.features[14] - 1.0).abs() < f32::EPSILON); }
399
400 #[test]
401 fn test_syntax_error_features() {
402 let features =
403 ErrorFeatures::extract(1, "bash: syntax error near unexpected token 'done'", None);
404 assert!((features.features[18] - 1.0).abs() < f32::EPSILON); assert!((features.features[19] - 1.0).abs() < f32::EPSILON); assert!((features.features[40] - 1.0).abs() < f32::EPSILON); }
408
409 #[test]
410 fn test_quote_mismatch_detection() {
411 let features = ErrorFeatures::extract(1, "unexpected EOF looking for matching '\"'", None);
412 assert!((features.features[35] - 1.0).abs() < f32::EPSILON); }
414
415 #[test]
416 fn test_command_features() {
417 let features =
418 ErrorFeatures::extract(1, "error", Some("cat file.txt | grep 'test' > output.txt"));
419 assert!(features.features[43] > 0.0); assert!(features.features[44] > 0.0); }
422
423 #[test]
424 fn test_signal_features() {
425 let features = ErrorFeatures::extract(141, "", None); assert!((features.features[8] - 1.0).abs() < f32::EPSILON); }
428
429 #[test]
430 fn test_shell_detection() {
431 let features_bash = ErrorFeatures::extract(1, "bash: error", None);
432 assert!((features_bash.features[50] - 1.0).abs() < f32::EPSILON); let features_zsh = ErrorFeatures::extract(1, "zsh: error", None);
435 assert!((features_zsh.features[52] - 1.0).abs() < f32::EPSILON); }
437
438 #[test]
439 fn test_file_not_found_features() {
440 let features =
441 ErrorFeatures::extract(1, "cat: /nonexistent: No such file or directory", None);
442 assert!((features.features[13] - 1.0).abs() < f32::EPSILON); }
444
445 #[test]
446 fn test_unbound_variable_features() {
447 let features = ErrorFeatures::extract(1, "bash: VAR: unbound variable", None);
448 assert!((features.features[22] - 1.0).abs() < f32::EPSILON); }
450
451 #[test]
452 fn test_feature_names_coverage() {
453 for i in 0..ErrorFeatures::SIZE {
454 let name = ErrorFeatures::feature_name(i);
455 assert_ne!(name, "unknown", "Feature {i} has no name");
456 }
457 }
458
459 #[test]
460 fn test_normalization_bounds() {
461 let long_stderr = "x".repeat(10000);
463 let features = ErrorFeatures::extract(255, &long_stderr, Some(&"x".repeat(1000)));
464
465 for (i, &val) in features.features.iter().enumerate() {
466 assert!(
467 (0.0..=1.0).contains(&val),
468 "Feature {i} ({}) out of bounds: {val}",
469 ErrorFeatures::feature_name(i)
470 );
471 }
472 }
473}