1#![allow(clippy::indexing_slicing)] #[derive(Debug, Clone)]
11pub struct ErrorFeatures {
12 pub features: Vec<f32>,
14}
15
16impl ErrorFeatures {
17 pub const SIZE: usize = 64;
19
20 #[must_use]
22 pub fn extract(exit_code: i32, stderr: &str, command: Option<&str>) -> Self {
23 let mut features = Vec::with_capacity(Self::SIZE);
24
25 features.push(exit_code as f32 / 255.0); features.push(if exit_code == 1 { 1.0 } else { 0.0 }); features.push(if exit_code == 2 { 1.0 } else { 0.0 }); features.push(if exit_code == 126 { 1.0 } else { 0.0 }); features.push(if exit_code == 127 { 1.0 } else { 0.0 }); features.push(if exit_code == 128 { 1.0 } else { 0.0 }); features.push(if exit_code == 130 { 1.0 } else { 0.0 }); features.push(if exit_code == 137 { 1.0 } else { 0.0 }); features.push(if exit_code == 141 { 1.0 } else { 0.0 }); features.push(if exit_code == 143 { 1.0 } else { 0.0 }); let stderr_len = stderr.len();
41 features.push((stderr_len as f32 / 1000.0).min(1.0)); features.push((stderr.lines().count() as f32 / 10.0).min(1.0)); let stderr_lower = stderr.to_lowercase();
46
47 features.push(if stderr_lower.contains("not found") {
49 1.0
50 } else {
51 0.0
52 });
53 features.push(if stderr_lower.contains("no such file") {
54 1.0
55 } else {
56 0.0
57 });
58 features.push(if stderr_lower.contains("permission denied") {
59 1.0
60 } else {
61 0.0
62 });
63 features.push(if stderr_lower.contains("is a directory") {
64 1.0
65 } else {
66 0.0
67 });
68 features.push(if stderr_lower.contains("not a directory") {
69 1.0
70 } else {
71 0.0
72 });
73 features.push(if stderr_lower.contains("too many open") {
74 1.0
75 } else {
76 0.0
77 });
78
79 features.push(if stderr_lower.contains("syntax error") {
81 1.0
82 } else {
83 0.0
84 });
85 features.push(if stderr_lower.contains("unexpected") {
86 1.0
87 } else {
88 0.0
89 });
90 features.push(if stderr_lower.contains("unmatched") {
91 1.0
92 } else {
93 0.0
94 });
95 features.push(if stderr_lower.contains("unterminated") {
96 1.0
97 } else {
98 0.0
99 });
100
101 features.push(if stderr_lower.contains("unbound variable") {
103 1.0
104 } else {
105 0.0
106 });
107 features.push(if stderr_lower.contains("bad substitution") {
108 1.0
109 } else {
110 0.0
111 });
112 features.push(if stderr_lower.contains("readonly") {
113 1.0
114 } else {
115 0.0
116 });
117
118 features.push(if stderr_lower.contains("command not found") {
120 1.0
121 } else {
122 0.0
123 });
124 features.push(if stderr_lower.contains("invalid option") {
125 1.0
126 } else {
127 0.0
128 });
129 features.push(if stderr_lower.contains("missing") {
130 1.0
131 } else {
132 0.0
133 });
134
135 features.push(if stderr_lower.contains("broken pipe") {
137 1.0
138 } else {
139 0.0
140 });
141 features.push(if stderr_lower.contains("killed") {
142 1.0
143 } else {
144 0.0
145 });
146 features.push(if stderr_lower.contains("timeout") {
147 1.0
148 } else {
149 0.0
150 });
151 features.push(if stderr_lower.contains("timed out") {
152 1.0
153 } else {
154 0.0
155 });
156
157 let single_quotes = stderr.matches('\'').count();
159 let double_quotes = stderr.matches('"').count();
160 let parens = stderr.matches('(').count() + stderr.matches(')').count();
161 let brackets = stderr.matches('[').count() + stderr.matches(']').count();
162 let braces = stderr.matches('{').count() + stderr.matches('}').count();
163
164 features.push((single_quotes as f32 / 10.0).min(1.0));
165 features.push((double_quotes as f32 / 10.0).min(1.0));
166 features.push(if !single_quotes.is_multiple_of(2) {
167 1.0
168 } else {
169 0.0
170 }); features.push(if !double_quotes.is_multiple_of(2) {
172 1.0
173 } else {
174 0.0
175 }); features.push(((parens + brackets + braces) as f32 / 20.0).min(1.0));
177 features.push(if !(parens + brackets + braces).is_multiple_of(2) {
178 1.0
179 } else {
180 0.0
181 }); let has_line_num = stderr_lower.contains("line ");
185 let has_column = stderr_lower.contains("column ") || stderr_lower.contains("col ");
186 features.push(if has_line_num { 1.0 } else { 0.0 });
187 features.push(if has_column { 1.0 } else { 0.0 });
188 features.push(if stderr_lower.contains("near") {
189 1.0
190 } else {
191 0.0
192 });
193 features.push(if stderr_lower.contains("expected") {
194 1.0
195 } else {
196 0.0
197 });
198
199 if let Some(cmd) = command {
201 let cmd_len = cmd.len();
202 features.push((cmd_len as f32 / 100.0).min(1.0)); features.push(if cmd.contains('|') { 1.0 } else { 0.0 }); features.push(if cmd.contains('>') { 1.0 } else { 0.0 }); features.push(if cmd.contains('<') { 1.0 } else { 0.0 }); features.push(if cmd.contains("2>") { 1.0 } else { 0.0 }); features.push(if cmd.starts_with("sudo") { 1.0 } else { 0.0 }); features.push(if cmd.contains("&&") || cmd.contains("||") {
209 1.0
210 } else {
211 0.0
212 }); features.push(if cmd.contains('$') { 1.0 } else { 0.0 }); } else {
215 features.extend([0.0; 8]);
216 }
217
218 features.push(if stderr_lower.contains("bash:") {
220 1.0
221 } else {
222 0.0
223 });
224 features.push(if stderr_lower.contains("sh:") {
225 1.0
226 } else {
227 0.0
228 });
229 features.push(if stderr_lower.contains("zsh:") {
230 1.0
231 } else {
232 0.0
233 });
234 features.push(if stderr_lower.contains("dash:") {
235 1.0
236 } else {
237 0.0
238 });
239 features.push(if stderr_lower.contains("ksh:") {
240 1.0
241 } else {
242 0.0
243 });
244 features.push(if stderr_lower.contains("fish:") {
245 1.0
246 } else {
247 0.0
248 });
249 features.push(if stderr_lower.contains("cannot") {
250 1.0
251 } else {
252 0.0
253 });
254 features.push(if stderr_lower.contains("failed") {
255 1.0
256 } else {
257 0.0
258 });
259
260 features.push(if stderr_lower.contains("error") {
262 1.0
263 } else {
264 0.0
265 });
266 features.push(if stderr_lower.contains("warning") {
267 1.0
268 } else {
269 0.0
270 });
271 features.push(if stderr_lower.contains("fatal") {
272 1.0
273 } else {
274 0.0
275 });
276 features.push(if stderr_lower.contains("abort") {
277 1.0
278 } else {
279 0.0
280 });
281 features.push(if stderr_lower.contains("segmentation") {
282 1.0
283 } else {
284 0.0
285 });
286 features.push(if stderr_lower.contains("core dump") {
287 1.0
288 } else {
289 0.0
290 });
291
292 debug_assert_eq!(features.len(), Self::SIZE, "Feature count mismatch");
294
295 Self { features }
296 }
297
298 #[must_use]
300 pub fn as_slice(&self) -> &[f32] {
301 &self.features
302 }
303
304 #[must_use]
306 pub fn feature_name(index: usize) -> &'static str {
307 const NAMES: [&str; 64] = [
308 "exit_code_normalized",
309 "exit_code_is_1",
310 "exit_code_is_2",
311 "exit_code_is_126",
312 "exit_code_is_127",
313 "exit_code_is_128",
314 "signal_sigint",
315 "signal_sigkill",
316 "signal_sigpipe",
317 "signal_sigterm",
318 "stderr_length",
319 "stderr_line_count",
320 "kw_not_found",
321 "kw_no_such_file",
322 "kw_permission_denied",
323 "kw_is_directory",
324 "kw_not_directory",
325 "kw_too_many_open",
326 "kw_syntax_error",
327 "kw_unexpected",
328 "kw_unmatched",
329 "kw_unterminated",
330 "kw_unbound_variable",
331 "kw_bad_substitution",
332 "kw_readonly",
333 "kw_command_not_found",
334 "kw_invalid_option",
335 "kw_missing",
336 "kw_broken_pipe",
337 "kw_killed",
338 "kw_timeout",
339 "kw_timed_out",
340 "single_quote_count",
341 "double_quote_count",
342 "single_quote_mismatch",
343 "double_quote_mismatch",
344 "bracket_count",
345 "bracket_mismatch",
346 "has_line_number",
347 "has_column",
348 "has_near",
349 "has_expected",
350 "cmd_length",
351 "cmd_has_pipe",
352 "cmd_has_output_redirect",
353 "cmd_has_input_redirect",
354 "cmd_has_stderr_redirect",
355 "cmd_has_sudo",
356 "cmd_is_compound",
357 "cmd_has_variables",
358 "shell_bash",
359 "shell_sh",
360 "shell_zsh",
361 "shell_dash",
362 "shell_ksh",
363 "shell_fish",
364 "kw_cannot",
365 "kw_failed",
366 "kw_error",
367 "kw_warning",
368 "kw_fatal",
369 "kw_abort",
370 "kw_segmentation",
371 "kw_core_dump",
372 ];
373 NAMES.get(index).copied().unwrap_or("unknown")
374 }
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
382 fn test_feature_vector_size() {
383 let features = ErrorFeatures::extract(1, "test error", None);
384 assert_eq!(features.features.len(), ErrorFeatures::SIZE);
385 }
386
387 #[test]
388 fn test_exit_code_127_features() {
389 let features = ErrorFeatures::extract(127, "bash: foobar: command not found", None);
390 assert!((features.features[4] - 1.0).abs() < f32::EPSILON); assert!((features.features[25] - 1.0).abs() < f32::EPSILON); }
393
394 #[test]
395 fn test_exit_code_126_features() {
396 let features = ErrorFeatures::extract(126, "bash: ./script.sh: Permission denied", None);
397 assert!((features.features[3] - 1.0).abs() < f32::EPSILON); assert!((features.features[14] - 1.0).abs() < f32::EPSILON); }
400
401 #[test]
402 fn test_syntax_error_features() {
403 let features =
404 ErrorFeatures::extract(1, "bash: syntax error near unexpected token 'done'", None);
405 assert!((features.features[18] - 1.0).abs() < f32::EPSILON); assert!((features.features[19] - 1.0).abs() < f32::EPSILON); assert!((features.features[40] - 1.0).abs() < f32::EPSILON); }
409
410 #[test]
411 fn test_quote_mismatch_detection() {
412 let features = ErrorFeatures::extract(1, "unexpected EOF looking for matching '\"'", None);
413 assert!((features.features[35] - 1.0).abs() < f32::EPSILON); }
415
416 #[test]
417 fn test_command_features() {
418 let features =
419 ErrorFeatures::extract(1, "error", Some("cat file.txt | grep 'test' > output.txt"));
420 assert!(features.features[43] > 0.0); assert!(features.features[44] > 0.0); }
423
424 #[test]
425 fn test_signal_features() {
426 let features = ErrorFeatures::extract(141, "", None); assert!((features.features[8] - 1.0).abs() < f32::EPSILON); }
429
430 #[test]
431 fn test_shell_detection() {
432 let features_bash = ErrorFeatures::extract(1, "bash: error", None);
433 assert!((features_bash.features[50] - 1.0).abs() < f32::EPSILON); let features_zsh = ErrorFeatures::extract(1, "zsh: error", None);
436 assert!((features_zsh.features[52] - 1.0).abs() < f32::EPSILON); }
438
439 #[test]
440 fn test_file_not_found_features() {
441 let features =
442 ErrorFeatures::extract(1, "cat: /nonexistent: No such file or directory", None);
443 assert!((features.features[13] - 1.0).abs() < f32::EPSILON); }
445
446 #[test]
447 fn test_unbound_variable_features() {
448 let features = ErrorFeatures::extract(1, "bash: VAR: unbound variable", None);
449 assert!((features.features[22] - 1.0).abs() < f32::EPSILON); }
451
452 #[test]
453 fn test_feature_names_coverage() {
454 for i in 0..ErrorFeatures::SIZE {
455 let name = ErrorFeatures::feature_name(i);
456 assert_ne!(name, "unknown", "Feature {i} has no name");
457 }
458 }
459
460 #[test]
461 fn test_normalization_bounds() {
462 let long_stderr = "x".repeat(10000);
464 let features = ErrorFeatures::extract(255, &long_stderr, Some(&"x".repeat(1000)));
465
466 for (i, &val) in features.features.iter().enumerate() {
467 assert!(
468 (0.0..=1.0).contains(&val),
469 "Feature {i} ({}) out of bounds: {val}",
470 ErrorFeatures::feature_name(i)
471 );
472 }
473 }
474}