tandem_server/bug_monitor/
error_provenance.rs1use std::path::Path;
15use std::time::Duration;
16
17use tokio::process::Command;
18use tokio::time::timeout;
19
20const GREP_TIMEOUT: Duration = Duration::from_secs(5);
21const MAX_HITS: usize = 10;
22const MAX_SUBSTRINGS: usize = 3;
23const MIN_SUBSTRING_CHARS: usize = 20;
24const MIN_SUBSTRING_WORDS: usize = 3;
25const MATCH_LINE_TRUNCATE: usize = 240;
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct ProvenanceHit {
30 pub path: String,
32 pub line: u32,
33 pub snippet: String,
38}
39
40pub fn distinctive_substrings(error: &str) -> Vec<String> {
44 let cleaned = strip_dynamic_tokens(error);
45 let mut runs = collect_runs(&cleaned);
46 runs.sort_by(|a, b| b.len().cmp(&a.len()));
47 runs.dedup();
48 runs.truncate(MAX_SUBSTRINGS);
49 runs
50}
51
52fn strip_dynamic_tokens(error: &str) -> String {
53 let mut out = String::with_capacity(error.len());
54 let mut chars = error.chars().peekable();
55 while let Some(ch) = chars.next() {
56 match ch {
57 '`' => {
59 for inner in chars.by_ref() {
60 if inner == '`' {
61 break;
62 }
63 }
64 out.push(' ');
65 }
66 '\'' => {
71 let mut buffer = String::new();
72 let mut found_close = false;
73 for inner in chars.by_ref().take(64) {
74 if inner == '\'' {
75 found_close = true;
76 break;
77 }
78 buffer.push(inner);
79 }
80 if found_close && !buffer.is_empty() {
81 out.push(' ');
82 } else {
83 out.push('\'');
84 out.push_str(&buffer);
85 }
86 }
87 d if d.is_ascii_digit() => {
91 while let Some(next) = chars.peek() {
92 if next.is_ascii_digit() {
93 chars.next();
94 } else {
95 break;
96 }
97 }
98 out.push(' ');
99 }
100 _ => out.push(ch),
101 }
102 }
103 out
104}
105
106fn collect_runs(cleaned: &str) -> Vec<String> {
107 let words = cleaned
108 .split_whitespace()
109 .filter(|w| !looks_like_dynamic_token(w))
110 .collect::<Vec<_>>();
111 if words.is_empty() {
112 return Vec::new();
113 }
114 let mut out = Vec::new();
117 let joined = words.join(" ");
118 if substring_qualifies(&joined) {
119 out.push(joined.clone());
120 }
121 for chunk in cleaned.split(|c: char| c == ':' || c == ',' || c == ';' || c == '\n') {
124 let chunk = chunk
125 .split_whitespace()
126 .filter(|w| !looks_like_dynamic_token(w))
127 .collect::<Vec<_>>()
128 .join(" ");
129 if substring_qualifies(&chunk) && !out.iter().any(|existing| existing == &chunk) {
130 out.push(chunk);
131 }
132 }
133 out
134}
135
136fn substring_qualifies(s: &str) -> bool {
137 let trimmed = s.trim();
138 if trimmed.is_empty() {
139 return false;
140 }
141 if trimmed.len() < MIN_SUBSTRING_CHARS
142 && trimmed.split_whitespace().count() < MIN_SUBSTRING_WORDS
143 {
144 return false;
145 }
146 true
147}
148
149fn looks_like_dynamic_token(word: &str) -> bool {
150 if word.is_empty() {
151 return true;
152 }
153 let hex_or_dash = word
155 .chars()
156 .all(|c| c.is_ascii_hexdigit() || c == '-' || c == '_');
157 if hex_or_dash && word.len() >= 8 {
158 return true;
159 }
160 if word.starts_with('/') || word.contains('/') {
162 return true;
163 }
164 let alnum_only = word.chars().all(|c| c.is_ascii_alphanumeric());
166 if alnum_only && word.len() >= 12 {
167 let has_vowel = word
168 .chars()
169 .any(|c| matches!(c.to_ascii_lowercase(), 'a' | 'e' | 'i' | 'o' | 'u'));
170 if !has_vowel {
171 return true;
172 }
173 }
174 false
175}
176
177pub async fn locate_error_provenance(
182 workspace_root: &Path,
183 error_message: &str,
184) -> Vec<ProvenanceHit> {
185 let substrings = distinctive_substrings(error_message);
186 if substrings.is_empty() {
187 let preview = error_message.chars().take(160).collect::<String>();
188 tracing::info!(
189 error_message_preview = %preview,
190 workspace_root = %workspace_root.display(),
191 "error provenance: distinctive_substrings produced no usable needles for this error",
192 );
193 return Vec::new();
194 }
195 let workspace_root = workspace_root.to_path_buf();
196 let mut hits: Vec<ProvenanceHit> = Vec::new();
197 let mut tried = 0usize;
198 let mut timeouts = 0usize;
199 let mut grep_errors = 0usize;
200 for needle in &substrings {
201 if hits.len() >= MAX_HITS {
202 break;
203 }
204 tried += 1;
205 match timeout(GREP_TIMEOUT, git_grep(&workspace_root, needle)).await {
206 Ok(Ok(found)) => {
207 for hit in found {
208 if hits.len() >= MAX_HITS {
209 break;
210 }
211 if hits
212 .iter()
213 .any(|existing| existing.path == hit.path && existing.line == hit.line)
214 {
215 continue;
216 }
217 hits.push(hit);
218 }
219 }
220 Ok(Err(error)) => {
221 grep_errors += 1;
222 tracing::info!(
223 needle = %needle,
224 workspace_root = %workspace_root.display(),
225 error = %error,
226 "error provenance: git grep returned an io error for this needle",
227 );
228 }
229 Err(_) => {
230 timeouts += 1;
231 tracing::info!(
232 needle = %needle,
233 workspace_root = %workspace_root.display(),
234 timeout_ms = GREP_TIMEOUT.as_millis() as u64,
235 "error provenance: git grep timed out for this needle",
236 );
237 }
238 }
239 }
240 if hits.is_empty() {
241 tracing::info!(
242 workspace_root = %workspace_root.display(),
243 substring_count = substrings.len(),
244 tried,
245 timeouts,
246 grep_errors,
247 "error provenance: every needle came back empty (no source matches found)",
248 );
249 }
250 hits
251}
252
253async fn git_grep(workspace_root: &Path, needle: &str) -> std::io::Result<Vec<ProvenanceHit>> {
254 let output = Command::new("git")
255 .arg("-C")
256 .arg(workspace_root)
257 .arg("grep")
258 .arg("-n")
259 .arg("-F")
260 .arg("--no-color")
261 .arg(needle)
262 .arg("--")
263 .args([
264 "*.rs", "*.ts", "*.tsx", "*.js", "*.jsx", "*.py", "*.go", "*.java", "*.kt", "*.swift",
265 ])
266 .output()
267 .await?;
268 let exit_code = output.status.code();
269 if !output.status.success() {
275 if exit_code != Some(1) {
276 let stderr = String::from_utf8_lossy(&output.stderr);
277 let stderr_preview: String = stderr.chars().take(240).collect();
278 tracing::info!(
279 needle = %needle,
280 workspace_root = %workspace_root.display(),
281 exit_code = ?exit_code,
282 stderr_preview = %stderr_preview,
283 "error provenance: git grep exited non-zero (likely not a git repo or grep config error)",
284 );
285 }
286 return Ok(Vec::new());
287 }
288 Ok(parse_git_grep_output(&String::from_utf8_lossy(
289 &output.stdout,
290 )))
291}
292
293fn parse_git_grep_output(stdout: &str) -> Vec<ProvenanceHit> {
294 let mut hits = Vec::new();
298 for raw in stdout.lines() {
299 if raw.is_empty() {
300 continue;
301 }
302 let Some((path, line_no, body)) = split_grep_line(raw) else {
303 continue;
304 };
305 let snippet = truncate_on_char_boundary(body, MATCH_LINE_TRUNCATE);
306 hits.push(ProvenanceHit {
307 path: path.to_string(),
308 line: line_no,
309 snippet,
310 });
311 }
312 hits
313}
314
315fn split_grep_line(raw: &str) -> Option<(&str, u32, &str)> {
321 let bytes = raw.as_bytes();
325 let mut i = 0;
326 while i < bytes.len() {
327 if bytes[i] == b':' {
328 let start = i + 1;
330 let mut end = start;
331 while end < bytes.len() && bytes[end].is_ascii_digit() {
332 end += 1;
333 }
334 if end > start && end < bytes.len() && bytes[end] == b':' {
335 if let Ok(n) = raw[start..end].parse::<u32>() {
336 return Some((&raw[..i], n, &raw[end + 1..]));
337 }
338 }
339 }
340 i += 1;
341 }
342 None
343}
344
345pub fn render_provenance_section(hits: &[ProvenanceHit]) -> Option<String> {
348 if hits.is_empty() {
349 return None;
350 }
351 let mut out = String::from("### Error provenance\n\n");
352 out.push_str("Likely emission sites for the failure message in this workspace:\n\n");
353 let mut total = 0usize;
354 for hit in hits {
355 let entry = format!(
356 "- `{}:{}`\n ```\n{}\n ```\n",
357 hit.path,
358 hit.line,
359 indent_snippet(&hit.snippet)
360 );
361 if total + entry.len() > 3_000 {
362 break;
363 }
364 total += entry.len();
365 out.push_str(&entry);
366 }
367 Some(out)
368}
369
370fn indent_snippet(snippet: &str) -> String {
371 snippet
372 .lines()
373 .map(|line| format!(" {}", truncate_on_char_boundary_no_ellipsis(line, 200)))
374 .collect::<Vec<_>>()
375 .join("\n")
376}
377
378fn truncate_on_char_boundary(s: &str, max_bytes: usize) -> String {
385 if s.len() <= max_bytes {
386 return s.to_string();
387 }
388 let mut end = max_bytes.min(s.len());
389 while end > 0 && !s.is_char_boundary(end) {
390 end -= 1;
391 }
392 format!("{}…", &s[..end])
393}
394
395fn truncate_on_char_boundary_no_ellipsis(s: &str, max_bytes: usize) -> String {
396 if s.len() <= max_bytes {
397 return s.to_string();
398 }
399 let mut end = max_bytes.min(s.len());
400 while end > 0 && !s.is_char_boundary(end) {
401 end -= 1;
402 }
403 s[..end].to_string()
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409
410 #[test]
411 fn distinctive_substrings_strips_backtick_segments() {
412 let result = distinctive_substrings(
413 "automation node `search_multi_agent` timed out after 180000 ms",
414 );
415 let joined = result.join(" | ");
416 assert!(
417 joined.contains("automation node") && joined.contains("timed out after"),
418 "should keep static text: {joined}"
419 );
420 assert!(
421 !joined.contains("search_multi_agent"),
422 "should drop backtick-quoted node name: {joined}"
423 );
424 assert!(
425 !joined.contains("180000"),
426 "should drop the templated duration: {joined}"
427 );
428 }
429
430 #[test]
431 fn distinctive_substrings_passes_through_fully_static_message() {
432 let result = distinctive_substrings("automation run blocked by upstream node outcome");
433 assert_eq!(
434 result.first().map(String::as_str),
435 Some("automation run blocked by upstream node outcome")
436 );
437 }
438
439 #[test]
440 fn distinctive_substrings_strips_uuid_like_tokens() {
441 let result =
442 distinctive_substrings("draft 9ee33834-bf6d-4f86-acb3-3cd41d9cef19 failed to publish");
443 let joined = result.join(" | ");
444 assert!(joined.contains("failed to publish"), "got: {joined}");
445 assert!(
446 !joined.contains("9ee33834"),
447 "should strip uuid-like token: {joined}"
448 );
449 }
450
451 #[test]
452 fn distinctive_substrings_strips_durations_and_paths() {
453 let result =
454 distinctive_substrings("no provider activity for at least 300s on /tmp/run-1/state");
455 let joined = result.join(" | ");
456 assert!(
457 joined.contains("no provider activity for at least"),
458 "got: {joined}"
459 );
460 assert!(!joined.contains("300"), "should strip number: {joined}");
461 assert!(!joined.contains("/tmp"), "should drop path: {joined}");
462 }
463
464 #[test]
465 fn distinctive_substrings_returns_empty_for_trivial_input() {
466 assert!(distinctive_substrings("").is_empty());
467 assert!(distinctive_substrings("ok").is_empty());
468 assert!(distinctive_substrings("`x` 123").is_empty());
469 }
470
471 #[test]
472 fn distinctive_substrings_caps_at_max() {
473 let input = "alpha bravo charlie delta: echo foxtrot golf hotel; india juliet kilo lima, mike november oscar papa, quebec romeo sierra tango";
474 let result = distinctive_substrings(input);
475 assert!(result.len() <= MAX_SUBSTRINGS);
476 }
477
478 #[test]
479 fn render_provenance_section_returns_none_for_empty_hits() {
480 assert!(render_provenance_section(&[]).is_none());
481 }
482
483 #[test]
484 fn render_provenance_section_includes_path_line_and_snippet() {
485 let hits = vec![ProvenanceHit {
486 path: "crates/foo/src/bar.rs".to_string(),
487 line: 42,
488 snippet: "let x = 1;\nlet y = 2;\nlet z = 3;".to_string(),
489 }];
490 let rendered = render_provenance_section(&hits).expect("section");
491 assert!(rendered.contains("Error provenance"));
492 assert!(rendered.contains("crates/foo/src/bar.rs:42"));
493 assert!(rendered.contains("let y = 2;"));
494 }
495
496 #[test]
497 fn render_provenance_section_caps_total_size() {
498 let big_snippet = (0..20)
499 .map(|_| "x".repeat(220))
500 .collect::<Vec<_>>()
501 .join("\n");
502 let hits = vec![
503 ProvenanceHit {
504 path: "a.rs".to_string(),
505 line: 1,
506 snippet: big_snippet.clone(),
507 },
508 ProvenanceHit {
509 path: "b.rs".to_string(),
510 line: 1,
511 snippet: "small".to_string(),
512 },
513 ];
514 let rendered = render_provenance_section(&hits).expect("section");
515 assert!(!rendered.contains("b.rs"));
517 }
518
519 #[test]
520 fn parse_git_grep_output_extracts_path_line_body() {
521 let stdout = "\
522src/lib.rs:11: bail!(\"automation run blocked by upstream node outcome\");
523crates/foo/bar.rs:42:fn x() {}
524";
525 let hits = parse_git_grep_output(stdout);
526 assert_eq!(hits.len(), 2);
527 assert_eq!(hits[0].path, "src/lib.rs");
528 assert_eq!(hits[0].line, 11);
529 assert!(hits[0].snippet.contains("blocked by upstream"));
530 assert_eq!(hits[1].path, "crates/foo/bar.rs");
531 assert_eq!(hits[1].line, 42);
532 }
533
534 #[test]
535 fn parse_git_grep_output_handles_paths_with_dashes() {
536 let stdout = "node_modules/some-package/file.js:7:throw new Error('boom');\n";
541 let hits = parse_git_grep_output(stdout);
542 assert_eq!(hits.len(), 1);
543 assert_eq!(hits[0].path, "node_modules/some-package/file.js");
544 assert_eq!(hits[0].line, 7);
545 assert!(hits[0].snippet.contains("throw new Error"));
546 }
547
548 #[test]
549 fn parse_git_grep_output_truncates_long_lines() {
550 let body = "x".repeat(1_000);
551 let stdout = format!("file.rs:1:{body}\n");
552 let hits = parse_git_grep_output(&stdout);
553 assert_eq!(hits.len(), 1);
554 assert!(hits[0].snippet.len() <= MATCH_LINE_TRUNCATE + 4);
555 assert!(hits[0].snippet.ends_with('…'));
556 }
557
558 #[test]
559 fn parse_git_grep_output_does_not_panic_on_multibyte_boundary() {
560 let mut body = "x".repeat(MATCH_LINE_TRUNCATE - 1);
564 body.push_str("漢字漢字漢字");
565 let stdout = format!("file.rs:1:{body}\n");
566 let hits = parse_git_grep_output(&stdout);
567 assert_eq!(hits.len(), 1);
568 let _ = hits[0].snippet.chars().count();
571 assert!(hits[0].snippet.ends_with('…'));
572 }
573
574 #[test]
575 fn truncate_on_char_boundary_passes_through_short_input() {
576 assert_eq!(truncate_on_char_boundary("hello", 240), "hello");
577 }
578
579 #[test]
580 fn truncate_on_char_boundary_steps_back_for_multibyte() {
581 let s = format!("{}漢", "x".repeat(238));
582 let out = truncate_on_char_boundary(&s, 240);
585 assert!(out.ends_with('…'));
586 assert!(out.is_char_boundary(out.len() - '…'.len_utf8()));
587 }
588
589 #[test]
590 fn parse_git_grep_output_skips_malformed_lines() {
591 let stdout = "no colon here at all\nstill nothing\n";
592 let hits = parse_git_grep_output(stdout);
593 assert!(hits.is_empty());
594 }
595
596 #[tokio::test]
597 async fn locate_error_provenance_finds_known_string_in_temp_workspace() {
598 let dir = tempfile::tempdir().expect("tempdir");
599 let root = dir.path();
600 let init = std::process::Command::new("git")
601 .arg("-C")
602 .arg(root)
603 .arg("init")
604 .arg("-q")
605 .output();
606 if init.is_err() {
607 return;
609 }
610 let _ = std::process::Command::new("git")
612 .arg("-C")
613 .arg(root)
614 .args(["config", "user.email", "test@example.com"])
615 .output();
616 let _ = std::process::Command::new("git")
617 .arg("-C")
618 .arg(root)
619 .args(["config", "user.name", "test"])
620 .output();
621 std::fs::write(
622 root.join("source.rs"),
623 "fn main() {\n panic!(\"the oracle has spoken from the void\");\n}\n",
624 )
625 .expect("write source");
626 let _ = std::process::Command::new("git")
627 .arg("-C")
628 .arg(root)
629 .args(["add", "."])
630 .output();
631 let _ = std::process::Command::new("git")
632 .arg("-C")
633 .arg(root)
634 .args(["commit", "-q", "-m", "init"])
635 .output();
636 let hits = locate_error_provenance(root, "the oracle has spoken from the void").await;
637 assert!(
638 hits.iter().any(|h| h.path == "source.rs" && h.line == 2),
639 "expected hit at source.rs:2, got: {hits:?}"
640 );
641 }
642
643 #[tokio::test]
644 async fn locate_error_provenance_returns_empty_for_nonsense() {
645 let dir = tempfile::tempdir().expect("tempdir");
646 let hits = locate_error_provenance(dir.path(), "").await;
647 assert!(hits.is_empty());
648 }
649}