Skip to main content

oo_ide/
diagnostics_extractor.rs

1//! Diagnostics extraction from parsed task output.
2//!
3//! [`DiagnosticsExtractor`] consumes [`StyledLine`] values produced by the
4//! VT100 parser and emits [`NewIssue`] entries that are fed back to
5//! [`crate::issue_registry::IssueRegistry`] via `Operation::AddIssue`.
6//!
7//! # Scope (MVP)
8//!
9//! Matches two categories of output:
10//!
11//! 1. **GNU-style diagnostic lines** (Rust, C, Go, most compilers):
12//!    ```text
13//!    path/to/file.rs:42:10: error: something bad happened
14//!    path/to/file.rs:42: warning: something is suspicious
15//!    ```
16//!
17//! 2. **TODO / FIXME / HACK / XXX comments** in any output line:
18//!    ```text
19//!    // TODO: refactor this function
20//!    FIXME: this is broken
21//!    ```
22//!
23//! Unknown or malformed lines are silently ignored — the extractor never
24//! panics on bad input.
25//!
26//! # Usage
27//!
28//! One `DiagnosticsExtractor` is created per task execution (called from
29//! [`crate::task_executor::TaskExecutor::spawn`]).  It is shared across the
30//! stdout and stderr streams via [`std::sync::Arc`].
31//!
32//! ```ignore
33//! let extractor = Arc::new(DiagnosticsExtractor::new("task:build:crate:a", "build"));
34//! for line in styled_lines {
35//!     for issue in extractor.extract_from_line(&line) {
36//!         let _ = op_tx.send(vec![Operation::AddIssue { issue }]);
37//!     }
38//! }
39//! ```
40
41use std::path::PathBuf;
42
43use regex::Regex;
44
45use crate::issue_registry::{NewIssue, Severity};
46use crate::vt_parser::StyledLine;
47
48// ---------------------------------------------------------------------------
49// DiagnosticsExtractor
50// ---------------------------------------------------------------------------
51
52/// Stateless, `Send + Sync` diagnostics extractor.
53///
54/// Each task execution gets one instance (shared between stdout and stderr
55/// streams via `Arc`).  All extracted issues receive the same `marker` and
56/// `source` so they can be cleared together when the task reruns.
57pub struct DiagnosticsExtractor {
58    /// Issue registry marker — `"task:{queue}:{target}"`.
59    marker: String,
60    /// Human-readable source tag for the registry (e.g. `"build"`, `"test"`).
61    source: String,
62    /// Matches `path:line:col: severity: message` (three-component prefix).
63    gnu_3: Regex,
64    /// Matches `path:line: severity: message` (two-component prefix, no column).
65    gnu_2: Regex,
66    /// Matches TODO/FIXME/HACK/XXX annotations anywhere in a line.
67    todo_pat: Regex,
68    /// Matches a rustc/cargo severity header: `error[E0425]: message` or
69    /// `warning: message`.  Group 1 = severity keyword, group 2 = message.
70    rustc_header: Regex,
71    /// Matches a rustc/cargo source location line: ` --> path:line:col`.
72    /// Group 1 = path, group 2 = line, group 3 = col.
73    rustc_arrow: Regex,
74}
75
76impl DiagnosticsExtractor {
77    /// Construct a new extractor.
78    ///
79    /// * `marker` — issue registry marker, usually `"task:{queue}:{target}"`.
80    /// * `source` — human-readable tag for the registry (e.g. `"build"`).
81    pub fn new(marker: impl Into<String>, source: impl Into<String>) -> Self {
82        // GNU-style: `path:line:col: severity: message`
83        // Severity keywords: error, warning, note, info, hint.
84        // We capture group 1=path, 2=line, 3=col, 4=severity, 5=message.
85        // The path group handles Windows drive letters (C:\...) by allowing an
86        // optional `X:\` prefix before the non-colon path characters.
87        let gnu_3 = Regex::new(
88            r"(?i)^((?:[A-Za-z]:\\[^:\n]*|[^:\n]+)):(\d+):(\d+):\s*(error|warning|note|info|hint):\s+(.+)$",
89        )
90        .expect("gnu_3 regex is valid");
91
92        // GNU-style without column: `path:line: severity: message`
93        let gnu_2 = Regex::new(
94            r"(?i)^((?:[A-Za-z]:\\[^:\n]*|[^:\n]+)):(\d+):\s*(error|warning|note|info|hint):\s+(.+)$",
95        )
96        .expect("gnu_2 regex is valid");
97
98        // TODO/FIXME/HACK/XXX — word-boundary anchored; captures the tag and the message.
99        let todo_pat = Regex::new(r"(?i)\b(TODO|FIXME|HACK|XXX)\b[:\s]+(.+)$")
100            .expect("todo_pat regex is valid");
101
102        // Rustc/cargo severity header: `error[E0425]: msg` or `warning: msg`.
103        // Group 1 = severity keyword, group 2 = optional `[Exxxx]` code (ignored),
104        // group 3 = message.
105        let rustc_header = Regex::new(
106            r"(?i)^(error|warning|note|info|hint)(\[.*?\])?:\s+(.+)$",
107        )
108        .expect("rustc_header regex is valid");
109
110        // Rustc/cargo source location: ` --> path:line:col`
111        // Leading whitespace is required (distinguishes from other text).
112        // Windows path handled same as gnu_3: optional drive prefix.
113        let rustc_arrow = Regex::new(
114            r"^\s+-->\s+((?:[A-Za-z]:\\[^:\n]*|[^:\n]+)):(\d+):(\d+)\s*$",
115        )
116        .expect("rustc_arrow regex is valid");
117
118        Self {
119            marker: marker.into(),
120            source: source.into(),
121            gnu_3,
122            gnu_2,
123            todo_pat,
124            rustc_header,
125            rustc_arrow,
126        }
127    }
128
129    /// Extract zero or more [`NewIssue`]s from a single styled output line.
130    ///
131    /// The plain text content of the line is used; ANSI styling is ignored.
132    /// Returns an empty `Vec` if the line matches no known pattern.
133    pub fn extract_from_line(&self, line: &StyledLine) -> Vec<NewIssue> {
134        self.extract_from_str(&line.text)
135    }
136
137    /// Extract zero or more [`NewIssue`]s from a plain text line.
138    ///
139    /// Equivalent to [`Self::extract_from_line`] but accepts a `&str` directly,
140    /// useful when the caller already has plain text (e.g. from a terminal PTY
141    /// output buffer where ANSI codes have already been stripped by the VT100
142    /// parser).
143    pub fn extract_from_str(&self, text: &str) -> Vec<NewIssue> {
144        let text = text.trim_end();
145        if text.is_empty() {
146            return Vec::new();
147        }
148
149        // Try the three-component GNU pattern first (more specific).
150        if let Some(caps) = self.gnu_3.captures(text) {
151            let path = PathBuf::from(&caps[1]);
152            let lineno: usize = caps[2].parse().unwrap_or(0);
153            let col: usize = caps[3].parse().unwrap_or(0);
154            let severity = parse_severity(&caps[4]);
155            let message = caps[5].trim().to_string();
156
157            return vec![self.make_issue(
158                severity,
159                message,
160                Some(path),
161                Some(lineno),
162                Some(col),
163            )];
164        }
165
166        // Fall back to the two-component GNU pattern (no column).
167        if let Some(caps) = self.gnu_2.captures(text) {
168            let path = PathBuf::from(&caps[1]);
169            let lineno: usize = caps[2].parse().unwrap_or(0);
170            let severity = parse_severity(&caps[3]);
171            let message = caps[4].trim().to_string();
172
173            return vec![self.make_issue(
174                severity,
175                message,
176                Some(path),
177                Some(lineno),
178                None,
179            )];
180        }
181
182        // Check for TODO/FIXME/HACK/XXX anywhere in the line.
183        if let Some(caps) = self.todo_pat.captures(text) {
184            let message = format!("{}: {}", &caps[1].to_uppercase(), caps[2].trim());
185            return vec![self.make_issue(Severity::Todo, message, None, None, None)];
186        }
187
188        Vec::new()
189    }
190
191    // --- rustc / cargo multi-line helpers ----------------------------------
192
193    /// Try to match a rustc/cargo severity header line such as
194    /// `error[E0425]: cannot find value \`x\`` or `warning: unused variable`.
195    ///
196    /// Returns `(severity, message)` if matched, `None` otherwise.
197    pub fn try_rustc_header(&self, text: &str) -> Option<(Severity, String)> {
198        let text = text.trim_end();
199        self.rustc_header.captures(text).map(|caps| {
200            let severity = parse_severity(&caps[1]);
201            let message = caps[3].trim().to_string();
202            (severity, message)
203        })
204    }
205
206    /// Try to match a rustc/cargo source-location line such as
207    /// ` --> src/main.rs:5:10`.
208    ///
209    /// Returns `(path, line, col)` if matched, `None` otherwise.
210    pub fn try_rustc_arrow(&self, text: &str) -> Option<(PathBuf, usize, usize)> {
211        let text = text.trim_end();
212        self.rustc_arrow.captures(text).map(|caps| {
213            let path = PathBuf::from(&caps[1]);
214            let line: usize = caps[2].parse().unwrap_or(0);
215            let col: usize = caps[3].parse().unwrap_or(0);
216            (path, line, col)
217        })
218    }
219
220    // --- helpers -----------------------------------------------------------
221
222    /// Build a [`NewIssue`] from individual components.
223    ///
224    /// `line` and `column` are 1-based; they are converted to 0-based
225    /// [`Position`] internally.
226    pub fn make_issue(
227        &self,
228        severity: Severity,
229        message: String,
230        path: Option<PathBuf>,
231        line: Option<usize>,
232        column: Option<usize>,
233    ) -> NewIssue {
234        use crate::editor::position::Position;
235
236        let range = match (line, column) {
237            (Some(l), Some(c)) => {
238                let pos = Position { line: l.saturating_sub(1), column: c.saturating_sub(1) };
239                Some((pos, pos))
240            }
241            (Some(l), None) => {
242                let pos = Position { line: l.saturating_sub(1), column: 0 };
243                Some((pos, pos))
244            }
245            _ => None,
246        };
247
248        NewIssue {
249            marker: Some(self.marker.clone()),
250            source: self.source.clone(),
251            path,
252            range,
253            message,
254            severity,
255        }
256    }
257}
258
259// ---------------------------------------------------------------------------
260// Helpers
261// ---------------------------------------------------------------------------
262
263/// Map a matched severity keyword to [`Severity`].
264fn parse_severity(s: &str) -> Severity {
265    match s.to_ascii_lowercase().as_str() {
266        "error" => Severity::Error,
267        "warning" => Severity::Warning,
268        _ => Severity::Info,
269    }
270}
271
272// ---------------------------------------------------------------------------
273// Tests
274// ---------------------------------------------------------------------------
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    fn extractor() -> DiagnosticsExtractor {
281        DiagnosticsExtractor::new("task:build:crate_a", "build")
282    }
283
284    fn line(text: &str) -> StyledLine {
285        StyledLine { text: text.to_string(), spans: vec![] }
286    }
287
288    fn single(text: &str) -> Option<NewIssue> {
289        let results = extractor().extract_from_line(&line(text));
290        assert!(
291            results.len() <= 1,
292            "expected at most 1 result, got {}: {results:?}",
293            results.len()
294        );
295        results.into_iter().next()
296    }
297
298    // 1. GNU 3-component: file:line:col: error: message
299    #[test]
300    fn gnu_error_with_column() {
301        let issue = single("src/main.rs:42:10: error: type mismatch").unwrap();
302        assert_eq!(issue.severity, Severity::Error);
303        assert_eq!(issue.message, "type mismatch");
304        assert_eq!(issue.path, Some(PathBuf::from("src/main.rs")));
305        // Range: line 41 (0-indexed), col 9 (0-indexed)
306        let (start, _) = issue.range.unwrap();
307        assert_eq!(start.line, 41);
308        assert_eq!(start.column, 9);
309    }
310
311    // 2. GNU 3-component: warning
312    #[test]
313    fn gnu_warning_with_column() {
314        let issue = single("lib/foo.rs:10:5: warning: unused variable").unwrap();
315        assert_eq!(issue.severity, Severity::Warning);
316        assert_eq!(issue.message, "unused variable");
317        assert_eq!(issue.path, Some(PathBuf::from("lib/foo.rs")));
318    }
319
320    // 3. GNU 2-component: file:line: error: message (no column)
321    #[test]
322    fn gnu_error_without_column() {
323        let issue = single("build/Makefile:7: error: missing separator").unwrap();
324        assert_eq!(issue.severity, Severity::Error);
325        assert_eq!(issue.message, "missing separator");
326        assert_eq!(issue.path, Some(PathBuf::from("build/Makefile")));
327        let (start, _) = issue.range.unwrap();
328        assert_eq!(start.line, 6); // 0-indexed
329        assert_eq!(start.column, 0);
330    }
331
332    // 4. TODO comment → Severity::Todo
333    #[test]
334    fn todo_comment() {
335        let issue = single("  // TODO: refactor this function").unwrap();
336        assert_eq!(issue.severity, Severity::Todo);
337        assert!(issue.message.contains("refactor this function"), "msg: {}", issue.message);
338        assert!(issue.path.is_none());
339    }
340
341    // 5. FIXME → Severity::Todo
342    #[test]
343    fn fixme_comment() {
344        let issue = single("FIXME: this is broken").unwrap();
345        assert_eq!(issue.severity, Severity::Todo);
346    }
347
348    // 6. HACK → Severity::Todo
349    #[test]
350    fn hack_comment() {
351        let issue = single("  HACK: workaround for upstream bug").unwrap();
352        assert_eq!(issue.severity, Severity::Todo);
353    }
354
355    // 7. XXX → Severity::Todo
356    #[test]
357    fn xxx_comment() {
358        let issue = single("XXX: needs review").unwrap();
359        assert_eq!(issue.severity, Severity::Todo);
360    }
361
362    // 8. Marker and source are set correctly on all issue types.
363    #[test]
364    fn marker_and_source_are_set() {
365        let ext = DiagnosticsExtractor::new("task:lint:mylib", "lint");
366        let results = ext.extract_from_line(&line("src/lib.rs:1:1: error: oops"));
367        let issue = &results[0];
368        assert_eq!(issue.marker, Some("task:lint:mylib".into()));
369        assert_eq!(issue.source, "lint");
370    }
371
372    // 9. Malformed / plain line → no issues.
373    #[test]
374    fn plain_line_produces_no_issues() {
375        assert!(single("   Compiling mylib v0.1.0").is_none());
376    }
377
378    // 10. Empty line → no issues.
379    #[test]
380    fn empty_line_produces_no_issues() {
381        assert!(single("").is_none());
382    }
383
384    // 11. Note / info keywords → Severity::Info.
385    #[test]
386    fn note_keyword_maps_to_info() {
387        let issue = single("src/main.rs:5:3: note: consider using a semicolon").unwrap();
388        assert_eq!(issue.severity, Severity::Info);
389    }
390
391    // 12. Multiple tasks produce diagnostics associated to correct marker.
392    #[test]
393    fn different_markers_are_independent() {
394        let ext_a = DiagnosticsExtractor::new("task:build:crate_a", "build");
395        let ext_b = DiagnosticsExtractor::new("task:build:crate_b", "build");
396        let results_a = ext_a.extract_from_line(&line("a.rs:1:1: error: a broke"));
397        let results_b = ext_b.extract_from_line(&line("b.rs:1:1: error: b broke"));
398        assert_eq!(results_a[0].marker, Some("task:build:crate_a".into()));
399        assert_eq!(results_b[0].marker, Some("task:build:crate_b".into()));
400    }
401
402    // 13. Case-insensitive severity matching.
403    #[test]
404    fn case_insensitive_severity() {
405        let issue = single("src/main.rs:1:1: ERROR: uppercase error").unwrap();
406        assert_eq!(issue.severity, Severity::Error);
407        let issue2 = single("src/main.rs:1:1: Warning: mixed case").unwrap();
408        assert_eq!(issue2.severity, Severity::Warning);
409    }
410
411    // 14. Path with spaces is not matched (safety: avoid false positives on
412    //     lines like `Running: some task`).
413    #[test]
414    fn path_with_colon_only_no_line_number_not_matched() {
415        // "Running: some task" has no digit after the first colon → no match.
416        assert!(single("Running: building the project").is_none());
417    }
418
419    // 15. extract_from_str produces the same result as extract_from_line.
420    #[test]
421    fn extract_from_str_matches_extract_from_line() {
422        let ext = extractor();
423        let text = "src/main.rs:10:5: warning: dead code";
424        let from_line = ext.extract_from_line(&line(text));
425        let from_str = ext.extract_from_str(text);
426        assert_eq!(from_line.len(), from_str.len());
427        assert_eq!(from_line[0].severity, from_str[0].severity);
428        assert_eq!(from_line[0].message, from_str[0].message);
429    }
430
431    // 16. extract_from_str works on ANSI-stripped terminal output.
432    #[test]
433    fn extract_from_str_ansi_stripped() {
434        // Simulate what the terminal extractor receives after strip_ansi.
435        let ext = extractor();
436        // "src/main.rs:1:1: error: oh no" preceded by cleared color code.
437        let text = "src/main.rs:1:1: error: oh no";
438        let issue = ext.extract_from_str(text).into_iter().next().unwrap();
439        assert_eq!(issue.severity, Severity::Error);
440        assert_eq!(issue.message, "oh no");
441    }
442}