Skip to main content

oo_ide/log_matcher/
engine.rs

1//! Runtime log-matcher execution engine (streaming state machine).
2//!
3//! [`MatcherEngine`] processes task output line-by-line, executing
4//! [`super::CompiledMatcher`] rules against each line to accumulate
5//! multi-line diagnostic blocks and emit [`crate::issue_registry::NewIssue`]s
6//! when a block is complete.
7//!
8//! # Usage
9//!
10//! ```ignore
11//! let mut engine = MatcherEngine::new(matchers, "task:build:crate:a");
12//! for line in output_lines {
13//!     for issue in engine.process_line(line) {
14//!         // send Operation::AddIssue { issue } …
15//!     }
16//! }
17//! // On task completion or cancellation (end-of-stream):
18//! for issue in engine.flush() { /* … */ }
19//! ```
20//!
21//! # Design guarantees
22//!
23//! * **Deterministic** – given identical input and matchers, produces identical
24//!   output.  Priority tie-breaking uses original insertion order.
25//! * **No panics** – invalid template references and non-numeric position
26//!   strings are silently discarded; the engine never panics on bad input.
27//! * **Single active block** – at most one block accumulates at any time; any
28//!   new `start` match terminates the current block before starting another.
29//! * **Best-effort recovery** – if a required body rule fails to match, the
30//!   block stays active and the line is silently consumed.  The block still
31//!   emits on the next `start` match or on `flush()`.
32
33use std::collections::HashMap;
34use std::path::PathBuf;
35
36use once_cell::sync::Lazy;
37use regex::Regex;
38
39use crate::editor::position::Position;
40use crate::issue_registry::{NewIssue, Severity};
41
42use super::types::{BodyRule, CompiledMatcher, EmitSeverity, EndCondition};
43
44// ---------------------------------------------------------------------------
45// Public types
46// ---------------------------------------------------------------------------
47
48/// Named capture group values extracted from a matched regex.
49pub type CaptureMap = HashMap<String, String>;
50
51// ---------------------------------------------------------------------------
52// Internal types
53// ---------------------------------------------------------------------------
54
55struct ActiveBlock {
56    /// Index into the engine's sorted `matchers` slice.
57    matcher_index: usize,
58    captures: CaptureMap,
59    /// Which body rule we are currently evaluating.
60    body_index: usize,
61    /// Number of non-start body lines accumulated (for `max_lines` safety cap).
62    lines_accumulated: u32,
63}
64
65// ---------------------------------------------------------------------------
66// MatcherEngine
67// ---------------------------------------------------------------------------
68
69/// Streaming log-matcher execution engine.
70///
71/// Create one instance **per output stream** (stdout / stderr) per task.
72/// Call [`process_line`](MatcherEngine::process_line) for each line, then
73/// [`flush`](MatcherEngine::flush) once the stream ends.
74pub struct MatcherEngine {
75    /// Matchers sorted *priority descending*, ties broken by *insertion order
76    /// ascending*.  Iterating finds the highest-priority winner first.
77    matchers: Vec<CompiledMatcher>,
78    active: Option<ActiveBlock>,
79    marker: String,
80}
81
82impl MatcherEngine {
83    /// Create a new engine.
84    ///
85    /// * `matchers` – compiled matchers for this stream.  An empty slice
86    ///   makes the engine a deterministic no-op.
87    /// * `marker` – ephemeral issue marker applied to every emitted issue
88    ///   (typically `"task:{queue}:{target}"`).
89    pub fn new(mut matchers: Vec<CompiledMatcher>, marker: impl Into<String>) -> Self {
90        // Sort: highest priority first; ties broken by original insertion order
91        // (stable sort preserves relative order within equal-priority groups).
92        matchers.sort_by(|a, b| b.priority.cmp(&a.priority));
93
94        Self {
95            matchers,
96            active: None,
97            marker: marker.into(),
98        }
99    }
100
101    /// Process a single text line from task output.
102    ///
103    /// Returns zero or more [`NewIssue`]s that became complete because of this
104    /// line (typically zero; usually one only when a block terminates).
105    pub fn process_line(&mut self, line: &str) -> Vec<NewIssue> {
106        let mut issues = Vec::new();
107
108        // ── 1. Start-match scan (highest-priority matcher wins) ──────────────
109        let start_winner = self.matchers.iter().enumerate().find_map(|(idx, m)| {
110            if m.start.is_match(line) {
111                Some((idx, extract_captures(&m.start, line)))
112            } else {
113                None
114            }
115        });
116
117        if let Some((sorted_idx, captures)) = start_winner {
118            // Any start unconditionally terminates the current block.
119            issues.extend(
120                self.active.take().and_then(|old| {
121                    emit_issue(&self.matchers[old.matcher_index], &old.captures, &self.marker)
122                }),
123            );
124            self.active = Some(ActiveBlock {
125                matcher_index: sorted_idx,
126                captures,
127                body_index: 0,
128                lines_accumulated: 0,
129            });
130            return issues;
131        }
132
133        // ── 2. No start match ────────────────────────────────────────────────
134        if self.active.is_none() {
135            return issues; // nothing active — ignore line
136        }
137
138        // ── 3. BlankLine end condition ───────────────────────────────────────
139        let is_blank_end = {
140            let block = self.active.as_ref().unwrap();
141            self.matchers[block.matcher_index].end == EndCondition::BlankLine
142                && line.trim().is_empty()
143        };
144        if is_blank_end {
145            let old = self.active.take().unwrap();
146            if let Some(issue) =
147                emit_issue(&self.matchers[old.matcher_index], &old.captures, &self.marker)
148            {
149                issues.push(issue);
150            }
151            return issues;
152        }
153
154        // ── 4. Accumulate body line ──────────────────────────────────────────
155        {
156            let block = self.active.as_mut().unwrap();
157            block.lines_accumulated += 1;
158        }
159
160        // ── 5. max_lines safety cap ──────────────────────────────────────────
161        let max_exceeded = {
162            let block = self.active.as_ref().unwrap();
163            self.matchers[block.matcher_index]
164                .max_lines
165                .is_some_and(|max| block.lines_accumulated > max)
166        };
167        if max_exceeded {
168            let old = self.active.take().unwrap();
169            if let Some(issue) =
170                emit_issue(&self.matchers[old.matcher_index], &old.captures, &self.marker)
171            {
172                issues.push(issue);
173            }
174            return issues;
175        }
176
177        // ── 6. Body rule processing ──────────────────────────────────────────
178        // Clone body rules to satisfy the borrow checker (Arc<Regex> is O(1)).
179        let body = {
180            let block = self.active.as_ref().unwrap();
181            self.matchers[block.matcher_index].body.clone()
182        };
183        if let Some(ref mut block) = self.active {
184            process_body_line(block, &body, line);
185        }
186
187        issues
188    }
189
190    /// Emit any pending block (end-of-stream).
191    ///
192    /// Call once when the output stream closes (task completed or cancelled).
193    /// Returns at most one issue (the pending block, if any).
194    pub fn flush(&mut self) -> Vec<NewIssue> {
195        self.active
196            .take()
197            .and_then(|old| {
198                emit_issue(&self.matchers[old.matcher_index], &old.captures, &self.marker)
199            })
200            .into_iter()
201            .collect()
202    }
203}
204
205// ---------------------------------------------------------------------------
206// Body-rule state machine
207// ---------------------------------------------------------------------------
208
209fn process_body_line(block: &mut ActiveBlock, body: &[BodyRule], line: &str) {
210    if body.is_empty() || block.body_index >= body.len() {
211        return; // all rules satisfied — accept line as noise
212    }
213
214    // Bounded retry loop: skipping an optional rule retries the same line
215    // against the next rule.  Cap at `body.len() + 1` to prevent any infinite
216    // loop (the +1 covers the edge case of skipping the last optional rule).
217    let max_iter = body.len() + 1;
218    let mut iter = 0;
219
220    while iter < max_iter && block.body_index < body.len() {
221        iter += 1;
222        let rule = &body[block.body_index];
223
224        if rule.pattern.is_match(line) {
225            // Merge named captures, overwriting any existing keys.
226            let new_caps = extract_captures(&rule.pattern, line);
227            block.captures.extend(new_caps);
228
229            if rule.repeat {
230                // Stay on this rule — the same rule may match the next line.
231            } else {
232                block.body_index += 1;
233            }
234            return; // line consumed
235        } else if rule.optional {
236            // Skip optional rule and retry same line against next rule.
237            block.body_index += 1;
238            // Continue loop.
239        } else {
240            // Required rule did not match — best-effort: stop body processing.
241            // The block stays active; future lines may still match subsequent
242            // rules once the body_index is eventually advanced.
243            return;
244        }
245    }
246    // body_index >= body.len(): all rules done; line is accepted as noise.
247}
248
249// ---------------------------------------------------------------------------
250// Helpers
251// ---------------------------------------------------------------------------
252
253/// Placeholder regex: `{{ capture_name }}`.
254static PLACEHOLDER_RE: Lazy<Regex> =
255    Lazy::new(|| Regex::new(r"\{\{\s*(\w+)\s*\}\}").expect("static regex"));
256
257/// Render a template string, replacing `{{ name }}` placeholders with values
258/// from `captures`.  Missing keys produce an empty string (no panic).
259pub fn render_template(template: &str, captures: &CaptureMap) -> String {
260    PLACEHOLDER_RE
261        .replace_all(template, |caps: &regex::Captures<'_>| {
262            captures
263                .get(caps[1].trim())
264                .cloned()
265                .unwrap_or_default()
266        })
267        .into_owned()
268}
269
270/// Extract all named capture group values from `line` using `re`.
271///
272/// Only named groups are collected; the full-match group (index 0) is ignored.
273/// Returns an empty map if `re` does not match or has no named groups.
274pub fn extract_captures(re: &Regex, line: &str) -> CaptureMap {
275    let mut map = CaptureMap::new();
276    if let Some(caps) = re.captures(line) {
277        for name in re.capture_names().flatten() {
278            if let Some(m) = caps.name(name) {
279                map.insert(name.to_string(), m.as_str().to_string());
280            }
281        }
282    }
283    map
284}
285
286/// Build a [`NewIssue`] from a completed block's captures and the matcher's
287/// emit template.
288///
289/// Returns `None` if the rendered message is empty (safety guard).
290fn emit_issue(matcher: &CompiledMatcher, captures: &CaptureMap, marker: &str) -> Option<NewIssue> {
291    let message = render_template(&matcher.emit.message, captures);
292    if message.is_empty() {
293        return None;
294    }
295
296    let severity = match matcher.emit.severity {
297        EmitSeverity::Error => Severity::Error,
298        EmitSeverity::Warning => Severity::Warning,
299        EmitSeverity::Info | EmitSeverity::Hint => Severity::Info,
300    };
301
302    // Resolve file path template (skip if the rendered value is empty).
303    let path = matcher
304        .emit
305        .file
306        .as_deref()
307        .and_then(|tmpl| {
308            let s = render_template(tmpl, captures);
309            if s.is_empty() { None } else { Some(PathBuf::from(s)) }
310        });
311
312    // Resolve line / column templates.
313    // `line` in the template is 1-indexed; `Position.line` is 0-indexed.
314    let range = {
315        let line_num = matcher.emit.line.as_deref().and_then(|tmpl| {
316            render_template(tmpl, captures)
317                .parse::<usize>()
318                .ok()
319                .map(|n| n.saturating_sub(1))
320        });
321        line_num.map(|ln| {
322            let col = matcher
323                .emit
324                .column
325                .as_deref()
326                .and_then(|tmpl| render_template(tmpl, captures).parse::<usize>().ok())
327                .unwrap_or(0);
328            let pos = Position::new(ln, col);
329            (pos, pos)
330        })
331    };
332
333    Some(NewIssue {
334        marker: Some(marker.to_string()),
335        source: matcher.source.clone(),
336        path,
337        range,
338        message,
339        severity,
340    })
341}
342
343// ---------------------------------------------------------------------------
344// Unit tests
345// ---------------------------------------------------------------------------
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use crate::log_matcher::types::{BodyRule, CompiledMatcher, EmitSeverity, EmitTemplate, EndCondition, MatcherId};
351    use std::sync::Arc;
352
353    fn make_matcher(
354        id: &str,
355        start_pat: &str,
356        body: Vec<BodyRule>,
357        end: EndCondition,
358        emit_msg: &str,
359        priority: u32,
360    ) -> CompiledMatcher {
361        CompiledMatcher {
362            id: MatcherId(id.to_string()),
363            source: "test".to_string(),
364            priority,
365            schema_version: 1,
366            start: Arc::new(Regex::new(start_pat).unwrap()),
367            body,
368            max_lines: None,
369            end,
370            emit: EmitTemplate {
371                severity: EmitSeverity::Error,
372                message: emit_msg.to_string(),
373                file: None,
374                line: None,
375                column: None,
376                code: None,
377            },
378        }
379    }
380
381    fn body_rule(pat: &str, optional: bool, repeat: bool) -> BodyRule {
382        BodyRule {
383            pattern: Arc::new(Regex::new(pat).unwrap()),
384            optional,
385            repeat,
386        }
387    }
388
389    // ── render_template ─────────────────────────────────────────────────────
390
391    #[test]
392    fn render_template_basic() {
393        let mut caps = CaptureMap::new();
394        caps.insert("msg".to_string(), "hello world".to_string());
395        assert_eq!(render_template("error: {{ msg }}", &caps), "error: hello world");
396    }
397
398    #[test]
399    fn render_template_missing_key() {
400        let caps = CaptureMap::new();
401        assert_eq!(render_template("{{ missing }}", &caps), "");
402    }
403
404    #[test]
405    fn render_template_multiple() {
406        let mut caps = CaptureMap::new();
407        caps.insert("file".to_string(), "src/main.rs".to_string());
408        caps.insert("line".to_string(), "42".to_string());
409        assert_eq!(
410            render_template("{{ file }}:{{ line }}", &caps),
411            "src/main.rs:42"
412        );
413    }
414
415    // ── extract_captures ────────────────────────────────────────────────────
416
417    #[test]
418    fn extract_captures_basic() {
419        let re = Regex::new(r"^(?P<file>.+):(?P<line>\d+)").unwrap();
420        let caps = extract_captures(&re, "src/main.rs:42");
421        assert_eq!(caps.get("file").map(|s| s.as_str()), Some("src/main.rs"));
422        assert_eq!(caps.get("line").map(|s| s.as_str()), Some("42"));
423    }
424
425    #[test]
426    fn extract_captures_no_match() {
427        let re = Regex::new(r"^(?P<file>.+):(?P<line>\d+)").unwrap();
428        let caps = extract_captures(&re, "no match here");
429        assert!(caps.is_empty());
430    }
431
432    // ── single-line start → flush emits ─────────────────────────────────────
433
434    #[test]
435    fn single_start_flush() {
436        let m = make_matcher(
437            "t.err",
438            r"^error: (?P<message>.+)",
439            vec![],
440            EndCondition::NextStart,
441            "{{ message }}",
442            0,
443        );
444        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
445        let issues = engine.process_line("error: something broke");
446        assert!(issues.is_empty(), "not emitted yet");
447        let flushed = engine.flush();
448        assert_eq!(flushed.len(), 1);
449        assert_eq!(flushed[0].message, "something broke");
450    }
451
452    // ── multi-line block captures body ───────────────────────────────────────
453
454    #[test]
455    fn multiline_block() {
456        let m = {
457            let mut m = make_matcher(
458                "t.err",
459                r"^error: (?P<message>.+)",
460                vec![body_rule(r"^ --> (?P<file>.+):(?P<line>\d+)", false, false)],
461                EndCondition::NextStart,
462                "{{ message }}",
463                0,
464            );
465            m.emit.file = Some("{{ file }}".to_string());
466            m.emit.line = Some("{{ line }}".to_string());
467            m
468        };
469        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
470        engine.process_line("error: my error");
471        engine.process_line(" --> src/lib.rs:10");
472        let issues = engine.flush();
473        assert_eq!(issues.len(), 1);
474        let issue = &issues[0];
475        assert_eq!(issue.message, "my error");
476        assert_eq!(issue.path.as_deref(), Some(std::path::Path::new("src/lib.rs")));
477        // line 10 → 0-indexed position 9
478        assert_eq!(issue.range.map(|(s, _)| s.line), Some(9));
479    }
480
481    // ── next_start terminates block ──────────────────────────────────────────
482
483    #[test]
484    fn next_start_terminates_block() {
485        let m = make_matcher(
486            "t.err",
487            r"^error: (?P<message>.+)",
488            vec![],
489            EndCondition::NextStart,
490            "{{ message }}",
491            0,
492        );
493        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
494        let i1 = engine.process_line("error: first error");
495        assert!(i1.is_empty());
496        let i2 = engine.process_line("error: second error");
497        assert_eq!(i2.len(), 1, "first block emitted on second start");
498        assert_eq!(i2[0].message, "first error");
499        let flushed = engine.flush();
500        assert_eq!(flushed.len(), 1);
501        assert_eq!(flushed[0].message, "second error");
502    }
503
504    // ── blank_line terminates block ──────────────────────────────────────────
505
506    #[test]
507    fn blank_line_terminates_block() {
508        let m = make_matcher(
509            "t.warn",
510            r"^warning: (?P<message>.+)",
511            vec![],
512            EndCondition::BlankLine,
513            "{{ message }}",
514            0,
515        );
516        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
517        engine.process_line("warning: some warning");
518        let issues = engine.process_line("");
519        assert_eq!(issues.len(), 1);
520        assert_eq!(issues[0].message, "some warning");
521        // No more active block.
522        assert!(engine.flush().is_empty());
523    }
524
525    // ── optional body rule skipped when non-matching ─────────────────────────
526
527    #[test]
528    fn optional_body_skipped() {
529        let m = make_matcher(
530            "t.err",
531            r"^error: (?P<message>.+)",
532            vec![body_rule(r"^ --> (?P<file>.+)", true, false)], // optional
533            EndCondition::NextStart,
534            "{{ message }}",
535            0,
536        );
537        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
538        engine.process_line("error: oops");
539        // Skip the optional body entirely.
540        let issues = engine.flush();
541        assert_eq!(issues.len(), 1);
542        assert_eq!(issues[0].message, "oops");
543    }
544
545    // ── repeat body rule matches multiple lines ───────────────────────────────
546
547    #[test]
548    fn repeat_body_rule() {
549        let m = make_matcher(
550            "t.note",
551            r"^note: (?P<message>.+)",
552            vec![body_rule(r"^\s+\| (?P<detail>.+)", true, true)], // optional+repeat
553            EndCondition::NextStart,
554            "{{ message }}",
555            0,
556        );
557        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
558        engine.process_line("note: context");
559        engine.process_line("   | line one");
560        engine.process_line("   | line two");
561        let issues = engine.flush();
562        assert_eq!(issues.len(), 1);
563        // Last match overwrites `detail`.
564        assert_eq!(issues[0].message, "context");
565    }
566
567    // ── priority resolution ──────────────────────────────────────────────────
568
569    #[test]
570    fn priority_resolution() {
571        let low = {
572            let mut m = make_matcher(
573                "t.low",
574                r"^msg: (?P<message>.+)",
575                vec![],
576                EndCondition::NextStart,
577                "LOW: {{ message }}",
578                1,
579            );
580            m.id = MatcherId("t.low".to_string());
581            m
582        };
583        let high = {
584            let mut m = make_matcher(
585                "t.high",
586                r"^msg: (?P<message>.+)",
587                vec![],
588                EndCondition::NextStart,
589                "HIGH: {{ message }}",
590                100,
591            );
592            m.id = MatcherId("t.high".to_string());
593            m
594        };
595        // Insert low-priority first, high-priority second — engine should pick high.
596        let mut engine = MatcherEngine::new(vec![low, high], "task:q:t");
597        engine.process_line("msg: hello");
598        let issues = engine.flush();
599        assert_eq!(issues.len(), 1);
600        assert!(issues[0].message.starts_with("HIGH:"), "high-priority matcher should win");
601    }
602
603    // ── flush on empty engine ────────────────────────────────────────────────
604
605    #[test]
606    fn flush_empty() {
607        let mut engine = MatcherEngine::new(vec![], "task:q:t");
608        assert!(engine.flush().is_empty());
609    }
610
611    // ── emit_issue: empty message returns None ───────────────────────────────
612
613    #[test]
614    fn emit_issue_empty_message() {
615        let m = make_matcher(
616            "t.empty",
617            r"^error: (?P<message>.+)",
618            vec![],
619            EndCondition::NextStart,
620            // Template produces empty string when capture is missing.
621            "{{ missing_key }}",
622            0,
623        );
624        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
625        engine.process_line("error: something");
626        // The start-match won't find `missing_key` in captures, so message == "".
627        // This means emit returns None and flush returns empty.
628        let issues = engine.flush();
629        // `message` capture IS present (start pattern captures it as "message" group
630        // but we used "missing_key" in the template) → empty → no issue.
631        assert!(issues.is_empty(), "empty rendered message should produce no issue");
632    }
633
634    // ── non-start line with no active block is ignored ────────────────────────
635
636    #[test]
637    fn idle_line_ignored() {
638        let m = make_matcher(
639            "t.err",
640            r"^error: (?P<message>.+)",
641            vec![],
642            EndCondition::NextStart,
643            "{{ message }}",
644            0,
645        );
646        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
647        let issues = engine.process_line("just some random output");
648        assert!(issues.is_empty());
649        assert!(engine.flush().is_empty());
650    }
651
652    // ── max_lines cap force-emits block ──────────────────────────────────────
653
654    #[test]
655    fn max_lines_cap() {
656        let mut m = make_matcher(
657            "t.err",
658            r"^error: (?P<message>.+)",
659            vec![],
660            EndCondition::NextStart,
661            "{{ message }}",
662            0,
663        );
664        m.max_lines = Some(2); // Force-emit after 2 body lines.
665
666        let mut engine = MatcherEngine::new(vec![m], "task:q:t");
667        engine.process_line("error: my message");
668        engine.process_line("body line 1");
669        engine.process_line("body line 2");
670        // Third body line (lines_accumulated == 3 > 2) triggers force-emit.
671        let issues = engine.process_line("body line 3");
672        assert_eq!(issues.len(), 1, "force-emit on max_lines exceeded");
673        assert!(engine.flush().is_empty(), "block already consumed");
674    }
675}