Skip to main content

oo_ide/log_matcher/
compiler.rs

1//! Compiles [`LogMatcherDef`] instances into [`CompiledMatcher`] structs.
2//!
3//! This module is purely deterministic and synchronous — no I/O, no async.
4
5use std::collections::{HashMap, HashSet};
6use std::sync::Arc;
7
8use once_cell::sync::Lazy;
9use regex::Regex;
10
11use super::message::{has_errors, Message};
12use super::schema::LogMatcherDef;
13use super::types::{
14    BodyRule, CompiledMatcher, EmitSeverity, EmitTemplate, EndCondition, MatcherId,
15};
16
17// ---------------------------------------------------------------------------
18// CompileOptions
19// ---------------------------------------------------------------------------
20
21/// Options controlling the compilation process.
22#[derive(Debug, Clone)]
23pub struct CompileOptions {
24    /// Source file name embedded in diagnostic references (e.g. `"extension.yaml"`).
25    /// `None` renders as `"unknown"`.
26    pub source_file: Option<String>,
27    /// Emit a [`crate::log_matcher::MessageLevel::Warning`] for each `{{ group }}` template reference
28    /// that does not correspond to a named capture group in any compiled regex.
29    pub warn_unused_captures: bool,
30    /// Maximum schema version accepted. Matchers with a higher `schema_version`
31    /// are rejected. Current maximum is `1`.
32    pub max_schema_version: u32,
33}
34
35impl Default for CompileOptions {
36    fn default() -> Self {
37        Self {
38            source_file: None,
39            warn_unused_captures: false,
40            max_schema_version: 1,
41        }
42    }
43}
44
45// ---------------------------------------------------------------------------
46// CompileResult
47// ---------------------------------------------------------------------------
48
49/// Successful compilation output.
50///
51/// Even on success, `messages` may contain warnings or info diagnostics.
52#[derive(Debug)]
53pub struct CompileResult {
54    pub matchers: Vec<CompiledMatcher>,
55    pub messages: Vec<Message>,
56}
57
58// ---------------------------------------------------------------------------
59// compile_matchers
60// ---------------------------------------------------------------------------
61
62/// Compile a list of [`LogMatcherDef`] into [`CompiledMatcher`] instances.
63///
64/// All errors are collected before returning so callers can fix everything
65/// in one pass.
66///
67/// Returns `Ok(CompileResult)` if no errors occurred (warnings may still be
68/// present in `CompileResult::messages`). Returns `Err(messages)` if any
69/// error was found; the message list contains the full diagnostic set.
70pub fn compile_matchers(
71    defs: Vec<LogMatcherDef>,
72    options: CompileOptions,
73) -> Result<CompileResult, Vec<Message>> {
74    let src = options.source_file.as_deref().unwrap_or("unknown");
75    let mut messages: Vec<Message> = Vec::new();
76    let mut matchers: Vec<CompiledMatcher> = Vec::new();
77
78    // -----------------------------------------------------------------------
79    // Pass 1 — detect empty and duplicate IDs.
80    // Indices in this set are skipped in pass 2.
81    // -----------------------------------------------------------------------
82    let mut skip_indices: HashSet<usize> = HashSet::new();
83    let mut first_occurrence: HashMap<String, usize> = HashMap::new();
84
85    for (idx, def) in defs.iter().enumerate() {
86        let id_field = format!("{}:matchers[{}].id", src, idx);
87        if def.id.is_empty() {
88            messages.push(Message::error_at("matcher id must not be empty", &id_field));
89            skip_indices.insert(idx);
90        } else if let Some(&first_idx) = first_occurrence.get(&def.id) {
91            let first_path = format!("{}:matchers[{}].id", src, first_idx);
92            messages.push(
93                Message::error_at(
94                    format!("duplicate matcher id: {}", def.id),
95                    &id_field,
96                )
97                .with_related(&first_path, "first defined here"),
98            );
99            skip_indices.insert(idx);
100        } else {
101            first_occurrence.insert(def.id.clone(), idx);
102        }
103    }
104
105    // -----------------------------------------------------------------------
106    // Pass 2 — compile each non-skipped matcher.
107    // -----------------------------------------------------------------------
108    for (idx, def) in defs.iter().enumerate() {
109        if skip_indices.contains(&idx) {
110            continue;
111        }
112
113        let field = |suffix: &str| format!("{}:matchers[{}].{}", src, idx, suffix);
114        let mut matcher_msgs: Vec<Message> = Vec::new();
115
116        // schema_version
117        if def.schema_version > options.max_schema_version {
118            matcher_msgs.push(Message::error_at(
119                format!(
120                    "unsupported schema version {}; max is {}",
121                    def.schema_version, options.max_schema_version
122                ),
123                field("schema_version"),
124            ));
125        }
126
127        // source non-empty
128        if def.source.is_empty() {
129            matcher_msgs.push(Message::error_at("source must not be empty", field("source")));
130        }
131
132        // start regex
133        let start_regex = compile_regex(&def.start.pattern, field("start.match"), &mut matcher_msgs);
134
135        // end condition (computed before body rules for the `repeat` check)
136        let has_valid_end =
137            matches!(def.end.condition.as_str(), "next_start" | "blank_line");
138        let end = match def.end.condition.as_str() {
139            "next_start" => Some(EndCondition::NextStart),
140            "blank_line" => Some(EndCondition::BlankLine),
141            other => {
142                matcher_msgs.push(Message::error_at(
143                    format!(
144                        "unknown condition '{}'; expected next_start or blank_line",
145                        other
146                    ),
147                    field("end.condition"),
148                ));
149                None
150            }
151        };
152
153        // body rules
154        let mut body_rules: Vec<BodyRule> = Vec::new();
155        for (bidx, brule) in def.body.iter().enumerate() {
156            let brule_field =
157                |s: &str| format!("{}:matchers[{}].body[{}].{}", src, idx, bidx, s);
158
159            if brule.repeat && !has_valid_end {
160                matcher_msgs.push(Message::error_at(
161                    "repeat: true requires a valid end condition",
162                    brule_field("repeat"),
163                ));
164            }
165
166            let pattern =
167                compile_regex(&brule.pattern, brule_field("match"), &mut matcher_msgs);
168            if let Some(r) = pattern {
169                body_rules.push(BodyRule {
170                    pattern: r,
171                    optional: brule.optional,
172                    repeat: brule.repeat,
173                });
174            }
175        }
176
177        // emit.message
178        if def.emit.message.is_empty() {
179            matcher_msgs.push(Message::error_at(
180                "emit.message is required",
181                field("emit.message"),
182            ));
183        }
184
185        // emit.severity
186        let severity =
187            parse_severity(&def.emit.severity, field("emit.severity"), &mut matcher_msgs);
188
189        // Capture group lint (optional warnings)
190        if options.warn_unused_captures {
191            let all_names: HashSet<String> = {
192                let mut names = HashSet::new();
193                if let Some(r) = &start_regex {
194                    collect_capture_names(r, &mut names);
195                }
196                for rule in &body_rules {
197                    collect_capture_names(&rule.pattern, &mut names);
198                }
199                names
200            };
201
202            let emit = &def.emit;
203            let template_fields: &[(&str, Option<&String>)] = &[
204                ("emit.message", Some(&emit.message)),
205                ("emit.file", emit.file.as_ref()),
206                ("emit.line", emit.line.as_ref()),
207                ("emit.column", emit.column.as_ref()),
208                ("emit.code", emit.code.as_ref()),
209            ];
210
211            for (fname, tmpl) in template_fields.iter() {
212                if let Some(t) = tmpl {
213                    for group_name in extract_template_refs(t) {
214                        if !all_names.contains(&group_name) {
215                            matcher_msgs.push(Message::warning_at(
216                                format!(
217                                    "template '{{{{ {} }}}}' references capture group not found in any regex",
218                                    group_name
219                                ),
220                                field(fname),
221                            ));
222                        }
223                    }
224                }
225            }
226        }
227
228        messages.extend(matcher_msgs.iter().cloned());
229
230        // Build CompiledMatcher only if this matcher has no errors.
231        if !has_errors(&matcher_msgs)
232            && let (Some(start), Some(end), Some(severity)) = (start_regex, end, severity)
233        {
234            matchers.push(CompiledMatcher {
235                id: MatcherId(def.id.clone()),
236                source: def.source.clone(),
237                priority: def.priority,
238                schema_version: def.schema_version,
239                start,
240                body: body_rules,
241                max_lines: def.max_lines,
242                end,
243                emit: EmitTemplate {
244                    severity,
245                    message: def.emit.message.clone(),
246                    file: def.emit.file.clone(),
247                    line: def.emit.line.clone(),
248                    column: def.emit.column.clone(),
249                    code: def.emit.code.clone(),
250                },
251            });
252        }
253    }
254
255    if has_errors(&messages) {
256        Err(messages)
257    } else {
258        Ok(CompileResult { matchers, messages })
259    }
260}
261
262// ---------------------------------------------------------------------------
263// Internal helpers
264// ---------------------------------------------------------------------------
265
266fn compile_regex(
267    pattern: &str,
268    field_path: impl Into<String>,
269    messages: &mut Vec<Message>,
270) -> Option<Arc<Regex>> {
271    match Regex::new(pattern) {
272        Ok(r) => Some(Arc::new(r)),
273        Err(e) => {
274            messages.push(Message::error_at(
275                format!("invalid regex: {}", e),
276                field_path,
277            ));
278            None
279        }
280    }
281}
282
283fn parse_severity(
284    s: &str,
285    field_path: impl Into<String>,
286    messages: &mut Vec<Message>,
287) -> Option<EmitSeverity> {
288    match s {
289        "error" => Some(EmitSeverity::Error),
290        "warning" => Some(EmitSeverity::Warning),
291        "info" => Some(EmitSeverity::Info),
292        "hint" => Some(EmitSeverity::Hint),
293        other => {
294            messages.push(Message::error_at(
295                format!(
296                    "unknown severity '{}'; expected error, warning, info, or hint",
297                    other
298                ),
299                field_path,
300            ));
301            None
302        }
303    }
304}
305
306fn collect_capture_names(regex: &Regex, out: &mut HashSet<String>) {
307    for name in regex.capture_names().flatten() {
308        out.insert(name.to_string());
309    }
310}
311
312static TEMPLATE_REF_RE: Lazy<Regex> =
313    Lazy::new(|| Regex::new(r"\{\{\s*(\w+)\s*\}\}").expect("static regex"));
314
315fn extract_template_refs(template: &str) -> Vec<String> {
316    TEMPLATE_REF_RE
317        .captures_iter(template)
318        .map(|c| c[1].to_string())
319        .collect()
320}
321
322// ---------------------------------------------------------------------------
323// Unit tests
324// ---------------------------------------------------------------------------
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use crate::log_matcher::schema::{BodyRuleDef, EmitDef, EndDef, StartDef};
330
331    fn minimal_def(id: &str) -> LogMatcherDef {
332        LogMatcherDef {
333            id: id.to_string(),
334            source: "test".to_string(),
335            priority: 0,
336            schema_version: 1,
337            start: StartDef {
338                pattern: "^test".to_string(),
339            },
340            body: vec![],
341            max_lines: None,
342            end: EndDef {
343                condition: "next_start".to_string(),
344            },
345            emit: EmitDef {
346                severity: "error".to_string(),
347                message: "test message".to_string(),
348                file: None,
349                line: None,
350                column: None,
351                code: None,
352            },
353        }
354    }
355
356    fn compile_one(def: LogMatcherDef) -> Result<CompileResult, Vec<Message>> {
357        compile_matchers(vec![def], CompileOptions::default())
358    }
359
360    #[test]
361    fn valid_matcher_compiles() {
362        let result = compile_one(minimal_def("my.matcher")).expect("should succeed");
363        assert_eq!(result.matchers.len(), 1);
364        assert_eq!(result.matchers[0].id.0, "my.matcher");
365        assert_eq!(result.matchers[0].priority, 0);
366        assert_eq!(result.matchers[0].schema_version, 1);
367        assert!(result.messages.is_empty());
368    }
369
370    #[test]
371    fn empty_defs_succeeds() {
372        let result = compile_matchers(vec![], CompileOptions::default()).expect("should succeed");
373        assert!(result.matchers.is_empty());
374        assert!(result.messages.is_empty());
375    }
376
377    #[test]
378    fn invalid_start_regex_rejected_with_field_path() {
379        let mut def = minimal_def("bad.regex");
380        def.start.pattern = "^error(unclosed".to_string();
381        let msgs = compile_one(def).expect_err("should fail");
382        let ref_paths: Vec<&str> = msgs
383            .iter()
384            .filter_map(|m| m.reference.as_ref())
385            .map(|r| r.filename.as_str())
386            .collect();
387        assert!(
388            ref_paths.iter().any(|p| p.contains("start.match")),
389            "expected start.match in refs, got: {:?}",
390            ref_paths
391        );
392        assert!(msgs.iter().any(|m| m.text.contains("invalid regex")));
393    }
394
395    #[test]
396    fn invalid_body_regex_rejected() {
397        let mut def = minimal_def("bad.body");
398        def.body.push(BodyRuleDef {
399            pattern: "^(bad".to_string(),
400            optional: false,
401            repeat: false,
402        });
403        let msgs = compile_one(def).expect_err("should fail");
404        assert!(msgs.iter().any(|m| m
405            .reference
406            .as_ref()
407            .is_some_and(|r| r.filename.contains("body[0].match"))));
408    }
409
410    #[test]
411    fn missing_emit_message_rejected() {
412        let mut def = minimal_def("no.message");
413        def.emit.message = String::new();
414        let msgs = compile_one(def).expect_err("should fail");
415        assert!(msgs.iter().any(|m| m
416            .reference
417            .as_ref()
418            .is_some_and(|r| r.filename.contains("emit.message"))));
419        assert!(msgs.iter().any(|m| m.text.contains("emit.message is required")));
420    }
421
422    #[test]
423    fn multiple_matchers_compile() {
424        let defs = vec![
425            minimal_def("a.matcher"),
426            minimal_def("b.matcher"),
427            minimal_def("c.matcher"),
428        ];
429        let result = compile_matchers(defs, CompileOptions::default()).expect("should succeed");
430        assert_eq!(result.matchers.len(), 3);
431    }
432
433    #[test]
434    fn duplicate_id_error_has_related_ref() {
435        let defs = vec![minimal_def("dup.id"), minimal_def("dup.id")];
436        let msgs = compile_matchers(defs, CompileOptions::default()).expect_err("should fail");
437        let dup_msg = msgs
438            .iter()
439            .find(|m| m.text.contains("duplicate matcher id"))
440            .expect("should have duplicate error");
441        assert!(
442            !dup_msg.related.is_empty(),
443            "expected related reference for duplicate"
444        );
445        assert!(dup_msg.related[0].label.contains("first defined here"));
446    }
447
448    #[test]
449    fn first_of_duplicate_still_compiles() {
450        // The first definition should succeed; only the second should fail.
451        // Since we return Err when any error exists, we check the error messages.
452        let defs = vec![minimal_def("dup"), minimal_def("dup")];
453        let msgs = compile_matchers(defs, CompileOptions::default()).expect_err("should fail");
454        // Only one duplicate error (not two)
455        let dup_errors: Vec<_> = msgs
456            .iter()
457            .filter(|m| m.text.contains("duplicate matcher id: dup"))
458            .collect();
459        assert_eq!(dup_errors.len(), 1, "expected exactly one duplicate error");
460    }
461
462    #[test]
463    fn defaults_applied() {
464        let result = compile_one(minimal_def("defaults")).expect("should succeed");
465        let m = &result.matchers[0];
466        assert_eq!(m.priority, 0);
467        assert_eq!(m.schema_version, 1);
468        assert!(m.body.is_empty());
469        assert!(m.max_lines.is_none());
470    }
471
472    #[test]
473    fn schema_version_too_high_rejected() {
474        let mut def = minimal_def("future");
475        def.schema_version = 99;
476        let msgs = compile_one(def).expect_err("should fail");
477        assert!(msgs.iter().any(|m| m
478            .reference
479            .as_ref()
480            .is_some_and(|r| r.filename.contains("schema_version"))));
481        assert!(msgs.iter().any(|m| m.text.contains("unsupported schema version")));
482    }
483
484    #[test]
485    fn unknown_end_condition_rejected() {
486        let mut def = minimal_def("bad.end");
487        def.end.condition = "timeout".to_string();
488        let msgs = compile_one(def).expect_err("should fail");
489        assert!(msgs.iter().any(|m| m
490            .reference
491            .as_ref()
492            .is_some_and(|r| r.filename.contains("end.condition"))));
493    }
494
495    #[test]
496    fn blank_line_end_condition_compiles() {
497        let mut def = minimal_def("blank.end");
498        def.end.condition = "blank_line".to_string();
499        let result = compile_one(def).expect("should succeed");
500        assert_eq!(result.matchers[0].end, crate::log_matcher::types::EndCondition::BlankLine);
501    }
502
503    #[test]
504    fn unknown_severity_rejected() {
505        let mut def = minimal_def("bad.sev");
506        def.emit.severity = "fatal".to_string();
507        let msgs = compile_one(def).expect_err("should fail");
508        assert!(msgs.iter().any(|m| m.text.contains("unknown severity")));
509    }
510
511    #[test]
512    fn repeat_without_end_condition_rejected() {
513        let mut def = minimal_def("bad.repeat");
514        def.end.condition = "bad_cond".to_string();
515        def.body.push(BodyRuleDef {
516            pattern: "^.*".to_string(),
517            optional: false,
518            repeat: true,
519        });
520        let msgs = compile_one(def).expect_err("should fail");
521        assert!(msgs
522            .iter()
523            .any(|m| m.text.contains("repeat: true requires a valid end condition")));
524    }
525
526    #[test]
527    fn unknown_capture_warning_emitted() {
528        let mut def = minimal_def("warn.captures");
529        def.start.pattern = "^error (?P<message>.+)".to_string();
530        def.emit.message = "{{ unknown_group }}".to_string();
531        let result = compile_matchers(
532            vec![def],
533            CompileOptions {
534                warn_unused_captures: true,
535                ..Default::default()
536            },
537        )
538        .expect("should succeed with warnings");
539        assert!(
540            result
541                .messages
542                .iter()
543                .any(|m| m.text.contains("unknown_group")),
544            "expected warning about unknown_group"
545        );
546    }
547
548    #[test]
549    fn no_warning_for_known_capture() {
550        let mut def = minimal_def("known.capture");
551        def.start.pattern = "^error (?P<message>.+)".to_string();
552        def.emit.message = "{{ message }}".to_string();
553        let result = compile_matchers(
554            vec![def],
555            CompileOptions {
556                warn_unused_captures: true,
557                ..Default::default()
558            },
559        )
560        .expect("should succeed");
561        // No warnings about "message" because it IS a named group
562        assert!(
563            !result.messages.iter().any(|m| m.text.contains("message")),
564            "should not warn about known capture group"
565        );
566    }
567
568    #[test]
569    fn source_file_embedded_in_references() {
570        let mut def = minimal_def("ref.test");
571        def.start.pattern = "unclosed[".to_string();
572        let msgs = compile_matchers(
573            vec![def],
574            CompileOptions {
575                source_file: Some("my-extension.yaml".to_string()),
576                ..Default::default()
577            },
578        )
579        .expect_err("should fail");
580        assert!(msgs
581            .iter()
582            .any(|m| m.reference.as_ref().is_some_and(|r| r.filename.contains("my-extension.yaml"))));
583    }
584
585    #[test]
586    fn all_errors_collected_across_matchers() {
587        // Two broken matchers — should get errors for both, not just the first.
588        let mut def1 = minimal_def("broken.one");
589        def1.start.pattern = "unclosed[".to_string();
590        let mut def2 = minimal_def("broken.two");
591        def2.emit.message = String::new();
592        let msgs =
593            compile_matchers(vec![def1, def2], CompileOptions::default()).expect_err("should fail");
594        // Should have errors for both matchers
595        let has_broken_one = msgs.iter().any(|m| {
596            m.reference
597                .as_ref()
598                .is_some_and(|r| r.filename.contains("matchers[0]"))
599        });
600        let has_broken_two = msgs.iter().any(|m| {
601            m.reference
602                .as_ref()
603                .is_some_and(|r| r.filename.contains("matchers[1]"))
604        });
605        assert!(has_broken_one, "expected error for matchers[0]");
606        assert!(has_broken_two, "expected error for matchers[1]");
607    }
608}