1use std::collections::{HashMap, HashSet};
6use std::sync::Arc;
7
8use once_cell::sync::Lazy;
9use regex::Regex;
10
11use super::message::{has_errors, Message};
12use super::schema::LogMatcherDef;
13use super::types::{
14 BodyRule, CompiledMatcher, EmitSeverity, EmitTemplate, EndCondition, MatcherId,
15};
16
17#[derive(Debug, Clone)]
23pub struct CompileOptions {
24 pub source_file: Option<String>,
27 pub warn_unused_captures: bool,
30 pub max_schema_version: u32,
33}
34
35impl Default for CompileOptions {
36 fn default() -> Self {
37 Self {
38 source_file: None,
39 warn_unused_captures: false,
40 max_schema_version: 1,
41 }
42 }
43}
44
45#[derive(Debug)]
53pub struct CompileResult {
54 pub matchers: Vec<CompiledMatcher>,
55 pub messages: Vec<Message>,
56}
57
58pub fn compile_matchers(
71 defs: Vec<LogMatcherDef>,
72 options: CompileOptions,
73) -> Result<CompileResult, Vec<Message>> {
74 let src = options.source_file.as_deref().unwrap_or("unknown");
75 let mut messages: Vec<Message> = Vec::new();
76 let mut matchers: Vec<CompiledMatcher> = Vec::new();
77
78 let mut skip_indices: HashSet<usize> = HashSet::new();
83 let mut first_occurrence: HashMap<String, usize> = HashMap::new();
84
85 for (idx, def) in defs.iter().enumerate() {
86 let id_field = format!("{}:matchers[{}].id", src, idx);
87 if def.id.is_empty() {
88 messages.push(Message::error_at("matcher id must not be empty", &id_field));
89 skip_indices.insert(idx);
90 } else if let Some(&first_idx) = first_occurrence.get(&def.id) {
91 let first_path = format!("{}:matchers[{}].id", src, first_idx);
92 messages.push(
93 Message::error_at(
94 format!("duplicate matcher id: {}", def.id),
95 &id_field,
96 )
97 .with_related(&first_path, "first defined here"),
98 );
99 skip_indices.insert(idx);
100 } else {
101 first_occurrence.insert(def.id.clone(), idx);
102 }
103 }
104
105 for (idx, def) in defs.iter().enumerate() {
109 if skip_indices.contains(&idx) {
110 continue;
111 }
112
113 let field = |suffix: &str| format!("{}:matchers[{}].{}", src, idx, suffix);
114 let mut matcher_msgs: Vec<Message> = Vec::new();
115
116 if def.schema_version > options.max_schema_version {
118 matcher_msgs.push(Message::error_at(
119 format!(
120 "unsupported schema version {}; max is {}",
121 def.schema_version, options.max_schema_version
122 ),
123 field("schema_version"),
124 ));
125 }
126
127 if def.source.is_empty() {
129 matcher_msgs.push(Message::error_at("source must not be empty", field("source")));
130 }
131
132 let start_regex = compile_regex(&def.start.pattern, field("start.match"), &mut matcher_msgs);
134
135 let has_valid_end =
137 matches!(def.end.condition.as_str(), "next_start" | "blank_line");
138 let end = match def.end.condition.as_str() {
139 "next_start" => Some(EndCondition::NextStart),
140 "blank_line" => Some(EndCondition::BlankLine),
141 other => {
142 matcher_msgs.push(Message::error_at(
143 format!(
144 "unknown condition '{}'; expected next_start or blank_line",
145 other
146 ),
147 field("end.condition"),
148 ));
149 None
150 }
151 };
152
153 let mut body_rules: Vec<BodyRule> = Vec::new();
155 for (bidx, brule) in def.body.iter().enumerate() {
156 let brule_field =
157 |s: &str| format!("{}:matchers[{}].body[{}].{}", src, idx, bidx, s);
158
159 if brule.repeat && !has_valid_end {
160 matcher_msgs.push(Message::error_at(
161 "repeat: true requires a valid end condition",
162 brule_field("repeat"),
163 ));
164 }
165
166 let pattern =
167 compile_regex(&brule.pattern, brule_field("match"), &mut matcher_msgs);
168 if let Some(r) = pattern {
169 body_rules.push(BodyRule {
170 pattern: r,
171 optional: brule.optional,
172 repeat: brule.repeat,
173 });
174 }
175 }
176
177 if def.emit.message.is_empty() {
179 matcher_msgs.push(Message::error_at(
180 "emit.message is required",
181 field("emit.message"),
182 ));
183 }
184
185 let severity =
187 parse_severity(&def.emit.severity, field("emit.severity"), &mut matcher_msgs);
188
189 if options.warn_unused_captures {
191 let all_names: HashSet<String> = {
192 let mut names = HashSet::new();
193 if let Some(r) = &start_regex {
194 collect_capture_names(r, &mut names);
195 }
196 for rule in &body_rules {
197 collect_capture_names(&rule.pattern, &mut names);
198 }
199 names
200 };
201
202 let emit = &def.emit;
203 let template_fields: &[(&str, Option<&String>)] = &[
204 ("emit.message", Some(&emit.message)),
205 ("emit.file", emit.file.as_ref()),
206 ("emit.line", emit.line.as_ref()),
207 ("emit.column", emit.column.as_ref()),
208 ("emit.code", emit.code.as_ref()),
209 ];
210
211 for (fname, tmpl) in template_fields.iter() {
212 if let Some(t) = tmpl {
213 for group_name in extract_template_refs(t) {
214 if !all_names.contains(&group_name) {
215 matcher_msgs.push(Message::warning_at(
216 format!(
217 "template '{{{{ {} }}}}' references capture group not found in any regex",
218 group_name
219 ),
220 field(fname),
221 ));
222 }
223 }
224 }
225 }
226 }
227
228 messages.extend(matcher_msgs.iter().cloned());
229
230 if !has_errors(&matcher_msgs)
232 && let (Some(start), Some(end), Some(severity)) = (start_regex, end, severity)
233 {
234 matchers.push(CompiledMatcher {
235 id: MatcherId(def.id.clone()),
236 source: def.source.clone(),
237 priority: def.priority,
238 schema_version: def.schema_version,
239 start,
240 body: body_rules,
241 max_lines: def.max_lines,
242 end,
243 emit: EmitTemplate {
244 severity,
245 message: def.emit.message.clone(),
246 file: def.emit.file.clone(),
247 line: def.emit.line.clone(),
248 column: def.emit.column.clone(),
249 code: def.emit.code.clone(),
250 },
251 });
252 }
253 }
254
255 if has_errors(&messages) {
256 Err(messages)
257 } else {
258 Ok(CompileResult { matchers, messages })
259 }
260}
261
262fn compile_regex(
267 pattern: &str,
268 field_path: impl Into<String>,
269 messages: &mut Vec<Message>,
270) -> Option<Arc<Regex>> {
271 match Regex::new(pattern) {
272 Ok(r) => Some(Arc::new(r)),
273 Err(e) => {
274 messages.push(Message::error_at(
275 format!("invalid regex: {}", e),
276 field_path,
277 ));
278 None
279 }
280 }
281}
282
283fn parse_severity(
284 s: &str,
285 field_path: impl Into<String>,
286 messages: &mut Vec<Message>,
287) -> Option<EmitSeverity> {
288 match s {
289 "error" => Some(EmitSeverity::Error),
290 "warning" => Some(EmitSeverity::Warning),
291 "info" => Some(EmitSeverity::Info),
292 "hint" => Some(EmitSeverity::Hint),
293 other => {
294 messages.push(Message::error_at(
295 format!(
296 "unknown severity '{}'; expected error, warning, info, or hint",
297 other
298 ),
299 field_path,
300 ));
301 None
302 }
303 }
304}
305
306fn collect_capture_names(regex: &Regex, out: &mut HashSet<String>) {
307 for name in regex.capture_names().flatten() {
308 out.insert(name.to_string());
309 }
310}
311
312static TEMPLATE_REF_RE: Lazy<Regex> =
313 Lazy::new(|| Regex::new(r"\{\{\s*(\w+)\s*\}\}").expect("static regex"));
314
315fn extract_template_refs(template: &str) -> Vec<String> {
316 TEMPLATE_REF_RE
317 .captures_iter(template)
318 .map(|c| c[1].to_string())
319 .collect()
320}
321
322#[cfg(test)]
327mod tests {
328 use super::*;
329 use crate::log_matcher::schema::{BodyRuleDef, EmitDef, EndDef, StartDef};
330
331 fn minimal_def(id: &str) -> LogMatcherDef {
332 LogMatcherDef {
333 id: id.to_string(),
334 source: "test".to_string(),
335 priority: 0,
336 schema_version: 1,
337 start: StartDef {
338 pattern: "^test".to_string(),
339 },
340 body: vec![],
341 max_lines: None,
342 end: EndDef {
343 condition: "next_start".to_string(),
344 },
345 emit: EmitDef {
346 severity: "error".to_string(),
347 message: "test message".to_string(),
348 file: None,
349 line: None,
350 column: None,
351 code: None,
352 },
353 }
354 }
355
356 fn compile_one(def: LogMatcherDef) -> Result<CompileResult, Vec<Message>> {
357 compile_matchers(vec![def], CompileOptions::default())
358 }
359
360 #[test]
361 fn valid_matcher_compiles() {
362 let result = compile_one(minimal_def("my.matcher")).expect("should succeed");
363 assert_eq!(result.matchers.len(), 1);
364 assert_eq!(result.matchers[0].id.0, "my.matcher");
365 assert_eq!(result.matchers[0].priority, 0);
366 assert_eq!(result.matchers[0].schema_version, 1);
367 assert!(result.messages.is_empty());
368 }
369
370 #[test]
371 fn empty_defs_succeeds() {
372 let result = compile_matchers(vec![], CompileOptions::default()).expect("should succeed");
373 assert!(result.matchers.is_empty());
374 assert!(result.messages.is_empty());
375 }
376
377 #[test]
378 fn invalid_start_regex_rejected_with_field_path() {
379 let mut def = minimal_def("bad.regex");
380 def.start.pattern = "^error(unclosed".to_string();
381 let msgs = compile_one(def).expect_err("should fail");
382 let ref_paths: Vec<&str> = msgs
383 .iter()
384 .filter_map(|m| m.reference.as_ref())
385 .map(|r| r.filename.as_str())
386 .collect();
387 assert!(
388 ref_paths.iter().any(|p| p.contains("start.match")),
389 "expected start.match in refs, got: {:?}",
390 ref_paths
391 );
392 assert!(msgs.iter().any(|m| m.text.contains("invalid regex")));
393 }
394
395 #[test]
396 fn invalid_body_regex_rejected() {
397 let mut def = minimal_def("bad.body");
398 def.body.push(BodyRuleDef {
399 pattern: "^(bad".to_string(),
400 optional: false,
401 repeat: false,
402 });
403 let msgs = compile_one(def).expect_err("should fail");
404 assert!(msgs.iter().any(|m| m
405 .reference
406 .as_ref()
407 .is_some_and(|r| r.filename.contains("body[0].match"))));
408 }
409
410 #[test]
411 fn missing_emit_message_rejected() {
412 let mut def = minimal_def("no.message");
413 def.emit.message = String::new();
414 let msgs = compile_one(def).expect_err("should fail");
415 assert!(msgs.iter().any(|m| m
416 .reference
417 .as_ref()
418 .is_some_and(|r| r.filename.contains("emit.message"))));
419 assert!(msgs.iter().any(|m| m.text.contains("emit.message is required")));
420 }
421
422 #[test]
423 fn multiple_matchers_compile() {
424 let defs = vec![
425 minimal_def("a.matcher"),
426 minimal_def("b.matcher"),
427 minimal_def("c.matcher"),
428 ];
429 let result = compile_matchers(defs, CompileOptions::default()).expect("should succeed");
430 assert_eq!(result.matchers.len(), 3);
431 }
432
433 #[test]
434 fn duplicate_id_error_has_related_ref() {
435 let defs = vec![minimal_def("dup.id"), minimal_def("dup.id")];
436 let msgs = compile_matchers(defs, CompileOptions::default()).expect_err("should fail");
437 let dup_msg = msgs
438 .iter()
439 .find(|m| m.text.contains("duplicate matcher id"))
440 .expect("should have duplicate error");
441 assert!(
442 !dup_msg.related.is_empty(),
443 "expected related reference for duplicate"
444 );
445 assert!(dup_msg.related[0].label.contains("first defined here"));
446 }
447
448 #[test]
449 fn first_of_duplicate_still_compiles() {
450 let defs = vec![minimal_def("dup"), minimal_def("dup")];
453 let msgs = compile_matchers(defs, CompileOptions::default()).expect_err("should fail");
454 let dup_errors: Vec<_> = msgs
456 .iter()
457 .filter(|m| m.text.contains("duplicate matcher id: dup"))
458 .collect();
459 assert_eq!(dup_errors.len(), 1, "expected exactly one duplicate error");
460 }
461
462 #[test]
463 fn defaults_applied() {
464 let result = compile_one(minimal_def("defaults")).expect("should succeed");
465 let m = &result.matchers[0];
466 assert_eq!(m.priority, 0);
467 assert_eq!(m.schema_version, 1);
468 assert!(m.body.is_empty());
469 assert!(m.max_lines.is_none());
470 }
471
472 #[test]
473 fn schema_version_too_high_rejected() {
474 let mut def = minimal_def("future");
475 def.schema_version = 99;
476 let msgs = compile_one(def).expect_err("should fail");
477 assert!(msgs.iter().any(|m| m
478 .reference
479 .as_ref()
480 .is_some_and(|r| r.filename.contains("schema_version"))));
481 assert!(msgs.iter().any(|m| m.text.contains("unsupported schema version")));
482 }
483
484 #[test]
485 fn unknown_end_condition_rejected() {
486 let mut def = minimal_def("bad.end");
487 def.end.condition = "timeout".to_string();
488 let msgs = compile_one(def).expect_err("should fail");
489 assert!(msgs.iter().any(|m| m
490 .reference
491 .as_ref()
492 .is_some_and(|r| r.filename.contains("end.condition"))));
493 }
494
495 #[test]
496 fn blank_line_end_condition_compiles() {
497 let mut def = minimal_def("blank.end");
498 def.end.condition = "blank_line".to_string();
499 let result = compile_one(def).expect("should succeed");
500 assert_eq!(result.matchers[0].end, crate::log_matcher::types::EndCondition::BlankLine);
501 }
502
503 #[test]
504 fn unknown_severity_rejected() {
505 let mut def = minimal_def("bad.sev");
506 def.emit.severity = "fatal".to_string();
507 let msgs = compile_one(def).expect_err("should fail");
508 assert!(msgs.iter().any(|m| m.text.contains("unknown severity")));
509 }
510
511 #[test]
512 fn repeat_without_end_condition_rejected() {
513 let mut def = minimal_def("bad.repeat");
514 def.end.condition = "bad_cond".to_string();
515 def.body.push(BodyRuleDef {
516 pattern: "^.*".to_string(),
517 optional: false,
518 repeat: true,
519 });
520 let msgs = compile_one(def).expect_err("should fail");
521 assert!(msgs
522 .iter()
523 .any(|m| m.text.contains("repeat: true requires a valid end condition")));
524 }
525
526 #[test]
527 fn unknown_capture_warning_emitted() {
528 let mut def = minimal_def("warn.captures");
529 def.start.pattern = "^error (?P<message>.+)".to_string();
530 def.emit.message = "{{ unknown_group }}".to_string();
531 let result = compile_matchers(
532 vec![def],
533 CompileOptions {
534 warn_unused_captures: true,
535 ..Default::default()
536 },
537 )
538 .expect("should succeed with warnings");
539 assert!(
540 result
541 .messages
542 .iter()
543 .any(|m| m.text.contains("unknown_group")),
544 "expected warning about unknown_group"
545 );
546 }
547
548 #[test]
549 fn no_warning_for_known_capture() {
550 let mut def = minimal_def("known.capture");
551 def.start.pattern = "^error (?P<message>.+)".to_string();
552 def.emit.message = "{{ message }}".to_string();
553 let result = compile_matchers(
554 vec![def],
555 CompileOptions {
556 warn_unused_captures: true,
557 ..Default::default()
558 },
559 )
560 .expect("should succeed");
561 assert!(
563 !result.messages.iter().any(|m| m.text.contains("message")),
564 "should not warn about known capture group"
565 );
566 }
567
568 #[test]
569 fn source_file_embedded_in_references() {
570 let mut def = minimal_def("ref.test");
571 def.start.pattern = "unclosed[".to_string();
572 let msgs = compile_matchers(
573 vec![def],
574 CompileOptions {
575 source_file: Some("my-extension.yaml".to_string()),
576 ..Default::default()
577 },
578 )
579 .expect_err("should fail");
580 assert!(msgs
581 .iter()
582 .any(|m| m.reference.as_ref().is_some_and(|r| r.filename.contains("my-extension.yaml"))));
583 }
584
585 #[test]
586 fn all_errors_collected_across_matchers() {
587 let mut def1 = minimal_def("broken.one");
589 def1.start.pattern = "unclosed[".to_string();
590 let mut def2 = minimal_def("broken.two");
591 def2.emit.message = String::new();
592 let msgs =
593 compile_matchers(vec![def1, def2], CompileOptions::default()).expect_err("should fail");
594 let has_broken_one = msgs.iter().any(|m| {
596 m.reference
597 .as_ref()
598 .is_some_and(|r| r.filename.contains("matchers[0]"))
599 });
600 let has_broken_two = msgs.iter().any(|m| {
601 m.reference
602 .as_ref()
603 .is_some_and(|r| r.filename.contains("matchers[1]"))
604 });
605 assert!(has_broken_one, "expected error for matchers[0]");
606 assert!(has_broken_two, "expected error for matchers[1]");
607 }
608}