Skip to main content

rsigma_parser/lint/
mod.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `yaml_serde::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: yaml_serde::Value = yaml_serde::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22pub mod catalogue;
23pub mod fix;
24mod rules;
25
26use std::collections::{HashMap, HashSet};
27use std::fmt;
28use std::path::Path;
29use std::sync::LazyLock;
30
31use serde::{Deserialize, Serialize};
32use yaml_serde::Value;
33
34// =============================================================================
35// Public types
36// =============================================================================
37
38/// Severity of a lint finding.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
40pub enum Severity {
41    /// Spec violation — the rule is invalid.
42    Error,
43    /// Best-practice issue — the rule works but is not spec-ideal.
44    Warning,
45    /// Informational suggestion — soft best-practice hint (e.g. missing author).
46    Info,
47    /// Subtle hint — lowest severity, for stylistic suggestions.
48    Hint,
49}
50
51impl fmt::Display for Severity {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        match self {
54            Severity::Error => write!(f, "error"),
55            Severity::Warning => write!(f, "warning"),
56            Severity::Info => write!(f, "info"),
57            Severity::Hint => write!(f, "hint"),
58        }
59    }
60}
61
62/// Identifies which lint rule fired.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
64pub enum LintRule {
65    // ── Infrastructure / parse errors ────────────────────────────────────
66    YamlParseError,
67    NotAMapping,
68    FileReadError,
69    SchemaViolation,
70
71    // ── Shared (all document types) ──────────────────────────────────────
72    MissingTitle,
73    EmptyTitle,
74    TitleTooLong,
75    MissingDescription,
76    MissingAuthor,
77    InvalidId,
78    InvalidStatus,
79    MissingLevel,
80    InvalidLevel,
81    InvalidDate,
82    InvalidModified,
83    ModifiedBeforeDate,
84    DescriptionTooLong,
85    NameTooLong,
86    TaxonomyTooLong,
87    NonLowercaseKey,
88
89    // ── Detection rules ──────────────────────────────────────────────────
90    MissingLogsource,
91    MissingDetection,
92    MissingCondition,
93    EmptyDetection,
94    InvalidRelatedType,
95    InvalidRelatedId,
96    RelatedMissingRequired,
97    DeprecatedWithoutRelated,
98    InvalidTag,
99    UnknownTagNamespace,
100    DuplicateTags,
101    DuplicateReferences,
102    DuplicateFields,
103    FalsepositiveTooShort,
104    ScopeTooShort,
105    LogsourceValueNotLowercase,
106    ConditionReferencesUnknown,
107    DeprecatedAggregationSyntax,
108
109    // ── Correlation rules ────────────────────────────────────────────────
110    MissingCorrelation,
111    MissingCorrelationType,
112    InvalidCorrelationType,
113    MissingCorrelationRules,
114    EmptyCorrelationRules,
115    MissingCorrelationTimespan,
116    InvalidTimespanFormat,
117    InvalidWindowMode,
118    MissingSessionGap,
119    GapWithoutSession,
120    InvalidGapFormat,
121    MissingGroupBy,
122    MissingCorrelationCondition,
123    MissingConditionField,
124    InvalidConditionOperator,
125    ConditionValueNotNumeric,
126    GenerateNotBoolean,
127
128    // ── Filter rules ─────────────────────────────────────────────────────
129    MissingFilter,
130    MissingFilterRules,
131    EmptyFilterRules,
132    MissingFilterSelection,
133    MissingFilterCondition,
134    FilterHasLevel,
135    FilterHasStatus,
136    MissingFilterLogsource,
137
138    // ── Detection logic (cross-cutting) ──────────────────────────────────
139    NullInValueList,
140    SingleValueAllModifier,
141    AllWithRe,
142    IncompatibleModifiers,
143    EmptyValueList,
144    WildcardOnlyValue,
145    FlattenedArrayCorrelation,
146    UnsupportedSigmaVersion,
147    ArrayMatchingWithoutVersion,
148    SigmaVersionMismatch,
149    UnknownRuleReference,
150    UnknownKey,
151}
152
153impl fmt::Display for LintRule {
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        let s = match self {
156            LintRule::YamlParseError => "yaml_parse_error",
157            LintRule::NotAMapping => "not_a_mapping",
158            LintRule::FileReadError => "file_read_error",
159            LintRule::SchemaViolation => "schema_violation",
160            LintRule::MissingTitle => "missing_title",
161            LintRule::EmptyTitle => "empty_title",
162            LintRule::TitleTooLong => "title_too_long",
163            LintRule::MissingDescription => "missing_description",
164            LintRule::MissingAuthor => "missing_author",
165            LintRule::InvalidId => "invalid_id",
166            LintRule::InvalidStatus => "invalid_status",
167            LintRule::MissingLevel => "missing_level",
168            LintRule::InvalidLevel => "invalid_level",
169            LintRule::InvalidDate => "invalid_date",
170            LintRule::InvalidModified => "invalid_modified",
171            LintRule::ModifiedBeforeDate => "modified_before_date",
172            LintRule::DescriptionTooLong => "description_too_long",
173            LintRule::NameTooLong => "name_too_long",
174            LintRule::TaxonomyTooLong => "taxonomy_too_long",
175            LintRule::NonLowercaseKey => "non_lowercase_key",
176            LintRule::MissingLogsource => "missing_logsource",
177            LintRule::MissingDetection => "missing_detection",
178            LintRule::MissingCondition => "missing_condition",
179            LintRule::EmptyDetection => "empty_detection",
180            LintRule::InvalidRelatedType => "invalid_related_type",
181            LintRule::InvalidRelatedId => "invalid_related_id",
182            LintRule::RelatedMissingRequired => "related_missing_required",
183            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
184            LintRule::InvalidTag => "invalid_tag",
185            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
186            LintRule::DuplicateTags => "duplicate_tags",
187            LintRule::DuplicateReferences => "duplicate_references",
188            LintRule::DuplicateFields => "duplicate_fields",
189            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
190            LintRule::ScopeTooShort => "scope_too_short",
191            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
192            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
193            LintRule::DeprecatedAggregationSyntax => "deprecated_aggregation_syntax",
194            LintRule::MissingCorrelation => "missing_correlation",
195            LintRule::MissingCorrelationType => "missing_correlation_type",
196            LintRule::InvalidCorrelationType => "invalid_correlation_type",
197            LintRule::MissingCorrelationRules => "missing_correlation_rules",
198            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
199            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
200            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
201            LintRule::InvalidWindowMode => "invalid_window_mode",
202            LintRule::MissingSessionGap => "missing_session_gap",
203            LintRule::GapWithoutSession => "gap_without_session",
204            LintRule::InvalidGapFormat => "invalid_gap_format",
205            LintRule::MissingGroupBy => "missing_group_by",
206            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
207            LintRule::MissingConditionField => "missing_condition_field",
208            LintRule::InvalidConditionOperator => "invalid_condition_operator",
209            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
210            LintRule::GenerateNotBoolean => "generate_not_boolean",
211            LintRule::MissingFilter => "missing_filter",
212            LintRule::MissingFilterRules => "missing_filter_rules",
213            LintRule::EmptyFilterRules => "empty_filter_rules",
214            LintRule::MissingFilterSelection => "missing_filter_selection",
215            LintRule::MissingFilterCondition => "missing_filter_condition",
216            LintRule::FilterHasLevel => "filter_has_level",
217            LintRule::FilterHasStatus => "filter_has_status",
218            LintRule::MissingFilterLogsource => "missing_filter_logsource",
219            LintRule::NullInValueList => "null_in_value_list",
220            LintRule::SingleValueAllModifier => "single_value_all_modifier",
221            LintRule::AllWithRe => "all_with_re",
222            LintRule::IncompatibleModifiers => "incompatible_modifiers",
223            LintRule::EmptyValueList => "empty_value_list",
224            LintRule::WildcardOnlyValue => "wildcard_only_value",
225            LintRule::FlattenedArrayCorrelation => "flattened_array_correlation",
226            LintRule::UnsupportedSigmaVersion => "unsupported_sigma_version",
227            LintRule::ArrayMatchingWithoutVersion => "array_matching_without_version",
228            LintRule::SigmaVersionMismatch => "sigma_version_mismatch",
229            LintRule::UnknownRuleReference => "unknown_rule_reference",
230            LintRule::UnknownKey => "unknown_key",
231        };
232        write!(f, "{s}")
233    }
234}
235
236/// A source span (line/column, both 0-indexed).
237#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
238pub struct Span {
239    pub start_line: u32,
240    pub start_col: u32,
241    pub end_line: u32,
242    pub end_col: u32,
243}
244
245// =============================================================================
246// Auto-fix types
247// =============================================================================
248
249/// Whether a fix is safe to apply automatically or needs manual review.
250#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
251pub enum FixDisposition {
252    Safe,
253    Unsafe,
254}
255
256/// A single patch operation within a [`Fix`].
257#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
258pub enum FixPatch {
259    ReplaceValue { path: String, new_value: String },
260    ReplaceKey { path: String, new_key: String },
261    Remove { path: String },
262}
263
264/// A suggested fix for a lint finding.
265#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
266pub struct Fix {
267    pub title: String,
268    pub disposition: FixDisposition,
269    pub patches: Vec<FixPatch>,
270}
271
272/// A single lint finding.
273#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
274pub struct LintWarning {
275    pub rule: LintRule,
276    pub severity: Severity,
277    pub message: String,
278    pub path: String,
279    pub span: Option<Span>,
280    pub fix: Option<Fix>,
281}
282
283impl fmt::Display for LintWarning {
284    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
285        write!(
286            f,
287            "{}[{}]: {}\n    --> {}",
288            self.severity, self.rule, self.message, self.path
289        )
290    }
291}
292
293/// Result of linting a single file (may contain multiple YAML documents).
294#[derive(Debug, Clone, Serialize)]
295pub struct FileLintResult {
296    pub path: std::path::PathBuf,
297    pub warnings: Vec<LintWarning>,
298}
299
300impl FileLintResult {
301    pub fn has_errors(&self) -> bool {
302        self.warnings.iter().any(|w| w.severity == Severity::Error)
303    }
304
305    pub fn error_count(&self) -> usize {
306        self.warnings
307            .iter()
308            .filter(|w| w.severity == Severity::Error)
309            .count()
310    }
311
312    pub fn warning_count(&self) -> usize {
313        self.warnings
314            .iter()
315            .filter(|w| w.severity == Severity::Warning)
316            .count()
317    }
318
319    pub fn info_count(&self) -> usize {
320        self.warnings
321            .iter()
322            .filter(|w| w.severity == Severity::Info)
323            .count()
324    }
325
326    pub fn hint_count(&self) -> usize {
327        self.warnings
328            .iter()
329            .filter(|w| w.severity == Severity::Hint)
330            .count()
331    }
332}
333
334// =============================================================================
335// Helpers (shared with rule submodules)
336// =============================================================================
337
338static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
339    [
340        "action",
341        "author",
342        "category",
343        "condition",
344        "correlation",
345        "date",
346        "description",
347        "detection",
348        "falsepositives",
349        "field",
350        "fields",
351        "filter",
352        "gap",
353        "generate",
354        "group-by",
355        "id",
356        "level",
357        "logsource",
358        "modified",
359        "name",
360        "product",
361        "references",
362        "related",
363        "rsigma.gap",
364        "rsigma.window",
365        "rules",
366        "scope",
367        "selection",
368        "service",
369        "sigma-version",
370        "status",
371        "tags",
372        "taxonomy",
373        "timeframe",
374        "timespan",
375        "title",
376        "type",
377        "window",
378    ]
379    .into_iter()
380    .map(|n| (n, Value::String(n.into())))
381    .collect()
382});
383
384pub(crate) fn key(s: &str) -> &'static Value {
385    KEY_CACHE
386        .get(s)
387        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
388}
389
390pub(crate) fn get_str<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a str> {
391    m.get(key(k)).and_then(|v| v.as_str())
392}
393
394pub(crate) fn get_mapping<'a>(
395    m: &'a yaml_serde::Mapping,
396    k: &str,
397) -> Option<&'a yaml_serde::Mapping> {
398    m.get(key(k)).and_then(|v| v.as_mapping())
399}
400
401pub(crate) fn get_seq<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a yaml_serde::Sequence> {
402    m.get(key(k)).and_then(|v| v.as_sequence())
403}
404
405pub(crate) fn warn(
406    rule: LintRule,
407    severity: Severity,
408    message: impl Into<String>,
409    path: impl Into<String>,
410) -> LintWarning {
411    LintWarning {
412        rule,
413        severity,
414        message: message.into(),
415        path: path.into(),
416        span: None,
417        fix: None,
418    }
419}
420
421pub(crate) fn err(
422    rule: LintRule,
423    message: impl Into<String>,
424    path: impl Into<String>,
425) -> LintWarning {
426    warn(rule, Severity::Error, message, path)
427}
428
429pub(crate) fn warning(
430    rule: LintRule,
431    message: impl Into<String>,
432    path: impl Into<String>,
433) -> LintWarning {
434    warn(rule, Severity::Warning, message, path)
435}
436
437pub(crate) fn info(
438    rule: LintRule,
439    message: impl Into<String>,
440    path: impl Into<String>,
441) -> LintWarning {
442    warn(rule, Severity::Info, message, path)
443}
444
445pub(crate) fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
446    Some(Fix {
447        title: title.into(),
448        disposition: FixDisposition::Safe,
449        patches,
450    })
451}
452
453/// Find the closest match for `input` among `candidates` using edit distance.
454pub(crate) fn closest_match<'a>(
455    input: &str,
456    candidates: &[&'a str],
457    max_distance: usize,
458) -> Option<&'a str> {
459    candidates
460        .iter()
461        .filter(|c| edit_distance(input, c) <= max_distance)
462        .min_by_key(|c| edit_distance(input, c))
463        .copied()
464}
465
466/// Levenshtein edit distance between two strings.
467pub(crate) fn edit_distance(a: &str, b: &str) -> usize {
468    let (a_len, b_len) = (a.len(), b.len());
469    if a_len == 0 {
470        return b_len;
471    }
472    if b_len == 0 {
473        return a_len;
474    }
475    let mut prev: Vec<usize> = (0..=b_len).collect();
476    let mut curr = vec![0; b_len + 1];
477    for (i, ca) in a.bytes().enumerate() {
478        curr[0] = i + 1;
479        for (j, cb) in b.bytes().enumerate() {
480            let cost = if ca == cb { 0 } else { 1 };
481            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
482        }
483        std::mem::swap(&mut prev, &mut curr);
484    }
485    prev[b_len]
486}
487
488pub(crate) const TYPO_MAX_EDIT_DISTANCE: usize = 2;
489
490// =============================================================================
491// Document type detection
492// =============================================================================
493
494#[derive(Debug, Clone, Copy, PartialEq, Eq)]
495pub(crate) enum DocType {
496    Detection,
497    Correlation,
498    Filter,
499}
500
501impl DocType {
502    pub(crate) fn known_keys(&self) -> &'static [&'static str] {
503        match self {
504            DocType::Detection => rules::shared::KNOWN_KEYS_DETECTION,
505            DocType::Correlation => rules::shared::KNOWN_KEYS_CORRELATION,
506            DocType::Filter => rules::shared::KNOWN_KEYS_FILTER,
507        }
508    }
509}
510
511fn detect_doc_type(m: &yaml_serde::Mapping) -> DocType {
512    if m.contains_key(key("correlation")) {
513        DocType::Correlation
514    } else if m.contains_key(key("filter")) {
515        DocType::Filter
516    } else {
517        DocType::Detection
518    }
519}
520
521fn is_action_fragment(m: &yaml_serde::Mapping) -> bool {
522    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
523}
524
525// =============================================================================
526// Cross-document reference resolution
527// =============================================================================
528
529/// An index of referenceable rules (detection rules and correlation rules) by
530/// their identifiers (`id` and `name`), each mapped to its resolved
531/// specification major. Built file-local for single-text linting and
532/// directory-global for directory linting.
533struct RuleIndex {
534    majors: HashMap<String, u32>,
535    /// Whether the index covers the whole set being linted. Only then is an
536    /// unresolved reference genuinely missing rather than living in a file
537    /// outside the linted scope.
538    complete: bool,
539}
540
541impl RuleIndex {
542    fn new(complete: bool) -> Self {
543        Self {
544            majors: HashMap::new(),
545            complete,
546        }
547    }
548
549    /// Index every referenceable document in one multi-document YAML text.
550    fn add_text(&mut self, text: &str) {
551        for doc in yaml_serde::Deserializer::from_str(text) {
552            let Ok(value) = Value::deserialize(doc) else {
553                break;
554            };
555            self.add_value(&value);
556        }
557    }
558
559    fn add_value(&mut self, value: &Value) {
560        let Some(m) = value.as_mapping() else {
561            return;
562        };
563        if is_action_fragment(m) {
564            return;
565        }
566        // Only detection rules and correlation rules can be referenced.
567        if matches!(
568            detect_doc_type(m),
569            DocType::Detection | DocType::Correlation
570        ) {
571            let major = crate::version::resolve_major(
572                m.get(key("sigma-version"))
573                    .and_then(crate::version::major_from_value),
574            );
575            for id_key in ["id", "name"] {
576                if let Some(v) = get_str(m, id_key) {
577                    self.majors.insert(v.to_string(), major);
578                }
579            }
580        }
581    }
582}
583
584/// Extract a `rules:` reference list (a single string or a sequence of strings).
585fn reference_list(v: Option<&Value>) -> Vec<String> {
586    match v {
587        Some(Value::String(s)) => vec![s.clone()],
588        Some(Value::Sequence(seq)) => seq
589            .iter()
590            .filter_map(|x| x.as_str().map(str::to_string))
591            .collect(),
592        _ => Vec::new(),
593    }
594}
595
596/// References declared by a correlation rule (`correlation.rules`).
597fn correlation_rule_refs(m: &yaml_serde::Mapping) -> Vec<String> {
598    m.get(key("correlation"))
599        .and_then(|c| c.as_mapping())
600        .map(|c| reference_list(c.get(key("rules"))))
601        .unwrap_or_default()
602}
603
604/// References declared by a filter rule (`filter.rules`). Returns `None` when the
605/// filter targets every rule (`rules: any`), which is not resolvable.
606fn filter_rule_refs(m: &yaml_serde::Mapping) -> Option<Vec<String>> {
607    let f = m.get(key("filter"))?.as_mapping()?;
608    let rules = f.get(key("rules"))?;
609    if let Some(s) = rules.as_str()
610        && s.eq_ignore_ascii_case("any")
611    {
612        return None;
613    }
614    Some(reference_list(Some(rules)))
615}
616
617/// Cross-document lints over the documents in one YAML text, resolving each
618/// correlation/filter reference against `index`:
619///
620/// - `sigma_version_mismatch` (warning): a referencing document and a resolved
621///   referenced rule declare different specification majors.
622/// - `unknown_rule_reference` (warning): a reference resolves to no rule and the
623///   index is complete (so it is genuinely missing, not out of the linted scope).
624fn lint_cross_references(docs: &[Value], index: &RuleIndex, warnings: &mut Vec<LintWarning>) {
625    for value in docs {
626        let Some(m) = value.as_mapping() else {
627            continue;
628        };
629        if is_action_fragment(m) {
630            continue;
631        }
632        let (refs, path) = match detect_doc_type(m) {
633            DocType::Correlation => (correlation_rule_refs(m), "/correlation/rules"),
634            DocType::Filter => match filter_rule_refs(m) {
635                Some(refs) => (refs, "/filter/rules"),
636                None => continue,
637            },
638            DocType::Detection => continue,
639        };
640        if refs.is_empty() {
641            continue;
642        }
643        let self_major = crate::version::resolve_major(
644            m.get(key("sigma-version"))
645                .and_then(crate::version::major_from_value),
646        );
647        let label = get_str(m, "title")
648            .or_else(|| get_str(m, "name"))
649            .unwrap_or("<rule>");
650        for r in refs {
651            match index.majors.get(&r).copied() {
652                Some(target) if target != self_major => warnings.push(warning(
653                    LintRule::SigmaVersionMismatch,
654                    format!(
655                        "'{label}' targets sigma-version major {self_major} but references rule \
656                         '{r}' which targets major {target}; cross-referencing rules must share a \
657                         specification major"
658                    ),
659                    path,
660                )),
661                Some(_) => {}
662                None if index.complete => warnings.push(warning(
663                    LintRule::UnknownRuleReference,
664                    format!(
665                        "'{label}' references rule '{r}', which was not found among the linted \
666                         rules (matched by id or name)"
667                    ),
668                    path,
669                )),
670                None => {}
671            }
672        }
673    }
674}
675
676// =============================================================================
677// Public API
678// =============================================================================
679
680fn lint_yaml_value_ext(value: &Value, extra_ns: &[String]) -> Vec<LintWarning> {
681    let Some(m) = value.as_mapping() else {
682        return vec![err(
683            LintRule::NotAMapping,
684            "document is not a YAML mapping",
685            "/",
686        )];
687    };
688
689    if is_action_fragment(m) {
690        return Vec::new();
691    }
692
693    let mut warnings = Vec::new();
694
695    rules::metadata::lint_shared(m, &mut warnings);
696
697    let doc_type = detect_doc_type(m);
698    match doc_type {
699        DocType::Detection => rules::detection::lint_detection_rule(m, &mut warnings, extra_ns),
700        DocType::Correlation => rules::correlation::lint_correlation_rule(m, &mut warnings),
701        DocType::Filter => rules::filter::lint_filter_rule(m, &mut warnings),
702    }
703
704    rules::version::lint_sigma_version(m, doc_type, &mut warnings);
705    rules::shared::lint_unknown_keys(m, doc_type, &mut warnings);
706
707    warnings
708}
709
710/// Lint a single YAML document value.
711pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
712    lint_yaml_value_ext(value, &[])
713}
714
715fn lint_yaml_str_ext(text: &str, extra_ns: &[String]) -> Vec<LintWarning> {
716    lint_yaml_str_indexed(text, extra_ns, None)
717}
718
719/// Lint one YAML text. When `external_index` is `Some` (directory linting) it is
720/// the directory-global rule index used for cross-reference checks; when `None`,
721/// a file-local index is built from this text, so cross-file references are out
722/// of scope and `unknown_rule_reference` does not fire.
723fn lint_yaml_str_indexed(
724    text: &str,
725    extra_ns: &[String],
726    external_index: Option<&RuleIndex>,
727) -> Vec<LintWarning> {
728    let mut all_warnings = Vec::new();
729    let mut docs: Vec<Value> = Vec::new();
730
731    for doc in yaml_serde::Deserializer::from_str(text) {
732        let value: Value = match Value::deserialize(doc) {
733            Ok(v) => v,
734            Err(e) => {
735                let mut w = err(
736                    LintRule::YamlParseError,
737                    format!("YAML parse error: {e}"),
738                    "/",
739                );
740                if let Some(loc) = e.location() {
741                    w.span = Some(Span {
742                        start_line: loc.line().saturating_sub(1) as u32,
743                        start_col: loc.column() as u32,
744                        end_line: loc.line().saturating_sub(1) as u32,
745                        end_col: loc.column() as u32 + 1,
746                    });
747                }
748                all_warnings.push(w);
749                break;
750            }
751        };
752
753        for mut w in lint_yaml_value_ext(&value, extra_ns) {
754            w.span = resolve_path_to_span(text, &w.path);
755            all_warnings.push(w);
756        }
757        docs.push(value);
758    }
759
760    // Cross-document checks resolve references against the directory-global index
761    // when given, otherwise a file-local index built from this text's documents.
762    let local_index;
763    let index = match external_index {
764        Some(idx) => idx,
765        None => {
766            let mut idx = RuleIndex::new(false);
767            for v in &docs {
768                idx.add_value(v);
769            }
770            local_index = idx;
771            &local_index
772        }
773    };
774    let mut xref = Vec::new();
775    lint_cross_references(&docs, index, &mut xref);
776    for mut w in xref {
777        w.span = resolve_path_to_span(text, &w.path);
778        all_warnings.push(w);
779    }
780
781    all_warnings
782}
783
784/// Lint a raw YAML string, returning warnings with resolved source spans.
785pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
786    lint_yaml_str_ext(text, &[])
787}
788
789fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
790    if path == "/" || path.is_empty() {
791        for (i, line) in text.lines().enumerate() {
792            let trimmed = line.trim();
793            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
794                return Some(Span {
795                    start_line: i as u32,
796                    start_col: 0,
797                    end_line: i as u32,
798                    end_col: line.len() as u32,
799                });
800            }
801        }
802        return None;
803    }
804
805    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
806
807    if segments.is_empty() {
808        return None;
809    }
810
811    let lines: Vec<&str> = text.lines().collect();
812    let mut current_indent: i32 = -1;
813    let mut search_start = 0usize;
814    let mut last_matched_line: Option<usize> = None;
815
816    for segment in &segments {
817        let array_index: Option<usize> = segment.parse().ok();
818        let mut found = false;
819
820        let mut line_num = search_start;
821        while line_num < lines.len() {
822            let line = lines[line_num];
823            let trimmed = line.trim();
824            if trimmed.is_empty() || trimmed.starts_with('#') {
825                line_num += 1;
826                continue;
827            }
828
829            let indent = (line.len() - trimmed.len()) as i32;
830
831            if indent <= current_indent && found {
832                break;
833            }
834            if indent <= current_indent {
835                line_num += 1;
836                continue;
837            }
838
839            if let Some(idx) = array_index {
840                if trimmed.starts_with("- ") && indent > current_indent {
841                    let mut count = 0usize;
842                    for (offset, sl) in lines[search_start..].iter().enumerate() {
843                        let scan = search_start + offset;
844                        let st = sl.trim();
845                        if st.is_empty() || st.starts_with('#') {
846                            continue;
847                        }
848                        let si = (sl.len() - st.len()) as i32;
849                        if si == indent && st.starts_with("- ") {
850                            if count == idx {
851                                last_matched_line = Some(scan);
852                                search_start = scan + 1;
853                                current_indent = indent;
854                                found = true;
855                                break;
856                            }
857                            count += 1;
858                        }
859                        if si < indent && count > 0 {
860                            break;
861                        }
862                    }
863                    break;
864                }
865            } else {
866                let key_pattern = format!("{segment}:");
867                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
868                    last_matched_line = Some(line_num);
869                    search_start = line_num + 1;
870                    current_indent = indent;
871                    found = true;
872                    break;
873                }
874            }
875
876            line_num += 1;
877        }
878
879        if !found && last_matched_line.is_none() {
880            break;
881        }
882    }
883
884    last_matched_line.map(|line_num| {
885        let line = lines[line_num];
886        Span {
887            start_line: line_num as u32,
888            start_col: 0,
889            end_line: line_num as u32,
890            end_col: line.len() as u32,
891        }
892    })
893}
894
895/// Lint all YAML documents in a file.
896pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
897    let content = std::fs::read_to_string(path)?;
898    let warnings = lint_yaml_str(&content);
899    Ok(FileLintResult {
900        path: path.to_path_buf(),
901        warnings,
902    })
903}
904
905/// Recursively collect `.yml`/`.yaml` file paths under `dir`, in sorted
906/// depth-first order, skipping hidden directories and any path matching the
907/// exclude set (relative to `base`). Symlink loops are guarded by `visited`.
908fn collect_yaml_files(
909    dir: &Path,
910    base: &Path,
911    exclude_set: Option<&globset::GlobSet>,
912    files: &mut Vec<std::path::PathBuf>,
913    visited: &mut HashSet<std::path::PathBuf>,
914) -> crate::error::Result<()> {
915    let canonical = match dir.canonicalize() {
916        Ok(p) => p,
917        Err(_) => return Ok(()),
918    };
919    if !visited.insert(canonical) {
920        return Ok(());
921    }
922
923    let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
924    entries.sort_by_key(|e| e.path());
925
926    for entry in entries {
927        let path = entry.path();
928
929        if let Some(gs) = exclude_set
930            && let Ok(rel) = path.strip_prefix(base)
931            && gs.is_match(rel)
932        {
933            continue;
934        }
935
936        if path.is_dir() {
937            if path
938                .file_name()
939                .and_then(|n| n.to_str())
940                .is_some_and(|n| n.starts_with('.'))
941            {
942                continue;
943            }
944            collect_yaml_files(&path, base, exclude_set, files, visited)?;
945        } else if matches!(
946            path.extension().and_then(|e| e.to_str()),
947            Some("yml" | "yaml")
948        ) {
949            files.push(path);
950        }
951    }
952    Ok(())
953}
954
955/// Two-pass directory lint: collect and read every file once to build a
956/// directory-global rule index, then lint each file against it so
957/// cross-reference checks see rules defined in sibling files.
958fn lint_directory_impl(
959    dir: &Path,
960    config: Option<&LintConfig>,
961) -> crate::error::Result<Vec<FileLintResult>> {
962    let exclude_set = config.and_then(LintConfig::build_exclude_set);
963    let mut files = Vec::new();
964    let mut visited = HashSet::new();
965    collect_yaml_files(dir, dir, exclude_set.as_ref(), &mut files, &mut visited)?;
966
967    // Read each file once and index every referenceable rule across the tree.
968    let mut index = RuleIndex::new(true);
969    let mut contents: Vec<(std::path::PathBuf, std::result::Result<String, String>)> =
970        Vec::with_capacity(files.len());
971    for path in files {
972        match std::fs::read_to_string(&path) {
973            Ok(text) => {
974                index.add_text(&text);
975                contents.push((path, Ok(text)));
976            }
977            Err(e) => contents.push((path, Err(format!("error reading file: {e}")))),
978        }
979    }
980
981    let mut results = Vec::with_capacity(contents.len());
982    for (path, content) in contents {
983        match content {
984            Ok(text) => {
985                let warnings = match config {
986                    Some(cfg) => {
987                        let w = lint_yaml_str_indexed(&text, &cfg.tag_namespaces, Some(&index));
988                        apply_suppressions(w, cfg, &parse_inline_suppressions(&text))
989                    }
990                    None => lint_yaml_str_indexed(&text, &[], Some(&index)),
991                };
992                results.push(FileLintResult { path, warnings });
993            }
994            Err(msg) => results.push(FileLintResult {
995                path,
996                warnings: vec![err(LintRule::FileReadError, msg, "/")],
997            }),
998        }
999    }
1000    Ok(results)
1001}
1002
1003/// Lint all `.yml`/`.yaml` files in a directory recursively.
1004pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
1005    lint_directory_impl(dir, None)
1006}
1007
1008// =============================================================================
1009// Lint configuration & suppression
1010// =============================================================================
1011
1012/// Configuration for lint rule suppression and severity overrides.
1013#[derive(Debug, Clone, Default, Serialize)]
1014pub struct LintConfig {
1015    pub disabled_rules: HashSet<String>,
1016    pub severity_overrides: HashMap<String, Severity>,
1017    pub exclude_patterns: Vec<String>,
1018    /// Extra tag namespaces recognised in addition to the built-in set.
1019    pub tag_namespaces: Vec<String>,
1020}
1021
1022#[derive(Debug, Deserialize)]
1023struct RawLintConfig {
1024    #[serde(default)]
1025    disabled_rules: Vec<String>,
1026    #[serde(default)]
1027    severity_overrides: HashMap<String, String>,
1028    #[serde(default)]
1029    exclude: Vec<String>,
1030    #[serde(default)]
1031    tag_namespaces: Vec<String>,
1032}
1033
1034/// Remove duplicate entries from a list while keeping the first occurrence of
1035/// each, so merged `exclude_patterns` / `tag_namespaces` stay stable and don't
1036/// repeat a value that appears in both the config file and a CLI flag.
1037fn dedup_preserving_order(items: &mut Vec<String>) {
1038    let mut seen = HashSet::new();
1039    items.retain(|item| seen.insert(item.clone()));
1040}
1041
1042impl LintConfig {
1043    pub fn load(path: &Path) -> crate::error::Result<Self> {
1044        let content = std::fs::read_to_string(path)?;
1045        let raw: RawLintConfig = yaml_serde::from_str(&content)?;
1046
1047        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
1048        let mut severity_overrides = HashMap::new();
1049        for (rule, sev_str) in &raw.severity_overrides {
1050            let sev = match sev_str.as_str() {
1051                "error" => Severity::Error,
1052                "warning" => Severity::Warning,
1053                "info" => Severity::Info,
1054                "hint" => Severity::Hint,
1055                other => {
1056                    return Err(crate::error::SigmaParserError::InvalidRule(format!(
1057                        "invalid severity '{other}' for rule '{rule}' in lint config"
1058                    )));
1059                }
1060            };
1061            severity_overrides.insert(rule.clone(), sev);
1062        }
1063
1064        let mut exclude_patterns = raw.exclude;
1065        dedup_preserving_order(&mut exclude_patterns);
1066
1067        let mut tag_namespaces: Vec<String> = raw
1068            .tag_namespaces
1069            .into_iter()
1070            .map(|s| s.to_lowercase())
1071            .collect();
1072        dedup_preserving_order(&mut tag_namespaces);
1073
1074        Ok(LintConfig {
1075            disabled_rules,
1076            severity_overrides,
1077            exclude_patterns,
1078            tag_namespaces,
1079        })
1080    }
1081
1082    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
1083        let dir = if start_path.is_file() {
1084            start_path.parent()?
1085        } else {
1086            start_path
1087        };
1088
1089        let mut current = dir;
1090        loop {
1091            let candidate = current.join(".rsigma-lint.yml");
1092            if candidate.is_file() {
1093                return Some(candidate);
1094            }
1095            let candidate_yaml = current.join(".rsigma-lint.yaml");
1096            if candidate_yaml.is_file() {
1097                return Some(candidate_yaml);
1098            }
1099            current = current.parent()?;
1100        }
1101    }
1102
1103    pub fn merge(&mut self, other: &LintConfig) {
1104        self.disabled_rules
1105            .extend(other.disabled_rules.iter().cloned());
1106        for (rule, sev) in &other.severity_overrides {
1107            self.severity_overrides.insert(rule.clone(), *sev);
1108        }
1109        self.exclude_patterns
1110            .extend(other.exclude_patterns.iter().cloned());
1111        dedup_preserving_order(&mut self.exclude_patterns);
1112        self.tag_namespaces
1113            .extend(other.tag_namespaces.iter().cloned());
1114        dedup_preserving_order(&mut self.tag_namespaces);
1115    }
1116
1117    pub fn is_disabled(&self, rule: &LintRule) -> bool {
1118        self.disabled_rules.contains(&rule.to_string())
1119    }
1120
1121    pub fn build_exclude_set(&self) -> Option<globset::GlobSet> {
1122        if self.exclude_patterns.is_empty() {
1123            return None;
1124        }
1125        let mut builder = globset::GlobSetBuilder::new();
1126        for pat in &self.exclude_patterns {
1127            if let Ok(glob) = globset::GlobBuilder::new(pat)
1128                .literal_separator(false)
1129                .build()
1130            {
1131                builder.add(glob);
1132            }
1133        }
1134        builder.build().ok()
1135    }
1136}
1137
1138// =============================================================================
1139// Inline suppression comments
1140// =============================================================================
1141
1142#[derive(Debug, Clone, Default)]
1143pub struct InlineSuppressions {
1144    pub disable_all: bool,
1145    pub file_disabled: HashSet<String>,
1146    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
1147}
1148
1149pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
1150    let mut result = InlineSuppressions::default();
1151
1152    for (i, line) in text.lines().enumerate() {
1153        let trimmed = line.trim();
1154
1155        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
1156            trimmed[pos + 1..].trim()
1157        } else {
1158            continue;
1159        };
1160
1161        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
1162            let rest = rest.trim();
1163            let next_line = (i + 1) as u32;
1164            if rest.is_empty() {
1165                result.line_disabled.insert(next_line, None);
1166            } else {
1167                let rules: HashSet<String> = rest
1168                    .split(',')
1169                    .map(|s| s.trim().to_string())
1170                    .filter(|s| !s.is_empty())
1171                    .collect();
1172                if !rules.is_empty() {
1173                    result
1174                        .line_disabled
1175                        .entry(next_line)
1176                        .and_modify(|existing| {
1177                            if let Some(existing_set) = existing {
1178                                existing_set.extend(rules.iter().cloned());
1179                            }
1180                        })
1181                        .or_insert(Some(rules));
1182                }
1183            }
1184        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
1185            let rest = rest.trim();
1186            if rest.is_empty() {
1187                result.disable_all = true;
1188            } else {
1189                for rule in rest.split(',') {
1190                    let rule = rule.trim();
1191                    if !rule.is_empty() {
1192                        result.file_disabled.insert(rule.to_string());
1193                    }
1194                }
1195            }
1196        }
1197    }
1198
1199    result
1200}
1201
1202fn find_yaml_comment(line: &str) -> Option<usize> {
1203    let mut in_single = false;
1204    let mut in_double = false;
1205    for (i, c) in line.char_indices() {
1206        match c {
1207            '\'' if !in_double => in_single = !in_single,
1208            '"' if !in_single => in_double = !in_double,
1209            '#' if !in_single && !in_double => return Some(i),
1210            _ => {}
1211        }
1212    }
1213    None
1214}
1215
1216impl InlineSuppressions {
1217    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
1218        if self.disable_all {
1219            return true;
1220        }
1221
1222        let rule_name = warning.rule.to_string();
1223        if self.file_disabled.contains(&rule_name) {
1224            return true;
1225        }
1226
1227        if let Some(span) = &warning.span
1228            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
1229        {
1230            return match line_rules {
1231                None => true,
1232                Some(rules) => rules.contains(&rule_name),
1233            };
1234        }
1235
1236        false
1237    }
1238}
1239
1240// =============================================================================
1241// Suppression filtering
1242// =============================================================================
1243
1244pub fn apply_suppressions(
1245    warnings: Vec<LintWarning>,
1246    config: &LintConfig,
1247    inline: &InlineSuppressions,
1248) -> Vec<LintWarning> {
1249    warnings
1250        .into_iter()
1251        .filter(|w| !config.is_disabled(&w.rule))
1252        .filter(|w| !inline.is_suppressed(w))
1253        .map(|mut w| {
1254            let rule_name = w.rule.to_string();
1255            if let Some(sev) = config.severity_overrides.get(&rule_name) {
1256                w.severity = *sev;
1257            }
1258            w
1259        })
1260        .collect()
1261}
1262
1263pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
1264    let warnings = lint_yaml_str_ext(text, &config.tag_namespaces);
1265    let inline = parse_inline_suppressions(text);
1266    apply_suppressions(warnings, config, &inline)
1267}
1268
1269pub fn lint_yaml_file_with_config(
1270    path: &Path,
1271    config: &LintConfig,
1272) -> crate::error::Result<FileLintResult> {
1273    let content = std::fs::read_to_string(path)?;
1274    let warnings = lint_yaml_str_with_config(&content, config);
1275    Ok(FileLintResult {
1276        path: path.to_path_buf(),
1277        warnings,
1278    })
1279}
1280
1281pub fn lint_yaml_directory_with_config(
1282    dir: &Path,
1283    config: &LintConfig,
1284) -> crate::error::Result<Vec<FileLintResult>> {
1285    lint_directory_impl(dir, Some(config))
1286}
1287
1288// =============================================================================
1289// Tests
1290// =============================================================================
1291
1292#[cfg(test)]
1293mod tests {
1294    use super::*;
1295
1296    fn yaml_value(yaml: &str) -> Value {
1297        yaml_serde::from_str(yaml).unwrap()
1298    }
1299
1300    fn lint(yaml: &str) -> Vec<LintWarning> {
1301        lint_yaml_value(&yaml_value(yaml))
1302    }
1303
1304    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1305        warnings.iter().any(|w| w.rule == rule)
1306    }
1307
1308    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1309        !has_rule(warnings, rule)
1310    }
1311
1312    #[test]
1313    fn valid_detection_rule_no_errors() {
1314        let w = lint(
1315            r#"
1316title: Test Rule
1317id: 929a690e-bef0-4204-a928-ef5e620d6fcc
1318status: test
1319logsource:
1320    category: process_creation
1321    product: windows
1322detection:
1323    selection:
1324        CommandLine|contains: 'whoami'
1325    condition: selection
1326level: medium
1327tags:
1328    - attack.execution
1329    - attack.t1059
1330"#,
1331        );
1332        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
1333        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1334    }
1335
1336    #[test]
1337    fn not_a_mapping() {
1338        let v: yaml_serde::Value = yaml_serde::from_str("- item1\n- item2").unwrap();
1339        let w = lint_yaml_value(&v);
1340        assert!(has_rule(&w, LintRule::NotAMapping));
1341    }
1342
1343    #[test]
1344    fn lint_yaml_str_produces_spans() {
1345        let text = r#"title: Test
1346status: invalid_status
1347logsource:
1348    category: test
1349detection:
1350    selection:
1351        field: value
1352    condition: selection
1353level: medium
1354"#;
1355        let warnings = lint_yaml_str(text);
1356        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
1357        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
1358        let span = invalid_status.unwrap().span;
1359        assert!(span.is_some(), "expected span to be resolved");
1360        assert_eq!(span.unwrap().start_line, 1);
1361    }
1362
1363    #[test]
1364    fn yaml_parse_error_uses_correct_rule() {
1365        let text = "title: [unclosed";
1366        let warnings = lint_yaml_str(text);
1367        assert!(has_rule(&warnings, LintRule::YamlParseError));
1368        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
1369    }
1370
1371    #[test]
1372    fn action_global_skipped() {
1373        let w = lint(
1374            r#"
1375action: global
1376title: Global Template
1377logsource:
1378    product: windows
1379"#,
1380        );
1381        assert!(w.is_empty());
1382    }
1383
1384    #[test]
1385    fn action_reset_skipped() {
1386        let w = lint(
1387            r#"
1388action: reset
1389"#,
1390        );
1391        assert!(w.is_empty());
1392    }
1393
1394    #[test]
1395    fn resolve_path_to_span_root() {
1396        let text = "title: Test\nstatus: test\n";
1397        let span = resolve_path_to_span(text, "/");
1398        assert!(span.is_some());
1399        assert_eq!(span.unwrap().start_line, 0);
1400    }
1401
1402    #[test]
1403    fn resolve_path_to_span_top_level_key() {
1404        let text = "title: Test\nstatus: test\nlevel: high\n";
1405        let span = resolve_path_to_span(text, "/status");
1406        assert!(span.is_some());
1407        assert_eq!(span.unwrap().start_line, 1);
1408    }
1409
1410    #[test]
1411    fn resolve_path_to_span_nested_key() {
1412        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
1413        let span = resolve_path_to_span(text, "/logsource/product");
1414        assert!(span.is_some());
1415        assert_eq!(span.unwrap().start_line, 3);
1416    }
1417
1418    #[test]
1419    fn resolve_path_to_span_missing_key() {
1420        let text = "title: Test\nstatus: test\n";
1421        let span = resolve_path_to_span(text, "/nonexistent");
1422        assert!(span.is_none());
1423    }
1424
1425    #[test]
1426    fn multi_doc_yaml_lints_all_documents() {
1427        let text = r#"title: Rule 1
1428logsource:
1429    category: test
1430detection:
1431    selection:
1432        field: value
1433    condition: selection
1434level: medium
1435---
1436title: Rule 2
1437status: bad_status
1438logsource:
1439    category: test
1440detection:
1441    selection:
1442        field: value
1443    condition: selection
1444level: medium
1445"#;
1446        let warnings = lint_yaml_str(text);
1447        assert!(has_rule(&warnings, LintRule::InvalidStatus));
1448    }
1449
1450    #[test]
1451    fn severity_display() {
1452        assert_eq!(format!("{}", Severity::Error), "error");
1453        assert_eq!(format!("{}", Severity::Warning), "warning");
1454        assert_eq!(format!("{}", Severity::Info), "info");
1455        assert_eq!(format!("{}", Severity::Hint), "hint");
1456    }
1457
1458    #[test]
1459    fn file_lint_result_has_errors() {
1460        let result = FileLintResult {
1461            path: std::path::PathBuf::from("test.yml"),
1462            warnings: vec![
1463                warning(LintRule::TitleTooLong, "too long", "/title"),
1464                err(
1465                    LintRule::MissingCondition,
1466                    "missing",
1467                    "/detection/condition",
1468                ),
1469            ],
1470        };
1471        assert!(result.has_errors());
1472        assert_eq!(result.error_count(), 1);
1473        assert_eq!(result.warning_count(), 1);
1474    }
1475
1476    #[test]
1477    fn file_lint_result_no_errors() {
1478        let result = FileLintResult {
1479            path: std::path::PathBuf::from("test.yml"),
1480            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
1481        };
1482        assert!(!result.has_errors());
1483        assert_eq!(result.error_count(), 0);
1484        assert_eq!(result.warning_count(), 1);
1485    }
1486
1487    #[test]
1488    fn file_lint_result_empty() {
1489        let result = FileLintResult {
1490            path: std::path::PathBuf::from("test.yml"),
1491            warnings: vec![],
1492        };
1493        assert!(!result.has_errors());
1494        assert_eq!(result.error_count(), 0);
1495        assert_eq!(result.warning_count(), 0);
1496    }
1497
1498    #[test]
1499    fn lint_warning_display() {
1500        let w = err(
1501            LintRule::MissingTitle,
1502            "missing required field 'title'",
1503            "/title",
1504        );
1505        let display = format!("{w}");
1506        assert!(display.contains("error"));
1507        assert!(display.contains("missing_title"));
1508        assert!(display.contains("/title"));
1509    }
1510
1511    #[test]
1512    fn file_lint_result_info_count() {
1513        let result = FileLintResult {
1514            path: std::path::PathBuf::from("test.yml"),
1515            warnings: vec![
1516                info(LintRule::MissingDescription, "missing desc", "/description"),
1517                info(LintRule::MissingAuthor, "missing author", "/author"),
1518                warning(LintRule::TitleTooLong, "too long", "/title"),
1519            ],
1520        };
1521        assert_eq!(result.info_count(), 2);
1522        assert_eq!(result.warning_count(), 1);
1523        assert_eq!(result.error_count(), 0);
1524        assert!(!result.has_errors());
1525    }
1526
1527    #[test]
1528    fn parse_inline_disable_all() {
1529        let text = "# rsigma-disable\ntitle: Test\n";
1530        let sup = parse_inline_suppressions(text);
1531        assert!(sup.disable_all);
1532    }
1533
1534    #[test]
1535    fn parse_inline_disable_specific_rules() {
1536        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
1537        let sup = parse_inline_suppressions(text);
1538        assert!(!sup.disable_all);
1539        assert!(sup.file_disabled.contains("missing_description"));
1540        assert!(sup.file_disabled.contains("missing_author"));
1541    }
1542
1543    #[test]
1544    fn parse_inline_disable_next_line_all() {
1545        let text = "# rsigma-disable-next-line\ntitle: Test\n";
1546        let sup = parse_inline_suppressions(text);
1547        assert!(!sup.disable_all);
1548        assert!(sup.line_disabled.contains_key(&1));
1549        assert!(sup.line_disabled[&1].is_none());
1550    }
1551
1552    #[test]
1553    fn parse_inline_disable_next_line_specific() {
1554        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
1555        let sup = parse_inline_suppressions(text);
1556        assert!(sup.line_disabled.contains_key(&2));
1557        let rules = sup.line_disabled[&2].as_ref().unwrap();
1558        assert!(rules.contains("missing_level"));
1559    }
1560
1561    #[test]
1562    fn parse_inline_no_comments() {
1563        let text = "title: Test\nstatus: test\n";
1564        let sup = parse_inline_suppressions(text);
1565        assert!(!sup.disable_all);
1566        assert!(sup.file_disabled.is_empty());
1567        assert!(sup.line_disabled.is_empty());
1568    }
1569
1570    #[test]
1571    fn parse_inline_comment_in_quoted_string() {
1572        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
1573        let sup = parse_inline_suppressions(text);
1574        assert!(!sup.disable_all);
1575        assert!(sup.file_disabled.is_empty());
1576    }
1577
1578    #[test]
1579    fn apply_suppressions_disables_rule() {
1580        let warnings = vec![
1581            info(LintRule::MissingDescription, "desc", "/description"),
1582            info(LintRule::MissingAuthor, "author", "/author"),
1583            warning(LintRule::TitleTooLong, "title", "/title"),
1584        ];
1585        let mut config = LintConfig::default();
1586        config
1587            .disabled_rules
1588            .insert("missing_description".to_string());
1589        let inline = InlineSuppressions::default();
1590
1591        let result = apply_suppressions(warnings, &config, &inline);
1592        assert_eq!(result.len(), 2);
1593        assert!(
1594            result
1595                .iter()
1596                .all(|w| w.rule != LintRule::MissingDescription)
1597        );
1598    }
1599
1600    #[test]
1601    fn apply_suppressions_severity_override() {
1602        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
1603        let mut config = LintConfig::default();
1604        config
1605            .severity_overrides
1606            .insert("title_too_long".to_string(), Severity::Info);
1607        let inline = InlineSuppressions::default();
1608
1609        let result = apply_suppressions(warnings, &config, &inline);
1610        assert_eq!(result.len(), 1);
1611        assert_eq!(result[0].severity, Severity::Info);
1612    }
1613
1614    #[test]
1615    fn apply_suppressions_inline_file_disable() {
1616        let warnings = vec![
1617            info(LintRule::MissingDescription, "desc", "/description"),
1618            info(LintRule::MissingAuthor, "author", "/author"),
1619        ];
1620        let config = LintConfig::default();
1621        let mut inline = InlineSuppressions::default();
1622        inline.file_disabled.insert("missing_author".to_string());
1623
1624        let result = apply_suppressions(warnings, &config, &inline);
1625        assert_eq!(result.len(), 1);
1626        assert_eq!(result[0].rule, LintRule::MissingDescription);
1627    }
1628
1629    #[test]
1630    fn apply_suppressions_inline_disable_all() {
1631        let warnings = vec![
1632            err(LintRule::MissingTitle, "title", "/title"),
1633            warning(LintRule::TitleTooLong, "long", "/title"),
1634        ];
1635        let config = LintConfig::default();
1636        let inline = InlineSuppressions {
1637            disable_all: true,
1638            ..Default::default()
1639        };
1640
1641        let result = apply_suppressions(warnings, &config, &inline);
1642        assert!(result.is_empty());
1643    }
1644
1645    #[test]
1646    fn apply_suppressions_inline_next_line() {
1647        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
1648        w1.span = Some(Span {
1649            start_line: 5,
1650            start_col: 0,
1651            end_line: 5,
1652            end_col: 10,
1653        });
1654        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
1655        w2.span = Some(Span {
1656            start_line: 6,
1657            start_col: 0,
1658            end_line: 6,
1659            end_col: 10,
1660        });
1661
1662        let config = LintConfig::default();
1663        let mut inline = InlineSuppressions::default();
1664        inline.line_disabled.insert(5, None);
1665
1666        let result = apply_suppressions(vec![w1, w2], &config, &inline);
1667        assert_eq!(result.len(), 1);
1668        assert_eq!(result[0].rule, LintRule::InvalidStatus);
1669    }
1670
1671    #[test]
1672    fn lint_with_config_disables_rules() {
1673        let text = r#"title: Test
1674logsource:
1675    category: test
1676detection:
1677    selection:
1678        field: value
1679    condition: selection
1680level: medium
1681"#;
1682        let mut config = LintConfig::default();
1683        config
1684            .disabled_rules
1685            .insert("missing_description".to_string());
1686        config.disabled_rules.insert("missing_author".to_string());
1687
1688        let warnings = lint_yaml_str_with_config(text, &config);
1689        assert!(
1690            !warnings
1691                .iter()
1692                .any(|w| w.rule == LintRule::MissingDescription)
1693        );
1694        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1695    }
1696
1697    #[test]
1698    fn lint_with_inline_disable_next_line() {
1699        let text = r#"title: Test
1700# rsigma-disable-next-line missing_level
1701logsource:
1702    category: test
1703detection:
1704    selection:
1705        field: value
1706    condition: selection
1707"#;
1708        let config = LintConfig::default();
1709        let warnings = lint_yaml_str_with_config(text, &config);
1710        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
1711    }
1712
1713    #[test]
1714    fn lint_with_inline_file_disable() {
1715        let text = r#"# rsigma-disable missing_description, missing_author
1716title: Test
1717logsource:
1718    category: test
1719detection:
1720    selection:
1721        field: value
1722    condition: selection
1723level: medium
1724"#;
1725        let config = LintConfig::default();
1726        let warnings = lint_yaml_str_with_config(text, &config);
1727        assert!(
1728            !warnings
1729                .iter()
1730                .any(|w| w.rule == LintRule::MissingDescription)
1731        );
1732        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1733    }
1734
1735    #[test]
1736    fn lint_with_inline_disable_all() {
1737        let text = r#"# rsigma-disable
1738title: Test
1739status: invalid_status
1740logsource:
1741    category: test
1742detection:
1743    selection:
1744        field: value
1745    condition: selection
1746"#;
1747        let config = LintConfig::default();
1748        let warnings = lint_yaml_str_with_config(text, &config);
1749        assert!(warnings.is_empty());
1750    }
1751
1752    #[test]
1753    fn lint_config_merge() {
1754        let mut base = LintConfig::default();
1755        base.disabled_rules.insert("rule_a".to_string());
1756        base.severity_overrides
1757            .insert("rule_b".to_string(), Severity::Info);
1758
1759        let other = LintConfig {
1760            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
1761            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
1762                .into_iter()
1763                .collect(),
1764            exclude_patterns: vec!["test/**".to_string()],
1765            tag_namespaces: vec!["myns".to_string()],
1766        };
1767
1768        base.merge(&other);
1769        assert!(base.disabled_rules.contains("rule_a"));
1770        assert!(base.disabled_rules.contains("rule_c"));
1771        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
1772        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
1773        assert_eq!(base.exclude_patterns, vec!["test/**".to_string()]);
1774        assert!(base.tag_namespaces.contains(&"myns".to_string()));
1775    }
1776
1777    #[test]
1778    fn lint_config_merge_dedups_lists() {
1779        let mut base = LintConfig {
1780            exclude_patterns: vec!["config/**".to_string(), "shared/**".to_string()],
1781            tag_namespaces: vec!["myorg".to_string(), "shared".to_string()],
1782            ..Default::default()
1783        };
1784        let other = LintConfig {
1785            // "shared/**" and "shared" overlap with base on purpose.
1786            exclude_patterns: vec!["shared/**".to_string(), "extra/**".to_string()],
1787            tag_namespaces: vec!["shared".to_string(), "internal".to_string()],
1788            ..Default::default()
1789        };
1790
1791        base.merge(&other);
1792
1793        assert_eq!(
1794            base.exclude_patterns,
1795            vec![
1796                "config/**".to_string(),
1797                "shared/**".to_string(),
1798                "extra/**".to_string()
1799            ]
1800        );
1801        assert_eq!(
1802            base.tag_namespaces,
1803            vec![
1804                "myorg".to_string(),
1805                "shared".to_string(),
1806                "internal".to_string()
1807            ]
1808        );
1809    }
1810
1811    #[test]
1812    fn lint_config_load_dedups_and_normalises() {
1813        let yaml = r#"
1814exclude:
1815  - "config/**"
1816  - "config/**"
1817tag_namespaces:
1818  - MyOrg
1819  - myorg
1820  - internal
1821"#;
1822        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
1823        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
1824        let config = LintConfig::load(tmp.path()).unwrap();
1825
1826        assert_eq!(config.exclude_patterns, vec!["config/**".to_string()]);
1827        // "MyOrg" lowercases to "myorg" and then collapses with the duplicate.
1828        assert_eq!(
1829            config.tag_namespaces,
1830            vec!["myorg".to_string(), "internal".to_string()]
1831        );
1832    }
1833
1834    #[test]
1835    fn lint_config_is_disabled() {
1836        let mut config = LintConfig::default();
1837        config.disabled_rules.insert("missing_title".to_string());
1838        assert!(config.is_disabled(&LintRule::MissingTitle));
1839        assert!(!config.is_disabled(&LintRule::EmptyTitle));
1840    }
1841
1842    #[test]
1843    fn find_yaml_comment_basic() {
1844        assert_eq!(find_yaml_comment("# comment"), Some(0));
1845        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
1846        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
1847        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
1848        assert_eq!(find_yaml_comment("key: value"), None);
1849    }
1850
1851    #[test]
1852    fn no_fix_for_unfixable_rule() {
1853        let w = lint(
1854            r#"
1855title: Test
1856logsource:
1857    category: test
1858"#,
1859        );
1860        assert!(has_rule(&w, LintRule::MissingDetection));
1861        let fix = w
1862            .iter()
1863            .find(|w| w.rule == LintRule::MissingDetection)
1864            .and_then(|w| w.fix.as_ref());
1865        assert!(fix.is_none());
1866    }
1867
1868    #[test]
1869    fn lint_config_exclude_from_yaml() {
1870        let yaml = r#"
1871disabled_rules:
1872  - missing_description
1873exclude:
1874  - "config/**"
1875  - "**/unsupported/**"
1876"#;
1877        let tmp = std::env::temp_dir().join("rsigma_test_exclude.yml");
1878        std::fs::write(&tmp, yaml).unwrap();
1879        let config = LintConfig::load(&tmp).unwrap();
1880        std::fs::remove_file(&tmp).ok();
1881
1882        assert!(config.disabled_rules.contains("missing_description"));
1883        assert_eq!(config.exclude_patterns.len(), 2);
1884        assert_eq!(config.exclude_patterns[0], "config/**");
1885        assert_eq!(config.exclude_patterns[1], "**/unsupported/**");
1886    }
1887
1888    #[test]
1889    fn lint_config_build_exclude_set_empty() {
1890        let config = LintConfig::default();
1891        assert!(config.build_exclude_set().is_none());
1892    }
1893
1894    #[test]
1895    fn lint_config_build_exclude_set_matches() {
1896        let config = LintConfig {
1897            exclude_patterns: vec!["config/**".to_string()],
1898            ..Default::default()
1899        };
1900        let gs = config.build_exclude_set().expect("should build");
1901        assert!(gs.is_match("config/data_mapping/foo.yaml"));
1902        assert!(gs.is_match("config/nested/deep/bar.yml"));
1903        assert!(!gs.is_match("rules/windows/test.yml"));
1904    }
1905
1906    #[test]
1907    fn cross_ref_version_mismatch_within_file() {
1908        // A correlation (major 3) referencing a base rule (major 2) by name, in
1909        // the same file, flags the mismatch. unknown_rule_reference does NOT
1910        // fire for a single file (the index is not complete).
1911        let yaml = r#"
1912title: Base Rule
1913name: base_rule
1914sigma-version: 2
1915logsource:
1916    category: test
1917detection:
1918    selection:
1919        EventID: 1
1920    condition: selection
1921---
1922title: Brute Force
1923sigma-version: 3
1924correlation:
1925    type: event_count
1926    rules:
1927        - base_rule
1928    group-by:
1929        - SourceIP
1930    timespan: 5m
1931    condition:
1932        gte: 10
1933"#;
1934        let w = lint_yaml_str(yaml);
1935        assert!(has_rule(&w, LintRule::SigmaVersionMismatch));
1936        assert!(has_no_rule(&w, LintRule::UnknownRuleReference));
1937    }
1938
1939    #[test]
1940    fn cross_ref_matching_version_no_mismatch() {
1941        let yaml = r#"
1942title: Base Rule
1943name: base_rule
1944sigma-version: 3
1945logsource:
1946    category: test
1947detection:
1948    selection:
1949        EventID: 1
1950    condition: selection
1951---
1952title: Brute Force
1953sigma-version: 3
1954correlation:
1955    type: event_count
1956    rules:
1957        - base_rule
1958    group-by:
1959        - SourceIP
1960    timespan: 5m
1961    condition:
1962        gte: 10
1963"#;
1964        assert!(has_no_rule(
1965            &lint_yaml_str(yaml),
1966            LintRule::SigmaVersionMismatch
1967        ));
1968    }
1969
1970    #[test]
1971    fn cross_ref_unknown_only_with_complete_index() {
1972        let yaml = r#"
1973title: Brute Force
1974correlation:
1975    type: event_count
1976    rules:
1977        - nonexistent_rule
1978    group-by:
1979        - SourceIP
1980    timespan: 5m
1981    condition:
1982        gte: 10
1983"#;
1984        // Single file: the referenced rule may live elsewhere, so it is out of
1985        // scope and unknown_rule_reference must not fire.
1986        assert!(has_no_rule(
1987            &lint_yaml_str(yaml),
1988            LintRule::UnknownRuleReference
1989        ));
1990
1991        // Directory: the index is complete, so the missing reference is flagged.
1992        let tmp = tempfile::tempdir().unwrap();
1993        std::fs::write(tmp.path().join("corr.yml"), yaml).unwrap();
1994        let results = lint_yaml_directory(tmp.path()).unwrap();
1995        assert!(
1996            results
1997                .iter()
1998                .flat_map(|r| &r.warnings)
1999                .any(|w| w.rule == LintRule::UnknownRuleReference)
2000        );
2001    }
2002
2003    #[test]
2004    fn cross_ref_resolves_across_files() {
2005        // Base rule in one file, correlation in another: the directory index
2006        // resolves the reference and flags the major mismatch across files.
2007        let tmp = tempfile::tempdir().unwrap();
2008        std::fs::write(
2009            tmp.path().join("base.yml"),
2010            r#"
2011title: Base Rule
2012name: base_rule
2013sigma-version: 2
2014logsource:
2015    category: test
2016detection:
2017    selection:
2018        EventID: 1
2019    condition: selection
2020"#,
2021        )
2022        .unwrap();
2023        std::fs::write(
2024            tmp.path().join("corr.yml"),
2025            r#"
2026title: Brute Force
2027sigma-version: 3
2028correlation:
2029    type: event_count
2030    rules:
2031        - base_rule
2032    group-by:
2033        - SourceIP
2034    timespan: 5m
2035    condition:
2036        gte: 10
2037"#,
2038        )
2039        .unwrap();
2040        let results = lint_yaml_directory(tmp.path()).unwrap();
2041        let all: Vec<_> = results.iter().flat_map(|r| &r.warnings).collect();
2042        assert!(all.iter().any(|w| w.rule == LintRule::SigmaVersionMismatch));
2043        assert!(!all.iter().any(|w| w.rule == LintRule::UnknownRuleReference));
2044    }
2045
2046    #[test]
2047    fn lint_directory_with_excludes() {
2048        let tmp = tempfile::tempdir().unwrap();
2049        let rules_dir = tmp.path().join("rules");
2050        let config_dir = tmp.path().join("config");
2051        std::fs::create_dir_all(&rules_dir).unwrap();
2052        std::fs::create_dir_all(&config_dir).unwrap();
2053
2054        std::fs::write(
2055            rules_dir.join("good.yml"),
2056            r#"
2057title: Good Rule
2058logsource:
2059    category: test
2060detection:
2061    sel:
2062        field: value
2063    condition: sel
2064level: medium
2065"#,
2066        )
2067        .unwrap();
2068
2069        std::fs::write(
2070            config_dir.join("mapping.yaml"),
2071            r#"
2072Title: Logon
2073Channel: Security
2074EventID: 4624
2075"#,
2076        )
2077        .unwrap();
2078
2079        let no_exclude = LintConfig::default();
2080        let results = lint_yaml_directory_with_config(tmp.path(), &no_exclude).unwrap();
2081        let config_warnings: Vec<_> = results
2082            .iter()
2083            .filter(|r| r.path.to_string_lossy().contains("config"))
2084            .flat_map(|r| &r.warnings)
2085            .collect();
2086        assert!(
2087            !config_warnings.is_empty(),
2088            "config file should produce warnings without excludes"
2089        );
2090
2091        let with_exclude = LintConfig {
2092            exclude_patterns: vec!["config/**".to_string()],
2093            ..Default::default()
2094        };
2095        let results = lint_yaml_directory_with_config(tmp.path(), &with_exclude).unwrap();
2096        let config_results: Vec<_> = results
2097            .iter()
2098            .filter(|r| r.path.to_string_lossy().contains("config"))
2099            .collect();
2100        assert!(config_results.is_empty(), "config file should be excluded");
2101
2102        let rule_results: Vec<_> = results
2103            .iter()
2104            .filter(|r| r.path.to_string_lossy().contains("good.yml"))
2105            .collect();
2106        assert_eq!(rule_results.len(), 1);
2107    }
2108
2109    #[test]
2110    fn all_lint_keys_are_cached() {
2111        const ALL_LINT_KEYS: &[&str] = &[
2112            "action",
2113            "author",
2114            "condition",
2115            "correlation",
2116            "date",
2117            "description",
2118            "detection",
2119            "field",
2120            "filter",
2121            "generate",
2122            "group-by",
2123            "id",
2124            "level",
2125            "logsource",
2126            "modified",
2127            "name",
2128            "rules",
2129            "selection",
2130            "status",
2131            "tags",
2132            "taxonomy",
2133            "timeframe",
2134            "timespan",
2135            "title",
2136            "type",
2137        ];
2138        for key_str in ALL_LINT_KEYS {
2139            assert!(KEY_CACHE.contains_key(key_str), "key not cached: {key_str}");
2140        }
2141    }
2142
2143    #[test]
2144    fn extra_tag_namespace_suppresses_warning() {
2145        let text = r#"title: Test
2146logsource:
2147    category: test
2148detection:
2149    selection:
2150        field: value
2151    condition: selection
2152level: medium
2153tags:
2154    - myorg.custom_tag
2155"#;
2156        // Without extra namespaces, unknown_tag_namespace fires.
2157        let warnings = lint_yaml_str(text);
2158        assert!(has_rule(&warnings, LintRule::UnknownTagNamespace));
2159
2160        // With "myorg" added, the warning is gone.
2161        let config = LintConfig {
2162            tag_namespaces: vec!["myorg".to_string()],
2163            ..Default::default()
2164        };
2165        let warnings = lint_yaml_str_with_config(text, &config);
2166        assert!(has_no_rule(&warnings, LintRule::UnknownTagNamespace));
2167    }
2168
2169    #[test]
2170    fn extra_tag_namespace_from_config_file() {
2171        let yaml = r#"
2172tag_namespaces:
2173  - myorg
2174  - internal
2175"#;
2176        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
2177        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
2178        let config = LintConfig::load(tmp.path()).unwrap();
2179
2180        assert!(config.tag_namespaces.contains(&"myorg".to_string()));
2181        assert!(config.tag_namespaces.contains(&"internal".to_string()));
2182    }
2183}