Skip to main content

rsigma_parser/lint/
mod.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `yaml_serde::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: yaml_serde::Value = yaml_serde::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22pub mod catalogue;
23pub mod fix;
24mod rules;
25
26use std::collections::{HashMap, HashSet};
27use std::fmt;
28use std::path::Path;
29use std::sync::LazyLock;
30
31use serde::{Deserialize, Serialize};
32use yaml_serde::Value;
33
34use crate::ads::AdsSection;
35
36// =============================================================================
37// Public types
38// =============================================================================
39
40/// Severity of a lint finding.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
42pub enum Severity {
43    /// Spec violation — the rule is invalid.
44    Error,
45    /// Best-practice issue — the rule works but is not spec-ideal.
46    Warning,
47    /// Informational suggestion — soft best-practice hint (e.g. missing author).
48    Info,
49    /// Subtle hint — lowest severity, for stylistic suggestions.
50    Hint,
51}
52
53impl fmt::Display for Severity {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        match self {
56            Severity::Error => write!(f, "error"),
57            Severity::Warning => write!(f, "warning"),
58            Severity::Info => write!(f, "info"),
59            Severity::Hint => write!(f, "hint"),
60        }
61    }
62}
63
64/// Identifies which lint rule fired.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
66pub enum LintRule {
67    // ── Infrastructure / parse errors ────────────────────────────────────
68    YamlParseError,
69    NotAMapping,
70    FileReadError,
71    SchemaViolation,
72
73    // ── Shared (all document types) ──────────────────────────────────────
74    MissingTitle,
75    EmptyTitle,
76    TitleTooLong,
77    MissingDescription,
78    MissingAuthor,
79    InvalidId,
80    InvalidStatus,
81    MissingLevel,
82    InvalidLevel,
83    InvalidDate,
84    InvalidModified,
85    ModifiedBeforeDate,
86    DescriptionTooLong,
87    NameTooLong,
88    TaxonomyTooLong,
89    NonLowercaseKey,
90
91    // ── Detection rules ──────────────────────────────────────────────────
92    MissingLogsource,
93    MissingDetection,
94    MissingCondition,
95    EmptyDetection,
96    InvalidRelatedType,
97    InvalidRelatedId,
98    RelatedMissingRequired,
99    DeprecatedWithoutRelated,
100    InvalidTag,
101    UnknownTagNamespace,
102    DuplicateTags,
103    DuplicateReferences,
104    DuplicateFields,
105    FalsepositiveTooShort,
106    ScopeTooShort,
107    LogsourceValueNotLowercase,
108    ConditionReferencesUnknown,
109    DeprecatedAggregationSyntax,
110
111    // ── Correlation rules ────────────────────────────────────────────────
112    MissingCorrelation,
113    MissingCorrelationType,
114    InvalidCorrelationType,
115    MissingCorrelationRules,
116    EmptyCorrelationRules,
117    MissingCorrelationTimespan,
118    InvalidTimespanFormat,
119    InvalidWindowMode,
120    MissingSessionGap,
121    GapWithoutSession,
122    InvalidGapFormat,
123    MissingGroupBy,
124    MissingCorrelationCondition,
125    MissingConditionField,
126    InvalidConditionOperator,
127    ConditionValueNotNumeric,
128    GenerateNotBoolean,
129
130    // ── Filter rules ─────────────────────────────────────────────────────
131    MissingFilter,
132    MissingFilterRules,
133    EmptyFilterRules,
134    MissingFilterSelection,
135    MissingFilterCondition,
136    FilterHasLevel,
137    FilterHasStatus,
138    MissingFilterLogsource,
139
140    // ── Detection logic (cross-cutting) ──────────────────────────────────
141    NullInValueList,
142    SingleValueAllModifier,
143    AllWithRe,
144    IncompatibleModifiers,
145    EmptyValueList,
146    WildcardOnlyValue,
147    FlattenedArrayCorrelation,
148    UnsupportedSigmaVersion,
149    ArrayMatchingWithoutVersion,
150    SigmaVersionMismatch,
151    UnknownRuleReference,
152    UnknownKey,
153
154    // ── ADS detection-strategy metadata ──────────────────────────────────
155    AdsMissingGoal,
156    AdsMissingCategorization,
157    AdsMissingStrategy,
158    AdsMissingTechnicalContext,
159    AdsMissingBlindSpots,
160    AdsMissingFalsePositives,
161    AdsMissingValidation,
162    AdsMissingPriority,
163    AdsMissingResponse,
164    AdsEmptySection,
165    AdsUnknownSection,
166}
167
168impl fmt::Display for LintRule {
169    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
170        let s = match self {
171            LintRule::YamlParseError => "yaml_parse_error",
172            LintRule::NotAMapping => "not_a_mapping",
173            LintRule::FileReadError => "file_read_error",
174            LintRule::SchemaViolation => "schema_violation",
175            LintRule::MissingTitle => "missing_title",
176            LintRule::EmptyTitle => "empty_title",
177            LintRule::TitleTooLong => "title_too_long",
178            LintRule::MissingDescription => "missing_description",
179            LintRule::MissingAuthor => "missing_author",
180            LintRule::InvalidId => "invalid_id",
181            LintRule::InvalidStatus => "invalid_status",
182            LintRule::MissingLevel => "missing_level",
183            LintRule::InvalidLevel => "invalid_level",
184            LintRule::InvalidDate => "invalid_date",
185            LintRule::InvalidModified => "invalid_modified",
186            LintRule::ModifiedBeforeDate => "modified_before_date",
187            LintRule::DescriptionTooLong => "description_too_long",
188            LintRule::NameTooLong => "name_too_long",
189            LintRule::TaxonomyTooLong => "taxonomy_too_long",
190            LintRule::NonLowercaseKey => "non_lowercase_key",
191            LintRule::MissingLogsource => "missing_logsource",
192            LintRule::MissingDetection => "missing_detection",
193            LintRule::MissingCondition => "missing_condition",
194            LintRule::EmptyDetection => "empty_detection",
195            LintRule::InvalidRelatedType => "invalid_related_type",
196            LintRule::InvalidRelatedId => "invalid_related_id",
197            LintRule::RelatedMissingRequired => "related_missing_required",
198            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
199            LintRule::InvalidTag => "invalid_tag",
200            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
201            LintRule::DuplicateTags => "duplicate_tags",
202            LintRule::DuplicateReferences => "duplicate_references",
203            LintRule::DuplicateFields => "duplicate_fields",
204            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
205            LintRule::ScopeTooShort => "scope_too_short",
206            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
207            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
208            LintRule::DeprecatedAggregationSyntax => "deprecated_aggregation_syntax",
209            LintRule::MissingCorrelation => "missing_correlation",
210            LintRule::MissingCorrelationType => "missing_correlation_type",
211            LintRule::InvalidCorrelationType => "invalid_correlation_type",
212            LintRule::MissingCorrelationRules => "missing_correlation_rules",
213            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
214            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
215            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
216            LintRule::InvalidWindowMode => "invalid_window_mode",
217            LintRule::MissingSessionGap => "missing_session_gap",
218            LintRule::GapWithoutSession => "gap_without_session",
219            LintRule::InvalidGapFormat => "invalid_gap_format",
220            LintRule::MissingGroupBy => "missing_group_by",
221            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
222            LintRule::MissingConditionField => "missing_condition_field",
223            LintRule::InvalidConditionOperator => "invalid_condition_operator",
224            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
225            LintRule::GenerateNotBoolean => "generate_not_boolean",
226            LintRule::MissingFilter => "missing_filter",
227            LintRule::MissingFilterRules => "missing_filter_rules",
228            LintRule::EmptyFilterRules => "empty_filter_rules",
229            LintRule::MissingFilterSelection => "missing_filter_selection",
230            LintRule::MissingFilterCondition => "missing_filter_condition",
231            LintRule::FilterHasLevel => "filter_has_level",
232            LintRule::FilterHasStatus => "filter_has_status",
233            LintRule::MissingFilterLogsource => "missing_filter_logsource",
234            LintRule::NullInValueList => "null_in_value_list",
235            LintRule::SingleValueAllModifier => "single_value_all_modifier",
236            LintRule::AllWithRe => "all_with_re",
237            LintRule::IncompatibleModifiers => "incompatible_modifiers",
238            LintRule::EmptyValueList => "empty_value_list",
239            LintRule::WildcardOnlyValue => "wildcard_only_value",
240            LintRule::FlattenedArrayCorrelation => "flattened_array_correlation",
241            LintRule::UnsupportedSigmaVersion => "unsupported_sigma_version",
242            LintRule::ArrayMatchingWithoutVersion => "array_matching_without_version",
243            LintRule::SigmaVersionMismatch => "sigma_version_mismatch",
244            LintRule::UnknownRuleReference => "unknown_rule_reference",
245            LintRule::UnknownKey => "unknown_key",
246            LintRule::AdsMissingGoal => "ads_missing_goal",
247            LintRule::AdsMissingCategorization => "ads_missing_categorization",
248            LintRule::AdsMissingStrategy => "ads_missing_strategy",
249            LintRule::AdsMissingTechnicalContext => "ads_missing_technical_context",
250            LintRule::AdsMissingBlindSpots => "ads_missing_blind_spots",
251            LintRule::AdsMissingFalsePositives => "ads_missing_false_positives",
252            LintRule::AdsMissingValidation => "ads_missing_validation",
253            LintRule::AdsMissingPriority => "ads_missing_priority",
254            LintRule::AdsMissingResponse => "ads_missing_response",
255            LintRule::AdsEmptySection => "ads_empty_section",
256            LintRule::AdsUnknownSection => "ads_unknown_section",
257        };
258        write!(f, "{s}")
259    }
260}
261
262/// A source span (line/column, both 0-indexed).
263#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
264pub struct Span {
265    pub start_line: u32,
266    pub start_col: u32,
267    pub end_line: u32,
268    pub end_col: u32,
269}
270
271// =============================================================================
272// Auto-fix types
273// =============================================================================
274
275/// Whether a fix is safe to apply automatically or needs manual review.
276#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
277pub enum FixDisposition {
278    Safe,
279    Unsafe,
280}
281
282/// A single patch operation within a [`Fix`].
283#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
284pub enum FixPatch {
285    ReplaceValue { path: String, new_value: String },
286    ReplaceKey { path: String, new_key: String },
287    Remove { path: String },
288}
289
290/// A suggested fix for a lint finding.
291#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
292pub struct Fix {
293    pub title: String,
294    pub disposition: FixDisposition,
295    pub patches: Vec<FixPatch>,
296}
297
298/// A single lint finding.
299#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
300pub struct LintWarning {
301    pub rule: LintRule,
302    pub severity: Severity,
303    pub message: String,
304    pub path: String,
305    pub span: Option<Span>,
306    pub fix: Option<Fix>,
307}
308
309impl fmt::Display for LintWarning {
310    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311        write!(
312            f,
313            "{}[{}]: {}\n    --> {}",
314            self.severity, self.rule, self.message, self.path
315        )
316    }
317}
318
319/// Result of linting a single file (may contain multiple YAML documents).
320#[derive(Debug, Clone, Serialize)]
321pub struct FileLintResult {
322    pub path: std::path::PathBuf,
323    pub warnings: Vec<LintWarning>,
324}
325
326impl FileLintResult {
327    pub fn has_errors(&self) -> bool {
328        self.warnings.iter().any(|w| w.severity == Severity::Error)
329    }
330
331    pub fn error_count(&self) -> usize {
332        self.warnings
333            .iter()
334            .filter(|w| w.severity == Severity::Error)
335            .count()
336    }
337
338    pub fn warning_count(&self) -> usize {
339        self.warnings
340            .iter()
341            .filter(|w| w.severity == Severity::Warning)
342            .count()
343    }
344
345    pub fn info_count(&self) -> usize {
346        self.warnings
347            .iter()
348            .filter(|w| w.severity == Severity::Info)
349            .count()
350    }
351
352    pub fn hint_count(&self) -> usize {
353        self.warnings
354            .iter()
355            .filter(|w| w.severity == Severity::Hint)
356            .count()
357    }
358}
359
360// =============================================================================
361// Helpers (shared with rule submodules)
362// =============================================================================
363
364static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
365    [
366        "action",
367        "author",
368        "category",
369        "condition",
370        "correlation",
371        "custom_attributes",
372        "date",
373        "description",
374        "detection",
375        "falsepositives",
376        "field",
377        "fields",
378        "filter",
379        "gap",
380        "generate",
381        "group-by",
382        "id",
383        "level",
384        "logsource",
385        "modified",
386        "name",
387        "product",
388        "references",
389        "related",
390        "rsigma.gap",
391        "rsigma.window",
392        "rules",
393        "scope",
394        "selection",
395        "service",
396        "sigma-version",
397        "status",
398        "tags",
399        "taxonomy",
400        "timeframe",
401        "timespan",
402        "title",
403        "type",
404        "window",
405    ]
406    .into_iter()
407    .map(|n| (n, Value::String(n.into())))
408    .collect()
409});
410
411pub(crate) fn key(s: &str) -> &'static Value {
412    KEY_CACHE
413        .get(s)
414        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
415}
416
417pub(crate) fn get_str<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a str> {
418    m.get(key(k)).and_then(|v| v.as_str())
419}
420
421pub(crate) fn get_mapping<'a>(
422    m: &'a yaml_serde::Mapping,
423    k: &str,
424) -> Option<&'a yaml_serde::Mapping> {
425    m.get(key(k)).and_then(|v| v.as_mapping())
426}
427
428pub(crate) fn get_seq<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a yaml_serde::Sequence> {
429    m.get(key(k)).and_then(|v| v.as_sequence())
430}
431
432pub(crate) fn warn(
433    rule: LintRule,
434    severity: Severity,
435    message: impl Into<String>,
436    path: impl Into<String>,
437) -> LintWarning {
438    LintWarning {
439        rule,
440        severity,
441        message: message.into(),
442        path: path.into(),
443        span: None,
444        fix: None,
445    }
446}
447
448pub(crate) fn err(
449    rule: LintRule,
450    message: impl Into<String>,
451    path: impl Into<String>,
452) -> LintWarning {
453    warn(rule, Severity::Error, message, path)
454}
455
456pub(crate) fn warning(
457    rule: LintRule,
458    message: impl Into<String>,
459    path: impl Into<String>,
460) -> LintWarning {
461    warn(rule, Severity::Warning, message, path)
462}
463
464pub(crate) fn info(
465    rule: LintRule,
466    message: impl Into<String>,
467    path: impl Into<String>,
468) -> LintWarning {
469    warn(rule, Severity::Info, message, path)
470}
471
472pub(crate) fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
473    Some(Fix {
474        title: title.into(),
475        disposition: FixDisposition::Safe,
476        patches,
477    })
478}
479
480/// Find the closest match for `input` among `candidates` using edit distance.
481pub(crate) fn closest_match<'a>(
482    input: &str,
483    candidates: &[&'a str],
484    max_distance: usize,
485) -> Option<&'a str> {
486    candidates
487        .iter()
488        .filter(|c| edit_distance(input, c) <= max_distance)
489        .min_by_key(|c| edit_distance(input, c))
490        .copied()
491}
492
493/// Levenshtein edit distance between two strings.
494pub(crate) fn edit_distance(a: &str, b: &str) -> usize {
495    let (a_len, b_len) = (a.len(), b.len());
496    if a_len == 0 {
497        return b_len;
498    }
499    if b_len == 0 {
500        return a_len;
501    }
502    let mut prev: Vec<usize> = (0..=b_len).collect();
503    let mut curr = vec![0; b_len + 1];
504    for (i, ca) in a.bytes().enumerate() {
505        curr[0] = i + 1;
506        for (j, cb) in b.bytes().enumerate() {
507            let cost = if ca == cb { 0 } else { 1 };
508            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
509        }
510        std::mem::swap(&mut prev, &mut curr);
511    }
512    prev[b_len]
513}
514
515pub(crate) const TYPO_MAX_EDIT_DISTANCE: usize = 2;
516
517// =============================================================================
518// Document type detection
519// =============================================================================
520
521#[derive(Debug, Clone, Copy, PartialEq, Eq)]
522pub(crate) enum DocType {
523    Detection,
524    Correlation,
525    Filter,
526}
527
528impl DocType {
529    pub(crate) fn known_keys(&self) -> &'static [&'static str] {
530        match self {
531            DocType::Detection => rules::shared::KNOWN_KEYS_DETECTION,
532            DocType::Correlation => rules::shared::KNOWN_KEYS_CORRELATION,
533            DocType::Filter => rules::shared::KNOWN_KEYS_FILTER,
534        }
535    }
536}
537
538fn detect_doc_type(m: &yaml_serde::Mapping) -> DocType {
539    if m.contains_key(key("correlation")) {
540        DocType::Correlation
541    } else if m.contains_key(key("filter")) {
542        DocType::Filter
543    } else {
544        DocType::Detection
545    }
546}
547
548fn is_action_fragment(m: &yaml_serde::Mapping) -> bool {
549    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
550}
551
552// =============================================================================
553// Cross-document reference resolution
554// =============================================================================
555
556/// An index of referenceable rules (detection rules and correlation rules) by
557/// their identifiers (`id` and `name`), each mapped to its resolved
558/// specification major. Built file-local for single-text linting and
559/// directory-global for directory linting.
560struct RuleIndex {
561    majors: HashMap<String, u32>,
562    /// Whether the index covers the whole set being linted. Only then is an
563    /// unresolved reference genuinely missing rather than living in a file
564    /// outside the linted scope.
565    complete: bool,
566}
567
568impl RuleIndex {
569    fn new(complete: bool) -> Self {
570        Self {
571            majors: HashMap::new(),
572            complete,
573        }
574    }
575
576    /// Index every referenceable document in one multi-document YAML text.
577    fn add_text(&mut self, text: &str) {
578        for doc in yaml_serde::Deserializer::from_str(text) {
579            let Ok(value) = Value::deserialize(doc) else {
580                break;
581            };
582            self.add_value(&value);
583        }
584    }
585
586    fn add_value(&mut self, value: &Value) {
587        let Some(m) = value.as_mapping() else {
588            return;
589        };
590        if is_action_fragment(m) {
591            return;
592        }
593        // Only detection rules and correlation rules can be referenced.
594        if matches!(
595            detect_doc_type(m),
596            DocType::Detection | DocType::Correlation
597        ) {
598            let major = crate::version::resolve_major(
599                m.get(key("sigma-version"))
600                    .and_then(crate::version::major_from_value),
601            );
602            for id_key in ["id", "name"] {
603                if let Some(v) = get_str(m, id_key) {
604                    self.majors.insert(v.to_string(), major);
605                }
606            }
607        }
608    }
609}
610
611/// Extract a `rules:` reference list (a single string or a sequence of strings).
612fn reference_list(v: Option<&Value>) -> Vec<String> {
613    match v {
614        Some(Value::String(s)) => vec![s.clone()],
615        Some(Value::Sequence(seq)) => seq
616            .iter()
617            .filter_map(|x| x.as_str().map(str::to_string))
618            .collect(),
619        _ => Vec::new(),
620    }
621}
622
623/// References declared by a correlation rule (`correlation.rules`).
624fn correlation_rule_refs(m: &yaml_serde::Mapping) -> Vec<String> {
625    m.get(key("correlation"))
626        .and_then(|c| c.as_mapping())
627        .map(|c| reference_list(c.get(key("rules"))))
628        .unwrap_or_default()
629}
630
631/// References declared by a filter rule (`filter.rules`). Returns `None` when the
632/// filter targets every rule (`rules: any`), which is not resolvable.
633fn filter_rule_refs(m: &yaml_serde::Mapping) -> Option<Vec<String>> {
634    let f = m.get(key("filter"))?.as_mapping()?;
635    let rules = f.get(key("rules"))?;
636    if let Some(s) = rules.as_str()
637        && s.eq_ignore_ascii_case("any")
638    {
639        return None;
640    }
641    Some(reference_list(Some(rules)))
642}
643
644/// Cross-document lints over the documents in one YAML text, resolving each
645/// correlation/filter reference against `index`:
646///
647/// - `sigma_version_mismatch` (warning): a referencing document and a resolved
648///   referenced rule declare different specification majors.
649/// - `unknown_rule_reference` (warning): a reference resolves to no rule and the
650///   index is complete (so it is genuinely missing, not out of the linted scope).
651fn lint_cross_references(docs: &[Value], index: &RuleIndex, warnings: &mut Vec<LintWarning>) {
652    for value in docs {
653        let Some(m) = value.as_mapping() else {
654            continue;
655        };
656        if is_action_fragment(m) {
657            continue;
658        }
659        let (refs, path) = match detect_doc_type(m) {
660            DocType::Correlation => (correlation_rule_refs(m), "/correlation/rules"),
661            DocType::Filter => match filter_rule_refs(m) {
662                Some(refs) => (refs, "/filter/rules"),
663                None => continue,
664            },
665            DocType::Detection => continue,
666        };
667        if refs.is_empty() {
668            continue;
669        }
670        let self_major = crate::version::resolve_major(
671            m.get(key("sigma-version"))
672                .and_then(crate::version::major_from_value),
673        );
674        let label = get_str(m, "title")
675            .or_else(|| get_str(m, "name"))
676            .unwrap_or("<rule>");
677        for r in refs {
678            match index.majors.get(&r).copied() {
679                Some(target) if target != self_major => warnings.push(warning(
680                    LintRule::SigmaVersionMismatch,
681                    format!(
682                        "'{label}' targets sigma-version major {self_major} but references rule \
683                         '{r}' which targets major {target}; cross-referencing rules must share a \
684                         specification major"
685                    ),
686                    path,
687                )),
688                Some(_) => {}
689                None if index.complete => warnings.push(warning(
690                    LintRule::UnknownRuleReference,
691                    format!(
692                        "'{label}' references rule '{r}', which was not found among the linted \
693                         rules (matched by id or name)"
694                    ),
695                    path,
696                )),
697                None => {}
698            }
699        }
700    }
701}
702
703// =============================================================================
704// Public API
705// =============================================================================
706
707fn lint_yaml_value_ext(
708    value: &Value,
709    extra_ns: &[String],
710    ads: Option<&AdsConfig>,
711) -> Vec<LintWarning> {
712    let Some(m) = value.as_mapping() else {
713        return vec![err(
714            LintRule::NotAMapping,
715            "document is not a YAML mapping",
716            "/",
717        )];
718    };
719
720    if is_action_fragment(m) {
721        return Vec::new();
722    }
723
724    let mut warnings = Vec::new();
725
726    rules::metadata::lint_shared(m, &mut warnings);
727
728    let doc_type = detect_doc_type(m);
729    match doc_type {
730        DocType::Detection => rules::detection::lint_detection_rule(m, &mut warnings, extra_ns),
731        DocType::Correlation => rules::correlation::lint_correlation_rule(m, &mut warnings),
732        DocType::Filter => rules::filter::lint_filter_rule(m, &mut warnings),
733    }
734
735    rules::version::lint_sigma_version(m, doc_type, &mut warnings);
736    rules::shared::lint_unknown_keys(m, doc_type, &mut warnings);
737
738    // ADS enforcement applies to detection rules only and only when an `ads:`
739    // block is configured.
740    if let Some(ads_cfg) = ads
741        && doc_type == DocType::Detection
742    {
743        rules::ads::lint_ads(m, ads_cfg, extra_ns, &mut warnings);
744    }
745
746    warnings
747}
748
749/// Lint a single YAML document value.
750pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
751    lint_yaml_value_ext(value, &[], None)
752}
753
754fn lint_yaml_str_ext(text: &str, extra_ns: &[String], ads: Option<&AdsConfig>) -> Vec<LintWarning> {
755    lint_yaml_str_indexed(text, extra_ns, ads, None)
756}
757
758/// Lint one YAML text. When `external_index` is `Some` (directory linting) it is
759/// the directory-global rule index used for cross-reference checks; when `None`,
760/// a file-local index is built from this text, so cross-file references are out
761/// of scope and `unknown_rule_reference` does not fire.
762fn lint_yaml_str_indexed(
763    text: &str,
764    extra_ns: &[String],
765    ads: Option<&AdsConfig>,
766    external_index: Option<&RuleIndex>,
767) -> Vec<LintWarning> {
768    let mut all_warnings = Vec::new();
769    let mut docs: Vec<Value> = Vec::new();
770
771    for doc in yaml_serde::Deserializer::from_str(text) {
772        let value: Value = match Value::deserialize(doc) {
773            Ok(v) => v,
774            Err(e) => {
775                let mut w = err(
776                    LintRule::YamlParseError,
777                    format!("YAML parse error: {e}"),
778                    "/",
779                );
780                if let Some(loc) = e.location() {
781                    w.span = Some(Span {
782                        start_line: loc.line().saturating_sub(1) as u32,
783                        start_col: loc.column() as u32,
784                        end_line: loc.line().saturating_sub(1) as u32,
785                        end_col: loc.column() as u32 + 1,
786                    });
787                }
788                all_warnings.push(w);
789                break;
790            }
791        };
792
793        for mut w in lint_yaml_value_ext(&value, extra_ns, ads) {
794            w.span = resolve_path_to_span(text, &w.path);
795            all_warnings.push(w);
796        }
797        docs.push(value);
798    }
799
800    // Cross-document checks resolve references against the directory-global index
801    // when given, otherwise a file-local index built from this text's documents.
802    let local_index;
803    let index = match external_index {
804        Some(idx) => idx,
805        None => {
806            let mut idx = RuleIndex::new(false);
807            for v in &docs {
808                idx.add_value(v);
809            }
810            local_index = idx;
811            &local_index
812        }
813    };
814    let mut xref = Vec::new();
815    lint_cross_references(&docs, index, &mut xref);
816    for mut w in xref {
817        w.span = resolve_path_to_span(text, &w.path);
818        all_warnings.push(w);
819    }
820
821    all_warnings
822}
823
824/// Lint a raw YAML string, returning warnings with resolved source spans.
825pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
826    lint_yaml_str_ext(text, &[], None)
827}
828
829fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
830    if path == "/" || path.is_empty() {
831        for (i, line) in text.lines().enumerate() {
832            let trimmed = line.trim();
833            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
834                return Some(Span {
835                    start_line: i as u32,
836                    start_col: 0,
837                    end_line: i as u32,
838                    end_col: line.len() as u32,
839                });
840            }
841        }
842        return None;
843    }
844
845    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
846
847    if segments.is_empty() {
848        return None;
849    }
850
851    let lines: Vec<&str> = text.lines().collect();
852    let mut current_indent: i32 = -1;
853    let mut search_start = 0usize;
854    let mut last_matched_line: Option<usize> = None;
855
856    for segment in &segments {
857        let array_index: Option<usize> = segment.parse().ok();
858        let mut found = false;
859
860        let mut line_num = search_start;
861        while line_num < lines.len() {
862            let line = lines[line_num];
863            let trimmed = line.trim();
864            if trimmed.is_empty() || trimmed.starts_with('#') {
865                line_num += 1;
866                continue;
867            }
868
869            let indent = (line.len() - trimmed.len()) as i32;
870
871            if indent <= current_indent && found {
872                break;
873            }
874            if indent <= current_indent {
875                line_num += 1;
876                continue;
877            }
878
879            if let Some(idx) = array_index {
880                if trimmed.starts_with("- ") && indent > current_indent {
881                    let mut count = 0usize;
882                    for (offset, sl) in lines[search_start..].iter().enumerate() {
883                        let scan = search_start + offset;
884                        let st = sl.trim();
885                        if st.is_empty() || st.starts_with('#') {
886                            continue;
887                        }
888                        let si = (sl.len() - st.len()) as i32;
889                        if si == indent && st.starts_with("- ") {
890                            if count == idx {
891                                last_matched_line = Some(scan);
892                                search_start = scan + 1;
893                                current_indent = indent;
894                                found = true;
895                                break;
896                            }
897                            count += 1;
898                        }
899                        if si < indent && count > 0 {
900                            break;
901                        }
902                    }
903                    break;
904                }
905            } else {
906                let key_pattern = format!("{segment}:");
907                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
908                    last_matched_line = Some(line_num);
909                    search_start = line_num + 1;
910                    current_indent = indent;
911                    found = true;
912                    break;
913                }
914            }
915
916            line_num += 1;
917        }
918
919        if !found && last_matched_line.is_none() {
920            break;
921        }
922    }
923
924    last_matched_line.map(|line_num| {
925        let line = lines[line_num];
926        Span {
927            start_line: line_num as u32,
928            start_col: 0,
929            end_line: line_num as u32,
930            end_col: line.len() as u32,
931        }
932    })
933}
934
935/// Lint all YAML documents in a file.
936pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
937    let content = std::fs::read_to_string(path)?;
938    let warnings = lint_yaml_str(&content);
939    Ok(FileLintResult {
940        path: path.to_path_buf(),
941        warnings,
942    })
943}
944
945/// Recursively collect `.yml`/`.yaml` file paths under `dir`, in sorted
946/// depth-first order, skipping hidden directories and any path matching the
947/// exclude set (relative to `base`). Symlink loops are guarded by `visited`.
948fn collect_yaml_files(
949    dir: &Path,
950    base: &Path,
951    exclude_set: Option<&globset::GlobSet>,
952    files: &mut Vec<std::path::PathBuf>,
953    visited: &mut HashSet<std::path::PathBuf>,
954) -> crate::error::Result<()> {
955    let canonical = match dir.canonicalize() {
956        Ok(p) => p,
957        Err(_) => return Ok(()),
958    };
959    if !visited.insert(canonical) {
960        return Ok(());
961    }
962
963    let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
964    entries.sort_by_key(|e| e.path());
965
966    for entry in entries {
967        let path = entry.path();
968
969        if let Some(gs) = exclude_set
970            && let Ok(rel) = path.strip_prefix(base)
971            && gs.is_match(rel)
972        {
973            continue;
974        }
975
976        if path.is_dir() {
977            if path
978                .file_name()
979                .and_then(|n| n.to_str())
980                .is_some_and(|n| n.starts_with('.'))
981            {
982                continue;
983            }
984            collect_yaml_files(&path, base, exclude_set, files, visited)?;
985        } else if matches!(
986            path.extension().and_then(|e| e.to_str()),
987            Some("yml" | "yaml")
988        ) {
989            files.push(path);
990        }
991    }
992    Ok(())
993}
994
995/// Two-pass directory lint: collect and read every file once to build a
996/// directory-global rule index, then lint each file against it so
997/// cross-reference checks see rules defined in sibling files.
998fn lint_directory_impl(
999    dir: &Path,
1000    config: Option<&LintConfig>,
1001) -> crate::error::Result<Vec<FileLintResult>> {
1002    let exclude_set = config.and_then(LintConfig::build_exclude_set);
1003    let mut files = Vec::new();
1004    let mut visited = HashSet::new();
1005    collect_yaml_files(dir, dir, exclude_set.as_ref(), &mut files, &mut visited)?;
1006
1007    // Read each file once and index every referenceable rule across the tree.
1008    let mut index = RuleIndex::new(true);
1009    let mut contents: Vec<(std::path::PathBuf, std::result::Result<String, String>)> =
1010        Vec::with_capacity(files.len());
1011    for path in files {
1012        match std::fs::read_to_string(&path) {
1013            Ok(text) => {
1014                index.add_text(&text);
1015                contents.push((path, Ok(text)));
1016            }
1017            Err(e) => contents.push((path, Err(format!("error reading file: {e}")))),
1018        }
1019    }
1020
1021    let mut results = Vec::with_capacity(contents.len());
1022    for (path, content) in contents {
1023        match content {
1024            Ok(text) => {
1025                let warnings = match config {
1026                    Some(cfg) => {
1027                        let w = lint_yaml_str_indexed(
1028                            &text,
1029                            &cfg.tag_namespaces,
1030                            cfg.ads.as_ref(),
1031                            Some(&index),
1032                        );
1033                        apply_suppressions(w, cfg, &parse_inline_suppressions(&text))
1034                    }
1035                    None => lint_yaml_str_indexed(&text, &[], None, Some(&index)),
1036                };
1037                results.push(FileLintResult { path, warnings });
1038            }
1039            Err(msg) => results.push(FileLintResult {
1040                path,
1041                warnings: vec![err(LintRule::FileReadError, msg, "/")],
1042            }),
1043        }
1044    }
1045    Ok(results)
1046}
1047
1048/// Lint all `.yml`/`.yaml` files in a directory recursively.
1049pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
1050    lint_directory_impl(dir, None)
1051}
1052
1053// =============================================================================
1054// Lint configuration & suppression
1055// =============================================================================
1056
1057/// Configuration for lint rule suppression and severity overrides.
1058#[derive(Debug, Clone, Default, Serialize)]
1059pub struct LintConfig {
1060    pub disabled_rules: HashSet<String>,
1061    pub severity_overrides: HashMap<String, Severity>,
1062    pub exclude_patterns: Vec<String>,
1063    /// Extra tag namespaces recognised in addition to the built-in set.
1064    pub tag_namespaces: Vec<String>,
1065    /// ADS enforcement configuration. `None` (the default) leaves the ADS
1066    /// presence checks off; an `ads:` block in the config enables them.
1067    #[serde(skip_serializing_if = "Option::is_none")]
1068    pub ads: Option<AdsConfig>,
1069}
1070
1071/// ADS (Alerting and Detection Strategy) enforcement configuration.
1072///
1073/// Present (`Some`) only when an `ads:` block appears in the layered lint
1074/// config; the ADS presence checks are off otherwise. When enabled, the checks
1075/// fire on detection rules whose `status` is in [`enforce_status`](Self::enforce_status)
1076/// and flag each missing [`required`](Self::required) section.
1077#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1078pub struct AdsConfig {
1079    /// Rule statuses that require ADS sections (lowercased).
1080    pub enforce_status: Vec<String>,
1081    /// The ADS section ids that are mandatory.
1082    pub required: Vec<String>,
1083    /// A single severity applied to every ADS finding, overriding the
1084    /// per-section default. `None` keeps the catalogue defaults.
1085    #[serde(skip_serializing_if = "Option::is_none")]
1086    pub severity: Option<Severity>,
1087}
1088
1089impl Default for AdsConfig {
1090    fn default() -> Self {
1091        AdsConfig {
1092            enforce_status: vec!["stable".to_string()],
1093            required: AdsSection::all()
1094                .iter()
1095                .map(|s| s.id().to_string())
1096                .collect(),
1097            severity: None,
1098        }
1099    }
1100}
1101
1102impl AdsConfig {
1103    /// Whether a rule with the given `status` string is in scope for ADS
1104    /// enforcement.
1105    pub fn enforces_status(&self, status: Option<&str>) -> bool {
1106        match status {
1107            Some(s) => self.enforce_status.iter().any(|e| e == s),
1108            None => false,
1109        }
1110    }
1111
1112    /// Whether the section id is required.
1113    pub fn requires(&self, section_id: &str) -> bool {
1114        self.required.iter().any(|r| r == section_id)
1115    }
1116}
1117
1118#[derive(Debug, Deserialize)]
1119struct RawLintConfig {
1120    #[serde(default)]
1121    disabled_rules: Vec<String>,
1122    #[serde(default)]
1123    severity_overrides: HashMap<String, String>,
1124    #[serde(default)]
1125    exclude: Vec<String>,
1126    #[serde(default)]
1127    tag_namespaces: Vec<String>,
1128    #[serde(default)]
1129    ads: Option<RawAdsConfig>,
1130}
1131
1132#[derive(Debug, Deserialize)]
1133struct RawAdsConfig {
1134    #[serde(default)]
1135    enforce_status: Option<Vec<String>>,
1136    #[serde(default)]
1137    required: Option<Vec<String>>,
1138    #[serde(default)]
1139    severity: Option<String>,
1140}
1141
1142/// Parse a lint severity wire string.
1143fn parse_severity(s: &str) -> Option<Severity> {
1144    match s {
1145        "error" => Some(Severity::Error),
1146        "warning" => Some(Severity::Warning),
1147        "info" => Some(Severity::Info),
1148        "hint" => Some(Severity::Hint),
1149        _ => None,
1150    }
1151}
1152
1153/// Build a validated [`AdsConfig`] from its raw, deserialized form, layering
1154/// any provided fields over the defaults.
1155fn ads_config_from_raw(raw: RawAdsConfig) -> crate::error::Result<AdsConfig> {
1156    let mut config = AdsConfig::default();
1157
1158    if let Some(statuses) = raw.enforce_status {
1159        const VALID_STATUSES: &[&str] = &[
1160            "stable",
1161            "test",
1162            "experimental",
1163            "deprecated",
1164            "unsupported",
1165        ];
1166        let mut normalised = Vec::with_capacity(statuses.len());
1167        for s in statuses {
1168            let lower = s.to_lowercase();
1169            if !VALID_STATUSES.contains(&lower.as_str()) {
1170                return Err(crate::error::SigmaParserError::InvalidRule(format!(
1171                    "invalid ads.enforce_status '{s}'; expected one of: {}",
1172                    VALID_STATUSES.join(", ")
1173                )));
1174            }
1175            normalised.push(lower);
1176        }
1177        dedup_preserving_order(&mut normalised);
1178        config.enforce_status = normalised;
1179    }
1180
1181    if let Some(required) = raw.required {
1182        let mut ids = Vec::with_capacity(required.len());
1183        for id in required {
1184            let lower = id.to_lowercase();
1185            if AdsSection::from_id(&lower).is_none() {
1186                return Err(crate::error::SigmaParserError::InvalidRule(format!(
1187                    "invalid ads.required section '{id}'; expected one of: {}",
1188                    AdsSection::all()
1189                        .iter()
1190                        .map(|s| s.id())
1191                        .collect::<Vec<_>>()
1192                        .join(", ")
1193                )));
1194            }
1195            ids.push(lower);
1196        }
1197        dedup_preserving_order(&mut ids);
1198        config.required = ids;
1199    }
1200
1201    if let Some(sev) = raw.severity {
1202        config.severity = Some(parse_severity(&sev).ok_or_else(|| {
1203            crate::error::SigmaParserError::InvalidRule(format!(
1204                "invalid ads.severity '{sev}'; expected error, warning, info, or hint"
1205            ))
1206        })?);
1207    }
1208
1209    Ok(config)
1210}
1211
1212/// Remove duplicate entries from a list while keeping the first occurrence of
1213/// each, so merged `exclude_patterns` / `tag_namespaces` stay stable and don't
1214/// repeat a value that appears in both the config file and a CLI flag.
1215fn dedup_preserving_order(items: &mut Vec<String>) {
1216    let mut seen = HashSet::new();
1217    items.retain(|item| seen.insert(item.clone()));
1218}
1219
1220impl LintConfig {
1221    pub fn load(path: &Path) -> crate::error::Result<Self> {
1222        let content = std::fs::read_to_string(path)?;
1223        let raw: RawLintConfig = yaml_serde::from_str(&content)?;
1224
1225        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
1226        let mut severity_overrides = HashMap::new();
1227        for (rule, sev_str) in &raw.severity_overrides {
1228            let sev = parse_severity(sev_str).ok_or_else(|| {
1229                crate::error::SigmaParserError::InvalidRule(format!(
1230                    "invalid severity '{sev_str}' for rule '{rule}' in lint config"
1231                ))
1232            })?;
1233            severity_overrides.insert(rule.clone(), sev);
1234        }
1235
1236        let mut exclude_patterns = raw.exclude;
1237        dedup_preserving_order(&mut exclude_patterns);
1238
1239        let mut tag_namespaces: Vec<String> = raw
1240            .tag_namespaces
1241            .into_iter()
1242            .map(|s| s.to_lowercase())
1243            .collect();
1244        dedup_preserving_order(&mut tag_namespaces);
1245
1246        let ads = raw.ads.map(ads_config_from_raw).transpose()?;
1247
1248        Ok(LintConfig {
1249            disabled_rules,
1250            severity_overrides,
1251            exclude_patterns,
1252            tag_namespaces,
1253            ads,
1254        })
1255    }
1256
1257    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
1258        let dir = if start_path.is_file() {
1259            start_path.parent()?
1260        } else {
1261            start_path
1262        };
1263
1264        let mut current = dir;
1265        loop {
1266            let candidate = current.join(".rsigma-lint.yml");
1267            if candidate.is_file() {
1268                return Some(candidate);
1269            }
1270            let candidate_yaml = current.join(".rsigma-lint.yaml");
1271            if candidate_yaml.is_file() {
1272                return Some(candidate_yaml);
1273            }
1274            current = current.parent()?;
1275        }
1276    }
1277
1278    pub fn merge(&mut self, other: &LintConfig) {
1279        self.disabled_rules
1280            .extend(other.disabled_rules.iter().cloned());
1281        for (rule, sev) in &other.severity_overrides {
1282            self.severity_overrides.insert(rule.clone(), *sev);
1283        }
1284        self.exclude_patterns
1285            .extend(other.exclude_patterns.iter().cloned());
1286        dedup_preserving_order(&mut self.exclude_patterns);
1287        self.tag_namespaces
1288            .extend(other.tag_namespaces.iter().cloned());
1289        dedup_preserving_order(&mut self.tag_namespaces);
1290        // A nearer-layer `ads:` block replaces the inherited one wholesale, so
1291        // a project can set its own ADS bar without merging stale section lists.
1292        if other.ads.is_some() {
1293            self.ads = other.ads.clone();
1294        }
1295    }
1296
1297    pub fn is_disabled(&self, rule: &LintRule) -> bool {
1298        self.disabled_rules.contains(&rule.to_string())
1299    }
1300
1301    pub fn build_exclude_set(&self) -> Option<globset::GlobSet> {
1302        if self.exclude_patterns.is_empty() {
1303            return None;
1304        }
1305        let mut builder = globset::GlobSetBuilder::new();
1306        for pat in &self.exclude_patterns {
1307            if let Ok(glob) = globset::GlobBuilder::new(pat)
1308                .literal_separator(false)
1309                .build()
1310            {
1311                builder.add(glob);
1312            }
1313        }
1314        builder.build().ok()
1315    }
1316}
1317
1318// =============================================================================
1319// Inline suppression comments
1320// =============================================================================
1321
1322#[derive(Debug, Clone, Default)]
1323pub struct InlineSuppressions {
1324    pub disable_all: bool,
1325    pub file_disabled: HashSet<String>,
1326    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
1327}
1328
1329pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
1330    let mut result = InlineSuppressions::default();
1331
1332    for (i, line) in text.lines().enumerate() {
1333        let trimmed = line.trim();
1334
1335        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
1336            trimmed[pos + 1..].trim()
1337        } else {
1338            continue;
1339        };
1340
1341        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
1342            let rest = rest.trim();
1343            let next_line = (i + 1) as u32;
1344            if rest.is_empty() {
1345                result.line_disabled.insert(next_line, None);
1346            } else {
1347                let rules: HashSet<String> = rest
1348                    .split(',')
1349                    .map(|s| s.trim().to_string())
1350                    .filter(|s| !s.is_empty())
1351                    .collect();
1352                if !rules.is_empty() {
1353                    result
1354                        .line_disabled
1355                        .entry(next_line)
1356                        .and_modify(|existing| {
1357                            if let Some(existing_set) = existing {
1358                                existing_set.extend(rules.iter().cloned());
1359                            }
1360                        })
1361                        .or_insert(Some(rules));
1362                }
1363            }
1364        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
1365            let rest = rest.trim();
1366            if rest.is_empty() {
1367                result.disable_all = true;
1368            } else {
1369                for rule in rest.split(',') {
1370                    let rule = rule.trim();
1371                    if !rule.is_empty() {
1372                        result.file_disabled.insert(rule.to_string());
1373                    }
1374                }
1375            }
1376        }
1377    }
1378
1379    result
1380}
1381
1382fn find_yaml_comment(line: &str) -> Option<usize> {
1383    let mut in_single = false;
1384    let mut in_double = false;
1385    for (i, c) in line.char_indices() {
1386        match c {
1387            '\'' if !in_double => in_single = !in_single,
1388            '"' if !in_single => in_double = !in_double,
1389            '#' if !in_single && !in_double => return Some(i),
1390            _ => {}
1391        }
1392    }
1393    None
1394}
1395
1396impl InlineSuppressions {
1397    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
1398        if self.disable_all {
1399            return true;
1400        }
1401
1402        let rule_name = warning.rule.to_string();
1403        if self.file_disabled.contains(&rule_name) {
1404            return true;
1405        }
1406
1407        if let Some(span) = &warning.span
1408            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
1409        {
1410            return match line_rules {
1411                None => true,
1412                Some(rules) => rules.contains(&rule_name),
1413            };
1414        }
1415
1416        false
1417    }
1418}
1419
1420// =============================================================================
1421// Suppression filtering
1422// =============================================================================
1423
1424pub fn apply_suppressions(
1425    warnings: Vec<LintWarning>,
1426    config: &LintConfig,
1427    inline: &InlineSuppressions,
1428) -> Vec<LintWarning> {
1429    warnings
1430        .into_iter()
1431        .filter(|w| !config.is_disabled(&w.rule))
1432        .filter(|w| !inline.is_suppressed(w))
1433        .map(|mut w| {
1434            let rule_name = w.rule.to_string();
1435            if let Some(sev) = config.severity_overrides.get(&rule_name) {
1436                w.severity = *sev;
1437            }
1438            w
1439        })
1440        .collect()
1441}
1442
1443pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
1444    let warnings = lint_yaml_str_ext(text, &config.tag_namespaces, config.ads.as_ref());
1445    let inline = parse_inline_suppressions(text);
1446    apply_suppressions(warnings, config, &inline)
1447}
1448
1449pub fn lint_yaml_file_with_config(
1450    path: &Path,
1451    config: &LintConfig,
1452) -> crate::error::Result<FileLintResult> {
1453    let content = std::fs::read_to_string(path)?;
1454    let warnings = lint_yaml_str_with_config(&content, config);
1455    Ok(FileLintResult {
1456        path: path.to_path_buf(),
1457        warnings,
1458    })
1459}
1460
1461pub fn lint_yaml_directory_with_config(
1462    dir: &Path,
1463    config: &LintConfig,
1464) -> crate::error::Result<Vec<FileLintResult>> {
1465    lint_directory_impl(dir, Some(config))
1466}
1467
1468// =============================================================================
1469// Tests
1470// =============================================================================
1471
1472#[cfg(test)]
1473mod tests {
1474    use super::*;
1475
1476    fn yaml_value(yaml: &str) -> Value {
1477        yaml_serde::from_str(yaml).unwrap()
1478    }
1479
1480    fn lint(yaml: &str) -> Vec<LintWarning> {
1481        lint_yaml_value(&yaml_value(yaml))
1482    }
1483
1484    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1485        warnings.iter().any(|w| w.rule == rule)
1486    }
1487
1488    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1489        !has_rule(warnings, rule)
1490    }
1491
1492    #[test]
1493    fn valid_detection_rule_no_errors() {
1494        let w = lint(
1495            r#"
1496title: Test Rule
1497id: 929a690e-bef0-4204-a928-ef5e620d6fcc
1498status: test
1499logsource:
1500    category: process_creation
1501    product: windows
1502detection:
1503    selection:
1504        CommandLine|contains: 'whoami'
1505    condition: selection
1506level: medium
1507tags:
1508    - attack.execution
1509    - attack.t1059
1510"#,
1511        );
1512        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
1513        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1514    }
1515
1516    #[test]
1517    fn not_a_mapping() {
1518        let v: yaml_serde::Value = yaml_serde::from_str("- item1\n- item2").unwrap();
1519        let w = lint_yaml_value(&v);
1520        assert!(has_rule(&w, LintRule::NotAMapping));
1521    }
1522
1523    #[test]
1524    fn lint_yaml_str_produces_spans() {
1525        let text = r#"title: Test
1526status: invalid_status
1527logsource:
1528    category: test
1529detection:
1530    selection:
1531        field: value
1532    condition: selection
1533level: medium
1534"#;
1535        let warnings = lint_yaml_str(text);
1536        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
1537        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
1538        let span = invalid_status.unwrap().span;
1539        assert!(span.is_some(), "expected span to be resolved");
1540        assert_eq!(span.unwrap().start_line, 1);
1541    }
1542
1543    #[test]
1544    fn yaml_parse_error_uses_correct_rule() {
1545        let text = "title: [unclosed";
1546        let warnings = lint_yaml_str(text);
1547        assert!(has_rule(&warnings, LintRule::YamlParseError));
1548        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
1549    }
1550
1551    #[test]
1552    fn action_global_skipped() {
1553        let w = lint(
1554            r#"
1555action: global
1556title: Global Template
1557logsource:
1558    product: windows
1559"#,
1560        );
1561        assert!(w.is_empty());
1562    }
1563
1564    #[test]
1565    fn action_reset_skipped() {
1566        let w = lint(
1567            r#"
1568action: reset
1569"#,
1570        );
1571        assert!(w.is_empty());
1572    }
1573
1574    #[test]
1575    fn resolve_path_to_span_root() {
1576        let text = "title: Test\nstatus: test\n";
1577        let span = resolve_path_to_span(text, "/");
1578        assert!(span.is_some());
1579        assert_eq!(span.unwrap().start_line, 0);
1580    }
1581
1582    #[test]
1583    fn resolve_path_to_span_top_level_key() {
1584        let text = "title: Test\nstatus: test\nlevel: high\n";
1585        let span = resolve_path_to_span(text, "/status");
1586        assert!(span.is_some());
1587        assert_eq!(span.unwrap().start_line, 1);
1588    }
1589
1590    #[test]
1591    fn resolve_path_to_span_nested_key() {
1592        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
1593        let span = resolve_path_to_span(text, "/logsource/product");
1594        assert!(span.is_some());
1595        assert_eq!(span.unwrap().start_line, 3);
1596    }
1597
1598    #[test]
1599    fn resolve_path_to_span_missing_key() {
1600        let text = "title: Test\nstatus: test\n";
1601        let span = resolve_path_to_span(text, "/nonexistent");
1602        assert!(span.is_none());
1603    }
1604
1605    #[test]
1606    fn multi_doc_yaml_lints_all_documents() {
1607        let text = r#"title: Rule 1
1608logsource:
1609    category: test
1610detection:
1611    selection:
1612        field: value
1613    condition: selection
1614level: medium
1615---
1616title: Rule 2
1617status: bad_status
1618logsource:
1619    category: test
1620detection:
1621    selection:
1622        field: value
1623    condition: selection
1624level: medium
1625"#;
1626        let warnings = lint_yaml_str(text);
1627        assert!(has_rule(&warnings, LintRule::InvalidStatus));
1628    }
1629
1630    #[test]
1631    fn severity_display() {
1632        assert_eq!(format!("{}", Severity::Error), "error");
1633        assert_eq!(format!("{}", Severity::Warning), "warning");
1634        assert_eq!(format!("{}", Severity::Info), "info");
1635        assert_eq!(format!("{}", Severity::Hint), "hint");
1636    }
1637
1638    #[test]
1639    fn file_lint_result_has_errors() {
1640        let result = FileLintResult {
1641            path: std::path::PathBuf::from("test.yml"),
1642            warnings: vec![
1643                warning(LintRule::TitleTooLong, "too long", "/title"),
1644                err(
1645                    LintRule::MissingCondition,
1646                    "missing",
1647                    "/detection/condition",
1648                ),
1649            ],
1650        };
1651        assert!(result.has_errors());
1652        assert_eq!(result.error_count(), 1);
1653        assert_eq!(result.warning_count(), 1);
1654    }
1655
1656    #[test]
1657    fn file_lint_result_no_errors() {
1658        let result = FileLintResult {
1659            path: std::path::PathBuf::from("test.yml"),
1660            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
1661        };
1662        assert!(!result.has_errors());
1663        assert_eq!(result.error_count(), 0);
1664        assert_eq!(result.warning_count(), 1);
1665    }
1666
1667    #[test]
1668    fn file_lint_result_empty() {
1669        let result = FileLintResult {
1670            path: std::path::PathBuf::from("test.yml"),
1671            warnings: vec![],
1672        };
1673        assert!(!result.has_errors());
1674        assert_eq!(result.error_count(), 0);
1675        assert_eq!(result.warning_count(), 0);
1676    }
1677
1678    #[test]
1679    fn lint_warning_display() {
1680        let w = err(
1681            LintRule::MissingTitle,
1682            "missing required field 'title'",
1683            "/title",
1684        );
1685        let display = format!("{w}");
1686        assert!(display.contains("error"));
1687        assert!(display.contains("missing_title"));
1688        assert!(display.contains("/title"));
1689    }
1690
1691    #[test]
1692    fn file_lint_result_info_count() {
1693        let result = FileLintResult {
1694            path: std::path::PathBuf::from("test.yml"),
1695            warnings: vec![
1696                info(LintRule::MissingDescription, "missing desc", "/description"),
1697                info(LintRule::MissingAuthor, "missing author", "/author"),
1698                warning(LintRule::TitleTooLong, "too long", "/title"),
1699            ],
1700        };
1701        assert_eq!(result.info_count(), 2);
1702        assert_eq!(result.warning_count(), 1);
1703        assert_eq!(result.error_count(), 0);
1704        assert!(!result.has_errors());
1705    }
1706
1707    #[test]
1708    fn parse_inline_disable_all() {
1709        let text = "# rsigma-disable\ntitle: Test\n";
1710        let sup = parse_inline_suppressions(text);
1711        assert!(sup.disable_all);
1712    }
1713
1714    #[test]
1715    fn parse_inline_disable_specific_rules() {
1716        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
1717        let sup = parse_inline_suppressions(text);
1718        assert!(!sup.disable_all);
1719        assert!(sup.file_disabled.contains("missing_description"));
1720        assert!(sup.file_disabled.contains("missing_author"));
1721    }
1722
1723    #[test]
1724    fn parse_inline_disable_next_line_all() {
1725        let text = "# rsigma-disable-next-line\ntitle: Test\n";
1726        let sup = parse_inline_suppressions(text);
1727        assert!(!sup.disable_all);
1728        assert!(sup.line_disabled.contains_key(&1));
1729        assert!(sup.line_disabled[&1].is_none());
1730    }
1731
1732    #[test]
1733    fn parse_inline_disable_next_line_specific() {
1734        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
1735        let sup = parse_inline_suppressions(text);
1736        assert!(sup.line_disabled.contains_key(&2));
1737        let rules = sup.line_disabled[&2].as_ref().unwrap();
1738        assert!(rules.contains("missing_level"));
1739    }
1740
1741    #[test]
1742    fn parse_inline_no_comments() {
1743        let text = "title: Test\nstatus: test\n";
1744        let sup = parse_inline_suppressions(text);
1745        assert!(!sup.disable_all);
1746        assert!(sup.file_disabled.is_empty());
1747        assert!(sup.line_disabled.is_empty());
1748    }
1749
1750    #[test]
1751    fn parse_inline_comment_in_quoted_string() {
1752        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
1753        let sup = parse_inline_suppressions(text);
1754        assert!(!sup.disable_all);
1755        assert!(sup.file_disabled.is_empty());
1756    }
1757
1758    #[test]
1759    fn apply_suppressions_disables_rule() {
1760        let warnings = vec![
1761            info(LintRule::MissingDescription, "desc", "/description"),
1762            info(LintRule::MissingAuthor, "author", "/author"),
1763            warning(LintRule::TitleTooLong, "title", "/title"),
1764        ];
1765        let mut config = LintConfig::default();
1766        config
1767            .disabled_rules
1768            .insert("missing_description".to_string());
1769        let inline = InlineSuppressions::default();
1770
1771        let result = apply_suppressions(warnings, &config, &inline);
1772        assert_eq!(result.len(), 2);
1773        assert!(
1774            result
1775                .iter()
1776                .all(|w| w.rule != LintRule::MissingDescription)
1777        );
1778    }
1779
1780    #[test]
1781    fn apply_suppressions_severity_override() {
1782        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
1783        let mut config = LintConfig::default();
1784        config
1785            .severity_overrides
1786            .insert("title_too_long".to_string(), Severity::Info);
1787        let inline = InlineSuppressions::default();
1788
1789        let result = apply_suppressions(warnings, &config, &inline);
1790        assert_eq!(result.len(), 1);
1791        assert_eq!(result[0].severity, Severity::Info);
1792    }
1793
1794    #[test]
1795    fn apply_suppressions_inline_file_disable() {
1796        let warnings = vec![
1797            info(LintRule::MissingDescription, "desc", "/description"),
1798            info(LintRule::MissingAuthor, "author", "/author"),
1799        ];
1800        let config = LintConfig::default();
1801        let mut inline = InlineSuppressions::default();
1802        inline.file_disabled.insert("missing_author".to_string());
1803
1804        let result = apply_suppressions(warnings, &config, &inline);
1805        assert_eq!(result.len(), 1);
1806        assert_eq!(result[0].rule, LintRule::MissingDescription);
1807    }
1808
1809    #[test]
1810    fn apply_suppressions_inline_disable_all() {
1811        let warnings = vec![
1812            err(LintRule::MissingTitle, "title", "/title"),
1813            warning(LintRule::TitleTooLong, "long", "/title"),
1814        ];
1815        let config = LintConfig::default();
1816        let inline = InlineSuppressions {
1817            disable_all: true,
1818            ..Default::default()
1819        };
1820
1821        let result = apply_suppressions(warnings, &config, &inline);
1822        assert!(result.is_empty());
1823    }
1824
1825    #[test]
1826    fn apply_suppressions_inline_next_line() {
1827        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
1828        w1.span = Some(Span {
1829            start_line: 5,
1830            start_col: 0,
1831            end_line: 5,
1832            end_col: 10,
1833        });
1834        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
1835        w2.span = Some(Span {
1836            start_line: 6,
1837            start_col: 0,
1838            end_line: 6,
1839            end_col: 10,
1840        });
1841
1842        let config = LintConfig::default();
1843        let mut inline = InlineSuppressions::default();
1844        inline.line_disabled.insert(5, None);
1845
1846        let result = apply_suppressions(vec![w1, w2], &config, &inline);
1847        assert_eq!(result.len(), 1);
1848        assert_eq!(result[0].rule, LintRule::InvalidStatus);
1849    }
1850
1851    #[test]
1852    fn lint_with_config_disables_rules() {
1853        let text = r#"title: Test
1854logsource:
1855    category: test
1856detection:
1857    selection:
1858        field: value
1859    condition: selection
1860level: medium
1861"#;
1862        let mut config = LintConfig::default();
1863        config
1864            .disabled_rules
1865            .insert("missing_description".to_string());
1866        config.disabled_rules.insert("missing_author".to_string());
1867
1868        let warnings = lint_yaml_str_with_config(text, &config);
1869        assert!(
1870            !warnings
1871                .iter()
1872                .any(|w| w.rule == LintRule::MissingDescription)
1873        );
1874        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1875    }
1876
1877    #[test]
1878    fn lint_with_inline_disable_next_line() {
1879        let text = r#"title: Test
1880# rsigma-disable-next-line missing_level
1881logsource:
1882    category: test
1883detection:
1884    selection:
1885        field: value
1886    condition: selection
1887"#;
1888        let config = LintConfig::default();
1889        let warnings = lint_yaml_str_with_config(text, &config);
1890        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
1891    }
1892
1893    #[test]
1894    fn lint_with_inline_file_disable() {
1895        let text = r#"# rsigma-disable missing_description, missing_author
1896title: Test
1897logsource:
1898    category: test
1899detection:
1900    selection:
1901        field: value
1902    condition: selection
1903level: medium
1904"#;
1905        let config = LintConfig::default();
1906        let warnings = lint_yaml_str_with_config(text, &config);
1907        assert!(
1908            !warnings
1909                .iter()
1910                .any(|w| w.rule == LintRule::MissingDescription)
1911        );
1912        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1913    }
1914
1915    #[test]
1916    fn lint_with_inline_disable_all() {
1917        let text = r#"# rsigma-disable
1918title: Test
1919status: invalid_status
1920logsource:
1921    category: test
1922detection:
1923    selection:
1924        field: value
1925    condition: selection
1926"#;
1927        let config = LintConfig::default();
1928        let warnings = lint_yaml_str_with_config(text, &config);
1929        assert!(warnings.is_empty());
1930    }
1931
1932    #[test]
1933    fn lint_config_merge() {
1934        let mut base = LintConfig::default();
1935        base.disabled_rules.insert("rule_a".to_string());
1936        base.severity_overrides
1937            .insert("rule_b".to_string(), Severity::Info);
1938
1939        let other = LintConfig {
1940            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
1941            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
1942                .into_iter()
1943                .collect(),
1944            exclude_patterns: vec!["test/**".to_string()],
1945            tag_namespaces: vec!["myns".to_string()],
1946            ads: None,
1947        };
1948
1949        base.merge(&other);
1950        assert!(base.disabled_rules.contains("rule_a"));
1951        assert!(base.disabled_rules.contains("rule_c"));
1952        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
1953        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
1954        assert_eq!(base.exclude_patterns, vec!["test/**".to_string()]);
1955        assert!(base.tag_namespaces.contains(&"myns".to_string()));
1956    }
1957
1958    #[test]
1959    fn lint_config_merge_dedups_lists() {
1960        let mut base = LintConfig {
1961            exclude_patterns: vec!["config/**".to_string(), "shared/**".to_string()],
1962            tag_namespaces: vec!["myorg".to_string(), "shared".to_string()],
1963            ..Default::default()
1964        };
1965        let other = LintConfig {
1966            // "shared/**" and "shared" overlap with base on purpose.
1967            exclude_patterns: vec!["shared/**".to_string(), "extra/**".to_string()],
1968            tag_namespaces: vec!["shared".to_string(), "internal".to_string()],
1969            ..Default::default()
1970        };
1971
1972        base.merge(&other);
1973
1974        assert_eq!(
1975            base.exclude_patterns,
1976            vec![
1977                "config/**".to_string(),
1978                "shared/**".to_string(),
1979                "extra/**".to_string()
1980            ]
1981        );
1982        assert_eq!(
1983            base.tag_namespaces,
1984            vec![
1985                "myorg".to_string(),
1986                "shared".to_string(),
1987                "internal".to_string()
1988            ]
1989        );
1990    }
1991
1992    #[test]
1993    fn lint_config_load_dedups_and_normalises() {
1994        let yaml = r#"
1995exclude:
1996  - "config/**"
1997  - "config/**"
1998tag_namespaces:
1999  - MyOrg
2000  - myorg
2001  - internal
2002"#;
2003        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
2004        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
2005        let config = LintConfig::load(tmp.path()).unwrap();
2006
2007        assert_eq!(config.exclude_patterns, vec!["config/**".to_string()]);
2008        // "MyOrg" lowercases to "myorg" and then collapses with the duplicate.
2009        assert_eq!(
2010            config.tag_namespaces,
2011            vec!["myorg".to_string(), "internal".to_string()]
2012        );
2013    }
2014
2015    #[test]
2016    fn lint_config_is_disabled() {
2017        let mut config = LintConfig::default();
2018        config.disabled_rules.insert("missing_title".to_string());
2019        assert!(config.is_disabled(&LintRule::MissingTitle));
2020        assert!(!config.is_disabled(&LintRule::EmptyTitle));
2021    }
2022
2023    #[test]
2024    fn find_yaml_comment_basic() {
2025        assert_eq!(find_yaml_comment("# comment"), Some(0));
2026        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
2027        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
2028        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
2029        assert_eq!(find_yaml_comment("key: value"), None);
2030    }
2031
2032    #[test]
2033    fn no_fix_for_unfixable_rule() {
2034        let w = lint(
2035            r#"
2036title: Test
2037logsource:
2038    category: test
2039"#,
2040        );
2041        assert!(has_rule(&w, LintRule::MissingDetection));
2042        let fix = w
2043            .iter()
2044            .find(|w| w.rule == LintRule::MissingDetection)
2045            .and_then(|w| w.fix.as_ref());
2046        assert!(fix.is_none());
2047    }
2048
2049    #[test]
2050    fn lint_config_exclude_from_yaml() {
2051        let yaml = r#"
2052disabled_rules:
2053  - missing_description
2054exclude:
2055  - "config/**"
2056  - "**/unsupported/**"
2057"#;
2058        let tmp = std::env::temp_dir().join("rsigma_test_exclude.yml");
2059        std::fs::write(&tmp, yaml).unwrap();
2060        let config = LintConfig::load(&tmp).unwrap();
2061        std::fs::remove_file(&tmp).ok();
2062
2063        assert!(config.disabled_rules.contains("missing_description"));
2064        assert_eq!(config.exclude_patterns.len(), 2);
2065        assert_eq!(config.exclude_patterns[0], "config/**");
2066        assert_eq!(config.exclude_patterns[1], "**/unsupported/**");
2067    }
2068
2069    #[test]
2070    fn lint_config_build_exclude_set_empty() {
2071        let config = LintConfig::default();
2072        assert!(config.build_exclude_set().is_none());
2073    }
2074
2075    #[test]
2076    fn lint_config_build_exclude_set_matches() {
2077        let config = LintConfig {
2078            exclude_patterns: vec!["config/**".to_string()],
2079            ..Default::default()
2080        };
2081        let gs = config.build_exclude_set().expect("should build");
2082        assert!(gs.is_match("config/data_mapping/foo.yaml"));
2083        assert!(gs.is_match("config/nested/deep/bar.yml"));
2084        assert!(!gs.is_match("rules/windows/test.yml"));
2085    }
2086
2087    #[test]
2088    fn cross_ref_version_mismatch_within_file() {
2089        // A correlation (major 3) referencing a base rule (major 2) by name, in
2090        // the same file, flags the mismatch. unknown_rule_reference does NOT
2091        // fire for a single file (the index is not complete).
2092        let yaml = r#"
2093title: Base Rule
2094name: base_rule
2095sigma-version: 2
2096logsource:
2097    category: test
2098detection:
2099    selection:
2100        EventID: 1
2101    condition: selection
2102---
2103title: Brute Force
2104sigma-version: 3
2105correlation:
2106    type: event_count
2107    rules:
2108        - base_rule
2109    group-by:
2110        - SourceIP
2111    timespan: 5m
2112    condition:
2113        gte: 10
2114"#;
2115        let w = lint_yaml_str(yaml);
2116        assert!(has_rule(&w, LintRule::SigmaVersionMismatch));
2117        assert!(has_no_rule(&w, LintRule::UnknownRuleReference));
2118    }
2119
2120    #[test]
2121    fn cross_ref_matching_version_no_mismatch() {
2122        let yaml = r#"
2123title: Base Rule
2124name: base_rule
2125sigma-version: 3
2126logsource:
2127    category: test
2128detection:
2129    selection:
2130        EventID: 1
2131    condition: selection
2132---
2133title: Brute Force
2134sigma-version: 3
2135correlation:
2136    type: event_count
2137    rules:
2138        - base_rule
2139    group-by:
2140        - SourceIP
2141    timespan: 5m
2142    condition:
2143        gte: 10
2144"#;
2145        assert!(has_no_rule(
2146            &lint_yaml_str(yaml),
2147            LintRule::SigmaVersionMismatch
2148        ));
2149    }
2150
2151    #[test]
2152    fn cross_ref_unknown_only_with_complete_index() {
2153        let yaml = r#"
2154title: Brute Force
2155correlation:
2156    type: event_count
2157    rules:
2158        - nonexistent_rule
2159    group-by:
2160        - SourceIP
2161    timespan: 5m
2162    condition:
2163        gte: 10
2164"#;
2165        // Single file: the referenced rule may live elsewhere, so it is out of
2166        // scope and unknown_rule_reference must not fire.
2167        assert!(has_no_rule(
2168            &lint_yaml_str(yaml),
2169            LintRule::UnknownRuleReference
2170        ));
2171
2172        // Directory: the index is complete, so the missing reference is flagged.
2173        let tmp = tempfile::tempdir().unwrap();
2174        std::fs::write(tmp.path().join("corr.yml"), yaml).unwrap();
2175        let results = lint_yaml_directory(tmp.path()).unwrap();
2176        assert!(
2177            results
2178                .iter()
2179                .flat_map(|r| &r.warnings)
2180                .any(|w| w.rule == LintRule::UnknownRuleReference)
2181        );
2182    }
2183
2184    #[test]
2185    fn cross_ref_resolves_across_files() {
2186        // Base rule in one file, correlation in another: the directory index
2187        // resolves the reference and flags the major mismatch across files.
2188        let tmp = tempfile::tempdir().unwrap();
2189        std::fs::write(
2190            tmp.path().join("base.yml"),
2191            r#"
2192title: Base Rule
2193name: base_rule
2194sigma-version: 2
2195logsource:
2196    category: test
2197detection:
2198    selection:
2199        EventID: 1
2200    condition: selection
2201"#,
2202        )
2203        .unwrap();
2204        std::fs::write(
2205            tmp.path().join("corr.yml"),
2206            r#"
2207title: Brute Force
2208sigma-version: 3
2209correlation:
2210    type: event_count
2211    rules:
2212        - base_rule
2213    group-by:
2214        - SourceIP
2215    timespan: 5m
2216    condition:
2217        gte: 10
2218"#,
2219        )
2220        .unwrap();
2221        let results = lint_yaml_directory(tmp.path()).unwrap();
2222        let all: Vec<_> = results.iter().flat_map(|r| &r.warnings).collect();
2223        assert!(all.iter().any(|w| w.rule == LintRule::SigmaVersionMismatch));
2224        assert!(!all.iter().any(|w| w.rule == LintRule::UnknownRuleReference));
2225    }
2226
2227    #[test]
2228    fn lint_directory_with_excludes() {
2229        let tmp = tempfile::tempdir().unwrap();
2230        let rules_dir = tmp.path().join("rules");
2231        let config_dir = tmp.path().join("config");
2232        std::fs::create_dir_all(&rules_dir).unwrap();
2233        std::fs::create_dir_all(&config_dir).unwrap();
2234
2235        std::fs::write(
2236            rules_dir.join("good.yml"),
2237            r#"
2238title: Good Rule
2239logsource:
2240    category: test
2241detection:
2242    sel:
2243        field: value
2244    condition: sel
2245level: medium
2246"#,
2247        )
2248        .unwrap();
2249
2250        std::fs::write(
2251            config_dir.join("mapping.yaml"),
2252            r#"
2253Title: Logon
2254Channel: Security
2255EventID: 4624
2256"#,
2257        )
2258        .unwrap();
2259
2260        let no_exclude = LintConfig::default();
2261        let results = lint_yaml_directory_with_config(tmp.path(), &no_exclude).unwrap();
2262        let config_warnings: Vec<_> = results
2263            .iter()
2264            .filter(|r| r.path.to_string_lossy().contains("config"))
2265            .flat_map(|r| &r.warnings)
2266            .collect();
2267        assert!(
2268            !config_warnings.is_empty(),
2269            "config file should produce warnings without excludes"
2270        );
2271
2272        let with_exclude = LintConfig {
2273            exclude_patterns: vec!["config/**".to_string()],
2274            ..Default::default()
2275        };
2276        let results = lint_yaml_directory_with_config(tmp.path(), &with_exclude).unwrap();
2277        let config_results: Vec<_> = results
2278            .iter()
2279            .filter(|r| r.path.to_string_lossy().contains("config"))
2280            .collect();
2281        assert!(config_results.is_empty(), "config file should be excluded");
2282
2283        let rule_results: Vec<_> = results
2284            .iter()
2285            .filter(|r| r.path.to_string_lossy().contains("good.yml"))
2286            .collect();
2287        assert_eq!(rule_results.len(), 1);
2288    }
2289
2290    #[test]
2291    fn all_lint_keys_are_cached() {
2292        const ALL_LINT_KEYS: &[&str] = &[
2293            "action",
2294            "author",
2295            "condition",
2296            "correlation",
2297            "date",
2298            "description",
2299            "detection",
2300            "field",
2301            "filter",
2302            "generate",
2303            "group-by",
2304            "id",
2305            "level",
2306            "logsource",
2307            "modified",
2308            "name",
2309            "rules",
2310            "selection",
2311            "status",
2312            "tags",
2313            "taxonomy",
2314            "timeframe",
2315            "timespan",
2316            "title",
2317            "type",
2318        ];
2319        for key_str in ALL_LINT_KEYS {
2320            assert!(KEY_CACHE.contains_key(key_str), "key not cached: {key_str}");
2321        }
2322    }
2323
2324    #[test]
2325    fn extra_tag_namespace_suppresses_warning() {
2326        let text = r#"title: Test
2327logsource:
2328    category: test
2329detection:
2330    selection:
2331        field: value
2332    condition: selection
2333level: medium
2334tags:
2335    - myorg.custom_tag
2336"#;
2337        // Without extra namespaces, unknown_tag_namespace fires.
2338        let warnings = lint_yaml_str(text);
2339        assert!(has_rule(&warnings, LintRule::UnknownTagNamespace));
2340
2341        // With "myorg" added, the warning is gone.
2342        let config = LintConfig {
2343            tag_namespaces: vec!["myorg".to_string()],
2344            ..Default::default()
2345        };
2346        let warnings = lint_yaml_str_with_config(text, &config);
2347        assert!(has_no_rule(&warnings, LintRule::UnknownTagNamespace));
2348    }
2349
2350    #[test]
2351    fn extra_tag_namespace_from_config_file() {
2352        let yaml = r#"
2353tag_namespaces:
2354  - myorg
2355  - internal
2356"#;
2357        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
2358        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
2359        let config = LintConfig::load(tmp.path()).unwrap();
2360
2361        assert!(config.tag_namespaces.contains(&"myorg".to_string()));
2362        assert!(config.tag_namespaces.contains(&"internal".to_string()));
2363    }
2364}