Skip to main content

rsigma_parser/lint/
mod.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `yaml_serde::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: yaml_serde::Value = yaml_serde::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22mod rules;
23
24use std::collections::{HashMap, HashSet};
25use std::fmt;
26use std::path::Path;
27use std::sync::LazyLock;
28
29use serde::{Deserialize, Serialize};
30use yaml_serde::Value;
31
32// =============================================================================
33// Public types
34// =============================================================================
35
36/// Severity of a lint finding.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
38pub enum Severity {
39    /// Spec violation — the rule is invalid.
40    Error,
41    /// Best-practice issue — the rule works but is not spec-ideal.
42    Warning,
43    /// Informational suggestion — soft best-practice hint (e.g. missing author).
44    Info,
45    /// Subtle hint — lowest severity, for stylistic suggestions.
46    Hint,
47}
48
49impl fmt::Display for Severity {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        match self {
52            Severity::Error => write!(f, "error"),
53            Severity::Warning => write!(f, "warning"),
54            Severity::Info => write!(f, "info"),
55            Severity::Hint => write!(f, "hint"),
56        }
57    }
58}
59
60/// Identifies which lint rule fired.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
62pub enum LintRule {
63    // ── Infrastructure / parse errors ────────────────────────────────────
64    YamlParseError,
65    NotAMapping,
66    FileReadError,
67    SchemaViolation,
68
69    // ── Shared (all document types) ──────────────────────────────────────
70    MissingTitle,
71    EmptyTitle,
72    TitleTooLong,
73    MissingDescription,
74    MissingAuthor,
75    InvalidId,
76    InvalidStatus,
77    MissingLevel,
78    InvalidLevel,
79    InvalidDate,
80    InvalidModified,
81    ModifiedBeforeDate,
82    DescriptionTooLong,
83    NameTooLong,
84    TaxonomyTooLong,
85    NonLowercaseKey,
86
87    // ── Detection rules ──────────────────────────────────────────────────
88    MissingLogsource,
89    MissingDetection,
90    MissingCondition,
91    EmptyDetection,
92    InvalidRelatedType,
93    InvalidRelatedId,
94    RelatedMissingRequired,
95    DeprecatedWithoutRelated,
96    InvalidTag,
97    UnknownTagNamespace,
98    DuplicateTags,
99    DuplicateReferences,
100    DuplicateFields,
101    FalsepositiveTooShort,
102    ScopeTooShort,
103    LogsourceValueNotLowercase,
104    ConditionReferencesUnknown,
105    DeprecatedAggregationSyntax,
106
107    // ── Correlation rules ────────────────────────────────────────────────
108    MissingCorrelation,
109    MissingCorrelationType,
110    InvalidCorrelationType,
111    MissingCorrelationRules,
112    EmptyCorrelationRules,
113    MissingCorrelationTimespan,
114    InvalidTimespanFormat,
115    InvalidWindowMode,
116    MissingSessionGap,
117    GapWithoutSession,
118    InvalidGapFormat,
119    MissingGroupBy,
120    MissingCorrelationCondition,
121    MissingConditionField,
122    InvalidConditionOperator,
123    ConditionValueNotNumeric,
124    GenerateNotBoolean,
125
126    // ── Filter rules ─────────────────────────────────────────────────────
127    MissingFilter,
128    MissingFilterRules,
129    EmptyFilterRules,
130    MissingFilterSelection,
131    MissingFilterCondition,
132    FilterHasLevel,
133    FilterHasStatus,
134    MissingFilterLogsource,
135
136    // ── Detection logic (cross-cutting) ──────────────────────────────────
137    NullInValueList,
138    SingleValueAllModifier,
139    AllWithRe,
140    IncompatibleModifiers,
141    EmptyValueList,
142    WildcardOnlyValue,
143    FlattenedArrayCorrelation,
144    UnsupportedSigmaVersion,
145    ArrayMatchingWithoutVersion,
146    SigmaVersionMismatch,
147    UnknownRuleReference,
148    UnknownKey,
149}
150
151impl fmt::Display for LintRule {
152    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153        let s = match self {
154            LintRule::YamlParseError => "yaml_parse_error",
155            LintRule::NotAMapping => "not_a_mapping",
156            LintRule::FileReadError => "file_read_error",
157            LintRule::SchemaViolation => "schema_violation",
158            LintRule::MissingTitle => "missing_title",
159            LintRule::EmptyTitle => "empty_title",
160            LintRule::TitleTooLong => "title_too_long",
161            LintRule::MissingDescription => "missing_description",
162            LintRule::MissingAuthor => "missing_author",
163            LintRule::InvalidId => "invalid_id",
164            LintRule::InvalidStatus => "invalid_status",
165            LintRule::MissingLevel => "missing_level",
166            LintRule::InvalidLevel => "invalid_level",
167            LintRule::InvalidDate => "invalid_date",
168            LintRule::InvalidModified => "invalid_modified",
169            LintRule::ModifiedBeforeDate => "modified_before_date",
170            LintRule::DescriptionTooLong => "description_too_long",
171            LintRule::NameTooLong => "name_too_long",
172            LintRule::TaxonomyTooLong => "taxonomy_too_long",
173            LintRule::NonLowercaseKey => "non_lowercase_key",
174            LintRule::MissingLogsource => "missing_logsource",
175            LintRule::MissingDetection => "missing_detection",
176            LintRule::MissingCondition => "missing_condition",
177            LintRule::EmptyDetection => "empty_detection",
178            LintRule::InvalidRelatedType => "invalid_related_type",
179            LintRule::InvalidRelatedId => "invalid_related_id",
180            LintRule::RelatedMissingRequired => "related_missing_required",
181            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
182            LintRule::InvalidTag => "invalid_tag",
183            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
184            LintRule::DuplicateTags => "duplicate_tags",
185            LintRule::DuplicateReferences => "duplicate_references",
186            LintRule::DuplicateFields => "duplicate_fields",
187            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
188            LintRule::ScopeTooShort => "scope_too_short",
189            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
190            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
191            LintRule::DeprecatedAggregationSyntax => "deprecated_aggregation_syntax",
192            LintRule::MissingCorrelation => "missing_correlation",
193            LintRule::MissingCorrelationType => "missing_correlation_type",
194            LintRule::InvalidCorrelationType => "invalid_correlation_type",
195            LintRule::MissingCorrelationRules => "missing_correlation_rules",
196            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
197            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
198            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
199            LintRule::InvalidWindowMode => "invalid_window_mode",
200            LintRule::MissingSessionGap => "missing_session_gap",
201            LintRule::GapWithoutSession => "gap_without_session",
202            LintRule::InvalidGapFormat => "invalid_gap_format",
203            LintRule::MissingGroupBy => "missing_group_by",
204            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
205            LintRule::MissingConditionField => "missing_condition_field",
206            LintRule::InvalidConditionOperator => "invalid_condition_operator",
207            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
208            LintRule::GenerateNotBoolean => "generate_not_boolean",
209            LintRule::MissingFilter => "missing_filter",
210            LintRule::MissingFilterRules => "missing_filter_rules",
211            LintRule::EmptyFilterRules => "empty_filter_rules",
212            LintRule::MissingFilterSelection => "missing_filter_selection",
213            LintRule::MissingFilterCondition => "missing_filter_condition",
214            LintRule::FilterHasLevel => "filter_has_level",
215            LintRule::FilterHasStatus => "filter_has_status",
216            LintRule::MissingFilterLogsource => "missing_filter_logsource",
217            LintRule::NullInValueList => "null_in_value_list",
218            LintRule::SingleValueAllModifier => "single_value_all_modifier",
219            LintRule::AllWithRe => "all_with_re",
220            LintRule::IncompatibleModifiers => "incompatible_modifiers",
221            LintRule::EmptyValueList => "empty_value_list",
222            LintRule::WildcardOnlyValue => "wildcard_only_value",
223            LintRule::FlattenedArrayCorrelation => "flattened_array_correlation",
224            LintRule::UnsupportedSigmaVersion => "unsupported_sigma_version",
225            LintRule::ArrayMatchingWithoutVersion => "array_matching_without_version",
226            LintRule::SigmaVersionMismatch => "sigma_version_mismatch",
227            LintRule::UnknownRuleReference => "unknown_rule_reference",
228            LintRule::UnknownKey => "unknown_key",
229        };
230        write!(f, "{s}")
231    }
232}
233
234/// A source span (line/column, both 0-indexed).
235#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
236pub struct Span {
237    pub start_line: u32,
238    pub start_col: u32,
239    pub end_line: u32,
240    pub end_col: u32,
241}
242
243// =============================================================================
244// Auto-fix types
245// =============================================================================
246
247/// Whether a fix is safe to apply automatically or needs manual review.
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
249pub enum FixDisposition {
250    Safe,
251    Unsafe,
252}
253
254/// A single patch operation within a [`Fix`].
255#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
256pub enum FixPatch {
257    ReplaceValue { path: String, new_value: String },
258    ReplaceKey { path: String, new_key: String },
259    Remove { path: String },
260}
261
262/// A suggested fix for a lint finding.
263#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
264pub struct Fix {
265    pub title: String,
266    pub disposition: FixDisposition,
267    pub patches: Vec<FixPatch>,
268}
269
270/// A single lint finding.
271#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
272pub struct LintWarning {
273    pub rule: LintRule,
274    pub severity: Severity,
275    pub message: String,
276    pub path: String,
277    pub span: Option<Span>,
278    pub fix: Option<Fix>,
279}
280
281impl fmt::Display for LintWarning {
282    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283        write!(
284            f,
285            "{}[{}]: {}\n    --> {}",
286            self.severity, self.rule, self.message, self.path
287        )
288    }
289}
290
291/// Result of linting a single file (may contain multiple YAML documents).
292#[derive(Debug, Clone, Serialize)]
293pub struct FileLintResult {
294    pub path: std::path::PathBuf,
295    pub warnings: Vec<LintWarning>,
296}
297
298impl FileLintResult {
299    pub fn has_errors(&self) -> bool {
300        self.warnings.iter().any(|w| w.severity == Severity::Error)
301    }
302
303    pub fn error_count(&self) -> usize {
304        self.warnings
305            .iter()
306            .filter(|w| w.severity == Severity::Error)
307            .count()
308    }
309
310    pub fn warning_count(&self) -> usize {
311        self.warnings
312            .iter()
313            .filter(|w| w.severity == Severity::Warning)
314            .count()
315    }
316
317    pub fn info_count(&self) -> usize {
318        self.warnings
319            .iter()
320            .filter(|w| w.severity == Severity::Info)
321            .count()
322    }
323
324    pub fn hint_count(&self) -> usize {
325        self.warnings
326            .iter()
327            .filter(|w| w.severity == Severity::Hint)
328            .count()
329    }
330}
331
332// =============================================================================
333// Helpers (shared with rule submodules)
334// =============================================================================
335
336static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
337    [
338        "action",
339        "author",
340        "category",
341        "condition",
342        "correlation",
343        "date",
344        "description",
345        "detection",
346        "falsepositives",
347        "field",
348        "fields",
349        "filter",
350        "gap",
351        "generate",
352        "group-by",
353        "id",
354        "level",
355        "logsource",
356        "modified",
357        "name",
358        "product",
359        "references",
360        "related",
361        "rsigma.gap",
362        "rsigma.window",
363        "rules",
364        "scope",
365        "selection",
366        "service",
367        "sigma-version",
368        "status",
369        "tags",
370        "taxonomy",
371        "timeframe",
372        "timespan",
373        "title",
374        "type",
375        "window",
376    ]
377    .into_iter()
378    .map(|n| (n, Value::String(n.into())))
379    .collect()
380});
381
382pub(crate) fn key(s: &str) -> &'static Value {
383    KEY_CACHE
384        .get(s)
385        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
386}
387
388pub(crate) fn get_str<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a str> {
389    m.get(key(k)).and_then(|v| v.as_str())
390}
391
392pub(crate) fn get_mapping<'a>(
393    m: &'a yaml_serde::Mapping,
394    k: &str,
395) -> Option<&'a yaml_serde::Mapping> {
396    m.get(key(k)).and_then(|v| v.as_mapping())
397}
398
399pub(crate) fn get_seq<'a>(m: &'a yaml_serde::Mapping, k: &str) -> Option<&'a yaml_serde::Sequence> {
400    m.get(key(k)).and_then(|v| v.as_sequence())
401}
402
403pub(crate) fn warn(
404    rule: LintRule,
405    severity: Severity,
406    message: impl Into<String>,
407    path: impl Into<String>,
408) -> LintWarning {
409    LintWarning {
410        rule,
411        severity,
412        message: message.into(),
413        path: path.into(),
414        span: None,
415        fix: None,
416    }
417}
418
419pub(crate) fn err(
420    rule: LintRule,
421    message: impl Into<String>,
422    path: impl Into<String>,
423) -> LintWarning {
424    warn(rule, Severity::Error, message, path)
425}
426
427pub(crate) fn warning(
428    rule: LintRule,
429    message: impl Into<String>,
430    path: impl Into<String>,
431) -> LintWarning {
432    warn(rule, Severity::Warning, message, path)
433}
434
435pub(crate) fn info(
436    rule: LintRule,
437    message: impl Into<String>,
438    path: impl Into<String>,
439) -> LintWarning {
440    warn(rule, Severity::Info, message, path)
441}
442
443pub(crate) fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
444    Some(Fix {
445        title: title.into(),
446        disposition: FixDisposition::Safe,
447        patches,
448    })
449}
450
451/// Find the closest match for `input` among `candidates` using edit distance.
452pub(crate) fn closest_match<'a>(
453    input: &str,
454    candidates: &[&'a str],
455    max_distance: usize,
456) -> Option<&'a str> {
457    candidates
458        .iter()
459        .filter(|c| edit_distance(input, c) <= max_distance)
460        .min_by_key(|c| edit_distance(input, c))
461        .copied()
462}
463
464/// Levenshtein edit distance between two strings.
465pub(crate) fn edit_distance(a: &str, b: &str) -> usize {
466    let (a_len, b_len) = (a.len(), b.len());
467    if a_len == 0 {
468        return b_len;
469    }
470    if b_len == 0 {
471        return a_len;
472    }
473    let mut prev: Vec<usize> = (0..=b_len).collect();
474    let mut curr = vec![0; b_len + 1];
475    for (i, ca) in a.bytes().enumerate() {
476        curr[0] = i + 1;
477        for (j, cb) in b.bytes().enumerate() {
478            let cost = if ca == cb { 0 } else { 1 };
479            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
480        }
481        std::mem::swap(&mut prev, &mut curr);
482    }
483    prev[b_len]
484}
485
486pub(crate) const TYPO_MAX_EDIT_DISTANCE: usize = 2;
487
488// =============================================================================
489// Document type detection
490// =============================================================================
491
492#[derive(Debug, Clone, Copy, PartialEq, Eq)]
493pub(crate) enum DocType {
494    Detection,
495    Correlation,
496    Filter,
497}
498
499impl DocType {
500    pub(crate) fn known_keys(&self) -> &'static [&'static str] {
501        match self {
502            DocType::Detection => rules::shared::KNOWN_KEYS_DETECTION,
503            DocType::Correlation => rules::shared::KNOWN_KEYS_CORRELATION,
504            DocType::Filter => rules::shared::KNOWN_KEYS_FILTER,
505        }
506    }
507}
508
509fn detect_doc_type(m: &yaml_serde::Mapping) -> DocType {
510    if m.contains_key(key("correlation")) {
511        DocType::Correlation
512    } else if m.contains_key(key("filter")) {
513        DocType::Filter
514    } else {
515        DocType::Detection
516    }
517}
518
519fn is_action_fragment(m: &yaml_serde::Mapping) -> bool {
520    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
521}
522
523// =============================================================================
524// Cross-document reference resolution
525// =============================================================================
526
527/// An index of referenceable rules (detection rules and correlation rules) by
528/// their identifiers (`id` and `name`), each mapped to its resolved
529/// specification major. Built file-local for single-text linting and
530/// directory-global for directory linting.
531struct RuleIndex {
532    majors: HashMap<String, u32>,
533    /// Whether the index covers the whole set being linted. Only then is an
534    /// unresolved reference genuinely missing rather than living in a file
535    /// outside the linted scope.
536    complete: bool,
537}
538
539impl RuleIndex {
540    fn new(complete: bool) -> Self {
541        Self {
542            majors: HashMap::new(),
543            complete,
544        }
545    }
546
547    /// Index every referenceable document in one multi-document YAML text.
548    fn add_text(&mut self, text: &str) {
549        for doc in yaml_serde::Deserializer::from_str(text) {
550            let Ok(value) = Value::deserialize(doc) else {
551                break;
552            };
553            self.add_value(&value);
554        }
555    }
556
557    fn add_value(&mut self, value: &Value) {
558        let Some(m) = value.as_mapping() else {
559            return;
560        };
561        if is_action_fragment(m) {
562            return;
563        }
564        // Only detection rules and correlation rules can be referenced.
565        if matches!(
566            detect_doc_type(m),
567            DocType::Detection | DocType::Correlation
568        ) {
569            let major = crate::version::resolve_major(
570                m.get(key("sigma-version"))
571                    .and_then(crate::version::major_from_value),
572            );
573            for id_key in ["id", "name"] {
574                if let Some(v) = get_str(m, id_key) {
575                    self.majors.insert(v.to_string(), major);
576                }
577            }
578        }
579    }
580}
581
582/// Extract a `rules:` reference list (a single string or a sequence of strings).
583fn reference_list(v: Option<&Value>) -> Vec<String> {
584    match v {
585        Some(Value::String(s)) => vec![s.clone()],
586        Some(Value::Sequence(seq)) => seq
587            .iter()
588            .filter_map(|x| x.as_str().map(str::to_string))
589            .collect(),
590        _ => Vec::new(),
591    }
592}
593
594/// References declared by a correlation rule (`correlation.rules`).
595fn correlation_rule_refs(m: &yaml_serde::Mapping) -> Vec<String> {
596    m.get(key("correlation"))
597        .and_then(|c| c.as_mapping())
598        .map(|c| reference_list(c.get(key("rules"))))
599        .unwrap_or_default()
600}
601
602/// References declared by a filter rule (`filter.rules`). Returns `None` when the
603/// filter targets every rule (`rules: any`), which is not resolvable.
604fn filter_rule_refs(m: &yaml_serde::Mapping) -> Option<Vec<String>> {
605    let f = m.get(key("filter"))?.as_mapping()?;
606    let rules = f.get(key("rules"))?;
607    if let Some(s) = rules.as_str()
608        && s.eq_ignore_ascii_case("any")
609    {
610        return None;
611    }
612    Some(reference_list(Some(rules)))
613}
614
615/// Cross-document lints over the documents in one YAML text, resolving each
616/// correlation/filter reference against `index`:
617///
618/// - `sigma_version_mismatch` (warning): a referencing document and a resolved
619///   referenced rule declare different specification majors.
620/// - `unknown_rule_reference` (warning): a reference resolves to no rule and the
621///   index is complete (so it is genuinely missing, not out of the linted scope).
622fn lint_cross_references(docs: &[Value], index: &RuleIndex, warnings: &mut Vec<LintWarning>) {
623    for value in docs {
624        let Some(m) = value.as_mapping() else {
625            continue;
626        };
627        if is_action_fragment(m) {
628            continue;
629        }
630        let (refs, path) = match detect_doc_type(m) {
631            DocType::Correlation => (correlation_rule_refs(m), "/correlation/rules"),
632            DocType::Filter => match filter_rule_refs(m) {
633                Some(refs) => (refs, "/filter/rules"),
634                None => continue,
635            },
636            DocType::Detection => continue,
637        };
638        if refs.is_empty() {
639            continue;
640        }
641        let self_major = crate::version::resolve_major(
642            m.get(key("sigma-version"))
643                .and_then(crate::version::major_from_value),
644        );
645        let label = get_str(m, "title")
646            .or_else(|| get_str(m, "name"))
647            .unwrap_or("<rule>");
648        for r in refs {
649            match index.majors.get(&r).copied() {
650                Some(target) if target != self_major => warnings.push(warning(
651                    LintRule::SigmaVersionMismatch,
652                    format!(
653                        "'{label}' targets sigma-version major {self_major} but references rule \
654                         '{r}' which targets major {target}; cross-referencing rules must share a \
655                         specification major"
656                    ),
657                    path,
658                )),
659                Some(_) => {}
660                None if index.complete => warnings.push(warning(
661                    LintRule::UnknownRuleReference,
662                    format!(
663                        "'{label}' references rule '{r}', which was not found among the linted \
664                         rules (matched by id or name)"
665                    ),
666                    path,
667                )),
668                None => {}
669            }
670        }
671    }
672}
673
674// =============================================================================
675// Public API
676// =============================================================================
677
678fn lint_yaml_value_ext(value: &Value, extra_ns: &[String]) -> Vec<LintWarning> {
679    let Some(m) = value.as_mapping() else {
680        return vec![err(
681            LintRule::NotAMapping,
682            "document is not a YAML mapping",
683            "/",
684        )];
685    };
686
687    if is_action_fragment(m) {
688        return Vec::new();
689    }
690
691    let mut warnings = Vec::new();
692
693    rules::metadata::lint_shared(m, &mut warnings);
694
695    let doc_type = detect_doc_type(m);
696    match doc_type {
697        DocType::Detection => rules::detection::lint_detection_rule(m, &mut warnings, extra_ns),
698        DocType::Correlation => rules::correlation::lint_correlation_rule(m, &mut warnings),
699        DocType::Filter => rules::filter::lint_filter_rule(m, &mut warnings),
700    }
701
702    rules::version::lint_sigma_version(m, doc_type, &mut warnings);
703    rules::shared::lint_unknown_keys(m, doc_type, &mut warnings);
704
705    warnings
706}
707
708/// Lint a single YAML document value.
709pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
710    lint_yaml_value_ext(value, &[])
711}
712
713fn lint_yaml_str_ext(text: &str, extra_ns: &[String]) -> Vec<LintWarning> {
714    lint_yaml_str_indexed(text, extra_ns, None)
715}
716
717/// Lint one YAML text. When `external_index` is `Some` (directory linting) it is
718/// the directory-global rule index used for cross-reference checks; when `None`,
719/// a file-local index is built from this text, so cross-file references are out
720/// of scope and `unknown_rule_reference` does not fire.
721fn lint_yaml_str_indexed(
722    text: &str,
723    extra_ns: &[String],
724    external_index: Option<&RuleIndex>,
725) -> Vec<LintWarning> {
726    let mut all_warnings = Vec::new();
727    let mut docs: Vec<Value> = Vec::new();
728
729    for doc in yaml_serde::Deserializer::from_str(text) {
730        let value: Value = match Value::deserialize(doc) {
731            Ok(v) => v,
732            Err(e) => {
733                let mut w = err(
734                    LintRule::YamlParseError,
735                    format!("YAML parse error: {e}"),
736                    "/",
737                );
738                if let Some(loc) = e.location() {
739                    w.span = Some(Span {
740                        start_line: loc.line().saturating_sub(1) as u32,
741                        start_col: loc.column() as u32,
742                        end_line: loc.line().saturating_sub(1) as u32,
743                        end_col: loc.column() as u32 + 1,
744                    });
745                }
746                all_warnings.push(w);
747                break;
748            }
749        };
750
751        for mut w in lint_yaml_value_ext(&value, extra_ns) {
752            w.span = resolve_path_to_span(text, &w.path);
753            all_warnings.push(w);
754        }
755        docs.push(value);
756    }
757
758    // Cross-document checks resolve references against the directory-global index
759    // when given, otherwise a file-local index built from this text's documents.
760    let local_index;
761    let index = match external_index {
762        Some(idx) => idx,
763        None => {
764            let mut idx = RuleIndex::new(false);
765            for v in &docs {
766                idx.add_value(v);
767            }
768            local_index = idx;
769            &local_index
770        }
771    };
772    let mut xref = Vec::new();
773    lint_cross_references(&docs, index, &mut xref);
774    for mut w in xref {
775        w.span = resolve_path_to_span(text, &w.path);
776        all_warnings.push(w);
777    }
778
779    all_warnings
780}
781
782/// Lint a raw YAML string, returning warnings with resolved source spans.
783pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
784    lint_yaml_str_ext(text, &[])
785}
786
787fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
788    if path == "/" || path.is_empty() {
789        for (i, line) in text.lines().enumerate() {
790            let trimmed = line.trim();
791            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
792                return Some(Span {
793                    start_line: i as u32,
794                    start_col: 0,
795                    end_line: i as u32,
796                    end_col: line.len() as u32,
797                });
798            }
799        }
800        return None;
801    }
802
803    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
804
805    if segments.is_empty() {
806        return None;
807    }
808
809    let lines: Vec<&str> = text.lines().collect();
810    let mut current_indent: i32 = -1;
811    let mut search_start = 0usize;
812    let mut last_matched_line: Option<usize> = None;
813
814    for segment in &segments {
815        let array_index: Option<usize> = segment.parse().ok();
816        let mut found = false;
817
818        let mut line_num = search_start;
819        while line_num < lines.len() {
820            let line = lines[line_num];
821            let trimmed = line.trim();
822            if trimmed.is_empty() || trimmed.starts_with('#') {
823                line_num += 1;
824                continue;
825            }
826
827            let indent = (line.len() - trimmed.len()) as i32;
828
829            if indent <= current_indent && found {
830                break;
831            }
832            if indent <= current_indent {
833                line_num += 1;
834                continue;
835            }
836
837            if let Some(idx) = array_index {
838                if trimmed.starts_with("- ") && indent > current_indent {
839                    let mut count = 0usize;
840                    for (offset, sl) in lines[search_start..].iter().enumerate() {
841                        let scan = search_start + offset;
842                        let st = sl.trim();
843                        if st.is_empty() || st.starts_with('#') {
844                            continue;
845                        }
846                        let si = (sl.len() - st.len()) as i32;
847                        if si == indent && st.starts_with("- ") {
848                            if count == idx {
849                                last_matched_line = Some(scan);
850                                search_start = scan + 1;
851                                current_indent = indent;
852                                found = true;
853                                break;
854                            }
855                            count += 1;
856                        }
857                        if si < indent && count > 0 {
858                            break;
859                        }
860                    }
861                    break;
862                }
863            } else {
864                let key_pattern = format!("{segment}:");
865                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
866                    last_matched_line = Some(line_num);
867                    search_start = line_num + 1;
868                    current_indent = indent;
869                    found = true;
870                    break;
871                }
872            }
873
874            line_num += 1;
875        }
876
877        if !found && last_matched_line.is_none() {
878            break;
879        }
880    }
881
882    last_matched_line.map(|line_num| {
883        let line = lines[line_num];
884        Span {
885            start_line: line_num as u32,
886            start_col: 0,
887            end_line: line_num as u32,
888            end_col: line.len() as u32,
889        }
890    })
891}
892
893/// Lint all YAML documents in a file.
894pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
895    let content = std::fs::read_to_string(path)?;
896    let warnings = lint_yaml_str(&content);
897    Ok(FileLintResult {
898        path: path.to_path_buf(),
899        warnings,
900    })
901}
902
903/// Recursively collect `.yml`/`.yaml` file paths under `dir`, in sorted
904/// depth-first order, skipping hidden directories and any path matching the
905/// exclude set (relative to `base`). Symlink loops are guarded by `visited`.
906fn collect_yaml_files(
907    dir: &Path,
908    base: &Path,
909    exclude_set: Option<&globset::GlobSet>,
910    files: &mut Vec<std::path::PathBuf>,
911    visited: &mut HashSet<std::path::PathBuf>,
912) -> crate::error::Result<()> {
913    let canonical = match dir.canonicalize() {
914        Ok(p) => p,
915        Err(_) => return Ok(()),
916    };
917    if !visited.insert(canonical) {
918        return Ok(());
919    }
920
921    let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
922    entries.sort_by_key(|e| e.path());
923
924    for entry in entries {
925        let path = entry.path();
926
927        if let Some(gs) = exclude_set
928            && let Ok(rel) = path.strip_prefix(base)
929            && gs.is_match(rel)
930        {
931            continue;
932        }
933
934        if path.is_dir() {
935            if path
936                .file_name()
937                .and_then(|n| n.to_str())
938                .is_some_and(|n| n.starts_with('.'))
939            {
940                continue;
941            }
942            collect_yaml_files(&path, base, exclude_set, files, visited)?;
943        } else if matches!(
944            path.extension().and_then(|e| e.to_str()),
945            Some("yml" | "yaml")
946        ) {
947            files.push(path);
948        }
949    }
950    Ok(())
951}
952
953/// Two-pass directory lint: collect and read every file once to build a
954/// directory-global rule index, then lint each file against it so
955/// cross-reference checks see rules defined in sibling files.
956fn lint_directory_impl(
957    dir: &Path,
958    config: Option<&LintConfig>,
959) -> crate::error::Result<Vec<FileLintResult>> {
960    let exclude_set = config.and_then(LintConfig::build_exclude_set);
961    let mut files = Vec::new();
962    let mut visited = HashSet::new();
963    collect_yaml_files(dir, dir, exclude_set.as_ref(), &mut files, &mut visited)?;
964
965    // Read each file once and index every referenceable rule across the tree.
966    let mut index = RuleIndex::new(true);
967    let mut contents: Vec<(std::path::PathBuf, std::result::Result<String, String>)> =
968        Vec::with_capacity(files.len());
969    for path in files {
970        match std::fs::read_to_string(&path) {
971            Ok(text) => {
972                index.add_text(&text);
973                contents.push((path, Ok(text)));
974            }
975            Err(e) => contents.push((path, Err(format!("error reading file: {e}")))),
976        }
977    }
978
979    let mut results = Vec::with_capacity(contents.len());
980    for (path, content) in contents {
981        match content {
982            Ok(text) => {
983                let warnings = match config {
984                    Some(cfg) => {
985                        let w = lint_yaml_str_indexed(&text, &cfg.tag_namespaces, Some(&index));
986                        apply_suppressions(w, cfg, &parse_inline_suppressions(&text))
987                    }
988                    None => lint_yaml_str_indexed(&text, &[], Some(&index)),
989                };
990                results.push(FileLintResult { path, warnings });
991            }
992            Err(msg) => results.push(FileLintResult {
993                path,
994                warnings: vec![err(LintRule::FileReadError, msg, "/")],
995            }),
996        }
997    }
998    Ok(results)
999}
1000
1001/// Lint all `.yml`/`.yaml` files in a directory recursively.
1002pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
1003    lint_directory_impl(dir, None)
1004}
1005
1006// =============================================================================
1007// Lint configuration & suppression
1008// =============================================================================
1009
1010/// Configuration for lint rule suppression and severity overrides.
1011#[derive(Debug, Clone, Default, Serialize)]
1012pub struct LintConfig {
1013    pub disabled_rules: HashSet<String>,
1014    pub severity_overrides: HashMap<String, Severity>,
1015    pub exclude_patterns: Vec<String>,
1016    /// Extra tag namespaces recognised in addition to the built-in set.
1017    pub tag_namespaces: Vec<String>,
1018}
1019
1020#[derive(Debug, Deserialize)]
1021struct RawLintConfig {
1022    #[serde(default)]
1023    disabled_rules: Vec<String>,
1024    #[serde(default)]
1025    severity_overrides: HashMap<String, String>,
1026    #[serde(default)]
1027    exclude: Vec<String>,
1028    #[serde(default)]
1029    tag_namespaces: Vec<String>,
1030}
1031
1032/// Remove duplicate entries from a list while keeping the first occurrence of
1033/// each, so merged `exclude_patterns` / `tag_namespaces` stay stable and don't
1034/// repeat a value that appears in both the config file and a CLI flag.
1035fn dedup_preserving_order(items: &mut Vec<String>) {
1036    let mut seen = HashSet::new();
1037    items.retain(|item| seen.insert(item.clone()));
1038}
1039
1040impl LintConfig {
1041    pub fn load(path: &Path) -> crate::error::Result<Self> {
1042        let content = std::fs::read_to_string(path)?;
1043        let raw: RawLintConfig = yaml_serde::from_str(&content)?;
1044
1045        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
1046        let mut severity_overrides = HashMap::new();
1047        for (rule, sev_str) in &raw.severity_overrides {
1048            let sev = match sev_str.as_str() {
1049                "error" => Severity::Error,
1050                "warning" => Severity::Warning,
1051                "info" => Severity::Info,
1052                "hint" => Severity::Hint,
1053                other => {
1054                    return Err(crate::error::SigmaParserError::InvalidRule(format!(
1055                        "invalid severity '{other}' for rule '{rule}' in lint config"
1056                    )));
1057                }
1058            };
1059            severity_overrides.insert(rule.clone(), sev);
1060        }
1061
1062        let mut exclude_patterns = raw.exclude;
1063        dedup_preserving_order(&mut exclude_patterns);
1064
1065        let mut tag_namespaces: Vec<String> = raw
1066            .tag_namespaces
1067            .into_iter()
1068            .map(|s| s.to_lowercase())
1069            .collect();
1070        dedup_preserving_order(&mut tag_namespaces);
1071
1072        Ok(LintConfig {
1073            disabled_rules,
1074            severity_overrides,
1075            exclude_patterns,
1076            tag_namespaces,
1077        })
1078    }
1079
1080    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
1081        let dir = if start_path.is_file() {
1082            start_path.parent()?
1083        } else {
1084            start_path
1085        };
1086
1087        let mut current = dir;
1088        loop {
1089            let candidate = current.join(".rsigma-lint.yml");
1090            if candidate.is_file() {
1091                return Some(candidate);
1092            }
1093            let candidate_yaml = current.join(".rsigma-lint.yaml");
1094            if candidate_yaml.is_file() {
1095                return Some(candidate_yaml);
1096            }
1097            current = current.parent()?;
1098        }
1099    }
1100
1101    pub fn merge(&mut self, other: &LintConfig) {
1102        self.disabled_rules
1103            .extend(other.disabled_rules.iter().cloned());
1104        for (rule, sev) in &other.severity_overrides {
1105            self.severity_overrides.insert(rule.clone(), *sev);
1106        }
1107        self.exclude_patterns
1108            .extend(other.exclude_patterns.iter().cloned());
1109        dedup_preserving_order(&mut self.exclude_patterns);
1110        self.tag_namespaces
1111            .extend(other.tag_namespaces.iter().cloned());
1112        dedup_preserving_order(&mut self.tag_namespaces);
1113    }
1114
1115    pub fn is_disabled(&self, rule: &LintRule) -> bool {
1116        self.disabled_rules.contains(&rule.to_string())
1117    }
1118
1119    pub fn build_exclude_set(&self) -> Option<globset::GlobSet> {
1120        if self.exclude_patterns.is_empty() {
1121            return None;
1122        }
1123        let mut builder = globset::GlobSetBuilder::new();
1124        for pat in &self.exclude_patterns {
1125            if let Ok(glob) = globset::GlobBuilder::new(pat)
1126                .literal_separator(false)
1127                .build()
1128            {
1129                builder.add(glob);
1130            }
1131        }
1132        builder.build().ok()
1133    }
1134}
1135
1136// =============================================================================
1137// Inline suppression comments
1138// =============================================================================
1139
1140#[derive(Debug, Clone, Default)]
1141pub struct InlineSuppressions {
1142    pub disable_all: bool,
1143    pub file_disabled: HashSet<String>,
1144    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
1145}
1146
1147pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
1148    let mut result = InlineSuppressions::default();
1149
1150    for (i, line) in text.lines().enumerate() {
1151        let trimmed = line.trim();
1152
1153        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
1154            trimmed[pos + 1..].trim()
1155        } else {
1156            continue;
1157        };
1158
1159        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
1160            let rest = rest.trim();
1161            let next_line = (i + 1) as u32;
1162            if rest.is_empty() {
1163                result.line_disabled.insert(next_line, None);
1164            } else {
1165                let rules: HashSet<String> = rest
1166                    .split(',')
1167                    .map(|s| s.trim().to_string())
1168                    .filter(|s| !s.is_empty())
1169                    .collect();
1170                if !rules.is_empty() {
1171                    result
1172                        .line_disabled
1173                        .entry(next_line)
1174                        .and_modify(|existing| {
1175                            if let Some(existing_set) = existing {
1176                                existing_set.extend(rules.iter().cloned());
1177                            }
1178                        })
1179                        .or_insert(Some(rules));
1180                }
1181            }
1182        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
1183            let rest = rest.trim();
1184            if rest.is_empty() {
1185                result.disable_all = true;
1186            } else {
1187                for rule in rest.split(',') {
1188                    let rule = rule.trim();
1189                    if !rule.is_empty() {
1190                        result.file_disabled.insert(rule.to_string());
1191                    }
1192                }
1193            }
1194        }
1195    }
1196
1197    result
1198}
1199
1200fn find_yaml_comment(line: &str) -> Option<usize> {
1201    let mut in_single = false;
1202    let mut in_double = false;
1203    for (i, c) in line.char_indices() {
1204        match c {
1205            '\'' if !in_double => in_single = !in_single,
1206            '"' if !in_single => in_double = !in_double,
1207            '#' if !in_single && !in_double => return Some(i),
1208            _ => {}
1209        }
1210    }
1211    None
1212}
1213
1214impl InlineSuppressions {
1215    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
1216        if self.disable_all {
1217            return true;
1218        }
1219
1220        let rule_name = warning.rule.to_string();
1221        if self.file_disabled.contains(&rule_name) {
1222            return true;
1223        }
1224
1225        if let Some(span) = &warning.span
1226            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
1227        {
1228            return match line_rules {
1229                None => true,
1230                Some(rules) => rules.contains(&rule_name),
1231            };
1232        }
1233
1234        false
1235    }
1236}
1237
1238// =============================================================================
1239// Suppression filtering
1240// =============================================================================
1241
1242pub fn apply_suppressions(
1243    warnings: Vec<LintWarning>,
1244    config: &LintConfig,
1245    inline: &InlineSuppressions,
1246) -> Vec<LintWarning> {
1247    warnings
1248        .into_iter()
1249        .filter(|w| !config.is_disabled(&w.rule))
1250        .filter(|w| !inline.is_suppressed(w))
1251        .map(|mut w| {
1252            let rule_name = w.rule.to_string();
1253            if let Some(sev) = config.severity_overrides.get(&rule_name) {
1254                w.severity = *sev;
1255            }
1256            w
1257        })
1258        .collect()
1259}
1260
1261pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
1262    let warnings = lint_yaml_str_ext(text, &config.tag_namespaces);
1263    let inline = parse_inline_suppressions(text);
1264    apply_suppressions(warnings, config, &inline)
1265}
1266
1267pub fn lint_yaml_file_with_config(
1268    path: &Path,
1269    config: &LintConfig,
1270) -> crate::error::Result<FileLintResult> {
1271    let content = std::fs::read_to_string(path)?;
1272    let warnings = lint_yaml_str_with_config(&content, config);
1273    Ok(FileLintResult {
1274        path: path.to_path_buf(),
1275        warnings,
1276    })
1277}
1278
1279pub fn lint_yaml_directory_with_config(
1280    dir: &Path,
1281    config: &LintConfig,
1282) -> crate::error::Result<Vec<FileLintResult>> {
1283    lint_directory_impl(dir, Some(config))
1284}
1285
1286// =============================================================================
1287// Tests
1288// =============================================================================
1289
1290#[cfg(test)]
1291mod tests {
1292    use super::*;
1293
1294    fn yaml_value(yaml: &str) -> Value {
1295        yaml_serde::from_str(yaml).unwrap()
1296    }
1297
1298    fn lint(yaml: &str) -> Vec<LintWarning> {
1299        lint_yaml_value(&yaml_value(yaml))
1300    }
1301
1302    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1303        warnings.iter().any(|w| w.rule == rule)
1304    }
1305
1306    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
1307        !has_rule(warnings, rule)
1308    }
1309
1310    #[test]
1311    fn valid_detection_rule_no_errors() {
1312        let w = lint(
1313            r#"
1314title: Test Rule
1315id: 929a690e-bef0-4204-a928-ef5e620d6fcc
1316status: test
1317logsource:
1318    category: process_creation
1319    product: windows
1320detection:
1321    selection:
1322        CommandLine|contains: 'whoami'
1323    condition: selection
1324level: medium
1325tags:
1326    - attack.execution
1327    - attack.t1059
1328"#,
1329        );
1330        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
1331        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1332    }
1333
1334    #[test]
1335    fn not_a_mapping() {
1336        let v: yaml_serde::Value = yaml_serde::from_str("- item1\n- item2").unwrap();
1337        let w = lint_yaml_value(&v);
1338        assert!(has_rule(&w, LintRule::NotAMapping));
1339    }
1340
1341    #[test]
1342    fn lint_yaml_str_produces_spans() {
1343        let text = r#"title: Test
1344status: invalid_status
1345logsource:
1346    category: test
1347detection:
1348    selection:
1349        field: value
1350    condition: selection
1351level: medium
1352"#;
1353        let warnings = lint_yaml_str(text);
1354        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
1355        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
1356        let span = invalid_status.unwrap().span;
1357        assert!(span.is_some(), "expected span to be resolved");
1358        assert_eq!(span.unwrap().start_line, 1);
1359    }
1360
1361    #[test]
1362    fn yaml_parse_error_uses_correct_rule() {
1363        let text = "title: [unclosed";
1364        let warnings = lint_yaml_str(text);
1365        assert!(has_rule(&warnings, LintRule::YamlParseError));
1366        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
1367    }
1368
1369    #[test]
1370    fn action_global_skipped() {
1371        let w = lint(
1372            r#"
1373action: global
1374title: Global Template
1375logsource:
1376    product: windows
1377"#,
1378        );
1379        assert!(w.is_empty());
1380    }
1381
1382    #[test]
1383    fn action_reset_skipped() {
1384        let w = lint(
1385            r#"
1386action: reset
1387"#,
1388        );
1389        assert!(w.is_empty());
1390    }
1391
1392    #[test]
1393    fn resolve_path_to_span_root() {
1394        let text = "title: Test\nstatus: test\n";
1395        let span = resolve_path_to_span(text, "/");
1396        assert!(span.is_some());
1397        assert_eq!(span.unwrap().start_line, 0);
1398    }
1399
1400    #[test]
1401    fn resolve_path_to_span_top_level_key() {
1402        let text = "title: Test\nstatus: test\nlevel: high\n";
1403        let span = resolve_path_to_span(text, "/status");
1404        assert!(span.is_some());
1405        assert_eq!(span.unwrap().start_line, 1);
1406    }
1407
1408    #[test]
1409    fn resolve_path_to_span_nested_key() {
1410        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
1411        let span = resolve_path_to_span(text, "/logsource/product");
1412        assert!(span.is_some());
1413        assert_eq!(span.unwrap().start_line, 3);
1414    }
1415
1416    #[test]
1417    fn resolve_path_to_span_missing_key() {
1418        let text = "title: Test\nstatus: test\n";
1419        let span = resolve_path_to_span(text, "/nonexistent");
1420        assert!(span.is_none());
1421    }
1422
1423    #[test]
1424    fn multi_doc_yaml_lints_all_documents() {
1425        let text = r#"title: Rule 1
1426logsource:
1427    category: test
1428detection:
1429    selection:
1430        field: value
1431    condition: selection
1432level: medium
1433---
1434title: Rule 2
1435status: bad_status
1436logsource:
1437    category: test
1438detection:
1439    selection:
1440        field: value
1441    condition: selection
1442level: medium
1443"#;
1444        let warnings = lint_yaml_str(text);
1445        assert!(has_rule(&warnings, LintRule::InvalidStatus));
1446    }
1447
1448    #[test]
1449    fn severity_display() {
1450        assert_eq!(format!("{}", Severity::Error), "error");
1451        assert_eq!(format!("{}", Severity::Warning), "warning");
1452        assert_eq!(format!("{}", Severity::Info), "info");
1453        assert_eq!(format!("{}", Severity::Hint), "hint");
1454    }
1455
1456    #[test]
1457    fn file_lint_result_has_errors() {
1458        let result = FileLintResult {
1459            path: std::path::PathBuf::from("test.yml"),
1460            warnings: vec![
1461                warning(LintRule::TitleTooLong, "too long", "/title"),
1462                err(
1463                    LintRule::MissingCondition,
1464                    "missing",
1465                    "/detection/condition",
1466                ),
1467            ],
1468        };
1469        assert!(result.has_errors());
1470        assert_eq!(result.error_count(), 1);
1471        assert_eq!(result.warning_count(), 1);
1472    }
1473
1474    #[test]
1475    fn file_lint_result_no_errors() {
1476        let result = FileLintResult {
1477            path: std::path::PathBuf::from("test.yml"),
1478            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
1479        };
1480        assert!(!result.has_errors());
1481        assert_eq!(result.error_count(), 0);
1482        assert_eq!(result.warning_count(), 1);
1483    }
1484
1485    #[test]
1486    fn file_lint_result_empty() {
1487        let result = FileLintResult {
1488            path: std::path::PathBuf::from("test.yml"),
1489            warnings: vec![],
1490        };
1491        assert!(!result.has_errors());
1492        assert_eq!(result.error_count(), 0);
1493        assert_eq!(result.warning_count(), 0);
1494    }
1495
1496    #[test]
1497    fn lint_warning_display() {
1498        let w = err(
1499            LintRule::MissingTitle,
1500            "missing required field 'title'",
1501            "/title",
1502        );
1503        let display = format!("{w}");
1504        assert!(display.contains("error"));
1505        assert!(display.contains("missing_title"));
1506        assert!(display.contains("/title"));
1507    }
1508
1509    #[test]
1510    fn file_lint_result_info_count() {
1511        let result = FileLintResult {
1512            path: std::path::PathBuf::from("test.yml"),
1513            warnings: vec![
1514                info(LintRule::MissingDescription, "missing desc", "/description"),
1515                info(LintRule::MissingAuthor, "missing author", "/author"),
1516                warning(LintRule::TitleTooLong, "too long", "/title"),
1517            ],
1518        };
1519        assert_eq!(result.info_count(), 2);
1520        assert_eq!(result.warning_count(), 1);
1521        assert_eq!(result.error_count(), 0);
1522        assert!(!result.has_errors());
1523    }
1524
1525    #[test]
1526    fn parse_inline_disable_all() {
1527        let text = "# rsigma-disable\ntitle: Test\n";
1528        let sup = parse_inline_suppressions(text);
1529        assert!(sup.disable_all);
1530    }
1531
1532    #[test]
1533    fn parse_inline_disable_specific_rules() {
1534        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
1535        let sup = parse_inline_suppressions(text);
1536        assert!(!sup.disable_all);
1537        assert!(sup.file_disabled.contains("missing_description"));
1538        assert!(sup.file_disabled.contains("missing_author"));
1539    }
1540
1541    #[test]
1542    fn parse_inline_disable_next_line_all() {
1543        let text = "# rsigma-disable-next-line\ntitle: Test\n";
1544        let sup = parse_inline_suppressions(text);
1545        assert!(!sup.disable_all);
1546        assert!(sup.line_disabled.contains_key(&1));
1547        assert!(sup.line_disabled[&1].is_none());
1548    }
1549
1550    #[test]
1551    fn parse_inline_disable_next_line_specific() {
1552        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
1553        let sup = parse_inline_suppressions(text);
1554        assert!(sup.line_disabled.contains_key(&2));
1555        let rules = sup.line_disabled[&2].as_ref().unwrap();
1556        assert!(rules.contains("missing_level"));
1557    }
1558
1559    #[test]
1560    fn parse_inline_no_comments() {
1561        let text = "title: Test\nstatus: test\n";
1562        let sup = parse_inline_suppressions(text);
1563        assert!(!sup.disable_all);
1564        assert!(sup.file_disabled.is_empty());
1565        assert!(sup.line_disabled.is_empty());
1566    }
1567
1568    #[test]
1569    fn parse_inline_comment_in_quoted_string() {
1570        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
1571        let sup = parse_inline_suppressions(text);
1572        assert!(!sup.disable_all);
1573        assert!(sup.file_disabled.is_empty());
1574    }
1575
1576    #[test]
1577    fn apply_suppressions_disables_rule() {
1578        let warnings = vec![
1579            info(LintRule::MissingDescription, "desc", "/description"),
1580            info(LintRule::MissingAuthor, "author", "/author"),
1581            warning(LintRule::TitleTooLong, "title", "/title"),
1582        ];
1583        let mut config = LintConfig::default();
1584        config
1585            .disabled_rules
1586            .insert("missing_description".to_string());
1587        let inline = InlineSuppressions::default();
1588
1589        let result = apply_suppressions(warnings, &config, &inline);
1590        assert_eq!(result.len(), 2);
1591        assert!(
1592            result
1593                .iter()
1594                .all(|w| w.rule != LintRule::MissingDescription)
1595        );
1596    }
1597
1598    #[test]
1599    fn apply_suppressions_severity_override() {
1600        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
1601        let mut config = LintConfig::default();
1602        config
1603            .severity_overrides
1604            .insert("title_too_long".to_string(), Severity::Info);
1605        let inline = InlineSuppressions::default();
1606
1607        let result = apply_suppressions(warnings, &config, &inline);
1608        assert_eq!(result.len(), 1);
1609        assert_eq!(result[0].severity, Severity::Info);
1610    }
1611
1612    #[test]
1613    fn apply_suppressions_inline_file_disable() {
1614        let warnings = vec![
1615            info(LintRule::MissingDescription, "desc", "/description"),
1616            info(LintRule::MissingAuthor, "author", "/author"),
1617        ];
1618        let config = LintConfig::default();
1619        let mut inline = InlineSuppressions::default();
1620        inline.file_disabled.insert("missing_author".to_string());
1621
1622        let result = apply_suppressions(warnings, &config, &inline);
1623        assert_eq!(result.len(), 1);
1624        assert_eq!(result[0].rule, LintRule::MissingDescription);
1625    }
1626
1627    #[test]
1628    fn apply_suppressions_inline_disable_all() {
1629        let warnings = vec![
1630            err(LintRule::MissingTitle, "title", "/title"),
1631            warning(LintRule::TitleTooLong, "long", "/title"),
1632        ];
1633        let config = LintConfig::default();
1634        let inline = InlineSuppressions {
1635            disable_all: true,
1636            ..Default::default()
1637        };
1638
1639        let result = apply_suppressions(warnings, &config, &inline);
1640        assert!(result.is_empty());
1641    }
1642
1643    #[test]
1644    fn apply_suppressions_inline_next_line() {
1645        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
1646        w1.span = Some(Span {
1647            start_line: 5,
1648            start_col: 0,
1649            end_line: 5,
1650            end_col: 10,
1651        });
1652        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
1653        w2.span = Some(Span {
1654            start_line: 6,
1655            start_col: 0,
1656            end_line: 6,
1657            end_col: 10,
1658        });
1659
1660        let config = LintConfig::default();
1661        let mut inline = InlineSuppressions::default();
1662        inline.line_disabled.insert(5, None);
1663
1664        let result = apply_suppressions(vec![w1, w2], &config, &inline);
1665        assert_eq!(result.len(), 1);
1666        assert_eq!(result[0].rule, LintRule::InvalidStatus);
1667    }
1668
1669    #[test]
1670    fn lint_with_config_disables_rules() {
1671        let text = r#"title: Test
1672logsource:
1673    category: test
1674detection:
1675    selection:
1676        field: value
1677    condition: selection
1678level: medium
1679"#;
1680        let mut config = LintConfig::default();
1681        config
1682            .disabled_rules
1683            .insert("missing_description".to_string());
1684        config.disabled_rules.insert("missing_author".to_string());
1685
1686        let warnings = lint_yaml_str_with_config(text, &config);
1687        assert!(
1688            !warnings
1689                .iter()
1690                .any(|w| w.rule == LintRule::MissingDescription)
1691        );
1692        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1693    }
1694
1695    #[test]
1696    fn lint_with_inline_disable_next_line() {
1697        let text = r#"title: Test
1698# rsigma-disable-next-line missing_level
1699logsource:
1700    category: test
1701detection:
1702    selection:
1703        field: value
1704    condition: selection
1705"#;
1706        let config = LintConfig::default();
1707        let warnings = lint_yaml_str_with_config(text, &config);
1708        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
1709    }
1710
1711    #[test]
1712    fn lint_with_inline_file_disable() {
1713        let text = r#"# rsigma-disable missing_description, missing_author
1714title: Test
1715logsource:
1716    category: test
1717detection:
1718    selection:
1719        field: value
1720    condition: selection
1721level: medium
1722"#;
1723        let config = LintConfig::default();
1724        let warnings = lint_yaml_str_with_config(text, &config);
1725        assert!(
1726            !warnings
1727                .iter()
1728                .any(|w| w.rule == LintRule::MissingDescription)
1729        );
1730        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
1731    }
1732
1733    #[test]
1734    fn lint_with_inline_disable_all() {
1735        let text = r#"# rsigma-disable
1736title: Test
1737status: invalid_status
1738logsource:
1739    category: test
1740detection:
1741    selection:
1742        field: value
1743    condition: selection
1744"#;
1745        let config = LintConfig::default();
1746        let warnings = lint_yaml_str_with_config(text, &config);
1747        assert!(warnings.is_empty());
1748    }
1749
1750    #[test]
1751    fn lint_config_merge() {
1752        let mut base = LintConfig::default();
1753        base.disabled_rules.insert("rule_a".to_string());
1754        base.severity_overrides
1755            .insert("rule_b".to_string(), Severity::Info);
1756
1757        let other = LintConfig {
1758            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
1759            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
1760                .into_iter()
1761                .collect(),
1762            exclude_patterns: vec!["test/**".to_string()],
1763            tag_namespaces: vec!["myns".to_string()],
1764        };
1765
1766        base.merge(&other);
1767        assert!(base.disabled_rules.contains("rule_a"));
1768        assert!(base.disabled_rules.contains("rule_c"));
1769        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
1770        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
1771        assert_eq!(base.exclude_patterns, vec!["test/**".to_string()]);
1772        assert!(base.tag_namespaces.contains(&"myns".to_string()));
1773    }
1774
1775    #[test]
1776    fn lint_config_merge_dedups_lists() {
1777        let mut base = LintConfig {
1778            exclude_patterns: vec!["config/**".to_string(), "shared/**".to_string()],
1779            tag_namespaces: vec!["myorg".to_string(), "shared".to_string()],
1780            ..Default::default()
1781        };
1782        let other = LintConfig {
1783            // "shared/**" and "shared" overlap with base on purpose.
1784            exclude_patterns: vec!["shared/**".to_string(), "extra/**".to_string()],
1785            tag_namespaces: vec!["shared".to_string(), "internal".to_string()],
1786            ..Default::default()
1787        };
1788
1789        base.merge(&other);
1790
1791        assert_eq!(
1792            base.exclude_patterns,
1793            vec![
1794                "config/**".to_string(),
1795                "shared/**".to_string(),
1796                "extra/**".to_string()
1797            ]
1798        );
1799        assert_eq!(
1800            base.tag_namespaces,
1801            vec![
1802                "myorg".to_string(),
1803                "shared".to_string(),
1804                "internal".to_string()
1805            ]
1806        );
1807    }
1808
1809    #[test]
1810    fn lint_config_load_dedups_and_normalises() {
1811        let yaml = r#"
1812exclude:
1813  - "config/**"
1814  - "config/**"
1815tag_namespaces:
1816  - MyOrg
1817  - myorg
1818  - internal
1819"#;
1820        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
1821        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
1822        let config = LintConfig::load(tmp.path()).unwrap();
1823
1824        assert_eq!(config.exclude_patterns, vec!["config/**".to_string()]);
1825        // "MyOrg" lowercases to "myorg" and then collapses with the duplicate.
1826        assert_eq!(
1827            config.tag_namespaces,
1828            vec!["myorg".to_string(), "internal".to_string()]
1829        );
1830    }
1831
1832    #[test]
1833    fn lint_config_is_disabled() {
1834        let mut config = LintConfig::default();
1835        config.disabled_rules.insert("missing_title".to_string());
1836        assert!(config.is_disabled(&LintRule::MissingTitle));
1837        assert!(!config.is_disabled(&LintRule::EmptyTitle));
1838    }
1839
1840    #[test]
1841    fn find_yaml_comment_basic() {
1842        assert_eq!(find_yaml_comment("# comment"), Some(0));
1843        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
1844        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
1845        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
1846        assert_eq!(find_yaml_comment("key: value"), None);
1847    }
1848
1849    #[test]
1850    fn no_fix_for_unfixable_rule() {
1851        let w = lint(
1852            r#"
1853title: Test
1854logsource:
1855    category: test
1856"#,
1857        );
1858        assert!(has_rule(&w, LintRule::MissingDetection));
1859        let fix = w
1860            .iter()
1861            .find(|w| w.rule == LintRule::MissingDetection)
1862            .and_then(|w| w.fix.as_ref());
1863        assert!(fix.is_none());
1864    }
1865
1866    #[test]
1867    fn lint_config_exclude_from_yaml() {
1868        let yaml = r#"
1869disabled_rules:
1870  - missing_description
1871exclude:
1872  - "config/**"
1873  - "**/unsupported/**"
1874"#;
1875        let tmp = std::env::temp_dir().join("rsigma_test_exclude.yml");
1876        std::fs::write(&tmp, yaml).unwrap();
1877        let config = LintConfig::load(&tmp).unwrap();
1878        std::fs::remove_file(&tmp).ok();
1879
1880        assert!(config.disabled_rules.contains("missing_description"));
1881        assert_eq!(config.exclude_patterns.len(), 2);
1882        assert_eq!(config.exclude_patterns[0], "config/**");
1883        assert_eq!(config.exclude_patterns[1], "**/unsupported/**");
1884    }
1885
1886    #[test]
1887    fn lint_config_build_exclude_set_empty() {
1888        let config = LintConfig::default();
1889        assert!(config.build_exclude_set().is_none());
1890    }
1891
1892    #[test]
1893    fn lint_config_build_exclude_set_matches() {
1894        let config = LintConfig {
1895            exclude_patterns: vec!["config/**".to_string()],
1896            ..Default::default()
1897        };
1898        let gs = config.build_exclude_set().expect("should build");
1899        assert!(gs.is_match("config/data_mapping/foo.yaml"));
1900        assert!(gs.is_match("config/nested/deep/bar.yml"));
1901        assert!(!gs.is_match("rules/windows/test.yml"));
1902    }
1903
1904    #[test]
1905    fn cross_ref_version_mismatch_within_file() {
1906        // A correlation (major 3) referencing a base rule (major 2) by name, in
1907        // the same file, flags the mismatch. unknown_rule_reference does NOT
1908        // fire for a single file (the index is not complete).
1909        let yaml = r#"
1910title: Base Rule
1911name: base_rule
1912sigma-version: 2
1913logsource:
1914    category: test
1915detection:
1916    selection:
1917        EventID: 1
1918    condition: selection
1919---
1920title: Brute Force
1921sigma-version: 3
1922correlation:
1923    type: event_count
1924    rules:
1925        - base_rule
1926    group-by:
1927        - SourceIP
1928    timespan: 5m
1929    condition:
1930        gte: 10
1931"#;
1932        let w = lint_yaml_str(yaml);
1933        assert!(has_rule(&w, LintRule::SigmaVersionMismatch));
1934        assert!(has_no_rule(&w, LintRule::UnknownRuleReference));
1935    }
1936
1937    #[test]
1938    fn cross_ref_matching_version_no_mismatch() {
1939        let yaml = r#"
1940title: Base Rule
1941name: base_rule
1942sigma-version: 3
1943logsource:
1944    category: test
1945detection:
1946    selection:
1947        EventID: 1
1948    condition: selection
1949---
1950title: Brute Force
1951sigma-version: 3
1952correlation:
1953    type: event_count
1954    rules:
1955        - base_rule
1956    group-by:
1957        - SourceIP
1958    timespan: 5m
1959    condition:
1960        gte: 10
1961"#;
1962        assert!(has_no_rule(
1963            &lint_yaml_str(yaml),
1964            LintRule::SigmaVersionMismatch
1965        ));
1966    }
1967
1968    #[test]
1969    fn cross_ref_unknown_only_with_complete_index() {
1970        let yaml = r#"
1971title: Brute Force
1972correlation:
1973    type: event_count
1974    rules:
1975        - nonexistent_rule
1976    group-by:
1977        - SourceIP
1978    timespan: 5m
1979    condition:
1980        gte: 10
1981"#;
1982        // Single file: the referenced rule may live elsewhere, so it is out of
1983        // scope and unknown_rule_reference must not fire.
1984        assert!(has_no_rule(
1985            &lint_yaml_str(yaml),
1986            LintRule::UnknownRuleReference
1987        ));
1988
1989        // Directory: the index is complete, so the missing reference is flagged.
1990        let tmp = tempfile::tempdir().unwrap();
1991        std::fs::write(tmp.path().join("corr.yml"), yaml).unwrap();
1992        let results = lint_yaml_directory(tmp.path()).unwrap();
1993        assert!(
1994            results
1995                .iter()
1996                .flat_map(|r| &r.warnings)
1997                .any(|w| w.rule == LintRule::UnknownRuleReference)
1998        );
1999    }
2000
2001    #[test]
2002    fn cross_ref_resolves_across_files() {
2003        // Base rule in one file, correlation in another: the directory index
2004        // resolves the reference and flags the major mismatch across files.
2005        let tmp = tempfile::tempdir().unwrap();
2006        std::fs::write(
2007            tmp.path().join("base.yml"),
2008            r#"
2009title: Base Rule
2010name: base_rule
2011sigma-version: 2
2012logsource:
2013    category: test
2014detection:
2015    selection:
2016        EventID: 1
2017    condition: selection
2018"#,
2019        )
2020        .unwrap();
2021        std::fs::write(
2022            tmp.path().join("corr.yml"),
2023            r#"
2024title: Brute Force
2025sigma-version: 3
2026correlation:
2027    type: event_count
2028    rules:
2029        - base_rule
2030    group-by:
2031        - SourceIP
2032    timespan: 5m
2033    condition:
2034        gte: 10
2035"#,
2036        )
2037        .unwrap();
2038        let results = lint_yaml_directory(tmp.path()).unwrap();
2039        let all: Vec<_> = results.iter().flat_map(|r| &r.warnings).collect();
2040        assert!(all.iter().any(|w| w.rule == LintRule::SigmaVersionMismatch));
2041        assert!(!all.iter().any(|w| w.rule == LintRule::UnknownRuleReference));
2042    }
2043
2044    #[test]
2045    fn lint_directory_with_excludes() {
2046        let tmp = tempfile::tempdir().unwrap();
2047        let rules_dir = tmp.path().join("rules");
2048        let config_dir = tmp.path().join("config");
2049        std::fs::create_dir_all(&rules_dir).unwrap();
2050        std::fs::create_dir_all(&config_dir).unwrap();
2051
2052        std::fs::write(
2053            rules_dir.join("good.yml"),
2054            r#"
2055title: Good Rule
2056logsource:
2057    category: test
2058detection:
2059    sel:
2060        field: value
2061    condition: sel
2062level: medium
2063"#,
2064        )
2065        .unwrap();
2066
2067        std::fs::write(
2068            config_dir.join("mapping.yaml"),
2069            r#"
2070Title: Logon
2071Channel: Security
2072EventID: 4624
2073"#,
2074        )
2075        .unwrap();
2076
2077        let no_exclude = LintConfig::default();
2078        let results = lint_yaml_directory_with_config(tmp.path(), &no_exclude).unwrap();
2079        let config_warnings: Vec<_> = results
2080            .iter()
2081            .filter(|r| r.path.to_string_lossy().contains("config"))
2082            .flat_map(|r| &r.warnings)
2083            .collect();
2084        assert!(
2085            !config_warnings.is_empty(),
2086            "config file should produce warnings without excludes"
2087        );
2088
2089        let with_exclude = LintConfig {
2090            exclude_patterns: vec!["config/**".to_string()],
2091            ..Default::default()
2092        };
2093        let results = lint_yaml_directory_with_config(tmp.path(), &with_exclude).unwrap();
2094        let config_results: Vec<_> = results
2095            .iter()
2096            .filter(|r| r.path.to_string_lossy().contains("config"))
2097            .collect();
2098        assert!(config_results.is_empty(), "config file should be excluded");
2099
2100        let rule_results: Vec<_> = results
2101            .iter()
2102            .filter(|r| r.path.to_string_lossy().contains("good.yml"))
2103            .collect();
2104        assert_eq!(rule_results.len(), 1);
2105    }
2106
2107    #[test]
2108    fn all_lint_keys_are_cached() {
2109        const ALL_LINT_KEYS: &[&str] = &[
2110            "action",
2111            "author",
2112            "condition",
2113            "correlation",
2114            "date",
2115            "description",
2116            "detection",
2117            "field",
2118            "filter",
2119            "generate",
2120            "group-by",
2121            "id",
2122            "level",
2123            "logsource",
2124            "modified",
2125            "name",
2126            "rules",
2127            "selection",
2128            "status",
2129            "tags",
2130            "taxonomy",
2131            "timeframe",
2132            "timespan",
2133            "title",
2134            "type",
2135        ];
2136        for key_str in ALL_LINT_KEYS {
2137            assert!(KEY_CACHE.contains_key(key_str), "key not cached: {key_str}");
2138        }
2139    }
2140
2141    #[test]
2142    fn extra_tag_namespace_suppresses_warning() {
2143        let text = r#"title: Test
2144logsource:
2145    category: test
2146detection:
2147    selection:
2148        field: value
2149    condition: selection
2150level: medium
2151tags:
2152    - myorg.custom_tag
2153"#;
2154        // Without extra namespaces, unknown_tag_namespace fires.
2155        let warnings = lint_yaml_str(text);
2156        assert!(has_rule(&warnings, LintRule::UnknownTagNamespace));
2157
2158        // With "myorg" added, the warning is gone.
2159        let config = LintConfig {
2160            tag_namespaces: vec!["myorg".to_string()],
2161            ..Default::default()
2162        };
2163        let warnings = lint_yaml_str_with_config(text, &config);
2164        assert!(has_no_rule(&warnings, LintRule::UnknownTagNamespace));
2165    }
2166
2167    #[test]
2168    fn extra_tag_namespace_from_config_file() {
2169        let yaml = r#"
2170tag_namespaces:
2171  - myorg
2172  - internal
2173"#;
2174        let mut tmp = tempfile::NamedTempFile::with_suffix(".yml").unwrap();
2175        std::io::Write::write_all(&mut tmp, yaml.as_bytes()).unwrap();
2176        let config = LintConfig::load(tmp.path()).unwrap();
2177
2178        assert!(config.tag_namespaces.contains(&"myorg".to_string()));
2179        assert!(config.tag_namespaces.contains(&"internal".to_string()));
2180    }
2181}