Skip to main content

rsigma_parser/
lint.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `serde_yaml::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: serde_yaml::Value = serde_yaml::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22use std::collections::{HashMap, HashSet};
23use std::fmt;
24use std::path::Path;
25use std::sync::LazyLock;
26
27use serde::{Deserialize, Serialize};
28use serde_yaml::Value;
29
30// =============================================================================
31// Public types
32// =============================================================================
33
34/// Severity of a lint finding.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
36pub enum Severity {
37    /// Spec violation — the rule is invalid.
38    Error,
39    /// Best-practice issue — the rule works but is not spec-ideal.
40    Warning,
41    /// Informational suggestion — soft best-practice hint (e.g. missing author).
42    Info,
43    /// Subtle hint — lowest severity, for stylistic suggestions.
44    Hint,
45}
46
47impl fmt::Display for Severity {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self {
50            Severity::Error => write!(f, "error"),
51            Severity::Warning => write!(f, "warning"),
52            Severity::Info => write!(f, "info"),
53            Severity::Hint => write!(f, "hint"),
54        }
55    }
56}
57
58/// Identifies which lint rule fired.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
60pub enum LintRule {
61    // ── Infrastructure / parse errors ────────────────────────────────────
62    YamlParseError,
63    NotAMapping,
64    FileReadError,
65    SchemaViolation,
66
67    // ── Shared (all document types) ──────────────────────────────────────
68    MissingTitle,
69    EmptyTitle,
70    TitleTooLong,
71    MissingDescription,
72    MissingAuthor,
73    InvalidId,
74    InvalidStatus,
75    MissingLevel,
76    InvalidLevel,
77    InvalidDate,
78    InvalidModified,
79    ModifiedBeforeDate,
80    DescriptionTooLong,
81    NameTooLong,
82    TaxonomyTooLong,
83    NonLowercaseKey,
84
85    // ── Detection rules ──────────────────────────────────────────────────
86    MissingLogsource,
87    MissingDetection,
88    MissingCondition,
89    EmptyDetection,
90    InvalidRelatedType,
91    InvalidRelatedId,
92    RelatedMissingRequired,
93    DeprecatedWithoutRelated,
94    InvalidTag,
95    UnknownTagNamespace,
96    DuplicateTags,
97    DuplicateReferences,
98    DuplicateFields,
99    FalsepositiveTooShort,
100    ScopeTooShort,
101    LogsourceValueNotLowercase,
102    ConditionReferencesUnknown,
103    DeprecatedAggregationSyntax,
104
105    // ── Correlation rules ────────────────────────────────────────────────
106    MissingCorrelation,
107    MissingCorrelationType,
108    InvalidCorrelationType,
109    MissingCorrelationRules,
110    EmptyCorrelationRules,
111    MissingCorrelationTimespan,
112    InvalidTimespanFormat,
113    MissingGroupBy,
114    MissingCorrelationCondition,
115    MissingConditionField,
116    InvalidConditionOperator,
117    ConditionValueNotNumeric,
118    GenerateNotBoolean,
119
120    // ── Filter rules ─────────────────────────────────────────────────────
121    MissingFilter,
122    MissingFilterRules,
123    EmptyFilterRules,
124    MissingFilterSelection,
125    MissingFilterCondition,
126    FilterHasLevel,
127    FilterHasStatus,
128    MissingFilterLogsource,
129
130    // ── Detection logic (cross-cutting) ──────────────────────────────────
131    NullInValueList,
132    SingleValueAllModifier,
133    AllWithRe,
134    IncompatibleModifiers,
135    EmptyValueList,
136    WildcardOnlyValue,
137    UnknownKey,
138}
139
140impl fmt::Display for LintRule {
141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142        let s = match self {
143            LintRule::YamlParseError => "yaml_parse_error",
144            LintRule::NotAMapping => "not_a_mapping",
145            LintRule::FileReadError => "file_read_error",
146            LintRule::SchemaViolation => "schema_violation",
147            LintRule::MissingTitle => "missing_title",
148            LintRule::EmptyTitle => "empty_title",
149            LintRule::TitleTooLong => "title_too_long",
150            LintRule::MissingDescription => "missing_description",
151            LintRule::MissingAuthor => "missing_author",
152            LintRule::InvalidId => "invalid_id",
153            LintRule::InvalidStatus => "invalid_status",
154            LintRule::MissingLevel => "missing_level",
155            LintRule::InvalidLevel => "invalid_level",
156            LintRule::InvalidDate => "invalid_date",
157            LintRule::InvalidModified => "invalid_modified",
158            LintRule::ModifiedBeforeDate => "modified_before_date",
159            LintRule::DescriptionTooLong => "description_too_long",
160            LintRule::NameTooLong => "name_too_long",
161            LintRule::TaxonomyTooLong => "taxonomy_too_long",
162            LintRule::NonLowercaseKey => "non_lowercase_key",
163            LintRule::MissingLogsource => "missing_logsource",
164            LintRule::MissingDetection => "missing_detection",
165            LintRule::MissingCondition => "missing_condition",
166            LintRule::EmptyDetection => "empty_detection",
167            LintRule::InvalidRelatedType => "invalid_related_type",
168            LintRule::InvalidRelatedId => "invalid_related_id",
169            LintRule::RelatedMissingRequired => "related_missing_required",
170            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
171            LintRule::InvalidTag => "invalid_tag",
172            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
173            LintRule::DuplicateTags => "duplicate_tags",
174            LintRule::DuplicateReferences => "duplicate_references",
175            LintRule::DuplicateFields => "duplicate_fields",
176            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
177            LintRule::ScopeTooShort => "scope_too_short",
178            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
179            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
180            LintRule::DeprecatedAggregationSyntax => "deprecated_aggregation_syntax",
181            LintRule::MissingCorrelation => "missing_correlation",
182            LintRule::MissingCorrelationType => "missing_correlation_type",
183            LintRule::InvalidCorrelationType => "invalid_correlation_type",
184            LintRule::MissingCorrelationRules => "missing_correlation_rules",
185            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
186            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
187            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
188            LintRule::MissingGroupBy => "missing_group_by",
189            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
190            LintRule::MissingConditionField => "missing_condition_field",
191            LintRule::InvalidConditionOperator => "invalid_condition_operator",
192            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
193            LintRule::GenerateNotBoolean => "generate_not_boolean",
194            LintRule::MissingFilter => "missing_filter",
195            LintRule::MissingFilterRules => "missing_filter_rules",
196            LintRule::EmptyFilterRules => "empty_filter_rules",
197            LintRule::MissingFilterSelection => "missing_filter_selection",
198            LintRule::MissingFilterCondition => "missing_filter_condition",
199            LintRule::FilterHasLevel => "filter_has_level",
200            LintRule::FilterHasStatus => "filter_has_status",
201            LintRule::MissingFilterLogsource => "missing_filter_logsource",
202            LintRule::NullInValueList => "null_in_value_list",
203            LintRule::SingleValueAllModifier => "single_value_all_modifier",
204            LintRule::AllWithRe => "all_with_re",
205            LintRule::IncompatibleModifiers => "incompatible_modifiers",
206            LintRule::EmptyValueList => "empty_value_list",
207            LintRule::WildcardOnlyValue => "wildcard_only_value",
208            LintRule::UnknownKey => "unknown_key",
209        };
210        write!(f, "{s}")
211    }
212}
213
214/// A source span (line/column, both 0-indexed).
215///
216/// Used by the LSP layer to avoid re-resolving JSON-pointer paths to
217/// source positions. When the lint is produced from raw `serde_yaml::Value`
218/// (which has no source positions), `span` will be `None`.
219#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
220pub struct Span {
221    /// 0-indexed start line.
222    pub start_line: u32,
223    /// 0-indexed start column.
224    pub start_col: u32,
225    /// 0-indexed end line.
226    pub end_line: u32,
227    /// 0-indexed end column.
228    pub end_col: u32,
229}
230
231// =============================================================================
232// Auto-fix types
233// =============================================================================
234
235/// Whether a fix is safe to apply automatically or needs manual review.
236#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
237pub enum FixDisposition {
238    /// No semantic change — safe to apply without review.
239    Safe,
240    /// May change meaning — should be reviewed before applying.
241    Unsafe,
242}
243
244/// A single patch operation within a [`Fix`].
245///
246/// Each variant describes a format-preserving edit to a YAML document.
247/// Paths are JSON-pointer-style strings (e.g. `"/status"`, `"/tags/2"`)
248/// matching the `LintWarning::path` convention.
249///
250/// These are intentionally yamlpath/yamlpatch-agnostic so that
251/// `rsigma-parser` carries no dependency on those crates. The consumer
252/// (CLI or LSP) converts these to concrete patch operations at apply time.
253#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
254pub enum FixPatch {
255    /// Replace the value at `path` with `new_value`.
256    ReplaceValue { path: String, new_value: String },
257    /// Rename the YAML key targeted by `path`.
258    ReplaceKey { path: String, new_key: String },
259    /// Remove the node at `path` entirely.
260    Remove { path: String },
261}
262
263/// A suggested fix for a lint finding.
264///
265/// Attached to a [`LintWarning`] when the issue can be corrected
266/// automatically. Contains one or more [`FixPatch`] operations that,
267/// applied sequentially, resolve the finding.
268#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
269pub struct Fix {
270    /// Short human-readable description (e.g. "rename 'Status' to 'status'").
271    pub title: String,
272    /// Whether the fix is safe to apply without review.
273    pub disposition: FixDisposition,
274    /// Ordered patch operations to apply.
275    pub patches: Vec<FixPatch>,
276}
277
278/// A single lint finding.
279#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
280pub struct LintWarning {
281    /// Which lint rule fired.
282    pub rule: LintRule,
283    /// Error or warning.
284    pub severity: Severity,
285    /// Human-readable message.
286    pub message: String,
287    /// JSON-pointer-style location, e.g. `"/status"`, `"/tags/2"`.
288    pub path: String,
289    /// Optional source span. `None` when linting `serde_yaml::Value` (no
290    /// source positions available). Populated by `lint_yaml_str` which
291    /// can resolve paths against the raw text.
292    pub span: Option<Span>,
293    /// Optional auto-fix. `None` when the finding cannot be corrected
294    /// automatically.
295    pub fix: Option<Fix>,
296}
297
298impl fmt::Display for LintWarning {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        write!(
301            f,
302            "{}[{}]: {}\n    --> {}",
303            self.severity, self.rule, self.message, self.path
304        )
305    }
306}
307
308/// Result of linting a single file (may contain multiple YAML documents).
309#[derive(Debug, Clone, Serialize)]
310pub struct FileLintResult {
311    pub path: std::path::PathBuf,
312    pub warnings: Vec<LintWarning>,
313}
314
315impl FileLintResult {
316    pub fn has_errors(&self) -> bool {
317        self.warnings.iter().any(|w| w.severity == Severity::Error)
318    }
319
320    pub fn error_count(&self) -> usize {
321        self.warnings
322            .iter()
323            .filter(|w| w.severity == Severity::Error)
324            .count()
325    }
326
327    pub fn warning_count(&self) -> usize {
328        self.warnings
329            .iter()
330            .filter(|w| w.severity == Severity::Warning)
331            .count()
332    }
333
334    pub fn info_count(&self) -> usize {
335        self.warnings
336            .iter()
337            .filter(|w| w.severity == Severity::Info)
338            .count()
339    }
340
341    pub fn hint_count(&self) -> usize {
342        self.warnings
343            .iter()
344            .filter(|w| w.severity == Severity::Hint)
345            .count()
346    }
347}
348
349// =============================================================================
350// Helpers
351// =============================================================================
352
353/// Pre-cached `Value::String` keys to avoid per-call allocations when
354/// looking up fields in `serde_yaml::Mapping`.
355static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
356    [
357        "action",
358        "author",
359        "category",
360        "condition",
361        "correlation",
362        "date",
363        "description",
364        "detection",
365        "falsepositives",
366        "field",
367        "fields",
368        "filter",
369        "generate",
370        "group-by",
371        "id",
372        "level",
373        "logsource",
374        "modified",
375        "name",
376        "product",
377        "references",
378        "related",
379        "rules",
380        "scope",
381        "selection",
382        "service",
383        "status",
384        "tags",
385        "taxonomy",
386        "timeframe",
387        "timespan",
388        "title",
389        "type",
390    ]
391    .into_iter()
392    .map(|n| (n, Value::String(n.into())))
393    .collect()
394});
395
396fn key(s: &str) -> &'static Value {
397    KEY_CACHE
398        .get(s)
399        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
400}
401
402fn get_str<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a str> {
403    m.get(key(k)).and_then(|v| v.as_str())
404}
405
406fn get_mapping<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Mapping> {
407    m.get(key(k)).and_then(|v| v.as_mapping())
408}
409
410fn get_seq<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Sequence> {
411    m.get(key(k)).and_then(|v| v.as_sequence())
412}
413
414fn warn(
415    rule: LintRule,
416    severity: Severity,
417    message: impl Into<String>,
418    path: impl Into<String>,
419) -> LintWarning {
420    LintWarning {
421        rule,
422        severity,
423        message: message.into(),
424        path: path.into(),
425        span: None,
426        fix: None,
427    }
428}
429
430fn err(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
431    warn(rule, Severity::Error, message, path)
432}
433
434fn warning(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
435    warn(rule, Severity::Warning, message, path)
436}
437
438fn info(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
439    warn(rule, Severity::Info, message, path)
440}
441
442fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
443    Some(Fix {
444        title: title.into(),
445        disposition: FixDisposition::Safe,
446        patches,
447    })
448}
449
450/// Find the closest match for `input` among `candidates` using edit distance.
451fn closest_match<'a>(input: &str, candidates: &[&'a str], max_distance: usize) -> Option<&'a str> {
452    candidates
453        .iter()
454        .filter(|c| edit_distance(input, c) <= max_distance)
455        .min_by_key(|c| edit_distance(input, c))
456        .copied()
457}
458
459/// Validate a date string matches YYYY-MM-DD with correct day-of-month.
460fn is_valid_date(s: &str) -> bool {
461    if s.len() != 10 {
462        return false;
463    }
464    let bytes = s.as_bytes();
465    if bytes[4] != b'-' || bytes[7] != b'-' {
466        return false;
467    }
468    let year_ok = bytes[0..4].iter().all(|b| b.is_ascii_digit());
469    let year: u16 = s[0..4].parse().unwrap_or(0);
470    let month: u8 = s[5..7].parse().unwrap_or(0);
471    let day: u8 = s[8..10].parse().unwrap_or(0);
472    if !year_ok || !(1..=12).contains(&month) || day == 0 {
473        return false;
474    }
475    let is_leap = (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400);
476    let max_day = match month {
477        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
478        4 | 6 | 9 | 11 => 30,
479        2 => {
480            if is_leap {
481                29
482            } else {
483                28
484            }
485        }
486        _ => return false,
487    };
488    day <= max_day
489}
490
491/// Extract a date string from a YAML value, handling serde_yaml auto-parsing.
492///
493/// `serde_yaml` sometimes deserialises `YYYY-MM-DD` as a tagged/non-string
494/// type. This helper coerces such values back to a trimmed string.
495fn extract_date_string(raw: &Value) -> Option<String> {
496    raw.as_str().map(|s| s.to_string()).or_else(|| {
497        serde_yaml::to_string(raw)
498            .ok()
499            .map(|s| s.trim().to_string())
500    })
501}
502
503/// Validate a UUID string (any version, hyphenated form).
504fn is_valid_uuid(s: &str) -> bool {
505    if s.len() != 36 {
506        return false;
507    }
508    let parts: Vec<&str> = s.split('-').collect();
509    if parts.len() != 5 {
510        return false;
511    }
512    let expected_lens = [8, 4, 4, 4, 12];
513    parts
514        .iter()
515        .zip(expected_lens.iter())
516        .all(|(part, &len)| part.len() == len && part.chars().all(|c| c.is_ascii_hexdigit()))
517}
518
519/// Check if a logsource value is lowercase with valid chars.
520fn is_valid_logsource_value(s: &str) -> bool {
521    !s.is_empty()
522        && s.chars().all(|c| {
523            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.' || c == '-'
524        })
525}
526
527/// Known tag namespaces from the spec.
528const KNOWN_TAG_NAMESPACES: &[&str] =
529    &["attack", "car", "cve", "d3fend", "detection", "stp", "tlp"];
530
531/// Valid status values.
532const VALID_STATUSES: &[&str] = &[
533    "stable",
534    "test",
535    "experimental",
536    "deprecated",
537    "unsupported",
538];
539
540/// Valid level values.
541const VALID_LEVELS: &[&str] = &["informational", "low", "medium", "high", "critical"];
542
543/// Valid related types.
544const VALID_RELATED_TYPES: &[&str] = &["derived", "obsolete", "merged", "renamed", "similar"];
545
546/// Valid correlation types.
547const VALID_CORRELATION_TYPES: &[&str] = &[
548    "event_count",
549    "value_count",
550    "temporal",
551    "temporal_ordered",
552    "value_sum",
553    "value_avg",
554    "value_percentile",
555    "value_median",
556];
557
558/// Valid condition operators.
559const VALID_CONDITION_OPERATORS: &[&str] = &["gt", "gte", "lt", "lte", "eq", "neq"];
560
561/// Correlation types that require a condition section.
562const TYPES_REQUIRING_CONDITION: &[&str] = &[
563    "event_count",
564    "value_count",
565    "value_sum",
566    "value_avg",
567    "value_percentile",
568];
569
570/// Correlation types that require condition.field.
571const TYPES_REQUIRING_FIELD: &[&str] =
572    &["value_count", "value_sum", "value_avg", "value_percentile"];
573
574/// Known top-level keys shared across all Sigma document types.
575const KNOWN_KEYS_SHARED: &[&str] = &[
576    "title",
577    "id",
578    "name",
579    "status",
580    "description",
581    "author",
582    "date",
583    "modified",
584    "related",
585    "taxonomy",
586    "action",
587    "license",
588    "references",
589    "tags",
590];
591
592/// Extra top-level keys valid for detection rules.
593const KNOWN_KEYS_DETECTION: &[&str] = &[
594    "logsource",
595    "detection",
596    "fields",
597    "falsepositives",
598    "level",
599    "scope",
600    "custom_attributes",
601];
602
603/// Extra top-level keys valid for correlation rules.
604const KNOWN_KEYS_CORRELATION: &[&str] = &[
605    "correlation",
606    "level",
607    "generate",
608    "falsepositives",
609    "custom_attributes",
610];
611
612/// Extra top-level keys valid for filter rules.
613const KNOWN_KEYS_FILTER: &[&str] = &["logsource", "filter"];
614
615/// Tag pattern: `^[a-z0-9_-]+\.[a-z0-9._-]+$`
616fn is_valid_tag(s: &str) -> bool {
617    let parts: Vec<&str> = s.splitn(2, '.').collect();
618    if parts.len() != 2 {
619        return false;
620    }
621    let ns_ok = !parts[0].is_empty()
622        && parts[0]
623            .chars()
624            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '-');
625    let rest_ok = !parts[1].is_empty()
626        && parts[1].chars().all(|c| {
627            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '_' || c == '-'
628        });
629    ns_ok && rest_ok
630}
631
632// =============================================================================
633// Document type detection
634// =============================================================================
635
636#[derive(Debug, Clone, Copy, PartialEq, Eq)]
637enum DocType {
638    Detection,
639    Correlation,
640    Filter,
641}
642
643impl DocType {
644    fn known_keys(&self) -> &'static [&'static str] {
645        match self {
646            DocType::Detection => KNOWN_KEYS_DETECTION,
647            DocType::Correlation => KNOWN_KEYS_CORRELATION,
648            DocType::Filter => KNOWN_KEYS_FILTER,
649        }
650    }
651}
652
653fn detect_doc_type(m: &serde_yaml::Mapping) -> DocType {
654    if m.contains_key(key("correlation")) {
655        DocType::Correlation
656    } else if m.contains_key(key("filter")) {
657        DocType::Filter
658    } else {
659        DocType::Detection
660    }
661}
662
663/// Returns `true` if this document is a collection action fragment
664/// (`action: global`, `action: reset`, `action: repeat`) that should be
665/// skipped during linting.
666fn is_action_fragment(m: &serde_yaml::Mapping) -> bool {
667    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
668}
669
670// =============================================================================
671// Shared lint checks
672// =============================================================================
673
674fn lint_shared(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
675    // ── title ────────────────────────────────────────────────────────────
676    match get_str(m, "title") {
677        None => warnings.push(err(
678            LintRule::MissingTitle,
679            "missing required field 'title'",
680            "/title",
681        )),
682        Some(t) if t.trim().is_empty() => {
683            warnings.push(err(
684                LintRule::EmptyTitle,
685                "title must not be empty",
686                "/title",
687            ));
688        }
689        Some(t) if t.len() > 256 => {
690            warnings.push(warning(
691                LintRule::TitleTooLong,
692                format!("title is {} characters, maximum is 256", t.len()),
693                "/title",
694            ));
695        }
696        _ => {}
697    }
698
699    // ── id ───────────────────────────────────────────────────────────────
700    if let Some(id) = get_str(m, "id")
701        && !is_valid_uuid(id)
702    {
703        warnings.push(warning(
704            LintRule::InvalidId,
705            format!("id \"{id}\" is not a valid UUID"),
706            "/id",
707        ));
708    }
709
710    // ── status ───────────────────────────────────────────────────────────
711    if let Some(status) = get_str(m, "status")
712        && !VALID_STATUSES.contains(&status)
713    {
714        let fix = closest_match(status, VALID_STATUSES, 3).map(|closest| Fix {
715            title: format!("replace '{status}' with '{closest}'"),
716            disposition: FixDisposition::Safe,
717            patches: vec![FixPatch::ReplaceValue {
718                path: "/status".into(),
719                new_value: closest.into(),
720            }],
721        });
722        warnings.push(LintWarning {
723            rule: LintRule::InvalidStatus,
724            severity: Severity::Error,
725            message: format!(
726                "invalid status \"{status}\", expected one of: {}",
727                VALID_STATUSES.join(", ")
728            ),
729            path: "/status".into(),
730            span: None,
731            fix,
732        });
733    }
734
735    // ── level ────────────────────────────────────────────────────────────
736    if let Some(level) = get_str(m, "level")
737        && !VALID_LEVELS.contains(&level)
738    {
739        let fix = closest_match(level, VALID_LEVELS, 3).map(|closest| Fix {
740            title: format!("replace '{level}' with '{closest}'"),
741            disposition: FixDisposition::Safe,
742            patches: vec![FixPatch::ReplaceValue {
743                path: "/level".into(),
744                new_value: closest.into(),
745            }],
746        });
747        warnings.push(LintWarning {
748            rule: LintRule::InvalidLevel,
749            severity: Severity::Error,
750            message: format!(
751                "invalid level \"{level}\", expected one of: {}",
752                VALID_LEVELS.join(", ")
753            ),
754            path: "/level".into(),
755            span: None,
756            fix,
757        });
758    }
759
760    // ── date ─────────────────────────────────────────────────────────────
761    let date_string = m.get(key("date")).and_then(extract_date_string);
762    if let Some(d) = &date_string
763        && !is_valid_date(d)
764    {
765        warnings.push(err(
766            LintRule::InvalidDate,
767            format!("invalid date \"{d}\", expected YYYY-MM-DD"),
768            "/date",
769        ));
770    }
771
772    // ── modified ─────────────────────────────────────────────────────────
773    let modified_string = m.get(key("modified")).and_then(extract_date_string);
774    if let Some(d) = &modified_string
775        && !is_valid_date(d)
776    {
777        warnings.push(err(
778            LintRule::InvalidModified,
779            format!("invalid modified date \"{d}\", expected YYYY-MM-DD"),
780            "/modified",
781        ));
782    }
783
784    // ── modified >= date ─────────────────────────────────────────────────
785    if let (Some(date_val), Some(mod_val)) = (&date_string, &modified_string)
786        && is_valid_date(date_val)
787        && is_valid_date(mod_val)
788        && mod_val.as_str() < date_val.as_str()
789    {
790        warnings.push(warning(
791            LintRule::ModifiedBeforeDate,
792            format!("modified date \"{mod_val}\" is before creation date \"{date_val}\""),
793            "/modified",
794        ));
795    }
796
797    // ── description (missing) ──────────────────────────────────────────
798    if !m.contains_key(key("description")) {
799        warnings.push(info(
800            LintRule::MissingDescription,
801            "missing recommended field 'description'",
802            "/description",
803        ));
804    }
805
806    // ── author (missing) ─────────────────────────────────────────────
807    if !m.contains_key(key("author")) {
808        warnings.push(info(
809            LintRule::MissingAuthor,
810            "missing recommended field 'author'",
811            "/author",
812        ));
813    }
814
815    // ── description (too long) ───────────────────────────────────────
816    if let Some(desc) = get_str(m, "description")
817        && desc.len() > 65535
818    {
819        warnings.push(warning(
820            LintRule::DescriptionTooLong,
821            format!("description is {} characters, maximum is 65535", desc.len()),
822            "/description",
823        ));
824    }
825
826    // ── name ─────────────────────────────────────────────────────────────
827    if let Some(name) = get_str(m, "name")
828        && name.len() > 256
829    {
830        warnings.push(warning(
831            LintRule::NameTooLong,
832            format!("name is {} characters, maximum is 256", name.len()),
833            "/name",
834        ));
835    }
836
837    // ── taxonomy ─────────────────────────────────────────────────────────
838    if let Some(tax) = get_str(m, "taxonomy")
839        && tax.len() > 256
840    {
841        warnings.push(warning(
842            LintRule::TaxonomyTooLong,
843            format!("taxonomy is {} characters, maximum is 256", tax.len()),
844            "/taxonomy",
845        ));
846    }
847
848    // ── lowercase keys ───────────────────────────────────────────────────
849    for k in m.keys() {
850        if let Some(ks) = k.as_str()
851            && ks != ks.to_ascii_lowercase()
852        {
853            let lower = ks.to_ascii_lowercase();
854            let mut w = warning(
855                LintRule::NonLowercaseKey,
856                format!("key \"{ks}\" should be lowercase"),
857                format!("/{ks}"),
858            );
859            w.fix = safe_fix(
860                format!("rename '{ks}' to '{lower}'"),
861                vec![FixPatch::ReplaceKey {
862                    path: format!("/{ks}"),
863                    new_key: lower,
864                }],
865            );
866            warnings.push(w);
867        }
868    }
869}
870
871// =============================================================================
872// Detection rule lint checks
873// =============================================================================
874
875fn lint_detection_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
876    // ── level ─────────────────────────────────────────────────────────────
877    if !m.contains_key(key("level")) {
878        warnings.push(warning(
879            LintRule::MissingLevel,
880            "missing recommended field 'level'",
881            "/level",
882        ));
883    }
884
885    // ── logsource ────────────────────────────────────────────────────────
886    if !m.contains_key(key("logsource")) {
887        warnings.push(err(
888            LintRule::MissingLogsource,
889            "missing required field 'logsource'",
890            "/logsource",
891        ));
892    } else {
893        lint_logsource(m, warnings);
894    }
895
896    // ── detection ────────────────────────────────────────────────────────
897    if let Some(det_val) = m.get(key("detection")) {
898        if let Some(det) = det_val.as_mapping() {
899            // Collect detection identifier names (excluding condition/timeframe)
900            let det_keys: HashSet<&str> = det
901                .keys()
902                .filter_map(|k| k.as_str())
903                .filter(|k| *k != "condition" && *k != "timeframe")
904                .collect();
905
906            if !det.contains_key(key("condition")) {
907                warnings.push(err(
908                    LintRule::MissingCondition,
909                    "detection section is missing required 'condition'",
910                    "/detection/condition",
911                ));
912            } else if let Some(cond_str) = get_str(det, "condition") {
913                if has_deprecated_aggregation(cond_str) {
914                    warnings.push(warning(
915                        LintRule::DeprecatedAggregationSyntax,
916                        "condition uses deprecated Sigma v1.x aggregation syntax \
917                         (| count/min/max/avg/sum/near); use a correlation rule instead",
918                        "/detection/condition",
919                    ));
920                } else {
921                    for ident in extract_condition_identifiers(cond_str) {
922                        if !det_keys.contains(ident.as_str()) {
923                            warnings.push(err(
924                                LintRule::ConditionReferencesUnknown,
925                                format!(
926                                    "condition references '{ident}' but no such detection identifier exists"
927                                ),
928                                "/detection/condition",
929                            ));
930                        }
931                    }
932                }
933            }
934
935            if det_keys.is_empty() {
936                warnings.push(warning(
937                    LintRule::EmptyDetection,
938                    "detection section has no named search identifiers",
939                    "/detection",
940                ));
941            }
942
943            // Detection logic checks
944            lint_detection_logic(det, warnings);
945        }
946    } else {
947        warnings.push(err(
948            LintRule::MissingDetection,
949            "missing required field 'detection'",
950            "/detection",
951        ));
952    }
953
954    // ── related ──────────────────────────────────────────────────────────
955    if let Some(related) = get_seq(m, "related") {
956        for (i, item) in related.iter().enumerate() {
957            let path_prefix = format!("/related/{i}");
958            if let Some(item_map) = item.as_mapping() {
959                let has_id = item_map.contains_key(key("id"));
960                let has_type = item_map.contains_key(key("type"));
961
962                if !has_id || !has_type {
963                    warnings.push(err(
964                        LintRule::RelatedMissingRequired,
965                        "related entry must have both 'id' and 'type'",
966                        &path_prefix,
967                    ));
968                }
969
970                if let Some(id) = get_str(item_map, "id")
971                    && !is_valid_uuid(id)
972                {
973                    warnings.push(warning(
974                        LintRule::InvalidRelatedId,
975                        format!("related id \"{id}\" is not a valid UUID"),
976                        format!("{path_prefix}/id"),
977                    ));
978                }
979
980                if let Some(type_val) = get_str(item_map, "type")
981                    && !VALID_RELATED_TYPES.contains(&type_val)
982                {
983                    warnings.push(err(
984                        LintRule::InvalidRelatedType,
985                        format!(
986                            "invalid related type \"{type_val}\", expected one of: {}",
987                            VALID_RELATED_TYPES.join(", ")
988                        ),
989                        format!("{path_prefix}/type"),
990                    ));
991                }
992            }
993        }
994    }
995
996    // ── deprecated + related consistency ─────────────────────────────────
997    if get_str(m, "status") == Some("deprecated") {
998        let has_related = get_seq(m, "related")
999            .map(|seq| !seq.is_empty())
1000            .unwrap_or(false);
1001        if !has_related {
1002            warnings.push(warning(
1003                LintRule::DeprecatedWithoutRelated,
1004                "deprecated rule should have a 'related' entry linking to its replacement",
1005                "/status",
1006            ));
1007        }
1008    }
1009
1010    // ── tags ─────────────────────────────────────────────────────────────
1011    if let Some(tags) = get_seq(m, "tags") {
1012        let mut seen_tags: HashSet<String> = HashSet::new();
1013        for (i, tag_val) in tags.iter().enumerate() {
1014            if let Some(tag) = tag_val.as_str() {
1015                if !is_valid_tag(tag) {
1016                    warnings.push(warning(
1017                        LintRule::InvalidTag,
1018                        format!(
1019                            "tag \"{tag}\" does not match required pattern (lowercase, dotted namespace)"
1020                        ),
1021                        format!("/tags/{i}"),
1022                    ));
1023                } else {
1024                    // Check known namespace
1025                    if let Some(ns) = tag.split('.').next()
1026                        && !KNOWN_TAG_NAMESPACES.contains(&ns)
1027                    {
1028                        warnings.push(warning(
1029                            LintRule::UnknownTagNamespace,
1030                            format!(
1031                                "unknown tag namespace \"{ns}\", known namespaces: {}",
1032                                KNOWN_TAG_NAMESPACES.join(", ")
1033                            ),
1034                            format!("/tags/{i}"),
1035                        ));
1036                    }
1037                }
1038
1039                if !seen_tags.insert(tag.to_string()) {
1040                    let mut w = warning(
1041                        LintRule::DuplicateTags,
1042                        format!("duplicate tag \"{tag}\""),
1043                        format!("/tags/{i}"),
1044                    );
1045                    w.fix = safe_fix(
1046                        format!("remove duplicate tag '{tag}'"),
1047                        vec![FixPatch::Remove {
1048                            path: format!("/tags/{i}"),
1049                        }],
1050                    );
1051                    warnings.push(w);
1052                }
1053            }
1054        }
1055    }
1056
1057    // ── references (unique) ──────────────────────────────────────────────
1058    if let Some(refs) = get_seq(m, "references") {
1059        let mut seen: HashSet<String> = HashSet::new();
1060        for (i, r) in refs.iter().enumerate() {
1061            if let Some(s) = r.as_str()
1062                && !seen.insert(s.to_string())
1063            {
1064                let mut w = warning(
1065                    LintRule::DuplicateReferences,
1066                    format!("duplicate reference \"{s}\""),
1067                    format!("/references/{i}"),
1068                );
1069                w.fix = safe_fix(
1070                    "remove duplicate reference",
1071                    vec![FixPatch::Remove {
1072                        path: format!("/references/{i}"),
1073                    }],
1074                );
1075                warnings.push(w);
1076            }
1077        }
1078    }
1079
1080    // ── fields (unique) ──────────────────────────────────────────────────
1081    if let Some(fields) = get_seq(m, "fields") {
1082        let mut seen: HashSet<String> = HashSet::new();
1083        for (i, f) in fields.iter().enumerate() {
1084            if let Some(s) = f.as_str()
1085                && !seen.insert(s.to_string())
1086            {
1087                let mut w = warning(
1088                    LintRule::DuplicateFields,
1089                    format!("duplicate field \"{s}\""),
1090                    format!("/fields/{i}"),
1091                );
1092                w.fix = safe_fix(
1093                    "remove duplicate field",
1094                    vec![FixPatch::Remove {
1095                        path: format!("/fields/{i}"),
1096                    }],
1097                );
1098                warnings.push(w);
1099            }
1100        }
1101    }
1102
1103    // ── falsepositives (minLength 2) ─────────────────────────────────────
1104    if let Some(fps) = get_seq(m, "falsepositives") {
1105        for (i, fp) in fps.iter().enumerate() {
1106            if let Some(s) = fp.as_str()
1107                && s.len() < 2
1108            {
1109                warnings.push(warning(
1110                    LintRule::FalsepositiveTooShort,
1111                    format!("falsepositive entry \"{s}\" must be at least 2 characters"),
1112                    format!("/falsepositives/{i}"),
1113                ));
1114            }
1115        }
1116    }
1117
1118    // ── scope (minLength 2) ──────────────────────────────────────────────
1119    if let Some(scope) = get_seq(m, "scope") {
1120        for (i, s_val) in scope.iter().enumerate() {
1121            if let Some(s) = s_val.as_str()
1122                && s.len() < 2
1123            {
1124                warnings.push(warning(
1125                    LintRule::ScopeTooShort,
1126                    format!("scope entry \"{s}\" must be at least 2 characters"),
1127                    format!("/scope/{i}"),
1128                ));
1129            }
1130        }
1131    }
1132}
1133
1134fn lint_logsource(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1135    if let Some(ls) = get_mapping(m, "logsource") {
1136        for field in &["category", "product", "service"] {
1137            if let Some(val) = get_str(ls, field)
1138                && !is_valid_logsource_value(val)
1139            {
1140                let lower = val.to_ascii_lowercase();
1141                let mut w = warning(
1142                    LintRule::LogsourceValueNotLowercase,
1143                    format!("logsource {field} \"{val}\" should be lowercase (a-z, 0-9, _, ., -)"),
1144                    format!("/logsource/{field}"),
1145                );
1146                w.fix = safe_fix(
1147                    format!("lowercase '{val}' to '{lower}'"),
1148                    vec![FixPatch::ReplaceValue {
1149                        path: format!("/logsource/{field}"),
1150                        new_value: lower,
1151                    }],
1152                );
1153                warnings.push(w);
1154            }
1155        }
1156    }
1157}
1158
1159/// Extract bare identifiers from a condition expression (excluding keywords
1160/// and wildcard patterns) so we can check they exist in the detection section.
1161fn extract_condition_identifiers(condition: &str) -> Vec<String> {
1162    const KEYWORDS: &[&str] = &["and", "or", "not", "of", "all", "them"];
1163    condition
1164        .split(|c: char| !c.is_alphanumeric() && c != '_' && c != '*')
1165        .filter(|s| !s.is_empty())
1166        .filter(|s| !KEYWORDS.contains(s))
1167        .filter(|s| !s.chars().all(|c| c.is_ascii_digit()))
1168        .filter(|s| !s.contains('*'))
1169        .map(|s| s.to_string())
1170        .collect()
1171}
1172
1173/// Detect deprecated Sigma v1.x pipe-aggregation syntax in a condition string.
1174///
1175/// Patterns like `selection | count(User) by SourceIP > 5` use a pipe followed
1176/// by an aggregation keyword. These were replaced by correlation rules in v2.x.
1177fn has_deprecated_aggregation(condition: &str) -> bool {
1178    let pipe_pos = match condition.find('|') {
1179        Some(p) => p,
1180        None => return false,
1181    };
1182    let after_pipe = condition[pipe_pos + 1..].trim_start();
1183    let agg_keyword = after_pipe
1184        .split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
1185        .next()
1186        .unwrap_or("");
1187    matches!(
1188        agg_keyword,
1189        "count" | "min" | "max" | "avg" | "sum" | "near"
1190    )
1191}
1192
1193/// Checks detection logic: null in value lists, single-value |all, empty value lists.
1194fn lint_detection_logic(det: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1195    for (det_key, det_val) in det {
1196        let det_key_str = det_key.as_str().unwrap_or("");
1197        if det_key_str == "condition" || det_key_str == "timeframe" {
1198            continue;
1199        }
1200
1201        lint_detection_value(det_val, det_key_str, warnings);
1202    }
1203}
1204
1205fn lint_detection_value(value: &Value, det_name: &str, warnings: &mut Vec<LintWarning>) {
1206    match value {
1207        Value::Mapping(m) => {
1208            for (field_key, field_val) in m {
1209                let field_key_str = field_key.as_str().unwrap_or("");
1210
1211                // Check |all combined with |re (regex alternation makes |all misleading)
1212                if field_key_str.contains("|all") && field_key_str.contains("|re") {
1213                    let new_key = field_key_str.replace("|all", "");
1214                    let mut w = warning(
1215                        LintRule::AllWithRe,
1216                        format!(
1217                            "'{field_key_str}' in '{det_name}' combines |all with |re; \
1218                             regex alternation (|) already handles multi-match — \
1219                             |all is redundant or misleading here"
1220                        ),
1221                        format!("/detection/{det_name}/{field_key_str}"),
1222                    );
1223                    w.fix = safe_fix(
1224                        format!("remove |all from '{field_key_str}'"),
1225                        vec![FixPatch::ReplaceKey {
1226                            path: format!("/detection/{det_name}/{field_key_str}"),
1227                            new_key,
1228                        }],
1229                    );
1230                    warnings.push(w);
1231                }
1232
1233                // Check |all with single value
1234                if field_key_str.contains("|all") {
1235                    let needs_fix = if let Value::Sequence(seq) = field_val {
1236                        seq.len() <= 1
1237                    } else {
1238                        true
1239                    };
1240                    if needs_fix {
1241                        let new_key = field_key_str.replace("|all", "");
1242                        let count = if let Value::Sequence(seq) = field_val {
1243                            seq.len().to_string()
1244                        } else {
1245                            "a single".into()
1246                        };
1247                        let mut w = warning(
1248                            LintRule::SingleValueAllModifier,
1249                            format!(
1250                                "'{field_key_str}' in '{det_name}' uses |all modifier with {count} value(s); |all requires multiple values"
1251                            ),
1252                            format!("/detection/{det_name}/{field_key_str}"),
1253                        );
1254                        w.fix = safe_fix(
1255                            format!("remove |all from '{field_key_str}'"),
1256                            vec![FixPatch::ReplaceKey {
1257                                path: format!("/detection/{det_name}/{field_key_str}"),
1258                                new_key,
1259                            }],
1260                        );
1261                        warnings.push(w);
1262                    }
1263                }
1264
1265                // Check for incompatible modifier combinations
1266                if let Some(msg) = check_modifier_compatibility(field_key_str) {
1267                    warnings.push(warning(
1268                        LintRule::IncompatibleModifiers,
1269                        format!("'{field_key_str}' in '{det_name}': {msg}"),
1270                        format!("/detection/{det_name}/{field_key_str}"),
1271                    ));
1272                }
1273
1274                // Check null in value list and empty value list
1275                if let Value::Sequence(seq) = field_val {
1276                    if seq.is_empty() {
1277                        warnings.push(warning(
1278                            LintRule::EmptyValueList,
1279                            format!("'{field_key_str}' in '{det_name}' has an empty value list"),
1280                            format!("/detection/{det_name}/{field_key_str}"),
1281                        ));
1282                    } else {
1283                        let has_null = seq.iter().any(|v| v.is_null());
1284                        let has_non_null = seq.iter().any(|v| !v.is_null());
1285                        if has_null && has_non_null {
1286                            warnings.push(warning(
1287                                LintRule::NullInValueList,
1288                                format!(
1289                                    "'{field_key_str}' in '{det_name}' mixes null with other values; null should be in its own selection"
1290                                ),
1291                                format!("/detection/{det_name}/{field_key_str}"),
1292                            ));
1293                        }
1294                    }
1295                }
1296
1297                // Check wildcard-only value: field: '*' usually means field|exists
1298                let base_field = field_key_str.split('|').next().unwrap_or(field_key_str);
1299                let is_wildcard_only = match field_val {
1300                    Value::String(s) => s == "*",
1301                    Value::Sequence(seq) => seq.len() == 1 && seq[0].as_str() == Some("*"),
1302                    _ => false,
1303                };
1304                if is_wildcard_only && !field_key_str.contains("|re") {
1305                    let new_key = format!("{base_field}|exists");
1306                    let mut w = warning(
1307                        LintRule::WildcardOnlyValue,
1308                        format!(
1309                            "'{field_key_str}' in '{det_name}' uses a lone wildcard '*'; \
1310                             consider '{base_field}|exists: true' instead"
1311                        ),
1312                        format!("/detection/{det_name}/{field_key_str}"),
1313                    );
1314                    w.fix = safe_fix(
1315                        format!("replace with '{new_key}: true'"),
1316                        vec![
1317                            FixPatch::ReplaceKey {
1318                                path: format!("/detection/{det_name}/{field_key_str}"),
1319                                new_key,
1320                            },
1321                            FixPatch::ReplaceValue {
1322                                path: format!("/detection/{det_name}/{base_field}|exists"),
1323                                new_value: "true".into(),
1324                            },
1325                        ],
1326                    );
1327                    warnings.push(w);
1328                }
1329            }
1330        }
1331        Value::Sequence(seq) => {
1332            // List of maps (OR-linked) or keyword list
1333            for item in seq {
1334                if item.is_mapping() {
1335                    lint_detection_value(item, det_name, warnings);
1336                }
1337            }
1338        }
1339        _ => {}
1340    }
1341}
1342
1343// =============================================================================
1344// Correlation rule lint checks
1345// =============================================================================
1346
1347fn lint_correlation_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1348    let Some(corr_val) = m.get(key("correlation")) else {
1349        warnings.push(err(
1350            LintRule::MissingCorrelation,
1351            "missing required field 'correlation'",
1352            "/correlation",
1353        ));
1354        return;
1355    };
1356
1357    let Some(corr) = corr_val.as_mapping() else {
1358        warnings.push(err(
1359            LintRule::MissingCorrelation,
1360            "'correlation' must be a mapping",
1361            "/correlation",
1362        ));
1363        return;
1364    };
1365
1366    // ── type ─────────────────────────────────────────────────────────────
1367    let corr_type = get_str(corr, "type");
1368    match corr_type {
1369        None => {
1370            warnings.push(err(
1371                LintRule::MissingCorrelationType,
1372                "missing required field 'correlation.type'",
1373                "/correlation/type",
1374            ));
1375        }
1376        Some(t) if !VALID_CORRELATION_TYPES.contains(&t) => {
1377            warnings.push(err(
1378                LintRule::InvalidCorrelationType,
1379                format!(
1380                    "invalid correlation type \"{t}\", expected one of: {}",
1381                    VALID_CORRELATION_TYPES.join(", ")
1382                ),
1383                "/correlation/type",
1384            ));
1385        }
1386        _ => {}
1387    }
1388
1389    // ── rules ────────────────────────────────────────────────────────────
1390    if let Some(rules) = corr.get(key("rules")) {
1391        if let Some(seq) = rules.as_sequence()
1392            && seq.is_empty()
1393        {
1394            warnings.push(warning(
1395                LintRule::EmptyCorrelationRules,
1396                "correlation.rules should not be empty",
1397                "/correlation/rules",
1398            ));
1399        }
1400    } else {
1401        warnings.push(err(
1402            LintRule::MissingCorrelationRules,
1403            "missing required field 'correlation.rules'",
1404            "/correlation/rules",
1405        ));
1406    }
1407
1408    // ── timespan ─────────────────────────────────────────────────────────
1409    if let Some(ts) = get_str(corr, "timespan").or_else(|| get_str(corr, "timeframe")) {
1410        if !is_valid_timespan(ts) {
1411            warnings.push(err(
1412                LintRule::InvalidTimespanFormat,
1413                format!(
1414                    "invalid timespan \"{ts}\", expected format like 5m, 1h, 30s, 7d, 1w, 1M, 1y"
1415                ),
1416                "/correlation/timespan",
1417            ));
1418        }
1419    } else {
1420        warnings.push(err(
1421            LintRule::MissingCorrelationTimespan,
1422            "missing required field 'correlation.timespan'",
1423            "/correlation/timespan",
1424        ));
1425    }
1426
1427    // ── Conditional requirements per correlation type ─────────────────────
1428    if let Some(ct) = corr_type {
1429        // group-by is required for all correlation types
1430        if !corr.contains_key(key("group-by")) {
1431            warnings.push(err(
1432                LintRule::MissingGroupBy,
1433                format!("{ct} correlation requires 'group-by'"),
1434                "/correlation/group-by",
1435            ));
1436        }
1437
1438        // condition required for non-temporal types
1439        if TYPES_REQUIRING_CONDITION.contains(&ct) {
1440            if let Some(cond_val) = corr.get(key("condition")) {
1441                if let Some(cond_map) = cond_val.as_mapping() {
1442                    lint_correlation_condition(cond_map, ct, warnings);
1443                }
1444            } else {
1445                warnings.push(err(
1446                    LintRule::MissingCorrelationCondition,
1447                    format!("{ct} correlation requires a 'condition'"),
1448                    "/correlation/condition",
1449                ));
1450            }
1451        }
1452    }
1453
1454    // ── generate (document root per schema; nested under `correlation` is legacy) ──
1455    for (path, val) in [
1456        ("/generate", m.get(key("generate"))),
1457        ("/correlation/generate", corr.get(key("generate"))),
1458    ] {
1459        if let Some(gen_val) = val
1460            && !gen_val.is_bool()
1461        {
1462            warnings.push(err(
1463                LintRule::GenerateNotBoolean,
1464                "'generate' must be a boolean (true/false)",
1465                path,
1466            ));
1467        }
1468    }
1469}
1470
1471fn lint_correlation_condition(
1472    cond: &serde_yaml::Mapping,
1473    corr_type: &str,
1474    warnings: &mut Vec<LintWarning>,
1475) {
1476    // Check condition.field requirement
1477    if TYPES_REQUIRING_FIELD.contains(&corr_type) && !cond.contains_key(key("field")) {
1478        warnings.push(err(
1479            LintRule::MissingConditionField,
1480            format!("{corr_type} correlation condition requires 'field'"),
1481            "/correlation/condition/field",
1482        ));
1483    }
1484
1485    // Validate operator keys and numeric values
1486    for (k, v) in cond {
1487        let ks = k.as_str().unwrap_or("");
1488        if ks == "field" {
1489            continue;
1490        }
1491        if !VALID_CONDITION_OPERATORS.contains(&ks) {
1492            warnings.push(err(
1493                LintRule::InvalidConditionOperator,
1494                format!(
1495                    "invalid condition operator \"{ks}\", expected one of: {}",
1496                    VALID_CONDITION_OPERATORS.join(", ")
1497                ),
1498                format!("/correlation/condition/{ks}"),
1499            ));
1500        } else if !v.is_i64() && !v.is_u64() && !v.is_f64() {
1501            warnings.push(err(
1502                LintRule::ConditionValueNotNumeric,
1503                format!("condition operator '{ks}' requires a numeric value"),
1504                format!("/correlation/condition/{ks}"),
1505            ));
1506        }
1507    }
1508}
1509
1510/// Check field modifier compatibility and return a diagnostic message if
1511/// the combination is invalid.
1512///
1513/// Modifier categories (at most one from each exclusive group):
1514/// - **String match**: contains, startswith, endswith
1515/// - **Pattern match**: re, cidr (incompatible with string-match modifiers)
1516/// - **Numeric comparison**: gt, gte, lt, lte, neq
1517/// - **Existence**: exists (standalone, incompatible with everything except all/cased)
1518/// - **Regex flags**: i, m, s (require re)
1519fn check_modifier_compatibility(field_key: &str) -> Option<String> {
1520    let parts: Vec<&str> = field_key.split('|').collect();
1521    if parts.len() < 2 {
1522        return None;
1523    }
1524    let modifiers = &parts[1..];
1525
1526    let string_match: &[&str] = &["contains", "startswith", "endswith"];
1527    let pattern_match: &[&str] = &["re", "cidr"];
1528    let numeric_compare: &[&str] = &["gt", "gte", "lt", "lte", "neq"];
1529    let regex_flags: &[&str] = &["i", "ignorecase", "m", "multiline", "s", "dotall"];
1530
1531    let has_string = modifiers
1532        .iter()
1533        .filter(|m| string_match.contains(m))
1534        .count();
1535    let has_pattern: Vec<&&str> = modifiers
1536        .iter()
1537        .filter(|m| pattern_match.contains(m))
1538        .collect();
1539    let has_numeric = modifiers.iter().any(|m| numeric_compare.contains(m));
1540    let has_exists = modifiers.contains(&"exists");
1541    let has_re = modifiers.contains(&"re");
1542    let has_regex_flags = modifiers.iter().any(|m| regex_flags.contains(m));
1543
1544    // Multiple string-match modifiers are mutually exclusive
1545    if has_string > 1 {
1546        return Some(
1547            "multiple string-match modifiers (contains, startswith, endswith) \
1548             are mutually exclusive"
1549                .to_string(),
1550        );
1551    }
1552
1553    // Pattern-match (re, cidr) is incompatible with string-match modifiers
1554    if !has_pattern.is_empty() && has_string > 0 {
1555        return Some(format!(
1556            "pattern modifier '{}' is incompatible with string-match modifiers \
1557             (contains, startswith, endswith)",
1558            has_pattern
1559                .iter()
1560                .map(|m| **m)
1561                .collect::<Vec<_>>()
1562                .join(", ")
1563        ));
1564    }
1565
1566    // Numeric comparison is incompatible with string-match and pattern modifiers
1567    if has_numeric && (has_string > 0 || !has_pattern.is_empty()) {
1568        return Some(
1569            "numeric comparison modifiers (gt, gte, lt, lte, neq) are incompatible \
1570             with string-match and pattern modifiers"
1571                .to_string(),
1572        );
1573    }
1574
1575    // exists is standalone
1576    if has_exists && modifiers.len() > 1 {
1577        let others: Vec<&&str> = modifiers
1578            .iter()
1579            .filter(|m| **m != "exists" && **m != "all" && **m != "cased")
1580            .collect();
1581        if !others.is_empty() {
1582            return Some(format!(
1583                "'exists' modifier is incompatible with: {}",
1584                others.iter().map(|m| **m).collect::<Vec<_>>().join(", ")
1585            ));
1586        }
1587    }
1588
1589    // Regex flags require re
1590    if has_regex_flags && !has_re {
1591        return Some("regex flag modifiers (i, m, s) require the 're' modifier".to_string());
1592    }
1593
1594    None
1595}
1596
1597fn is_valid_timespan(s: &str) -> bool {
1598    if s.is_empty() {
1599        return false;
1600    }
1601    let last = s.as_bytes()[s.len() - 1];
1602    // s=second, m=minute, h=hour, d=day, w=week, M=month, y=year
1603    if !matches!(last, b's' | b'm' | b'h' | b'd' | b'w' | b'M' | b'y') {
1604        return false;
1605    }
1606    let num_part = &s[..s.len() - 1];
1607    !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit())
1608}
1609
1610// =============================================================================
1611// Filter rule lint checks
1612// =============================================================================
1613
1614fn lint_filter_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1615    // ── filter section ───────────────────────────────────────────────────
1616    let Some(filter_val) = m.get(key("filter")) else {
1617        warnings.push(err(
1618            LintRule::MissingFilter,
1619            "missing required field 'filter'",
1620            "/filter",
1621        ));
1622        return;
1623    };
1624
1625    let Some(filter) = filter_val.as_mapping() else {
1626        warnings.push(err(
1627            LintRule::MissingFilter,
1628            "'filter' must be a mapping",
1629            "/filter",
1630        ));
1631        return;
1632    };
1633
1634    // ── filter.rules ─────────────────────────────────────────────────────
1635    if let Some(rules_val) = filter.get(key("rules")) {
1636        if let Some(seq) = rules_val.as_sequence()
1637            && seq.is_empty()
1638        {
1639            warnings.push(warning(
1640                LintRule::EmptyFilterRules,
1641                "filter.rules should have at least one entry",
1642                "/filter/rules",
1643            ));
1644        }
1645    } else {
1646        warnings.push(err(
1647            LintRule::MissingFilterRules,
1648            "missing required field 'filter.rules'",
1649            "/filter/rules",
1650        ));
1651    }
1652
1653    // ── filter.selection ─────────────────────────────────────────────────
1654    if !filter.contains_key(key("selection")) {
1655        warnings.push(err(
1656            LintRule::MissingFilterSelection,
1657            "missing required field 'filter.selection'",
1658            "/filter/selection",
1659        ));
1660    }
1661
1662    // ── filter.condition ─────────────────────────────────────────────────
1663    if !filter.contains_key(key("condition")) {
1664        warnings.push(err(
1665            LintRule::MissingFilterCondition,
1666            "missing required field 'filter.condition'",
1667            "/filter/condition",
1668        ));
1669    }
1670
1671    // ── logsource required for filters ───────────────────────────────────
1672    if !m.contains_key(key("logsource")) {
1673        warnings.push(err(
1674            LintRule::MissingFilterLogsource,
1675            "missing required field 'logsource' for filter rule",
1676            "/logsource",
1677        ));
1678    } else {
1679        lint_logsource(m, warnings);
1680    }
1681
1682    // ── Filters should NOT have level or status ──────────────────────────
1683    if m.contains_key(key("level")) {
1684        let mut w = warning(
1685            LintRule::FilterHasLevel,
1686            "filter rules should not have a 'level' field",
1687            "/level",
1688        );
1689        w.fix = safe_fix(
1690            "remove 'level' from filter rule",
1691            vec![FixPatch::Remove {
1692                path: "/level".into(),
1693            }],
1694        );
1695        warnings.push(w);
1696    }
1697
1698    if m.contains_key(key("status")) {
1699        let mut w = warning(
1700            LintRule::FilterHasStatus,
1701            "filter rules should not have a 'status' field",
1702            "/status",
1703        );
1704        w.fix = safe_fix(
1705            "remove 'status' from filter rule",
1706            vec![FixPatch::Remove {
1707                path: "/status".into(),
1708            }],
1709        );
1710        warnings.push(w);
1711    }
1712}
1713
1714// =============================================================================
1715// Public API
1716// =============================================================================
1717
1718/// Levenshtein edit distance between two strings.
1719fn edit_distance(a: &str, b: &str) -> usize {
1720    let (a_len, b_len) = (a.len(), b.len());
1721    if a_len == 0 {
1722        return b_len;
1723    }
1724    if b_len == 0 {
1725        return a_len;
1726    }
1727    let mut prev: Vec<usize> = (0..=b_len).collect();
1728    let mut curr = vec![0; b_len + 1];
1729    for (i, ca) in a.bytes().enumerate() {
1730        curr[0] = i + 1;
1731        for (j, cb) in b.bytes().enumerate() {
1732            let cost = if ca == cb { 0 } else { 1 };
1733            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
1734        }
1735        std::mem::swap(&mut prev, &mut curr);
1736    }
1737    prev[b_len]
1738}
1739
1740/// Maximum edit distance to consider an unknown key a likely typo of a known key.
1741const TYPO_MAX_EDIT_DISTANCE: usize = 2;
1742
1743/// Check for unknown top-level keys that are likely typos of known keys.
1744///
1745/// The Sigma specification v2.1.0 explicitly allows arbitrary custom top-level
1746/// fields, so unknown keys are not errors. However, when an unknown key is
1747/// within a small edit distance of a known key it is likely a typo and we
1748/// surface an informational hint.
1749fn lint_unknown_keys(m: &serde_yaml::Mapping, doc_type: DocType, warnings: &mut Vec<LintWarning>) {
1750    let type_keys = doc_type.known_keys();
1751    let all_known: Vec<&str> = KNOWN_KEYS_SHARED
1752        .iter()
1753        .chain(type_keys.iter())
1754        .copied()
1755        .collect();
1756
1757    for k in m.keys() {
1758        let Some(ks) = k.as_str() else { continue };
1759        if KNOWN_KEYS_SHARED.contains(&ks) || type_keys.contains(&ks) {
1760            continue;
1761        }
1762        // Only warn when the key looks like a typo of a known key.
1763        if let Some(closest) = all_known
1764            .iter()
1765            .filter(|known| edit_distance(ks, known) <= TYPO_MAX_EDIT_DISTANCE)
1766            .min_by_key(|known| edit_distance(ks, known))
1767        {
1768            let mut w = info(
1769                LintRule::UnknownKey,
1770                format!("unknown top-level key \"{ks}\"; did you mean \"{closest}\"?"),
1771                format!("/{ks}"),
1772            );
1773            w.fix = safe_fix(
1774                format!("rename '{ks}' to '{closest}'"),
1775                vec![FixPatch::ReplaceKey {
1776                    path: format!("/{ks}"),
1777                    new_key: closest.to_string(),
1778                }],
1779            );
1780            warnings.push(w);
1781        }
1782    }
1783}
1784
1785/// Lint a single YAML document value.
1786///
1787/// Auto-detects document type (detection / correlation / filter) and runs
1788/// the appropriate checks. Returns all findings.
1789pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
1790    let Some(m) = value.as_mapping() else {
1791        return vec![err(
1792            LintRule::NotAMapping,
1793            "document is not a YAML mapping",
1794            "/",
1795        )];
1796    };
1797
1798    // Skip collection action fragments
1799    if is_action_fragment(m) {
1800        return Vec::new();
1801    }
1802
1803    let mut warnings = Vec::new();
1804
1805    // Run shared checks
1806    lint_shared(m, &mut warnings);
1807
1808    // Run type-specific checks
1809    let doc_type = detect_doc_type(m);
1810    match doc_type {
1811        DocType::Detection => lint_detection_rule(m, &mut warnings),
1812        DocType::Correlation => lint_correlation_rule(m, &mut warnings),
1813        DocType::Filter => lint_filter_rule(m, &mut warnings),
1814    }
1815
1816    // Check for unknown top-level keys
1817    lint_unknown_keys(m, doc_type, &mut warnings);
1818
1819    warnings
1820}
1821
1822/// Lint a raw YAML string, returning warnings with resolved source spans.
1823///
1824/// Unlike [`lint_yaml_value`], this function takes the raw text and resolves
1825/// JSON-pointer paths to `(line, col)` spans. This is the preferred entry
1826/// point for the LSP server.
1827pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
1828    let mut all_warnings = Vec::new();
1829
1830    for doc in serde_yaml::Deserializer::from_str(text) {
1831        let value: Value = match Value::deserialize(doc) {
1832            Ok(v) => v,
1833            Err(e) => {
1834                let mut w = err(
1835                    LintRule::YamlParseError,
1836                    format!("YAML parse error: {e}"),
1837                    "/",
1838                );
1839                // serde_yaml can give us a location
1840                if let Some(loc) = e.location() {
1841                    w.span = Some(Span {
1842                        start_line: loc.line().saturating_sub(1) as u32,
1843                        start_col: loc.column() as u32,
1844                        end_line: loc.line().saturating_sub(1) as u32,
1845                        end_col: loc.column() as u32 + 1,
1846                    });
1847                }
1848                all_warnings.push(w);
1849                // A parse error leaves the YAML stream in an undefined state;
1850                // the deserializer iterator may never terminate on malformed
1851                // input, so we must stop iterating to avoid infinite loops and
1852                // unbounded memory growth.
1853                break;
1854            }
1855        };
1856
1857        let warnings = lint_yaml_value(&value);
1858        // Resolve spans for each warning
1859        for mut w in warnings {
1860            w.span = resolve_path_to_span(text, &w.path);
1861            all_warnings.push(w);
1862        }
1863    }
1864
1865    all_warnings
1866}
1867
1868/// Resolve a JSON-pointer path to a `Span` by scanning the YAML text.
1869///
1870/// Returns `None` if the path cannot be resolved.
1871fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
1872    if path == "/" || path.is_empty() {
1873        // Root — first non-empty line
1874        for (i, line) in text.lines().enumerate() {
1875            let trimmed = line.trim();
1876            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
1877                return Some(Span {
1878                    start_line: i as u32,
1879                    start_col: 0,
1880                    end_line: i as u32,
1881                    end_col: line.len() as u32,
1882                });
1883            }
1884        }
1885        return None;
1886    }
1887
1888    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
1889
1890    if segments.is_empty() {
1891        return None;
1892    }
1893
1894    let lines: Vec<&str> = text.lines().collect();
1895    let mut current_indent: i32 = -1;
1896    let mut search_start = 0usize;
1897    let mut last_matched_line: Option<usize> = None;
1898
1899    for segment in &segments {
1900        let array_index: Option<usize> = segment.parse().ok();
1901        let mut found = false;
1902
1903        let mut line_num = search_start;
1904        while line_num < lines.len() {
1905            let line = lines[line_num];
1906            let trimmed = line.trim();
1907            if trimmed.is_empty() || trimmed.starts_with('#') {
1908                line_num += 1;
1909                continue;
1910            }
1911
1912            let indent = (line.len() - trimmed.len()) as i32;
1913
1914            if indent <= current_indent && found {
1915                break;
1916            }
1917            if indent <= current_indent {
1918                line_num += 1;
1919                continue;
1920            }
1921
1922            if let Some(idx) = array_index {
1923                if trimmed.starts_with("- ") && indent > current_indent {
1924                    let mut count = 0usize;
1925                    for (offset, sl) in lines[search_start..].iter().enumerate() {
1926                        let scan = search_start + offset;
1927                        let st = sl.trim();
1928                        if st.is_empty() || st.starts_with('#') {
1929                            continue;
1930                        }
1931                        let si = (sl.len() - st.len()) as i32;
1932                        if si == indent && st.starts_with("- ") {
1933                            if count == idx {
1934                                last_matched_line = Some(scan);
1935                                search_start = scan + 1;
1936                                current_indent = indent;
1937                                found = true;
1938                                break;
1939                            }
1940                            count += 1;
1941                        }
1942                        if si < indent && count > 0 {
1943                            break;
1944                        }
1945                    }
1946                    break;
1947                }
1948            } else {
1949                let key_pattern = format!("{segment}:");
1950                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
1951                    last_matched_line = Some(line_num);
1952                    search_start = line_num + 1;
1953                    current_indent = indent;
1954                    found = true;
1955                    break;
1956                }
1957            }
1958
1959            line_num += 1;
1960        }
1961
1962        if !found && last_matched_line.is_none() {
1963            break;
1964        }
1965    }
1966
1967    last_matched_line.map(|line_num| {
1968        let line = lines[line_num];
1969        Span {
1970            start_line: line_num as u32,
1971            start_col: 0,
1972            end_line: line_num as u32,
1973            end_col: line.len() as u32,
1974        }
1975    })
1976}
1977
1978/// Lint all YAML documents in a file.
1979///
1980/// Handles multi-document YAML (separated by `---`). Collection action
1981/// fragments (`action: global/reset/repeat`) are skipped. Warnings include
1982/// resolved source spans (delegates to [`lint_yaml_str`]).
1983pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
1984    let content = std::fs::read_to_string(path)?;
1985    let warnings = lint_yaml_str(&content);
1986    Ok(FileLintResult {
1987        path: path.to_path_buf(),
1988        warnings,
1989    })
1990}
1991
1992/// Lint all `.yml`/`.yaml` files in a directory recursively.
1993///
1994/// Skips hidden directories (starting with `.`) and tracks visited
1995/// canonical paths to avoid infinite loops from symlink cycles.
1996pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
1997    let mut results = Vec::new();
1998    let mut visited = HashSet::new();
1999
2000    fn walk(
2001        dir: &Path,
2002        results: &mut Vec<FileLintResult>,
2003        visited: &mut HashSet<std::path::PathBuf>,
2004    ) -> crate::error::Result<()> {
2005        // Resolve symlinks and canonicalize for cycle detection
2006        let canonical = match dir.canonicalize() {
2007            Ok(p) => p,
2008            Err(_) => return Ok(()),
2009        };
2010        if !visited.insert(canonical) {
2011            // Already visited this directory — symlink cycle
2012            return Ok(());
2013        }
2014
2015        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
2016        entries.sort_by_key(|e| e.path());
2017
2018        for entry in entries {
2019            let path = entry.path();
2020
2021            // Skip hidden directories (e.g. .git)
2022            if path.is_dir() {
2023                if path
2024                    .file_name()
2025                    .and_then(|n| n.to_str())
2026                    .is_some_and(|n| n.starts_with('.'))
2027                {
2028                    continue;
2029                }
2030                walk(&path, results, visited)?;
2031            } else if matches!(
2032                path.extension().and_then(|e| e.to_str()),
2033                Some("yml" | "yaml")
2034            ) {
2035                match crate::lint::lint_yaml_file(&path) {
2036                    Ok(file_result) => results.push(file_result),
2037                    Err(e) => {
2038                        results.push(FileLintResult {
2039                            path: path.clone(),
2040                            warnings: vec![err(
2041                                LintRule::FileReadError,
2042                                format!("error reading file: {e}"),
2043                                "/",
2044                            )],
2045                        });
2046                    }
2047                }
2048            }
2049        }
2050        Ok(())
2051    }
2052
2053    walk(dir, &mut results, &mut visited)?;
2054    Ok(results)
2055}
2056
2057// =============================================================================
2058// Lint configuration & suppression
2059// =============================================================================
2060
2061/// Configuration for lint rule suppression and severity overrides.
2062///
2063/// Can be loaded from a `.rsigma-lint.yml` config file, merged with CLI
2064/// `--disable` flags, and combined with inline `# rsigma-disable` comments.
2065#[derive(Debug, Clone, Default, Serialize)]
2066pub struct LintConfig {
2067    /// Rule names to suppress entirely (e.g. `"missing_description"`).
2068    pub disabled_rules: HashSet<String>,
2069    /// Override the default severity of a rule (e.g. `title_too_long -> Info`).
2070    pub severity_overrides: HashMap<String, Severity>,
2071    /// Glob patterns for paths to exclude from directory linting.
2072    /// Matched against relative paths from the lint root (e.g. `"config/**"`).
2073    pub exclude_patterns: Vec<String>,
2074}
2075
2076/// Raw YAML shape for `.rsigma-lint.yml`.
2077#[derive(Debug, Deserialize)]
2078struct RawLintConfig {
2079    #[serde(default)]
2080    disabled_rules: Vec<String>,
2081    #[serde(default)]
2082    severity_overrides: HashMap<String, String>,
2083    #[serde(default)]
2084    exclude: Vec<String>,
2085}
2086
2087impl LintConfig {
2088    /// Load a `LintConfig` from a `.rsigma-lint.yml` file.
2089    pub fn load(path: &Path) -> crate::error::Result<Self> {
2090        let content = std::fs::read_to_string(path)?;
2091        let raw: RawLintConfig = serde_yaml::from_str(&content)?;
2092
2093        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
2094        let mut severity_overrides = HashMap::new();
2095        for (rule, sev_str) in &raw.severity_overrides {
2096            let sev = match sev_str.as_str() {
2097                "error" => Severity::Error,
2098                "warning" => Severity::Warning,
2099                "info" => Severity::Info,
2100                "hint" => Severity::Hint,
2101                other => {
2102                    return Err(crate::error::SigmaParserError::InvalidRule(format!(
2103                        "invalid severity '{other}' for rule '{rule}' in lint config"
2104                    )));
2105                }
2106            };
2107            severity_overrides.insert(rule.clone(), sev);
2108        }
2109
2110        Ok(LintConfig {
2111            disabled_rules,
2112            severity_overrides,
2113            exclude_patterns: raw.exclude,
2114        })
2115    }
2116
2117    /// Walk up from `start_path` to find the nearest `.rsigma-lint.yml`.
2118    ///
2119    /// Checks `start_path` itself (if a directory) or its parent, then
2120    /// ancestors until the filesystem root.
2121    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
2122        let dir = if start_path.is_file() {
2123            start_path.parent()?
2124        } else {
2125            start_path
2126        };
2127
2128        let mut current = dir;
2129        loop {
2130            let candidate = current.join(".rsigma-lint.yml");
2131            if candidate.is_file() {
2132                return Some(candidate);
2133            }
2134            // Also try .yaml extension
2135            let candidate_yaml = current.join(".rsigma-lint.yaml");
2136            if candidate_yaml.is_file() {
2137                return Some(candidate_yaml);
2138            }
2139            current = current.parent()?;
2140        }
2141    }
2142
2143    /// Merge another config into this one (e.g. CLI `--disable` into file config).
2144    pub fn merge(&mut self, other: &LintConfig) {
2145        self.disabled_rules
2146            .extend(other.disabled_rules.iter().cloned());
2147        for (rule, sev) in &other.severity_overrides {
2148            self.severity_overrides.insert(rule.clone(), *sev);
2149        }
2150        self.exclude_patterns
2151            .extend(other.exclude_patterns.iter().cloned());
2152    }
2153
2154    /// Check if a rule is disabled.
2155    pub fn is_disabled(&self, rule: &LintRule) -> bool {
2156        self.disabled_rules.contains(&rule.to_string())
2157    }
2158
2159    /// Build a compiled [`globset::GlobSet`] from the exclude patterns.
2160    ///
2161    /// Returns `None` if there are no patterns. Invalid patterns are silently
2162    /// skipped (they will have been validated at config load time in practice).
2163    pub fn build_exclude_set(&self) -> Option<globset::GlobSet> {
2164        if self.exclude_patterns.is_empty() {
2165            return None;
2166        }
2167        let mut builder = globset::GlobSetBuilder::new();
2168        for pat in &self.exclude_patterns {
2169            if let Ok(glob) = globset::GlobBuilder::new(pat)
2170                .literal_separator(false)
2171                .build()
2172            {
2173                builder.add(glob);
2174            }
2175        }
2176        builder.build().ok()
2177    }
2178}
2179
2180// =============================================================================
2181// Inline suppression comments
2182// =============================================================================
2183
2184/// Parsed inline suppression directives from YAML source text.
2185#[derive(Debug, Clone, Default)]
2186pub struct InlineSuppressions {
2187    /// If `true`, all rules are suppressed for the entire file.
2188    pub disable_all: bool,
2189    /// Rules suppressed for the entire file (from `# rsigma-disable rule1, rule2`).
2190    pub file_disabled: HashSet<String>,
2191    /// Rules suppressed for specific lines: `line_number -> set of rule names`.
2192    /// An empty set means all rules are suppressed for that line.
2193    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
2194}
2195
2196/// Parse `# rsigma-disable` comments from raw YAML text.
2197///
2198/// Supported forms:
2199/// - `# rsigma-disable` — suppress **all** rules for the file
2200/// - `# rsigma-disable rule1, rule2` — suppress specific rules for the file
2201/// - `# rsigma-disable-next-line` — suppress all rules for the next line
2202/// - `# rsigma-disable-next-line rule1, rule2` — suppress specific rules for the next line
2203pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
2204    let mut result = InlineSuppressions::default();
2205
2206    for (i, line) in text.lines().enumerate() {
2207        let trimmed = line.trim();
2208
2209        // Look for comment-only lines or trailing comments
2210        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
2211            trimmed[pos + 1..].trim()
2212        } else {
2213            continue;
2214        };
2215
2216        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
2217            let rest = rest.trim();
2218            let next_line = (i + 1) as u32;
2219            if rest.is_empty() {
2220                // Suppress all rules for next line
2221                result.line_disabled.insert(next_line, None);
2222            } else {
2223                // Suppress specific rules for next line
2224                let rules: HashSet<String> = rest
2225                    .split(',')
2226                    .map(|s| s.trim().to_string())
2227                    .filter(|s| !s.is_empty())
2228                    .collect();
2229                if !rules.is_empty() {
2230                    result
2231                        .line_disabled
2232                        .entry(next_line)
2233                        .and_modify(|existing| {
2234                            if let Some(existing_set) = existing {
2235                                existing_set.extend(rules.iter().cloned());
2236                            }
2237                            // If None (all suppressed), leave as None
2238                        })
2239                        .or_insert(Some(rules));
2240                }
2241            }
2242        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
2243            let rest = rest.trim();
2244            if rest.is_empty() {
2245                // Suppress all rules for the entire file
2246                result.disable_all = true;
2247            } else {
2248                // Suppress specific rules for the file
2249                for rule in rest.split(',') {
2250                    let rule = rule.trim();
2251                    if !rule.is_empty() {
2252                        result.file_disabled.insert(rule.to_string());
2253                    }
2254                }
2255            }
2256        }
2257    }
2258
2259    result
2260}
2261
2262/// Find the start of a YAML comment (`#`) that is not inside a quoted string.
2263///
2264/// Returns the byte offset of `#` within the trimmed line, or `None`.
2265fn find_yaml_comment(line: &str) -> Option<usize> {
2266    let mut in_single = false;
2267    let mut in_double = false;
2268    for (i, c) in line.char_indices() {
2269        match c {
2270            '\'' if !in_double => in_single = !in_single,
2271            '"' if !in_single => in_double = !in_double,
2272            '#' if !in_single && !in_double => return Some(i),
2273            _ => {}
2274        }
2275    }
2276    None
2277}
2278
2279impl InlineSuppressions {
2280    /// Check if a warning should be suppressed.
2281    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
2282        // File-level disable-all
2283        if self.disable_all {
2284            return true;
2285        }
2286
2287        // File-level specific rules
2288        let rule_name = warning.rule.to_string();
2289        if self.file_disabled.contains(&rule_name) {
2290            return true;
2291        }
2292
2293        // Line-level suppression (requires a resolved span)
2294        if let Some(span) = &warning.span
2295            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
2296        {
2297            return match line_rules {
2298                None => true, // All rules suppressed for this line
2299                Some(rules) => rules.contains(&rule_name),
2300            };
2301        }
2302
2303        false
2304    }
2305}
2306
2307// =============================================================================
2308// Suppression filtering
2309// =============================================================================
2310
2311/// Apply suppression from config and inline comments to lint warnings.
2312///
2313/// 1. Removes warnings whose rule is in `config.disabled_rules`.
2314/// 2. Removes warnings suppressed by inline comments.
2315/// 3. Applies `severity_overrides` to remaining warnings.
2316pub fn apply_suppressions(
2317    warnings: Vec<LintWarning>,
2318    config: &LintConfig,
2319    inline: &InlineSuppressions,
2320) -> Vec<LintWarning> {
2321    warnings
2322        .into_iter()
2323        .filter(|w| !config.is_disabled(&w.rule))
2324        .filter(|w| !inline.is_suppressed(w))
2325        .map(|mut w| {
2326            let rule_name = w.rule.to_string();
2327            if let Some(sev) = config.severity_overrides.get(&rule_name) {
2328                w.severity = *sev;
2329            }
2330            w
2331        })
2332        .collect()
2333}
2334
2335/// Lint a raw YAML string with config-based suppression.
2336///
2337/// Combines [`lint_yaml_str`] + [`parse_inline_suppressions`] +
2338/// [`apply_suppressions`] in one call.
2339pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
2340    let warnings = lint_yaml_str(text);
2341    let inline = parse_inline_suppressions(text);
2342    apply_suppressions(warnings, config, &inline)
2343}
2344
2345/// Lint a file with config-based suppression.
2346pub fn lint_yaml_file_with_config(
2347    path: &Path,
2348    config: &LintConfig,
2349) -> crate::error::Result<FileLintResult> {
2350    let content = std::fs::read_to_string(path)?;
2351    let warnings = lint_yaml_str_with_config(&content, config);
2352    Ok(FileLintResult {
2353        path: path.to_path_buf(),
2354        warnings,
2355    })
2356}
2357
2358/// Lint a directory with config-based suppression.
2359///
2360/// Respects `config.exclude_patterns`: glob patterns matched against paths
2361/// relative to `dir` (e.g. `"config/**"` skips `<dir>/config/...`).
2362pub fn lint_yaml_directory_with_config(
2363    dir: &Path,
2364    config: &LintConfig,
2365) -> crate::error::Result<Vec<FileLintResult>> {
2366    let mut results = Vec::new();
2367    let mut visited = HashSet::new();
2368    let exclude_set = config.build_exclude_set();
2369
2370    fn walk(
2371        dir: &Path,
2372        base: &Path,
2373        config: &LintConfig,
2374        exclude_set: &Option<globset::GlobSet>,
2375        results: &mut Vec<FileLintResult>,
2376        visited: &mut HashSet<std::path::PathBuf>,
2377    ) -> crate::error::Result<()> {
2378        let canonical = match dir.canonicalize() {
2379            Ok(p) => p,
2380            Err(_) => return Ok(()),
2381        };
2382        if !visited.insert(canonical) {
2383            return Ok(());
2384        }
2385
2386        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
2387        entries.sort_by_key(|e| e.path());
2388
2389        for entry in entries {
2390            let path = entry.path();
2391
2392            if let Some(gs) = exclude_set
2393                && let Ok(rel) = path.strip_prefix(base)
2394                && gs.is_match(rel)
2395            {
2396                continue;
2397            }
2398
2399            if path.is_dir() {
2400                if path
2401                    .file_name()
2402                    .and_then(|n| n.to_str())
2403                    .is_some_and(|n| n.starts_with('.'))
2404                {
2405                    continue;
2406                }
2407                walk(&path, base, config, exclude_set, results, visited)?;
2408            } else if matches!(
2409                path.extension().and_then(|e| e.to_str()),
2410                Some("yml" | "yaml")
2411            ) {
2412                match lint_yaml_file_with_config(&path, config) {
2413                    Ok(file_result) => results.push(file_result),
2414                    Err(e) => {
2415                        results.push(FileLintResult {
2416                            path: path.clone(),
2417                            warnings: vec![err(
2418                                LintRule::FileReadError,
2419                                format!("error reading file: {e}"),
2420                                "/",
2421                            )],
2422                        });
2423                    }
2424                }
2425            }
2426        }
2427        Ok(())
2428    }
2429
2430    walk(dir, dir, config, &exclude_set, &mut results, &mut visited)?;
2431    Ok(results)
2432}
2433
2434// =============================================================================
2435// Tests
2436// =============================================================================
2437
2438#[cfg(test)]
2439mod tests {
2440    use super::*;
2441
2442    fn yaml_value(yaml: &str) -> Value {
2443        serde_yaml::from_str(yaml).unwrap()
2444    }
2445
2446    fn lint(yaml: &str) -> Vec<LintWarning> {
2447        lint_yaml_value(&yaml_value(yaml))
2448    }
2449
2450    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2451        warnings.iter().any(|w| w.rule == rule)
2452    }
2453
2454    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2455        !has_rule(warnings, rule)
2456    }
2457
2458    // ── Valid rule produces no errors ────────────────────────────────────
2459
2460    #[test]
2461    fn valid_detection_rule_no_errors() {
2462        let w = lint(
2463            r#"
2464title: Test Rule
2465id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2466status: test
2467logsource:
2468    category: process_creation
2469    product: windows
2470detection:
2471    selection:
2472        CommandLine|contains: 'whoami'
2473    condition: selection
2474level: medium
2475tags:
2476    - attack.execution
2477    - attack.t1059
2478"#,
2479        );
2480        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2481        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2482    }
2483
2484    // ── Shared checks ───────────────────────────────────────────────────
2485
2486    #[test]
2487    fn missing_title() {
2488        let w = lint(
2489            r#"
2490logsource:
2491    category: test
2492detection:
2493    selection:
2494        field: value
2495    condition: selection
2496"#,
2497        );
2498        assert!(has_rule(&w, LintRule::MissingTitle));
2499    }
2500
2501    #[test]
2502    fn title_too_long() {
2503        let long_title = "a".repeat(257);
2504        let yaml = format!(
2505            r#"
2506title: '{long_title}'
2507logsource:
2508    category: test
2509detection:
2510    selection:
2511        field: value
2512    condition: selection
2513"#
2514        );
2515        let w = lint(&yaml);
2516        assert!(has_rule(&w, LintRule::TitleTooLong));
2517    }
2518
2519    #[test]
2520    fn invalid_id() {
2521        let w = lint(
2522            r#"
2523title: Test
2524id: not-a-uuid
2525logsource:
2526    category: test
2527detection:
2528    selection:
2529        field: value
2530    condition: selection
2531"#,
2532        );
2533        assert!(has_rule(&w, LintRule::InvalidId));
2534    }
2535
2536    #[test]
2537    fn valid_id_no_warning() {
2538        let w = lint(
2539            r#"
2540title: Test
2541id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2542logsource:
2543    category: test
2544detection:
2545    selection:
2546        field: value
2547    condition: selection
2548"#,
2549        );
2550        assert!(has_no_rule(&w, LintRule::InvalidId));
2551    }
2552
2553    #[test]
2554    fn invalid_status() {
2555        let w = lint(
2556            r#"
2557title: Test
2558status: invalid
2559logsource:
2560    category: test
2561detection:
2562    selection:
2563        field: value
2564    condition: selection
2565"#,
2566        );
2567        assert!(has_rule(&w, LintRule::InvalidStatus));
2568    }
2569
2570    #[test]
2571    fn invalid_level() {
2572        let w = lint(
2573            r#"
2574title: Test
2575level: important
2576logsource:
2577    category: test
2578detection:
2579    selection:
2580        field: value
2581    condition: selection
2582"#,
2583        );
2584        assert!(has_rule(&w, LintRule::InvalidLevel));
2585    }
2586
2587    #[test]
2588    fn invalid_date_format() {
2589        let w = lint(
2590            r#"
2591title: Test
2592date: 'Jan 2025'
2593logsource:
2594    category: test
2595detection:
2596    selection:
2597        field: value
2598    condition: selection
2599"#,
2600        );
2601        assert!(has_rule(&w, LintRule::InvalidDate));
2602    }
2603
2604    #[test]
2605    fn modified_before_date() {
2606        let w = lint(
2607            r#"
2608title: Test
2609date: '2025-06-15'
2610modified: '2025-06-10'
2611logsource:
2612    category: test
2613detection:
2614    selection:
2615        field: value
2616    condition: selection
2617"#,
2618        );
2619        assert!(has_rule(&w, LintRule::ModifiedBeforeDate));
2620    }
2621
2622    #[test]
2623    fn non_lowercase_key() {
2624        let w = lint(
2625            r#"
2626title: Test
2627Status: test
2628logsource:
2629    category: test
2630detection:
2631    selection:
2632        field: value
2633    condition: selection
2634"#,
2635        );
2636        assert!(has_rule(&w, LintRule::NonLowercaseKey));
2637    }
2638
2639    // ── Detection rule checks ───────────────────────────────────────────
2640
2641    #[test]
2642    fn missing_logsource() {
2643        let w = lint(
2644            r#"
2645title: Test
2646detection:
2647    selection:
2648        field: value
2649    condition: selection
2650"#,
2651        );
2652        assert!(has_rule(&w, LintRule::MissingLogsource));
2653    }
2654
2655    #[test]
2656    fn missing_detection() {
2657        let w = lint(
2658            r#"
2659title: Test
2660logsource:
2661    category: test
2662"#,
2663        );
2664        assert!(has_rule(&w, LintRule::MissingDetection));
2665    }
2666
2667    #[test]
2668    fn missing_condition() {
2669        let w = lint(
2670            r#"
2671title: Test
2672logsource:
2673    category: test
2674detection:
2675    selection:
2676        field: value
2677"#,
2678        );
2679        assert!(has_rule(&w, LintRule::MissingCondition));
2680    }
2681
2682    #[test]
2683    fn empty_detection() {
2684        let w = lint(
2685            r#"
2686title: Test
2687logsource:
2688    category: test
2689detection:
2690    condition: selection
2691"#,
2692        );
2693        assert!(has_rule(&w, LintRule::EmptyDetection));
2694    }
2695
2696    #[test]
2697    fn invalid_related_type() {
2698        let w = lint(
2699            r#"
2700title: Test
2701logsource:
2702    category: test
2703detection:
2704    selection:
2705        field: value
2706    condition: selection
2707related:
2708    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2709      type: invalid_type
2710"#,
2711        );
2712        assert!(has_rule(&w, LintRule::InvalidRelatedType));
2713    }
2714
2715    #[test]
2716    fn related_missing_required_fields() {
2717        let w = lint(
2718            r#"
2719title: Test
2720logsource:
2721    category: test
2722detection:
2723    selection:
2724        field: value
2725    condition: selection
2726related:
2727    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2728"#,
2729        );
2730        assert!(has_rule(&w, LintRule::RelatedMissingRequired));
2731    }
2732
2733    #[test]
2734    fn deprecated_without_related() {
2735        let w = lint(
2736            r#"
2737title: Test
2738status: deprecated
2739logsource:
2740    category: test
2741detection:
2742    selection:
2743        field: value
2744    condition: selection
2745"#,
2746        );
2747        assert!(has_rule(&w, LintRule::DeprecatedWithoutRelated));
2748    }
2749
2750    #[test]
2751    fn invalid_tag_pattern() {
2752        let w = lint(
2753            r#"
2754title: Test
2755logsource:
2756    category: test
2757detection:
2758    selection:
2759        field: value
2760    condition: selection
2761tags:
2762    - 'Invalid Tag'
2763"#,
2764        );
2765        assert!(has_rule(&w, LintRule::InvalidTag));
2766    }
2767
2768    #[test]
2769    fn unknown_tag_namespace() {
2770        let w = lint(
2771            r#"
2772title: Test
2773logsource:
2774    category: test
2775detection:
2776    selection:
2777        field: value
2778    condition: selection
2779tags:
2780    - custom.something
2781"#,
2782        );
2783        assert!(has_rule(&w, LintRule::UnknownTagNamespace));
2784    }
2785
2786    #[test]
2787    fn duplicate_tags() {
2788        let w = lint(
2789            r#"
2790title: Test
2791logsource:
2792    category: test
2793detection:
2794    selection:
2795        field: value
2796    condition: selection
2797tags:
2798    - attack.execution
2799    - attack.execution
2800"#,
2801        );
2802        assert!(has_rule(&w, LintRule::DuplicateTags));
2803    }
2804
2805    #[test]
2806    fn logsource_not_lowercase() {
2807        let w = lint(
2808            r#"
2809title: Test
2810logsource:
2811    category: Process_Creation
2812    product: Windows
2813detection:
2814    selection:
2815        field: value
2816    condition: selection
2817"#,
2818        );
2819        assert!(has_rule(&w, LintRule::LogsourceValueNotLowercase));
2820    }
2821
2822    #[test]
2823    fn single_value_all_modifier() {
2824        let w = lint(
2825            r#"
2826title: Test
2827logsource:
2828    category: test
2829detection:
2830    selection:
2831        CommandLine|contains|all: 'single'
2832    condition: selection
2833"#,
2834        );
2835        assert!(has_rule(&w, LintRule::SingleValueAllModifier));
2836    }
2837
2838    #[test]
2839    fn null_in_value_list() {
2840        let w = lint(
2841            r#"
2842title: Test
2843logsource:
2844    category: test
2845detection:
2846    selection:
2847        FieldA:
2848            - 'value1'
2849            - null
2850    condition: selection
2851"#,
2852        );
2853        assert!(has_rule(&w, LintRule::NullInValueList));
2854    }
2855
2856    // ── Correlation rule checks ─────────────────────────────────────────
2857
2858    #[test]
2859    fn valid_correlation_no_errors() {
2860        let w = lint(
2861            r#"
2862title: Brute Force
2863correlation:
2864    type: event_count
2865    rules:
2866        - 929a690e-bef0-4204-a928-ef5e620d6fcc
2867    group-by:
2868        - User
2869    timespan: 1h
2870    condition:
2871        gte: 100
2872level: high
2873"#,
2874        );
2875        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2876        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2877    }
2878
2879    #[test]
2880    fn invalid_correlation_type() {
2881        let w = lint(
2882            r#"
2883title: Test
2884correlation:
2885    type: invalid_type
2886    rules:
2887        - some-rule
2888    timespan: 1h
2889    group-by:
2890        - User
2891"#,
2892        );
2893        assert!(has_rule(&w, LintRule::InvalidCorrelationType));
2894    }
2895
2896    #[test]
2897    fn missing_correlation_timespan() {
2898        let w = lint(
2899            r#"
2900title: Test
2901correlation:
2902    type: event_count
2903    rules:
2904        - some-rule
2905    group-by:
2906        - User
2907    condition:
2908        gte: 10
2909"#,
2910        );
2911        assert!(has_rule(&w, LintRule::MissingCorrelationTimespan));
2912    }
2913
2914    #[test]
2915    fn invalid_timespan_format() {
2916        let w = lint(
2917            r#"
2918title: Test
2919correlation:
2920    type: event_count
2921    rules:
2922        - some-rule
2923    group-by:
2924        - User
2925    timespan: 1hour
2926    condition:
2927        gte: 10
2928"#,
2929        );
2930        assert!(has_rule(&w, LintRule::InvalidTimespanFormat));
2931    }
2932
2933    #[test]
2934    fn missing_group_by() {
2935        let w = lint(
2936            r#"
2937title: Test
2938correlation:
2939    type: event_count
2940    rules:
2941        - some-rule
2942    timespan: 1h
2943    condition:
2944        gte: 10
2945"#,
2946        );
2947        assert!(has_rule(&w, LintRule::MissingGroupBy));
2948    }
2949
2950    #[test]
2951    fn missing_condition_field_for_value_count() {
2952        let w = lint(
2953            r#"
2954title: Test
2955correlation:
2956    type: value_count
2957    rules:
2958        - some-rule
2959    group-by:
2960        - User
2961    timespan: 1h
2962    condition:
2963        gte: 10
2964"#,
2965        );
2966        assert!(has_rule(&w, LintRule::MissingConditionField));
2967    }
2968
2969    #[test]
2970    fn invalid_condition_operator() {
2971        let w = lint(
2972            r#"
2973title: Test
2974correlation:
2975    type: event_count
2976    rules:
2977        - some-rule
2978    group-by:
2979        - User
2980    timespan: 1h
2981    condition:
2982        bigger: 10
2983"#,
2984        );
2985        assert!(has_rule(&w, LintRule::InvalidConditionOperator));
2986    }
2987
2988    #[test]
2989    fn generate_not_boolean() {
2990        let w = lint(
2991            r#"
2992title: Test
2993correlation:
2994    type: event_count
2995    rules:
2996        - some-rule
2997    group-by:
2998        - User
2999    timespan: 1h
3000    condition:
3001        gte: 10
3002    generate: 'yes'
3003"#,
3004        );
3005        assert!(has_rule(&w, LintRule::GenerateNotBoolean));
3006    }
3007
3008    // ── Filter rule checks ──────────────────────────────────────────────
3009
3010    #[test]
3011    fn valid_filter_no_errors() {
3012        let w = lint(
3013            r#"
3014title: Filter Admin
3015logsource:
3016    category: process_creation
3017    product: windows
3018filter:
3019    rules:
3020        - 929a690e-bef0-4204-a928-ef5e620d6fcc
3021    selection:
3022        User|startswith: 'adm_'
3023    condition: selection
3024"#,
3025        );
3026        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
3027        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
3028    }
3029
3030    #[test]
3031    fn missing_filter_rules() {
3032        let w = lint(
3033            r#"
3034title: Test
3035logsource:
3036    category: test
3037filter:
3038    selection:
3039        User: admin
3040    condition: selection
3041"#,
3042        );
3043        assert!(has_rule(&w, LintRule::MissingFilterRules));
3044    }
3045
3046    #[test]
3047    fn missing_filter_selection() {
3048        let w = lint(
3049            r#"
3050title: Test
3051logsource:
3052    category: test
3053filter:
3054    rules:
3055        - some-rule
3056    condition: selection
3057"#,
3058        );
3059        assert!(has_rule(&w, LintRule::MissingFilterSelection));
3060    }
3061
3062    #[test]
3063    fn missing_filter_condition() {
3064        let w = lint(
3065            r#"
3066title: Test
3067logsource:
3068    category: test
3069filter:
3070    rules:
3071        - some-rule
3072    selection:
3073        User: admin
3074"#,
3075        );
3076        assert!(has_rule(&w, LintRule::MissingFilterCondition));
3077    }
3078
3079    #[test]
3080    fn filter_has_level_warning() {
3081        let w = lint(
3082            r#"
3083title: Test
3084logsource:
3085    category: test
3086level: high
3087filter:
3088    rules:
3089        - some-rule
3090    selection:
3091        User: admin
3092    condition: selection
3093"#,
3094        );
3095        assert!(has_rule(&w, LintRule::FilterHasLevel));
3096    }
3097
3098    #[test]
3099    fn filter_has_status_warning() {
3100        let w = lint(
3101            r#"
3102title: Test
3103logsource:
3104    category: test
3105status: test
3106filter:
3107    rules:
3108        - some-rule
3109    selection:
3110        User: admin
3111    condition: selection
3112"#,
3113        );
3114        assert!(has_rule(&w, LintRule::FilterHasStatus));
3115    }
3116
3117    #[test]
3118    fn missing_filter_logsource() {
3119        let w = lint(
3120            r#"
3121title: Test
3122filter:
3123    rules:
3124        - some-rule
3125    selection:
3126        User: admin
3127    condition: selection
3128"#,
3129        );
3130        assert!(has_rule(&w, LintRule::MissingFilterLogsource));
3131    }
3132
3133    // ── Action fragments are skipped ────────────────────────────────────
3134
3135    #[test]
3136    fn action_global_skipped() {
3137        let w = lint(
3138            r#"
3139action: global
3140title: Global Template
3141logsource:
3142    product: windows
3143"#,
3144        );
3145        assert!(w.is_empty());
3146    }
3147
3148    #[test]
3149    fn action_reset_skipped() {
3150        let w = lint(
3151            r#"
3152action: reset
3153"#,
3154        );
3155        assert!(w.is_empty());
3156    }
3157
3158    // ── New checks ──────────────────────────────────────────────────────
3159
3160    #[test]
3161    fn empty_title() {
3162        let w = lint(
3163            r#"
3164title: ''
3165logsource:
3166    category: test
3167detection:
3168    selection:
3169        field: value
3170    condition: selection
3171level: medium
3172"#,
3173        );
3174        assert!(has_rule(&w, LintRule::EmptyTitle));
3175    }
3176
3177    #[test]
3178    fn missing_level() {
3179        let w = lint(
3180            r#"
3181title: Test
3182logsource:
3183    category: test
3184detection:
3185    selection:
3186        field: value
3187    condition: selection
3188"#,
3189        );
3190        assert!(has_rule(&w, LintRule::MissingLevel));
3191    }
3192
3193    #[test]
3194    fn valid_level_no_missing_warning() {
3195        let w = lint(
3196            r#"
3197title: Test
3198logsource:
3199    category: test
3200detection:
3201    selection:
3202        field: value
3203    condition: selection
3204level: medium
3205"#,
3206        );
3207        assert!(has_no_rule(&w, LintRule::MissingLevel));
3208    }
3209
3210    #[test]
3211    fn invalid_date_feb_30() {
3212        assert!(!is_valid_date("2025-02-30"));
3213    }
3214
3215    #[test]
3216    fn invalid_date_apr_31() {
3217        assert!(!is_valid_date("2025-04-31"));
3218    }
3219
3220    #[test]
3221    fn valid_date_feb_28() {
3222        assert!(is_valid_date("2025-02-28"));
3223    }
3224
3225    #[test]
3226    fn valid_date_leap_year_feb_29() {
3227        assert!(is_valid_date("2024-02-29"));
3228    }
3229
3230    #[test]
3231    fn invalid_date_non_leap_feb_29() {
3232        assert!(!is_valid_date("2025-02-29"));
3233    }
3234
3235    #[test]
3236    fn condition_references_unknown() {
3237        let w = lint(
3238            r#"
3239title: Test
3240logsource:
3241    category: test
3242detection:
3243    selection:
3244        field: value
3245    condition: sel_main
3246level: medium
3247"#,
3248        );
3249        assert!(has_rule(&w, LintRule::ConditionReferencesUnknown));
3250    }
3251
3252    #[test]
3253    fn condition_references_valid() {
3254        let w = lint(
3255            r#"
3256title: Test
3257logsource:
3258    category: test
3259detection:
3260    selection:
3261        field: value
3262    condition: selection
3263level: medium
3264"#,
3265        );
3266        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3267    }
3268
3269    #[test]
3270    fn condition_references_complex_valid() {
3271        let w = lint(
3272            r#"
3273title: Test
3274logsource:
3275    category: test
3276detection:
3277    sel_main:
3278        field: value
3279    filter_fp:
3280        User: admin
3281    condition: sel_main and not filter_fp
3282level: medium
3283"#,
3284        );
3285        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3286    }
3287
3288    #[test]
3289    fn empty_value_list() {
3290        let w = lint(
3291            r#"
3292title: Test
3293logsource:
3294    category: test
3295detection:
3296    selection:
3297        field: []
3298    condition: selection
3299level: medium
3300"#,
3301        );
3302        assert!(has_rule(&w, LintRule::EmptyValueList));
3303    }
3304
3305    #[test]
3306    fn not_a_mapping() {
3307        let v: serde_yaml::Value = serde_yaml::from_str("- item1\n- item2").unwrap();
3308        let w = lint_yaml_value(&v);
3309        assert!(has_rule(&w, LintRule::NotAMapping));
3310    }
3311
3312    #[test]
3313    fn lint_yaml_str_produces_spans() {
3314        let text = r#"title: Test
3315status: invalid_status
3316logsource:
3317    category: test
3318detection:
3319    selection:
3320        field: value
3321    condition: selection
3322level: medium
3323"#;
3324        let warnings = lint_yaml_str(text);
3325        // InvalidStatus points to /status which exists in the text
3326        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
3327        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
3328        let span = invalid_status.unwrap().span;
3329        assert!(span.is_some(), "expected span to be resolved");
3330        // "status:" is on line 1 (0-indexed)
3331        assert_eq!(span.unwrap().start_line, 1);
3332    }
3333
3334    #[test]
3335    fn yaml_parse_error_uses_correct_rule() {
3336        let text = "title: [unclosed";
3337        let warnings = lint_yaml_str(text);
3338        assert!(has_rule(&warnings, LintRule::YamlParseError));
3339        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
3340    }
3341
3342    // ── Unknown top-level keys ───────────────────────────────────────────
3343
3344    #[test]
3345    fn unknown_key_typo_detected() {
3346        let w = lint(
3347            r#"
3348title: Test
3349desciption: Typo field
3350logsource:
3351    category: test
3352detection:
3353    selection:
3354        field: value
3355    condition: selection
3356level: medium
3357"#,
3358        );
3359        assert!(has_rule(&w, LintRule::UnknownKey));
3360        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3361        assert!(unk.message.contains("desciption"));
3362        assert!(unk.message.contains("description"));
3363        assert_eq!(unk.severity, Severity::Info);
3364    }
3365
3366    #[test]
3367    fn known_keys_no_unknown_warning() {
3368        let w = lint(
3369            r#"
3370title: Test Rule
3371id: 929a690e-bef0-4204-a928-ef5e620d6fcc
3372status: test
3373description: A valid description
3374author: tester
3375date: '2025-01-01'
3376modified: '2025-06-01'
3377license: MIT
3378logsource:
3379    category: process_creation
3380    product: windows
3381detection:
3382    selection:
3383        CommandLine|contains: 'whoami'
3384    condition: selection
3385level: medium
3386tags:
3387    - attack.execution
3388references:
3389    - https://example.com
3390fields:
3391    - CommandLine
3392falsepositives:
3393    - Legitimate admin
3394"#,
3395        );
3396        assert!(has_no_rule(&w, LintRule::UnknownKey));
3397    }
3398
3399    #[test]
3400    fn custom_fields_allowed_by_spec() {
3401        // The Sigma spec v2.1.0 explicitly allows arbitrary custom top-level
3402        // fields, so keys like "simulation" and "regression_tests_path" that
3403        // are not close to any known key should NOT produce warnings.
3404        let w = lint(
3405            r#"
3406title: Test Rule
3407logsource:
3408    category: test
3409detection:
3410    selection:
3411        field: value
3412    condition: selection
3413level: medium
3414simulation:
3415    action: scan
3416regression_tests_path: tests/
3417custom_metadata: hello
3418"#,
3419        );
3420        assert!(has_no_rule(&w, LintRule::UnknownKey));
3421    }
3422
3423    #[test]
3424    fn unknown_key_typo_correlation() {
3425        // "lvel" is edit-distance 1 from "level"
3426        let w = lint(
3427            r#"
3428title: Correlation Test
3429name: test_correlation
3430correlation:
3431    type: event_count
3432    rules:
3433        - rule1
3434    group-by:
3435        - src_ip
3436    timespan: 5m
3437    condition:
3438        gte: 10
3439lvel: high
3440"#,
3441        );
3442        assert!(has_rule(&w, LintRule::UnknownKey));
3443        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3444        assert!(unk.message.contains("lvel"));
3445        assert!(unk.message.contains("level"));
3446    }
3447
3448    #[test]
3449    fn unknown_key_custom_field_filter() {
3450        // "badkey" is not close to any known key — no warning.
3451        let w = lint(
3452            r#"
3453title: Filter Test
3454logsource:
3455    category: test
3456filter:
3457    rules:
3458        - rule1
3459    selection:
3460        User: admin
3461    condition: selection
3462badkey: foo
3463"#,
3464        );
3465        assert!(has_no_rule(&w, LintRule::UnknownKey));
3466    }
3467
3468    // ── Wildcard-only value ──────────────────────────────────────────────
3469
3470    #[test]
3471    fn wildcard_only_value_string() {
3472        let w = lint(
3473            r#"
3474title: Test
3475logsource:
3476    category: test
3477detection:
3478    selection:
3479        TargetFilename: '*'
3480    condition: selection
3481level: medium
3482"#,
3483        );
3484        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3485    }
3486
3487    #[test]
3488    fn wildcard_only_value_list() {
3489        let w = lint(
3490            r#"
3491title: Test
3492logsource:
3493    category: test
3494detection:
3495    selection:
3496        TargetFilename:
3497            - '*'
3498    condition: selection
3499level: medium
3500"#,
3501        );
3502        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3503    }
3504
3505    #[test]
3506    fn wildcard_with_other_values_no_warning() {
3507        let w = lint(
3508            r#"
3509title: Test
3510logsource:
3511    category: test
3512detection:
3513    selection:
3514        TargetFilename:
3515            - '*temp*'
3516            - '*cache*'
3517    condition: selection
3518level: medium
3519"#,
3520        );
3521        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3522    }
3523
3524    #[test]
3525    fn wildcard_regex_no_warning() {
3526        let w = lint(
3527            r#"
3528title: Test
3529logsource:
3530    category: test
3531detection:
3532    selection:
3533        TargetFilename|re: '*'
3534    condition: selection
3535level: medium
3536"#,
3537        );
3538        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3539    }
3540
3541    // ── resolve_path_to_span tests ───────────────────────────────────────
3542
3543    #[test]
3544    fn resolve_path_to_span_root() {
3545        let text = "title: Test\nstatus: test\n";
3546        let span = resolve_path_to_span(text, "/");
3547        assert!(span.is_some());
3548        assert_eq!(span.unwrap().start_line, 0);
3549    }
3550
3551    #[test]
3552    fn resolve_path_to_span_top_level_key() {
3553        let text = "title: Test\nstatus: test\nlevel: high\n";
3554        let span = resolve_path_to_span(text, "/status");
3555        assert!(span.is_some());
3556        assert_eq!(span.unwrap().start_line, 1);
3557    }
3558
3559    #[test]
3560    fn resolve_path_to_span_nested_key() {
3561        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
3562        let span = resolve_path_to_span(text, "/logsource/product");
3563        assert!(span.is_some());
3564        assert_eq!(span.unwrap().start_line, 3);
3565    }
3566
3567    #[test]
3568    fn resolve_path_to_span_missing_key() {
3569        let text = "title: Test\nstatus: test\n";
3570        let span = resolve_path_to_span(text, "/nonexistent");
3571        assert!(span.is_none());
3572    }
3573
3574    // ── Multi-document YAML ──────────────────────────────────────────────
3575
3576    #[test]
3577    fn multi_doc_yaml_lints_all_documents() {
3578        let text = r#"title: Rule 1
3579logsource:
3580    category: test
3581detection:
3582    selection:
3583        field: value
3584    condition: selection
3585level: medium
3586---
3587title: Rule 2
3588status: bad_status
3589logsource:
3590    category: test
3591detection:
3592    selection:
3593        field: value
3594    condition: selection
3595level: medium
3596"#;
3597        let warnings = lint_yaml_str(text);
3598        // Second doc has InvalidStatus
3599        assert!(has_rule(&warnings, LintRule::InvalidStatus));
3600    }
3601
3602    // ── is_valid_timespan edge cases ─────────────────────────────────────
3603
3604    #[test]
3605    fn timespan_zero_seconds() {
3606        assert!(is_valid_timespan("0s"));
3607    }
3608
3609    #[test]
3610    fn timespan_no_digits() {
3611        assert!(!is_valid_timespan("s"));
3612    }
3613
3614    #[test]
3615    fn timespan_no_unit() {
3616        assert!(!is_valid_timespan("123"));
3617    }
3618
3619    #[test]
3620    fn timespan_invalid_unit() {
3621        assert!(!is_valid_timespan("5x"));
3622    }
3623
3624    #[test]
3625    fn timespan_valid_variants() {
3626        assert!(is_valid_timespan("30s"));
3627        assert!(is_valid_timespan("5m"));
3628        assert!(is_valid_timespan("1h"));
3629        assert!(is_valid_timespan("7d"));
3630        assert!(is_valid_timespan("1w"));
3631        assert!(is_valid_timespan("1M"));
3632        assert!(is_valid_timespan("1y"));
3633    }
3634
3635    // ── FileLintResult methods ───────────────────────────────────────────
3636
3637    #[test]
3638    fn file_lint_result_has_errors() {
3639        let result = FileLintResult {
3640            path: std::path::PathBuf::from("test.yml"),
3641            warnings: vec![
3642                warning(LintRule::TitleTooLong, "too long", "/title"),
3643                err(
3644                    LintRule::MissingCondition,
3645                    "missing",
3646                    "/detection/condition",
3647                ),
3648            ],
3649        };
3650        assert!(result.has_errors());
3651        assert_eq!(result.error_count(), 1);
3652        assert_eq!(result.warning_count(), 1);
3653    }
3654
3655    #[test]
3656    fn file_lint_result_no_errors() {
3657        let result = FileLintResult {
3658            path: std::path::PathBuf::from("test.yml"),
3659            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
3660        };
3661        assert!(!result.has_errors());
3662        assert_eq!(result.error_count(), 0);
3663        assert_eq!(result.warning_count(), 1);
3664    }
3665
3666    #[test]
3667    fn file_lint_result_empty() {
3668        let result = FileLintResult {
3669            path: std::path::PathBuf::from("test.yml"),
3670            warnings: vec![],
3671        };
3672        assert!(!result.has_errors());
3673        assert_eq!(result.error_count(), 0);
3674        assert_eq!(result.warning_count(), 0);
3675    }
3676
3677    // ── LintWarning Display impl ─────────────────────────────────────────
3678
3679    #[test]
3680    fn lint_warning_display() {
3681        let w = err(
3682            LintRule::MissingTitle,
3683            "missing required field 'title'",
3684            "/title",
3685        );
3686        let display = format!("{w}");
3687        assert!(display.contains("error"));
3688        assert!(display.contains("missing_title"));
3689        assert!(display.contains("/title"));
3690    }
3691
3692    // ── New checks: missing description / author / all+re ────────────────
3693
3694    #[test]
3695    fn missing_description_info() {
3696        let w = lint(
3697            r#"
3698title: Test
3699logsource:
3700    category: test
3701detection:
3702    selection:
3703        field: value
3704    condition: selection
3705level: medium
3706"#,
3707        );
3708        assert!(has_rule(&w, LintRule::MissingDescription));
3709        let md = w
3710            .iter()
3711            .find(|w| w.rule == LintRule::MissingDescription)
3712            .unwrap();
3713        assert_eq!(md.severity, Severity::Info);
3714    }
3715
3716    #[test]
3717    fn has_description_no_info() {
3718        let w = lint(
3719            r#"
3720title: Test
3721description: A fine description
3722logsource:
3723    category: test
3724detection:
3725    selection:
3726        field: value
3727    condition: selection
3728level: medium
3729"#,
3730        );
3731        assert!(has_no_rule(&w, LintRule::MissingDescription));
3732    }
3733
3734    #[test]
3735    fn missing_author_info() {
3736        let w = lint(
3737            r#"
3738title: Test
3739logsource:
3740    category: test
3741detection:
3742    selection:
3743        field: value
3744    condition: selection
3745level: medium
3746"#,
3747        );
3748        assert!(has_rule(&w, LintRule::MissingAuthor));
3749        let ma = w
3750            .iter()
3751            .find(|w| w.rule == LintRule::MissingAuthor)
3752            .unwrap();
3753        assert_eq!(ma.severity, Severity::Info);
3754    }
3755
3756    #[test]
3757    fn has_author_no_info() {
3758        let w = lint(
3759            r#"
3760title: Test
3761author: tester
3762logsource:
3763    category: test
3764detection:
3765    selection:
3766        field: value
3767    condition: selection
3768level: medium
3769"#,
3770        );
3771        assert!(has_no_rule(&w, LintRule::MissingAuthor));
3772    }
3773
3774    #[test]
3775    fn all_with_re_warning() {
3776        let w = lint(
3777            r#"
3778title: Test
3779logsource:
3780    category: test
3781detection:
3782    selection:
3783        CommandLine|all|re:
3784            - '(?i)whoami'
3785            - '(?i)net user'
3786    condition: selection
3787level: medium
3788"#,
3789        );
3790        assert!(has_rule(&w, LintRule::AllWithRe));
3791    }
3792
3793    #[test]
3794    fn all_without_re_no_all_with_re() {
3795        let w = lint(
3796            r#"
3797title: Test
3798logsource:
3799    category: test
3800detection:
3801    selection:
3802        CommandLine|contains|all:
3803            - 'whoami'
3804            - 'net user'
3805    condition: selection
3806level: medium
3807"#,
3808        );
3809        assert!(has_no_rule(&w, LintRule::AllWithRe));
3810    }
3811
3812    #[test]
3813    fn re_without_all_no_all_with_re() {
3814        let w = lint(
3815            r#"
3816title: Test
3817logsource:
3818    category: test
3819detection:
3820    selection:
3821        CommandLine|re: '(?i)whoami|net user'
3822    condition: selection
3823level: medium
3824"#,
3825        );
3826        assert!(has_no_rule(&w, LintRule::AllWithRe));
3827    }
3828
3829    // ── Modifier compatibility checks ────────────────────────────────────
3830
3831    #[test]
3832    fn incompatible_contains_startswith() {
3833        let w = lint(
3834            r#"
3835title: Test
3836logsource:
3837    category: test
3838detection:
3839    selection:
3840        Field|contains|startswith: 'test'
3841    condition: selection
3842level: medium
3843"#,
3844        );
3845        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3846    }
3847
3848    #[test]
3849    fn incompatible_endswith_startswith() {
3850        let w = lint(
3851            r#"
3852title: Test
3853logsource:
3854    category: test
3855detection:
3856    selection:
3857        Field|endswith|startswith: 'test'
3858    condition: selection
3859level: medium
3860"#,
3861        );
3862        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3863    }
3864
3865    #[test]
3866    fn incompatible_contains_endswith() {
3867        let w = lint(
3868            r#"
3869title: Test
3870logsource:
3871    category: test
3872detection:
3873    selection:
3874        Field|contains|endswith: 'test'
3875    condition: selection
3876level: medium
3877"#,
3878        );
3879        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3880    }
3881
3882    #[test]
3883    fn incompatible_re_with_contains() {
3884        let w = lint(
3885            r#"
3886title: Test
3887logsource:
3888    category: test
3889detection:
3890    selection:
3891        Field|re|contains: '.*test.*'
3892    condition: selection
3893level: medium
3894"#,
3895        );
3896        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3897    }
3898
3899    #[test]
3900    fn incompatible_cidr_with_startswith() {
3901        let w = lint(
3902            r#"
3903title: Test
3904logsource:
3905    category: test
3906detection:
3907    selection:
3908        Field|cidr|startswith: '192.168.0.0/16'
3909    condition: selection
3910level: medium
3911"#,
3912        );
3913        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3914    }
3915
3916    #[test]
3917    fn incompatible_exists_with_contains() {
3918        let w = lint(
3919            r#"
3920title: Test
3921logsource:
3922    category: test
3923detection:
3924    selection:
3925        Field|exists|contains: true
3926    condition: selection
3927level: medium
3928"#,
3929        );
3930        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3931    }
3932
3933    #[test]
3934    fn incompatible_gt_with_contains() {
3935        let w = lint(
3936            r#"
3937title: Test
3938logsource:
3939    category: test
3940detection:
3941    selection:
3942        Field|gt|contains: 100
3943    condition: selection
3944level: medium
3945"#,
3946        );
3947        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3948    }
3949
3950    #[test]
3951    fn incompatible_regex_flags_without_re() {
3952        let w = lint(
3953            r#"
3954title: Test
3955logsource:
3956    category: test
3957detection:
3958    selection:
3959        Field|i|m: 'test'
3960    condition: selection
3961level: medium
3962"#,
3963        );
3964        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3965    }
3966
3967    #[test]
3968    fn compatible_re_with_regex_flags() {
3969        let w = lint(
3970            r#"
3971title: Test
3972logsource:
3973    category: test
3974detection:
3975    selection:
3976        Field|re|i|m|s: '(?i)test'
3977    condition: selection
3978level: medium
3979"#,
3980        );
3981        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
3982    }
3983
3984    #[test]
3985    fn compatible_contains_all() {
3986        let w = lint(
3987            r#"
3988title: Test
3989logsource:
3990    category: test
3991detection:
3992    selection:
3993        Field|contains|all:
3994            - 'val1'
3995            - 'val2'
3996    condition: selection
3997level: medium
3998"#,
3999        );
4000        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4001    }
4002
4003    #[test]
4004    fn compatible_base64offset_contains() {
4005        let w = lint(
4006            r#"
4007title: Test
4008logsource:
4009    category: test
4010detection:
4011    selection:
4012        Field|base64offset|contains: 'test'
4013    condition: selection
4014level: medium
4015"#,
4016        );
4017        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4018    }
4019
4020    #[test]
4021    fn compatible_wide_base64() {
4022        let w = lint(
4023            r#"
4024title: Test
4025logsource:
4026    category: test
4027detection:
4028    selection:
4029        Field|wide|base64: 'test'
4030    condition: selection
4031level: medium
4032"#,
4033        );
4034        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4035    }
4036
4037    // ── Info/Hint severity levels ────────────────────────────────────────
4038
4039    #[test]
4040    fn severity_display() {
4041        assert_eq!(format!("{}", Severity::Error), "error");
4042        assert_eq!(format!("{}", Severity::Warning), "warning");
4043        assert_eq!(format!("{}", Severity::Info), "info");
4044        assert_eq!(format!("{}", Severity::Hint), "hint");
4045    }
4046
4047    #[test]
4048    fn file_lint_result_info_count() {
4049        let result = FileLintResult {
4050            path: std::path::PathBuf::from("test.yml"),
4051            warnings: vec![
4052                info(LintRule::MissingDescription, "missing desc", "/description"),
4053                info(LintRule::MissingAuthor, "missing author", "/author"),
4054                warning(LintRule::TitleTooLong, "too long", "/title"),
4055            ],
4056        };
4057        assert_eq!(result.info_count(), 2);
4058        assert_eq!(result.warning_count(), 1);
4059        assert_eq!(result.error_count(), 0);
4060        assert!(!result.has_errors());
4061    }
4062
4063    // ── Inline suppression parsing ───────────────────────────────────────
4064
4065    #[test]
4066    fn parse_inline_disable_all() {
4067        let text = "# rsigma-disable\ntitle: Test\n";
4068        let sup = parse_inline_suppressions(text);
4069        assert!(sup.disable_all);
4070    }
4071
4072    #[test]
4073    fn parse_inline_disable_specific_rules() {
4074        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
4075        let sup = parse_inline_suppressions(text);
4076        assert!(!sup.disable_all);
4077        assert!(sup.file_disabled.contains("missing_description"));
4078        assert!(sup.file_disabled.contains("missing_author"));
4079    }
4080
4081    #[test]
4082    fn parse_inline_disable_next_line_all() {
4083        let text = "# rsigma-disable-next-line\ntitle: Test\n";
4084        let sup = parse_inline_suppressions(text);
4085        assert!(!sup.disable_all);
4086        // Line 0 has the comment, line 1 is "title: Test"
4087        assert!(sup.line_disabled.contains_key(&1));
4088        assert!(sup.line_disabled[&1].is_none()); // None means all rules
4089    }
4090
4091    #[test]
4092    fn parse_inline_disable_next_line_specific() {
4093        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
4094        let sup = parse_inline_suppressions(text);
4095        // Comment on line 1, suppresses line 2
4096        assert!(sup.line_disabled.contains_key(&2));
4097        let rules = sup.line_disabled[&2].as_ref().unwrap();
4098        assert!(rules.contains("missing_level"));
4099    }
4100
4101    #[test]
4102    fn parse_inline_no_comments() {
4103        let text = "title: Test\nstatus: test\n";
4104        let sup = parse_inline_suppressions(text);
4105        assert!(!sup.disable_all);
4106        assert!(sup.file_disabled.is_empty());
4107        assert!(sup.line_disabled.is_empty());
4108    }
4109
4110    #[test]
4111    fn parse_inline_comment_in_quoted_string() {
4112        // The '#' is inside a quoted string — should NOT be treated as a comment
4113        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
4114        let sup = parse_inline_suppressions(text);
4115        assert!(!sup.disable_all);
4116        assert!(sup.file_disabled.is_empty());
4117    }
4118
4119    // ── Suppression filtering ────────────────────────────────────────────
4120
4121    #[test]
4122    fn apply_suppressions_disables_rule() {
4123        let warnings = vec![
4124            info(LintRule::MissingDescription, "desc", "/description"),
4125            info(LintRule::MissingAuthor, "author", "/author"),
4126            warning(LintRule::TitleTooLong, "title", "/title"),
4127        ];
4128        let mut config = LintConfig::default();
4129        config
4130            .disabled_rules
4131            .insert("missing_description".to_string());
4132        let inline = InlineSuppressions::default();
4133
4134        let result = apply_suppressions(warnings, &config, &inline);
4135        assert_eq!(result.len(), 2);
4136        assert!(
4137            result
4138                .iter()
4139                .all(|w| w.rule != LintRule::MissingDescription)
4140        );
4141    }
4142
4143    #[test]
4144    fn apply_suppressions_severity_override() {
4145        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
4146        let mut config = LintConfig::default();
4147        config
4148            .severity_overrides
4149            .insert("title_too_long".to_string(), Severity::Info);
4150        let inline = InlineSuppressions::default();
4151
4152        let result = apply_suppressions(warnings, &config, &inline);
4153        assert_eq!(result.len(), 1);
4154        assert_eq!(result[0].severity, Severity::Info);
4155    }
4156
4157    #[test]
4158    fn apply_suppressions_inline_file_disable() {
4159        let warnings = vec![
4160            info(LintRule::MissingDescription, "desc", "/description"),
4161            info(LintRule::MissingAuthor, "author", "/author"),
4162        ];
4163        let config = LintConfig::default();
4164        let mut inline = InlineSuppressions::default();
4165        inline.file_disabled.insert("missing_author".to_string());
4166
4167        let result = apply_suppressions(warnings, &config, &inline);
4168        assert_eq!(result.len(), 1);
4169        assert_eq!(result[0].rule, LintRule::MissingDescription);
4170    }
4171
4172    #[test]
4173    fn apply_suppressions_inline_disable_all() {
4174        let warnings = vec![
4175            err(LintRule::MissingTitle, "title", "/title"),
4176            warning(LintRule::TitleTooLong, "long", "/title"),
4177        ];
4178        let config = LintConfig::default();
4179        let inline = InlineSuppressions {
4180            disable_all: true,
4181            ..Default::default()
4182        };
4183
4184        let result = apply_suppressions(warnings, &config, &inline);
4185        assert!(result.is_empty());
4186    }
4187
4188    #[test]
4189    fn apply_suppressions_inline_next_line() {
4190        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
4191        w1.span = Some(Span {
4192            start_line: 5,
4193            start_col: 0,
4194            end_line: 5,
4195            end_col: 10,
4196        });
4197        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
4198        w2.span = Some(Span {
4199            start_line: 6,
4200            start_col: 0,
4201            end_line: 6,
4202            end_col: 10,
4203        });
4204
4205        let config = LintConfig::default();
4206        let mut inline = InlineSuppressions::default();
4207        // Suppress all rules on line 5
4208        inline.line_disabled.insert(5, None);
4209
4210        let result = apply_suppressions(vec![w1, w2], &config, &inline);
4211        assert_eq!(result.len(), 1);
4212        assert_eq!(result[0].rule, LintRule::InvalidStatus);
4213    }
4214
4215    // ── lint_yaml_str_with_config integration ────────────────────────────
4216
4217    #[test]
4218    fn lint_with_config_disables_rules() {
4219        let text = r#"title: Test
4220logsource:
4221    category: test
4222detection:
4223    selection:
4224        field: value
4225    condition: selection
4226level: medium
4227"#;
4228        let mut config = LintConfig::default();
4229        config
4230            .disabled_rules
4231            .insert("missing_description".to_string());
4232        config.disabled_rules.insert("missing_author".to_string());
4233
4234        let warnings = lint_yaml_str_with_config(text, &config);
4235        assert!(
4236            !warnings
4237                .iter()
4238                .any(|w| w.rule == LintRule::MissingDescription)
4239        );
4240        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4241    }
4242
4243    #[test]
4244    fn lint_with_inline_disable_next_line() {
4245        let text = r#"title: Test
4246# rsigma-disable-next-line missing_level
4247logsource:
4248    category: test
4249detection:
4250    selection:
4251        field: value
4252    condition: selection
4253"#;
4254        // Note: missing_level is on the logsource line... actually we need to think about
4255        // where the warning span resolves to. The warning for missing_level has path /level,
4256        // and won't have a span matching line 2. Let's use a config-based suppression
4257        // instead for this test.
4258        let config = LintConfig::default();
4259        let warnings = lint_yaml_str_with_config(text, &config);
4260        // This test verifies that inline parsing doesn't break normal linting
4261        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
4262    }
4263
4264    #[test]
4265    fn lint_with_inline_file_disable() {
4266        let text = r#"# rsigma-disable missing_description, missing_author
4267title: Test
4268logsource:
4269    category: test
4270detection:
4271    selection:
4272        field: value
4273    condition: selection
4274level: medium
4275"#;
4276        let config = LintConfig::default();
4277        let warnings = lint_yaml_str_with_config(text, &config);
4278        assert!(
4279            !warnings
4280                .iter()
4281                .any(|w| w.rule == LintRule::MissingDescription)
4282        );
4283        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4284    }
4285
4286    #[test]
4287    fn lint_with_inline_disable_all() {
4288        let text = r#"# rsigma-disable
4289title: Test
4290status: invalid_status
4291logsource:
4292    category: test
4293detection:
4294    selection:
4295        field: value
4296    condition: selection
4297"#;
4298        let config = LintConfig::default();
4299        let warnings = lint_yaml_str_with_config(text, &config);
4300        assert!(warnings.is_empty());
4301    }
4302
4303    // ── LintConfig ───────────────────────────────────────────────────────
4304
4305    #[test]
4306    fn lint_config_merge() {
4307        let mut base = LintConfig::default();
4308        base.disabled_rules.insert("rule_a".to_string());
4309        base.severity_overrides
4310            .insert("rule_b".to_string(), Severity::Info);
4311
4312        let other = LintConfig {
4313            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
4314            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
4315                .into_iter()
4316                .collect(),
4317            exclude_patterns: vec!["test/**".to_string()],
4318        };
4319
4320        base.merge(&other);
4321        assert!(base.disabled_rules.contains("rule_a"));
4322        assert!(base.disabled_rules.contains("rule_c"));
4323        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
4324        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
4325        assert_eq!(base.exclude_patterns, vec!["test/**".to_string()]);
4326    }
4327
4328    #[test]
4329    fn lint_config_is_disabled() {
4330        let mut config = LintConfig::default();
4331        config.disabled_rules.insert("missing_title".to_string());
4332        assert!(config.is_disabled(&LintRule::MissingTitle));
4333        assert!(!config.is_disabled(&LintRule::EmptyTitle));
4334    }
4335
4336    #[test]
4337    fn find_yaml_comment_basic() {
4338        assert_eq!(find_yaml_comment("# comment"), Some(0));
4339        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
4340        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
4341        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
4342        assert_eq!(find_yaml_comment("key: value"), None);
4343    }
4344
4345    // ── Fix generation tests ─────────────────────────────────────────────
4346
4347    fn find_fix(warnings: &[LintWarning], rule: LintRule) -> Option<&Fix> {
4348        warnings
4349            .iter()
4350            .find(|w| w.rule == rule)
4351            .and_then(|w| w.fix.as_ref())
4352    }
4353
4354    fn fix_summary(fix: &Fix) -> String {
4355        use std::fmt::Write;
4356        let mut s = String::new();
4357        writeln!(s, "title: {}", fix.title).unwrap();
4358        writeln!(s, "disposition: {:?}", fix.disposition).unwrap();
4359        for (i, p) in fix.patches.iter().enumerate() {
4360            match p {
4361                FixPatch::ReplaceValue { path, new_value } => {
4362                    writeln!(s, "patch[{i}]: ReplaceValue {path} -> {new_value}").unwrap();
4363                }
4364                FixPatch::ReplaceKey { path, new_key } => {
4365                    writeln!(s, "patch[{i}]: ReplaceKey {path} -> {new_key}").unwrap();
4366                }
4367                FixPatch::Remove { path } => {
4368                    writeln!(s, "patch[{i}]: Remove {path}").unwrap();
4369                }
4370            }
4371        }
4372        s
4373    }
4374
4375    #[test]
4376    fn fix_invalid_status() {
4377        let w = lint(
4378            r#"
4379title: Test
4380status: expreimental
4381logsource:
4382    category: test
4383detection:
4384    sel:
4385        field: value
4386    condition: sel
4387"#,
4388        );
4389        let fix = find_fix(&w, LintRule::InvalidStatus).expect("should have fix");
4390        insta::assert_snapshot!(fix_summary(fix), @r"
4391        title: replace 'expreimental' with 'experimental'
4392        disposition: Safe
4393        patch[0]: ReplaceValue /status -> experimental
4394        ");
4395    }
4396
4397    #[test]
4398    fn fix_invalid_level() {
4399        let w = lint(
4400            r#"
4401title: Test
4402level: hgih
4403logsource:
4404    category: test
4405detection:
4406    sel:
4407        field: value
4408    condition: sel
4409"#,
4410        );
4411        let fix = find_fix(&w, LintRule::InvalidLevel).expect("should have fix");
4412        insta::assert_snapshot!(fix_summary(fix), @r"
4413        title: replace 'hgih' with 'high'
4414        disposition: Safe
4415        patch[0]: ReplaceValue /level -> high
4416        ");
4417    }
4418
4419    #[test]
4420    fn fix_non_lowercase_key() {
4421        let w = lint(
4422            r#"
4423title: Test
4424Status: test
4425logsource:
4426    category: test
4427detection:
4428    sel:
4429        field: value
4430    condition: sel
4431"#,
4432        );
4433        let fix = find_fix(&w, LintRule::NonLowercaseKey).expect("should have fix");
4434        insta::assert_snapshot!(fix_summary(fix), @r"
4435        title: rename 'Status' to 'status'
4436        disposition: Safe
4437        patch[0]: ReplaceKey /Status -> status
4438        ");
4439    }
4440
4441    #[test]
4442    fn fix_logsource_value_not_lowercase() {
4443        let w = lint(
4444            r#"
4445title: Test
4446logsource:
4447    category: Test
4448detection:
4449    sel:
4450        field: value
4451    condition: sel
4452"#,
4453        );
4454        let fix = find_fix(&w, LintRule::LogsourceValueNotLowercase).expect("should have fix");
4455        insta::assert_snapshot!(fix_summary(fix), @r"
4456        title: lowercase 'Test' to 'test'
4457        disposition: Safe
4458        patch[0]: ReplaceValue /logsource/category -> test
4459        ");
4460    }
4461
4462    #[test]
4463    fn fix_unknown_key_typo() {
4464        let w = lint(
4465            r#"
4466title: Test
4467desciption: Typo field
4468logsource:
4469    category: test
4470detection:
4471    sel:
4472        field: value
4473    condition: sel
4474level: medium
4475"#,
4476        );
4477        let fix = find_fix(&w, LintRule::UnknownKey).expect("should have fix");
4478        insta::assert_snapshot!(fix_summary(fix), @r"
4479        title: rename 'desciption' to 'description'
4480        disposition: Safe
4481        patch[0]: ReplaceKey /desciption -> description
4482        ");
4483    }
4484
4485    #[test]
4486    fn fix_duplicate_tags() {
4487        let w = lint(
4488            r#"
4489title: Test
4490status: test
4491tags:
4492    - attack.execution
4493    - attack.execution
4494logsource:
4495    category: test
4496detection:
4497    sel:
4498        field: value
4499    condition: sel
4500"#,
4501        );
4502        let fix = find_fix(&w, LintRule::DuplicateTags).expect("should have fix");
4503        insta::assert_snapshot!(fix_summary(fix), @r"
4504        title: remove duplicate tag 'attack.execution'
4505        disposition: Safe
4506        patch[0]: Remove /tags/1
4507        ");
4508    }
4509
4510    #[test]
4511    fn fix_duplicate_references() {
4512        let w = lint(
4513            r#"
4514title: Test
4515references:
4516    - https://example.com
4517    - https://example.com
4518logsource:
4519    category: test
4520detection:
4521    sel:
4522        field: value
4523    condition: sel
4524"#,
4525        );
4526        let fix = find_fix(&w, LintRule::DuplicateReferences).expect("should have fix");
4527        insta::assert_snapshot!(fix_summary(fix), @r"
4528        title: remove duplicate reference
4529        disposition: Safe
4530        patch[0]: Remove /references/1
4531        ");
4532    }
4533
4534    #[test]
4535    fn fix_duplicate_fields() {
4536        let w = lint(
4537            r#"
4538title: Test
4539fields:
4540    - CommandLine
4541    - CommandLine
4542logsource:
4543    category: test
4544detection:
4545    sel:
4546        field: value
4547    condition: sel
4548"#,
4549        );
4550        let fix = find_fix(&w, LintRule::DuplicateFields).expect("should have fix");
4551        insta::assert_snapshot!(fix_summary(fix), @r"
4552        title: remove duplicate field
4553        disposition: Safe
4554        patch[0]: Remove /fields/1
4555        ");
4556    }
4557
4558    #[test]
4559    fn fix_all_with_re() {
4560        let w = lint(
4561            r#"
4562title: Test
4563logsource:
4564    category: test
4565detection:
4566    sel:
4567        Cmd|all|re:
4568            - foo.*
4569            - bar.*
4570    condition: sel
4571"#,
4572        );
4573        let fix = find_fix(&w, LintRule::AllWithRe).expect("should have fix");
4574        insta::assert_snapshot!(fix_summary(fix), @r"
4575        title: remove |all from 'Cmd|all|re'
4576        disposition: Safe
4577        patch[0]: ReplaceKey /detection/sel/Cmd|all|re -> Cmd|re
4578        ");
4579    }
4580
4581    #[test]
4582    fn fix_single_value_all_modifier() {
4583        let w = lint(
4584            r#"
4585title: Test
4586logsource:
4587    category: test
4588detection:
4589    sel:
4590        Cmd|all|contains:
4591            - only_one
4592    condition: sel
4593"#,
4594        );
4595        let fix = find_fix(&w, LintRule::SingleValueAllModifier).expect("should have fix");
4596        insta::assert_snapshot!(fix_summary(fix), @r"
4597        title: remove |all from 'Cmd|all|contains'
4598        disposition: Safe
4599        patch[0]: ReplaceKey /detection/sel/Cmd|all|contains -> Cmd|contains
4600        ");
4601    }
4602
4603    #[test]
4604    fn fix_wildcard_only_value() {
4605        let w = lint(
4606            r#"
4607title: Test
4608logsource:
4609    category: test
4610detection:
4611    sel:
4612        CommandLine: '*'
4613    condition: sel
4614"#,
4615        );
4616        let fix = find_fix(&w, LintRule::WildcardOnlyValue).expect("should have fix");
4617        insta::assert_snapshot!(fix_summary(fix), @r"
4618        title: replace with 'CommandLine|exists: true'
4619        disposition: Safe
4620        patch[0]: ReplaceKey /detection/sel/CommandLine -> CommandLine|exists
4621        patch[1]: ReplaceValue /detection/sel/CommandLine|exists -> true
4622        ");
4623    }
4624
4625    #[test]
4626    fn fix_filter_has_level() {
4627        let w = lint(
4628            r#"
4629title: Test
4630logsource:
4631    category: test
4632level: high
4633filter:
4634    rules:
4635        - rule1
4636    selection:
4637        User: admin
4638    condition: selection
4639"#,
4640        );
4641        let fix = find_fix(&w, LintRule::FilterHasLevel).expect("should have fix");
4642        insta::assert_snapshot!(fix_summary(fix), @r"
4643        title: remove 'level' from filter rule
4644        disposition: Safe
4645        patch[0]: Remove /level
4646        ");
4647    }
4648
4649    #[test]
4650    fn fix_filter_has_status() {
4651        let w = lint(
4652            r#"
4653title: Test
4654logsource:
4655    category: test
4656status: test
4657filter:
4658    rules:
4659        - rule1
4660    selection:
4661        User: admin
4662    condition: selection
4663"#,
4664        );
4665        let fix = find_fix(&w, LintRule::FilterHasStatus).expect("should have fix");
4666        insta::assert_snapshot!(fix_summary(fix), @r"
4667        title: remove 'status' from filter rule
4668        disposition: Safe
4669        patch[0]: Remove /status
4670        ");
4671    }
4672
4673    #[test]
4674    fn no_fix_for_unfixable_rule() {
4675        let w = lint(
4676            r#"
4677title: Test
4678logsource:
4679    category: test
4680"#,
4681        );
4682        assert!(has_rule(&w, LintRule::MissingDetection));
4683        assert!(find_fix(&w, LintRule::MissingDetection).is_none());
4684    }
4685
4686    #[test]
4687    fn no_fix_for_far_invalid_status() {
4688        let w = lint(
4689            r#"
4690title: Test
4691status: totallyinvalidxyz
4692logsource:
4693    category: test
4694detection:
4695    sel:
4696        field: value
4697    condition: sel
4698"#,
4699        );
4700        assert!(has_rule(&w, LintRule::InvalidStatus));
4701        assert!(
4702            find_fix(&w, LintRule::InvalidStatus).is_none(),
4703            "no fix when edit distance is too large"
4704        );
4705    }
4706
4707    // ── Deprecated aggregation syntax ───────────────────────────────────
4708
4709    #[test]
4710    fn deprecated_aggregation_count() {
4711        let w = lint(
4712            r#"
4713title: Test
4714logsource:
4715    category: test
4716detection:
4717    selection:
4718        EventID: 4625
4719    condition: selection | count(TargetUserName) by IpAddress > 5
4720level: medium
4721"#,
4722        );
4723        assert!(has_rule(&w, LintRule::DeprecatedAggregationSyntax));
4724        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4725        let dag = w
4726            .iter()
4727            .find(|w| w.rule == LintRule::DeprecatedAggregationSyntax)
4728            .unwrap();
4729        assert_eq!(dag.severity, Severity::Warning);
4730    }
4731
4732    #[test]
4733    fn deprecated_aggregation_near() {
4734        let w = lint(
4735            r#"
4736title: Test
4737logsource:
4738    category: test
4739detection:
4740    selection:
4741        EventID: 1
4742    condition: selection | near(field) by host
4743level: medium
4744"#,
4745        );
4746        assert!(has_rule(&w, LintRule::DeprecatedAggregationSyntax));
4747        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4748    }
4749
4750    #[test]
4751    fn no_deprecated_aggregation_for_normal_condition() {
4752        let w = lint(
4753            r#"
4754title: Test
4755logsource:
4756    category: test
4757detection:
4758    selection:
4759        field: value
4760    condition: selection
4761level: medium
4762"#,
4763        );
4764        assert!(has_no_rule(&w, LintRule::DeprecatedAggregationSyntax));
4765    }
4766
4767    #[test]
4768    fn no_deprecated_aggregation_for_pipe_in_field_modifier() {
4769        let w = lint(
4770            r#"
4771title: Test
4772logsource:
4773    category: test
4774detection:
4775    selection:
4776        field|contains: value
4777    condition: selection
4778level: medium
4779"#,
4780        );
4781        assert!(has_no_rule(&w, LintRule::DeprecatedAggregationSyntax));
4782        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4783    }
4784
4785    #[test]
4786    fn has_deprecated_aggregation_function() {
4787        assert!(has_deprecated_aggregation(
4788            "selection | count(User) by SourceIP > 5"
4789        ));
4790        assert!(has_deprecated_aggregation(
4791            "selection |  sum(Amount) by Account > 1000"
4792        ));
4793        assert!(has_deprecated_aggregation(
4794            "selection | near(field) by host"
4795        ));
4796        assert!(has_deprecated_aggregation(
4797            "selection | min(score) by host > 0"
4798        ));
4799        assert!(has_deprecated_aggregation(
4800            "selection | max(score) by host > 100"
4801        ));
4802        assert!(has_deprecated_aggregation(
4803            "selection | avg(score) by host > 50"
4804        ));
4805        assert!(!has_deprecated_aggregation("selection and not filter"));
4806        assert!(!has_deprecated_aggregation("1 of selection*"));
4807        assert!(!has_deprecated_aggregation("all of them"));
4808    }
4809
4810    // ── Exclude patterns ────────────────────────────────────────────────
4811
4812    #[test]
4813    fn lint_config_exclude_from_yaml() {
4814        let yaml = r#"
4815disabled_rules:
4816  - missing_description
4817exclude:
4818  - "config/**"
4819  - "**/unsupported/**"
4820"#;
4821        let tmp = std::env::temp_dir().join("rsigma_test_exclude.yml");
4822        std::fs::write(&tmp, yaml).unwrap();
4823        let config = LintConfig::load(&tmp).unwrap();
4824        std::fs::remove_file(&tmp).ok();
4825
4826        assert!(config.disabled_rules.contains("missing_description"));
4827        assert_eq!(config.exclude_patterns.len(), 2);
4828        assert_eq!(config.exclude_patterns[0], "config/**");
4829        assert_eq!(config.exclude_patterns[1], "**/unsupported/**");
4830    }
4831
4832    #[test]
4833    fn lint_config_build_exclude_set_empty() {
4834        let config = LintConfig::default();
4835        assert!(config.build_exclude_set().is_none());
4836    }
4837
4838    #[test]
4839    fn lint_config_build_exclude_set_matches() {
4840        let config = LintConfig {
4841            exclude_patterns: vec!["config/**".to_string()],
4842            ..Default::default()
4843        };
4844        let gs = config.build_exclude_set().expect("should build");
4845        assert!(gs.is_match("config/data_mapping/foo.yaml"));
4846        assert!(gs.is_match("config/nested/deep/bar.yml"));
4847        assert!(!gs.is_match("rules/windows/test.yml"));
4848    }
4849
4850    #[test]
4851    fn lint_directory_with_excludes() {
4852        let tmp = tempfile::tempdir().unwrap();
4853        let rules_dir = tmp.path().join("rules");
4854        let config_dir = tmp.path().join("config");
4855        std::fs::create_dir_all(&rules_dir).unwrap();
4856        std::fs::create_dir_all(&config_dir).unwrap();
4857
4858        // Valid rule
4859        std::fs::write(
4860            rules_dir.join("good.yml"),
4861            r#"
4862title: Good Rule
4863logsource:
4864    category: test
4865detection:
4866    sel:
4867        field: value
4868    condition: sel
4869level: medium
4870"#,
4871        )
4872        .unwrap();
4873
4874        // Non-rule config file (would produce errors if linted)
4875        std::fs::write(
4876            config_dir.join("mapping.yaml"),
4877            r#"
4878Title: Logon
4879Channel: Security
4880EventID: 4624
4881"#,
4882        )
4883        .unwrap();
4884
4885        // Without excludes: config file produces errors
4886        let no_exclude = LintConfig::default();
4887        let results = lint_yaml_directory_with_config(tmp.path(), &no_exclude).unwrap();
4888        let config_warnings: Vec<_> = results
4889            .iter()
4890            .filter(|r| r.path.to_string_lossy().contains("config"))
4891            .flat_map(|r| &r.warnings)
4892            .collect();
4893        assert!(
4894            !config_warnings.is_empty(),
4895            "config file should produce warnings without excludes"
4896        );
4897
4898        // With excludes: config file is skipped
4899        let with_exclude = LintConfig {
4900            exclude_patterns: vec!["config/**".to_string()],
4901            ..Default::default()
4902        };
4903        let results = lint_yaml_directory_with_config(tmp.path(), &with_exclude).unwrap();
4904        let config_results: Vec<_> = results
4905            .iter()
4906            .filter(|r| r.path.to_string_lossy().contains("config"))
4907            .collect();
4908        assert!(config_results.is_empty(), "config file should be excluded");
4909
4910        // The valid rule should still be linted
4911        let rule_results: Vec<_> = results
4912            .iter()
4913            .filter(|r| r.path.to_string_lossy().contains("good.yml"))
4914            .collect();
4915        assert_eq!(rule_results.len(), 1);
4916    }
4917}