Skip to main content

rsigma_parser/
lint.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `serde_yaml::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: serde_yaml::Value = serde_yaml::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22use std::collections::{HashMap, HashSet};
23use std::fmt;
24use std::path::Path;
25use std::sync::LazyLock;
26
27use serde::{Deserialize, Serialize};
28use serde_yaml::Value;
29
30// =============================================================================
31// Public types
32// =============================================================================
33
34/// Severity of a lint finding.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
36pub enum Severity {
37    /// Spec violation — the rule is invalid.
38    Error,
39    /// Best-practice issue — the rule works but is not spec-ideal.
40    Warning,
41    /// Informational suggestion — soft best-practice hint (e.g. missing author).
42    Info,
43    /// Subtle hint — lowest severity, for stylistic suggestions.
44    Hint,
45}
46
47impl fmt::Display for Severity {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self {
50            Severity::Error => write!(f, "error"),
51            Severity::Warning => write!(f, "warning"),
52            Severity::Info => write!(f, "info"),
53            Severity::Hint => write!(f, "hint"),
54        }
55    }
56}
57
58/// Identifies which lint rule fired.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
60pub enum LintRule {
61    // ── Infrastructure / parse errors ────────────────────────────────────
62    YamlParseError,
63    NotAMapping,
64    FileReadError,
65    SchemaViolation,
66
67    // ── Shared (all document types) ──────────────────────────────────────
68    MissingTitle,
69    EmptyTitle,
70    TitleTooLong,
71    MissingDescription,
72    MissingAuthor,
73    InvalidId,
74    InvalidStatus,
75    MissingLevel,
76    InvalidLevel,
77    InvalidDate,
78    InvalidModified,
79    ModifiedBeforeDate,
80    DescriptionTooLong,
81    NameTooLong,
82    TaxonomyTooLong,
83    NonLowercaseKey,
84
85    // ── Detection rules ──────────────────────────────────────────────────
86    MissingLogsource,
87    MissingDetection,
88    MissingCondition,
89    EmptyDetection,
90    InvalidRelatedType,
91    InvalidRelatedId,
92    RelatedMissingRequired,
93    DeprecatedWithoutRelated,
94    InvalidTag,
95    UnknownTagNamespace,
96    DuplicateTags,
97    DuplicateReferences,
98    DuplicateFields,
99    FalsepositiveTooShort,
100    ScopeTooShort,
101    LogsourceValueNotLowercase,
102    ConditionReferencesUnknown,
103    DeprecatedAggregationSyntax,
104
105    // ── Correlation rules ────────────────────────────────────────────────
106    MissingCorrelation,
107    MissingCorrelationType,
108    InvalidCorrelationType,
109    MissingCorrelationRules,
110    EmptyCorrelationRules,
111    MissingCorrelationTimespan,
112    InvalidTimespanFormat,
113    MissingGroupBy,
114    MissingCorrelationCondition,
115    MissingConditionField,
116    InvalidConditionOperator,
117    ConditionValueNotNumeric,
118    GenerateNotBoolean,
119
120    // ── Filter rules ─────────────────────────────────────────────────────
121    MissingFilter,
122    MissingFilterRules,
123    EmptyFilterRules,
124    MissingFilterSelection,
125    MissingFilterCondition,
126    FilterHasLevel,
127    FilterHasStatus,
128    MissingFilterLogsource,
129
130    // ── Detection logic (cross-cutting) ──────────────────────────────────
131    NullInValueList,
132    SingleValueAllModifier,
133    AllWithRe,
134    IncompatibleModifiers,
135    EmptyValueList,
136    WildcardOnlyValue,
137    UnknownKey,
138}
139
140impl fmt::Display for LintRule {
141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142        let s = match self {
143            LintRule::YamlParseError => "yaml_parse_error",
144            LintRule::NotAMapping => "not_a_mapping",
145            LintRule::FileReadError => "file_read_error",
146            LintRule::SchemaViolation => "schema_violation",
147            LintRule::MissingTitle => "missing_title",
148            LintRule::EmptyTitle => "empty_title",
149            LintRule::TitleTooLong => "title_too_long",
150            LintRule::MissingDescription => "missing_description",
151            LintRule::MissingAuthor => "missing_author",
152            LintRule::InvalidId => "invalid_id",
153            LintRule::InvalidStatus => "invalid_status",
154            LintRule::MissingLevel => "missing_level",
155            LintRule::InvalidLevel => "invalid_level",
156            LintRule::InvalidDate => "invalid_date",
157            LintRule::InvalidModified => "invalid_modified",
158            LintRule::ModifiedBeforeDate => "modified_before_date",
159            LintRule::DescriptionTooLong => "description_too_long",
160            LintRule::NameTooLong => "name_too_long",
161            LintRule::TaxonomyTooLong => "taxonomy_too_long",
162            LintRule::NonLowercaseKey => "non_lowercase_key",
163            LintRule::MissingLogsource => "missing_logsource",
164            LintRule::MissingDetection => "missing_detection",
165            LintRule::MissingCondition => "missing_condition",
166            LintRule::EmptyDetection => "empty_detection",
167            LintRule::InvalidRelatedType => "invalid_related_type",
168            LintRule::InvalidRelatedId => "invalid_related_id",
169            LintRule::RelatedMissingRequired => "related_missing_required",
170            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
171            LintRule::InvalidTag => "invalid_tag",
172            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
173            LintRule::DuplicateTags => "duplicate_tags",
174            LintRule::DuplicateReferences => "duplicate_references",
175            LintRule::DuplicateFields => "duplicate_fields",
176            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
177            LintRule::ScopeTooShort => "scope_too_short",
178            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
179            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
180            LintRule::DeprecatedAggregationSyntax => "deprecated_aggregation_syntax",
181            LintRule::MissingCorrelation => "missing_correlation",
182            LintRule::MissingCorrelationType => "missing_correlation_type",
183            LintRule::InvalidCorrelationType => "invalid_correlation_type",
184            LintRule::MissingCorrelationRules => "missing_correlation_rules",
185            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
186            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
187            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
188            LintRule::MissingGroupBy => "missing_group_by",
189            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
190            LintRule::MissingConditionField => "missing_condition_field",
191            LintRule::InvalidConditionOperator => "invalid_condition_operator",
192            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
193            LintRule::GenerateNotBoolean => "generate_not_boolean",
194            LintRule::MissingFilter => "missing_filter",
195            LintRule::MissingFilterRules => "missing_filter_rules",
196            LintRule::EmptyFilterRules => "empty_filter_rules",
197            LintRule::MissingFilterSelection => "missing_filter_selection",
198            LintRule::MissingFilterCondition => "missing_filter_condition",
199            LintRule::FilterHasLevel => "filter_has_level",
200            LintRule::FilterHasStatus => "filter_has_status",
201            LintRule::MissingFilterLogsource => "missing_filter_logsource",
202            LintRule::NullInValueList => "null_in_value_list",
203            LintRule::SingleValueAllModifier => "single_value_all_modifier",
204            LintRule::AllWithRe => "all_with_re",
205            LintRule::IncompatibleModifiers => "incompatible_modifiers",
206            LintRule::EmptyValueList => "empty_value_list",
207            LintRule::WildcardOnlyValue => "wildcard_only_value",
208            LintRule::UnknownKey => "unknown_key",
209        };
210        write!(f, "{s}")
211    }
212}
213
214/// A source span (line/column, both 0-indexed).
215///
216/// Used by the LSP layer to avoid re-resolving JSON-pointer paths to
217/// source positions. When the lint is produced from raw `serde_yaml::Value`
218/// (which has no source positions), `span` will be `None`.
219#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
220pub struct Span {
221    /// 0-indexed start line.
222    pub start_line: u32,
223    /// 0-indexed start column.
224    pub start_col: u32,
225    /// 0-indexed end line.
226    pub end_line: u32,
227    /// 0-indexed end column.
228    pub end_col: u32,
229}
230
231// =============================================================================
232// Auto-fix types
233// =============================================================================
234
235/// Whether a fix is safe to apply automatically or needs manual review.
236#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
237pub enum FixDisposition {
238    /// No semantic change — safe to apply without review.
239    Safe,
240    /// May change meaning — should be reviewed before applying.
241    Unsafe,
242}
243
244/// A single patch operation within a [`Fix`].
245///
246/// Each variant describes a format-preserving edit to a YAML document.
247/// Paths are JSON-pointer-style strings (e.g. `"/status"`, `"/tags/2"`)
248/// matching the `LintWarning::path` convention.
249///
250/// These are intentionally yamlpath/yamlpatch-agnostic so that
251/// `rsigma-parser` carries no dependency on those crates. The consumer
252/// (CLI or LSP) converts these to concrete patch operations at apply time.
253#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
254pub enum FixPatch {
255    /// Replace the value at `path` with `new_value`.
256    ReplaceValue { path: String, new_value: String },
257    /// Rename the YAML key targeted by `path`.
258    ReplaceKey { path: String, new_key: String },
259    /// Remove the node at `path` entirely.
260    Remove { path: String },
261}
262
263/// A suggested fix for a lint finding.
264///
265/// Attached to a [`LintWarning`] when the issue can be corrected
266/// automatically. Contains one or more [`FixPatch`] operations that,
267/// applied sequentially, resolve the finding.
268#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
269pub struct Fix {
270    /// Short human-readable description (e.g. "rename 'Status' to 'status'").
271    pub title: String,
272    /// Whether the fix is safe to apply without review.
273    pub disposition: FixDisposition,
274    /// Ordered patch operations to apply.
275    pub patches: Vec<FixPatch>,
276}
277
278/// A single lint finding.
279#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
280pub struct LintWarning {
281    /// Which lint rule fired.
282    pub rule: LintRule,
283    /// Error or warning.
284    pub severity: Severity,
285    /// Human-readable message.
286    pub message: String,
287    /// JSON-pointer-style location, e.g. `"/status"`, `"/tags/2"`.
288    pub path: String,
289    /// Optional source span. `None` when linting `serde_yaml::Value` (no
290    /// source positions available). Populated by `lint_yaml_str` which
291    /// can resolve paths against the raw text.
292    pub span: Option<Span>,
293    /// Optional auto-fix. `None` when the finding cannot be corrected
294    /// automatically.
295    pub fix: Option<Fix>,
296}
297
298impl fmt::Display for LintWarning {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        write!(
301            f,
302            "{}[{}]: {}\n    --> {}",
303            self.severity, self.rule, self.message, self.path
304        )
305    }
306}
307
308/// Result of linting a single file (may contain multiple YAML documents).
309#[derive(Debug, Clone, Serialize)]
310pub struct FileLintResult {
311    pub path: std::path::PathBuf,
312    pub warnings: Vec<LintWarning>,
313}
314
315impl FileLintResult {
316    pub fn has_errors(&self) -> bool {
317        self.warnings.iter().any(|w| w.severity == Severity::Error)
318    }
319
320    pub fn error_count(&self) -> usize {
321        self.warnings
322            .iter()
323            .filter(|w| w.severity == Severity::Error)
324            .count()
325    }
326
327    pub fn warning_count(&self) -> usize {
328        self.warnings
329            .iter()
330            .filter(|w| w.severity == Severity::Warning)
331            .count()
332    }
333
334    pub fn info_count(&self) -> usize {
335        self.warnings
336            .iter()
337            .filter(|w| w.severity == Severity::Info)
338            .count()
339    }
340
341    pub fn hint_count(&self) -> usize {
342        self.warnings
343            .iter()
344            .filter(|w| w.severity == Severity::Hint)
345            .count()
346    }
347}
348
349// =============================================================================
350// Helpers
351// =============================================================================
352
353/// Pre-cached `Value::String` keys to avoid per-call allocations when
354/// looking up fields in `serde_yaml::Mapping`.
355static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
356    [
357        "action",
358        "author",
359        "category",
360        "condition",
361        "correlation",
362        "date",
363        "description",
364        "detection",
365        "falsepositives",
366        "field",
367        "fields",
368        "filter",
369        "generate",
370        "group-by",
371        "id",
372        "level",
373        "logsource",
374        "modified",
375        "name",
376        "product",
377        "references",
378        "related",
379        "rules",
380        "scope",
381        "selection",
382        "service",
383        "status",
384        "tags",
385        "taxonomy",
386        "timeframe",
387        "timespan",
388        "title",
389        "type",
390    ]
391    .into_iter()
392    .map(|n| (n, Value::String(n.into())))
393    .collect()
394});
395
396fn key(s: &str) -> &'static Value {
397    KEY_CACHE
398        .get(s)
399        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
400}
401
402fn get_str<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a str> {
403    m.get(key(k)).and_then(|v| v.as_str())
404}
405
406fn get_mapping<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Mapping> {
407    m.get(key(k)).and_then(|v| v.as_mapping())
408}
409
410fn get_seq<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Sequence> {
411    m.get(key(k)).and_then(|v| v.as_sequence())
412}
413
414fn warn(
415    rule: LintRule,
416    severity: Severity,
417    message: impl Into<String>,
418    path: impl Into<String>,
419) -> LintWarning {
420    LintWarning {
421        rule,
422        severity,
423        message: message.into(),
424        path: path.into(),
425        span: None,
426        fix: None,
427    }
428}
429
430fn err(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
431    warn(rule, Severity::Error, message, path)
432}
433
434fn warning(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
435    warn(rule, Severity::Warning, message, path)
436}
437
438fn info(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
439    warn(rule, Severity::Info, message, path)
440}
441
442fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
443    Some(Fix {
444        title: title.into(),
445        disposition: FixDisposition::Safe,
446        patches,
447    })
448}
449
450/// Find the closest match for `input` among `candidates` using edit distance.
451fn closest_match<'a>(input: &str, candidates: &[&'a str], max_distance: usize) -> Option<&'a str> {
452    candidates
453        .iter()
454        .filter(|c| edit_distance(input, c) <= max_distance)
455        .min_by_key(|c| edit_distance(input, c))
456        .copied()
457}
458
459/// Validate a date string matches YYYY-MM-DD with correct day-of-month.
460fn is_valid_date(s: &str) -> bool {
461    if s.len() != 10 {
462        return false;
463    }
464    let bytes = s.as_bytes();
465    if bytes[4] != b'-' || bytes[7] != b'-' {
466        return false;
467    }
468    let year_ok = bytes[0..4].iter().all(|b| b.is_ascii_digit());
469    let year: u16 = s[0..4].parse().unwrap_or(0);
470    let month: u8 = s[5..7].parse().unwrap_or(0);
471    let day: u8 = s[8..10].parse().unwrap_or(0);
472    if !year_ok || !(1..=12).contains(&month) || day == 0 {
473        return false;
474    }
475    let is_leap = (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400);
476    let max_day = match month {
477        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
478        4 | 6 | 9 | 11 => 30,
479        2 => {
480            if is_leap {
481                29
482            } else {
483                28
484            }
485        }
486        _ => return false,
487    };
488    day <= max_day
489}
490
491/// Extract a date string from a YAML value, handling serde_yaml auto-parsing.
492///
493/// `serde_yaml` sometimes deserialises `YYYY-MM-DD` as a tagged/non-string
494/// type. This helper coerces such values back to a trimmed string.
495fn extract_date_string(raw: &Value) -> Option<String> {
496    raw.as_str().map(|s| s.to_string()).or_else(|| {
497        serde_yaml::to_string(raw)
498            .ok()
499            .map(|s| s.trim().to_string())
500    })
501}
502
503/// Validate a UUID string (any version, hyphenated form).
504fn is_valid_uuid(s: &str) -> bool {
505    if s.len() != 36 {
506        return false;
507    }
508    let parts: Vec<&str> = s.split('-').collect();
509    if parts.len() != 5 {
510        return false;
511    }
512    let expected_lens = [8, 4, 4, 4, 12];
513    parts
514        .iter()
515        .zip(expected_lens.iter())
516        .all(|(part, &len)| part.len() == len && part.chars().all(|c| c.is_ascii_hexdigit()))
517}
518
519/// Check if a logsource value is lowercase with valid chars.
520fn is_valid_logsource_value(s: &str) -> bool {
521    !s.is_empty()
522        && s.chars().all(|c| {
523            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.' || c == '-'
524        })
525}
526
527/// Known tag namespaces from the spec.
528const KNOWN_TAG_NAMESPACES: &[&str] =
529    &["attack", "car", "cve", "d3fend", "detection", "stp", "tlp"];
530
531/// Valid status values.
532const VALID_STATUSES: &[&str] = &[
533    "stable",
534    "test",
535    "experimental",
536    "deprecated",
537    "unsupported",
538];
539
540/// Valid level values.
541const VALID_LEVELS: &[&str] = &["informational", "low", "medium", "high", "critical"];
542
543/// Valid related types.
544const VALID_RELATED_TYPES: &[&str] = &[
545    "correlation",
546    "derived",
547    "obsolete",
548    "merged",
549    "renamed",
550    "similar",
551];
552
553/// Valid correlation types.
554const VALID_CORRELATION_TYPES: &[&str] = &[
555    "event_count",
556    "value_count",
557    "temporal",
558    "temporal_ordered",
559    "value_sum",
560    "value_avg",
561    "value_percentile",
562    "value_median",
563];
564
565/// Valid condition operators.
566const VALID_CONDITION_OPERATORS: &[&str] = &["gt", "gte", "lt", "lte", "eq", "neq"];
567
568/// Correlation types that require a condition section.
569const TYPES_REQUIRING_CONDITION: &[&str] = &[
570    "event_count",
571    "value_count",
572    "value_sum",
573    "value_avg",
574    "value_percentile",
575];
576
577/// Correlation types that require condition.field.
578const TYPES_REQUIRING_FIELD: &[&str] =
579    &["value_count", "value_sum", "value_avg", "value_percentile"];
580
581/// Known top-level keys shared across all Sigma document types.
582const KNOWN_KEYS_SHARED: &[&str] = &[
583    "title",
584    "id",
585    "name",
586    "status",
587    "description",
588    "author",
589    "date",
590    "modified",
591    "related",
592    "taxonomy",
593    "action",
594    "license",
595    "references",
596    "tags",
597];
598
599/// Extra top-level keys valid for detection rules.
600const KNOWN_KEYS_DETECTION: &[&str] = &[
601    "logsource",
602    "detection",
603    "fields",
604    "falsepositives",
605    "level",
606    "scope",
607    "custom_attributes",
608];
609
610/// Extra top-level keys valid for correlation rules.
611const KNOWN_KEYS_CORRELATION: &[&str] = &[
612    "correlation",
613    "custom_attributes",
614    "falsepositives",
615    "fields",
616    "generate",
617    "level",
618    "license",
619    "related",
620    "scope",
621];
622
623/// Extra top-level keys valid for filter rules.
624const KNOWN_KEYS_FILTER: &[&str] = &[
625    "custom_attributes",
626    "falsepositives",
627    "fields",
628    "filter",
629    "level",
630    "license",
631    "logsource",
632    "references",
633    "related",
634    "scope",
635    "tags",
636    "taxonomy",
637];
638
639/// Tag pattern: `^[a-z0-9_-]+\.[a-z0-9._-]+$`
640fn is_valid_tag(s: &str) -> bool {
641    let parts: Vec<&str> = s.splitn(2, '.').collect();
642    if parts.len() != 2 {
643        return false;
644    }
645    let ns_ok = !parts[0].is_empty()
646        && parts[0]
647            .chars()
648            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '-');
649    let rest_ok = !parts[1].is_empty()
650        && parts[1].chars().all(|c| {
651            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '_' || c == '-'
652        });
653    ns_ok && rest_ok
654}
655
656// =============================================================================
657// Document type detection
658// =============================================================================
659
660#[derive(Debug, Clone, Copy, PartialEq, Eq)]
661enum DocType {
662    Detection,
663    Correlation,
664    Filter,
665}
666
667impl DocType {
668    fn known_keys(&self) -> &'static [&'static str] {
669        match self {
670            DocType::Detection => KNOWN_KEYS_DETECTION,
671            DocType::Correlation => KNOWN_KEYS_CORRELATION,
672            DocType::Filter => KNOWN_KEYS_FILTER,
673        }
674    }
675}
676
677fn detect_doc_type(m: &serde_yaml::Mapping) -> DocType {
678    if m.contains_key(key("correlation")) {
679        DocType::Correlation
680    } else if m.contains_key(key("filter")) {
681        DocType::Filter
682    } else {
683        DocType::Detection
684    }
685}
686
687/// Returns `true` if this document is a collection action fragment
688/// (`action: global`, `action: reset`, `action: repeat`) that should be
689/// skipped during linting.
690fn is_action_fragment(m: &serde_yaml::Mapping) -> bool {
691    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
692}
693
694// =============================================================================
695// Shared lint checks
696// =============================================================================
697
698fn lint_shared(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
699    // ── title ────────────────────────────────────────────────────────────
700    match get_str(m, "title") {
701        None => warnings.push(err(
702            LintRule::MissingTitle,
703            "missing required field 'title'",
704            "/title",
705        )),
706        Some(t) if t.trim().is_empty() => {
707            warnings.push(err(
708                LintRule::EmptyTitle,
709                "title must not be empty",
710                "/title",
711            ));
712        }
713        Some(t) if t.len() > 256 => {
714            warnings.push(warning(
715                LintRule::TitleTooLong,
716                format!("title is {} characters, maximum is 256", t.len()),
717                "/title",
718            ));
719        }
720        _ => {}
721    }
722
723    // ── id ───────────────────────────────────────────────────────────────
724    if let Some(id) = get_str(m, "id")
725        && !is_valid_uuid(id)
726    {
727        warnings.push(warning(
728            LintRule::InvalidId,
729            format!("id \"{id}\" is not a valid UUID"),
730            "/id",
731        ));
732    }
733
734    // ── status ───────────────────────────────────────────────────────────
735    if let Some(status) = get_str(m, "status")
736        && !VALID_STATUSES.contains(&status)
737    {
738        let fix = closest_match(status, VALID_STATUSES, 3).map(|closest| Fix {
739            title: format!("replace '{status}' with '{closest}'"),
740            disposition: FixDisposition::Safe,
741            patches: vec![FixPatch::ReplaceValue {
742                path: "/status".into(),
743                new_value: closest.into(),
744            }],
745        });
746        warnings.push(LintWarning {
747            rule: LintRule::InvalidStatus,
748            severity: Severity::Error,
749            message: format!(
750                "invalid status \"{status}\", expected one of: {}",
751                VALID_STATUSES.join(", ")
752            ),
753            path: "/status".into(),
754            span: None,
755            fix,
756        });
757    }
758
759    // ── level ────────────────────────────────────────────────────────────
760    if let Some(level) = get_str(m, "level")
761        && !VALID_LEVELS.contains(&level)
762    {
763        let fix = closest_match(level, VALID_LEVELS, 3).map(|closest| Fix {
764            title: format!("replace '{level}' with '{closest}'"),
765            disposition: FixDisposition::Safe,
766            patches: vec![FixPatch::ReplaceValue {
767                path: "/level".into(),
768                new_value: closest.into(),
769            }],
770        });
771        warnings.push(LintWarning {
772            rule: LintRule::InvalidLevel,
773            severity: Severity::Error,
774            message: format!(
775                "invalid level \"{level}\", expected one of: {}",
776                VALID_LEVELS.join(", ")
777            ),
778            path: "/level".into(),
779            span: None,
780            fix,
781        });
782    }
783
784    // ── date ─────────────────────────────────────────────────────────────
785    let date_string = m.get(key("date")).and_then(extract_date_string);
786    if let Some(d) = &date_string
787        && !is_valid_date(d)
788    {
789        warnings.push(err(
790            LintRule::InvalidDate,
791            format!("invalid date \"{d}\", expected YYYY-MM-DD"),
792            "/date",
793        ));
794    }
795
796    // ── modified ─────────────────────────────────────────────────────────
797    let modified_string = m.get(key("modified")).and_then(extract_date_string);
798    if let Some(d) = &modified_string
799        && !is_valid_date(d)
800    {
801        warnings.push(err(
802            LintRule::InvalidModified,
803            format!("invalid modified date \"{d}\", expected YYYY-MM-DD"),
804            "/modified",
805        ));
806    }
807
808    // ── modified >= date ─────────────────────────────────────────────────
809    if let (Some(date_val), Some(mod_val)) = (&date_string, &modified_string)
810        && is_valid_date(date_val)
811        && is_valid_date(mod_val)
812        && mod_val.as_str() < date_val.as_str()
813    {
814        warnings.push(warning(
815            LintRule::ModifiedBeforeDate,
816            format!("modified date \"{mod_val}\" is before creation date \"{date_val}\""),
817            "/modified",
818        ));
819    }
820
821    // ── description (missing) ──────────────────────────────────────────
822    if !m.contains_key(key("description")) {
823        warnings.push(info(
824            LintRule::MissingDescription,
825            "missing recommended field 'description'",
826            "/description",
827        ));
828    }
829
830    // ── author (missing) ─────────────────────────────────────────────
831    if !m.contains_key(key("author")) {
832        warnings.push(info(
833            LintRule::MissingAuthor,
834            "missing recommended field 'author'",
835            "/author",
836        ));
837    }
838
839    // ── description (too long) ───────────────────────────────────────
840    if let Some(desc) = get_str(m, "description")
841        && desc.len() > 65535
842    {
843        warnings.push(warning(
844            LintRule::DescriptionTooLong,
845            format!("description is {} characters, maximum is 65535", desc.len()),
846            "/description",
847        ));
848    }
849
850    // ── name ─────────────────────────────────────────────────────────────
851    if let Some(name) = get_str(m, "name")
852        && name.len() > 256
853    {
854        warnings.push(warning(
855            LintRule::NameTooLong,
856            format!("name is {} characters, maximum is 256", name.len()),
857            "/name",
858        ));
859    }
860
861    // ── taxonomy ─────────────────────────────────────────────────────────
862    if let Some(tax) = get_str(m, "taxonomy")
863        && tax.len() > 256
864    {
865        warnings.push(warning(
866            LintRule::TaxonomyTooLong,
867            format!("taxonomy is {} characters, maximum is 256", tax.len()),
868            "/taxonomy",
869        ));
870    }
871
872    // ── lowercase keys ───────────────────────────────────────────────────
873    for k in m.keys() {
874        if let Some(ks) = k.as_str()
875            && ks != ks.to_ascii_lowercase()
876        {
877            let lower = ks.to_ascii_lowercase();
878            let mut w = warning(
879                LintRule::NonLowercaseKey,
880                format!("key \"{ks}\" should be lowercase"),
881                format!("/{ks}"),
882            );
883            w.fix = safe_fix(
884                format!("rename '{ks}' to '{lower}'"),
885                vec![FixPatch::ReplaceKey {
886                    path: format!("/{ks}"),
887                    new_key: lower,
888                }],
889            );
890            warnings.push(w);
891        }
892    }
893}
894
895// =============================================================================
896// Detection rule lint checks
897// =============================================================================
898
899fn lint_detection_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
900    // ── level ─────────────────────────────────────────────────────────────
901    if !m.contains_key(key("level")) {
902        warnings.push(warning(
903            LintRule::MissingLevel,
904            "missing recommended field 'level'",
905            "/level",
906        ));
907    }
908
909    // ── logsource ────────────────────────────────────────────────────────
910    if !m.contains_key(key("logsource")) {
911        warnings.push(err(
912            LintRule::MissingLogsource,
913            "missing required field 'logsource'",
914            "/logsource",
915        ));
916    } else {
917        lint_logsource(m, warnings);
918    }
919
920    // ── detection ────────────────────────────────────────────────────────
921    if let Some(det_val) = m.get(key("detection")) {
922        if let Some(det) = det_val.as_mapping() {
923            // Collect detection identifier names (excluding condition/timeframe)
924            let det_keys: HashSet<&str> = det
925                .keys()
926                .filter_map(|k| k.as_str())
927                .filter(|k| *k != "condition" && *k != "timeframe")
928                .collect();
929
930            if !det.contains_key(key("condition")) {
931                warnings.push(err(
932                    LintRule::MissingCondition,
933                    "detection section is missing required 'condition'",
934                    "/detection/condition",
935                ));
936            } else if let Some(cond_str) = get_str(det, "condition") {
937                if has_deprecated_aggregation(cond_str) {
938                    warnings.push(warning(
939                        LintRule::DeprecatedAggregationSyntax,
940                        "condition uses deprecated Sigma v1.x aggregation syntax \
941                         (| count/min/max/avg/sum/near); use a correlation rule instead",
942                        "/detection/condition",
943                    ));
944                } else {
945                    for ident in extract_condition_identifiers(cond_str) {
946                        if !det_keys.contains(ident.as_str()) {
947                            warnings.push(err(
948                                LintRule::ConditionReferencesUnknown,
949                                format!(
950                                    "condition references '{ident}' but no such detection identifier exists"
951                                ),
952                                "/detection/condition",
953                            ));
954                        }
955                    }
956                }
957            }
958
959            if det_keys.is_empty() {
960                warnings.push(warning(
961                    LintRule::EmptyDetection,
962                    "detection section has no named search identifiers",
963                    "/detection",
964                ));
965            }
966
967            // Detection logic checks
968            lint_detection_logic(det, warnings);
969        }
970    } else {
971        warnings.push(err(
972            LintRule::MissingDetection,
973            "missing required field 'detection'",
974            "/detection",
975        ));
976    }
977
978    // ── related ──────────────────────────────────────────────────────────
979    if let Some(related) = get_seq(m, "related") {
980        for (i, item) in related.iter().enumerate() {
981            let path_prefix = format!("/related/{i}");
982            if let Some(item_map) = item.as_mapping() {
983                let has_id = item_map.contains_key(key("id"));
984                let has_type = item_map.contains_key(key("type"));
985
986                if !has_id || !has_type {
987                    warnings.push(err(
988                        LintRule::RelatedMissingRequired,
989                        "related entry must have both 'id' and 'type'",
990                        &path_prefix,
991                    ));
992                }
993
994                if let Some(id) = get_str(item_map, "id")
995                    && !is_valid_uuid(id)
996                {
997                    warnings.push(warning(
998                        LintRule::InvalidRelatedId,
999                        format!("related id \"{id}\" is not a valid UUID"),
1000                        format!("{path_prefix}/id"),
1001                    ));
1002                }
1003
1004                if let Some(type_val) = get_str(item_map, "type")
1005                    && !VALID_RELATED_TYPES.contains(&type_val)
1006                {
1007                    warnings.push(err(
1008                        LintRule::InvalidRelatedType,
1009                        format!(
1010                            "invalid related type \"{type_val}\", expected one of: {}",
1011                            VALID_RELATED_TYPES.join(", ")
1012                        ),
1013                        format!("{path_prefix}/type"),
1014                    ));
1015                }
1016            }
1017        }
1018    }
1019
1020    // ── deprecated + related consistency ─────────────────────────────────
1021    if get_str(m, "status") == Some("deprecated") {
1022        let has_related = get_seq(m, "related")
1023            .map(|seq| !seq.is_empty())
1024            .unwrap_or(false);
1025        if !has_related {
1026            warnings.push(warning(
1027                LintRule::DeprecatedWithoutRelated,
1028                "deprecated rule should have a 'related' entry linking to its replacement",
1029                "/status",
1030            ));
1031        }
1032    }
1033
1034    // ── tags ─────────────────────────────────────────────────────────────
1035    if let Some(tags) = get_seq(m, "tags") {
1036        let mut seen_tags: HashSet<String> = HashSet::new();
1037        for (i, tag_val) in tags.iter().enumerate() {
1038            if let Some(tag) = tag_val.as_str() {
1039                if !is_valid_tag(tag) {
1040                    warnings.push(warning(
1041                        LintRule::InvalidTag,
1042                        format!(
1043                            "tag \"{tag}\" does not match required pattern (lowercase, dotted namespace)"
1044                        ),
1045                        format!("/tags/{i}"),
1046                    ));
1047                } else {
1048                    // Check known namespace
1049                    if let Some(ns) = tag.split('.').next()
1050                        && !KNOWN_TAG_NAMESPACES.contains(&ns)
1051                    {
1052                        warnings.push(warning(
1053                            LintRule::UnknownTagNamespace,
1054                            format!(
1055                                "unknown tag namespace \"{ns}\", known namespaces: {}",
1056                                KNOWN_TAG_NAMESPACES.join(", ")
1057                            ),
1058                            format!("/tags/{i}"),
1059                        ));
1060                    }
1061                }
1062
1063                if !seen_tags.insert(tag.to_string()) {
1064                    let mut w = warning(
1065                        LintRule::DuplicateTags,
1066                        format!("duplicate tag \"{tag}\""),
1067                        format!("/tags/{i}"),
1068                    );
1069                    w.fix = safe_fix(
1070                        format!("remove duplicate tag '{tag}'"),
1071                        vec![FixPatch::Remove {
1072                            path: format!("/tags/{i}"),
1073                        }],
1074                    );
1075                    warnings.push(w);
1076                }
1077            }
1078        }
1079    }
1080
1081    // ── references (unique) ──────────────────────────────────────────────
1082    if let Some(refs) = get_seq(m, "references") {
1083        let mut seen: HashSet<String> = HashSet::new();
1084        for (i, r) in refs.iter().enumerate() {
1085            if let Some(s) = r.as_str()
1086                && !seen.insert(s.to_string())
1087            {
1088                let mut w = warning(
1089                    LintRule::DuplicateReferences,
1090                    format!("duplicate reference \"{s}\""),
1091                    format!("/references/{i}"),
1092                );
1093                w.fix = safe_fix(
1094                    "remove duplicate reference",
1095                    vec![FixPatch::Remove {
1096                        path: format!("/references/{i}"),
1097                    }],
1098                );
1099                warnings.push(w);
1100            }
1101        }
1102    }
1103
1104    // ── fields (unique) ──────────────────────────────────────────────────
1105    if let Some(fields) = get_seq(m, "fields") {
1106        let mut seen: HashSet<String> = HashSet::new();
1107        for (i, f) in fields.iter().enumerate() {
1108            if let Some(s) = f.as_str()
1109                && !seen.insert(s.to_string())
1110            {
1111                let mut w = warning(
1112                    LintRule::DuplicateFields,
1113                    format!("duplicate field \"{s}\""),
1114                    format!("/fields/{i}"),
1115                );
1116                w.fix = safe_fix(
1117                    "remove duplicate field",
1118                    vec![FixPatch::Remove {
1119                        path: format!("/fields/{i}"),
1120                    }],
1121                );
1122                warnings.push(w);
1123            }
1124        }
1125    }
1126
1127    // ── falsepositives (minLength 2) ─────────────────────────────────────
1128    if let Some(fps) = get_seq(m, "falsepositives") {
1129        for (i, fp) in fps.iter().enumerate() {
1130            if let Some(s) = fp.as_str()
1131                && s.len() < 2
1132            {
1133                warnings.push(warning(
1134                    LintRule::FalsepositiveTooShort,
1135                    format!("falsepositive entry \"{s}\" must be at least 2 characters"),
1136                    format!("/falsepositives/{i}"),
1137                ));
1138            }
1139        }
1140    }
1141
1142    // ── scope (minLength 2) ──────────────────────────────────────────────
1143    if let Some(scope) = get_seq(m, "scope") {
1144        for (i, s_val) in scope.iter().enumerate() {
1145            if let Some(s) = s_val.as_str()
1146                && s.len() < 2
1147            {
1148                warnings.push(warning(
1149                    LintRule::ScopeTooShort,
1150                    format!("scope entry \"{s}\" must be at least 2 characters"),
1151                    format!("/scope/{i}"),
1152                ));
1153            }
1154        }
1155    }
1156}
1157
1158fn lint_logsource(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1159    if let Some(ls) = get_mapping(m, "logsource") {
1160        for field in &["category", "product", "service"] {
1161            if let Some(val) = get_str(ls, field)
1162                && !is_valid_logsource_value(val)
1163            {
1164                let lower = val.to_ascii_lowercase();
1165                let mut w = warning(
1166                    LintRule::LogsourceValueNotLowercase,
1167                    format!("logsource {field} \"{val}\" should be lowercase (a-z, 0-9, _, ., -)"),
1168                    format!("/logsource/{field}"),
1169                );
1170                w.fix = safe_fix(
1171                    format!("lowercase '{val}' to '{lower}'"),
1172                    vec![FixPatch::ReplaceValue {
1173                        path: format!("/logsource/{field}"),
1174                        new_value: lower,
1175                    }],
1176                );
1177                warnings.push(w);
1178            }
1179        }
1180    }
1181}
1182
1183/// Extract bare identifiers from a condition expression (excluding keywords
1184/// and wildcard patterns) so we can check they exist in the detection section.
1185fn extract_condition_identifiers(condition: &str) -> Vec<String> {
1186    const KEYWORDS: &[&str] = &["and", "or", "not", "of", "all", "them"];
1187    condition
1188        .split(|c: char| !c.is_alphanumeric() && c != '_' && c != '*')
1189        .filter(|s| !s.is_empty())
1190        .filter(|s| !KEYWORDS.contains(s))
1191        .filter(|s| !s.chars().all(|c| c.is_ascii_digit()))
1192        .filter(|s| !s.contains('*'))
1193        .map(|s| s.to_string())
1194        .collect()
1195}
1196
1197/// Detect deprecated Sigma v1.x pipe-aggregation syntax in a condition string.
1198///
1199/// Patterns like `selection | count(User) by SourceIP > 5` use a pipe followed
1200/// by an aggregation keyword. These were replaced by correlation rules in v2.x.
1201fn has_deprecated_aggregation(condition: &str) -> bool {
1202    let pipe_pos = match condition.find('|') {
1203        Some(p) => p,
1204        None => return false,
1205    };
1206    let after_pipe = condition[pipe_pos + 1..].trim_start();
1207    let agg_keyword = after_pipe
1208        .split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
1209        .next()
1210        .unwrap_or("");
1211    matches!(
1212        agg_keyword,
1213        "count" | "min" | "max" | "avg" | "sum" | "near"
1214    )
1215}
1216
1217/// Checks detection logic: null in value lists, single-value |all, empty value lists.
1218fn lint_detection_logic(det: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1219    for (det_key, det_val) in det {
1220        let det_key_str = det_key.as_str().unwrap_or("");
1221        if det_key_str == "condition" || det_key_str == "timeframe" {
1222            continue;
1223        }
1224
1225        lint_detection_value(det_val, det_key_str, warnings);
1226    }
1227}
1228
1229fn lint_detection_value(value: &Value, det_name: &str, warnings: &mut Vec<LintWarning>) {
1230    match value {
1231        Value::Mapping(m) => {
1232            for (field_key, field_val) in m {
1233                let field_key_str = field_key.as_str().unwrap_or("");
1234
1235                // Check |all combined with |re (regex alternation makes |all misleading)
1236                if field_key_str.contains("|all") && field_key_str.contains("|re") {
1237                    let new_key = field_key_str.replace("|all", "");
1238                    let mut w = warning(
1239                        LintRule::AllWithRe,
1240                        format!(
1241                            "'{field_key_str}' in '{det_name}' combines |all with |re; \
1242                             regex alternation (|) already handles multi-match — \
1243                             |all is redundant or misleading here"
1244                        ),
1245                        format!("/detection/{det_name}/{field_key_str}"),
1246                    );
1247                    w.fix = safe_fix(
1248                        format!("remove |all from '{field_key_str}'"),
1249                        vec![FixPatch::ReplaceKey {
1250                            path: format!("/detection/{det_name}/{field_key_str}"),
1251                            new_key,
1252                        }],
1253                    );
1254                    warnings.push(w);
1255                }
1256
1257                // Check |all with single value
1258                if field_key_str.contains("|all") {
1259                    let needs_fix = if let Value::Sequence(seq) = field_val {
1260                        seq.len() <= 1
1261                    } else {
1262                        true
1263                    };
1264                    if needs_fix {
1265                        let new_key = field_key_str.replace("|all", "");
1266                        let count = if let Value::Sequence(seq) = field_val {
1267                            seq.len().to_string()
1268                        } else {
1269                            "a single".into()
1270                        };
1271                        let mut w = warning(
1272                            LintRule::SingleValueAllModifier,
1273                            format!(
1274                                "'{field_key_str}' in '{det_name}' uses |all modifier with {count} value(s); |all requires multiple values"
1275                            ),
1276                            format!("/detection/{det_name}/{field_key_str}"),
1277                        );
1278                        w.fix = safe_fix(
1279                            format!("remove |all from '{field_key_str}'"),
1280                            vec![FixPatch::ReplaceKey {
1281                                path: format!("/detection/{det_name}/{field_key_str}"),
1282                                new_key,
1283                            }],
1284                        );
1285                        warnings.push(w);
1286                    }
1287                }
1288
1289                // Check for incompatible modifier combinations
1290                if let Some(msg) = check_modifier_compatibility(field_key_str) {
1291                    warnings.push(warning(
1292                        LintRule::IncompatibleModifiers,
1293                        format!("'{field_key_str}' in '{det_name}': {msg}"),
1294                        format!("/detection/{det_name}/{field_key_str}"),
1295                    ));
1296                }
1297
1298                // Check null in value list and empty value list
1299                if let Value::Sequence(seq) = field_val {
1300                    if seq.is_empty() {
1301                        warnings.push(warning(
1302                            LintRule::EmptyValueList,
1303                            format!("'{field_key_str}' in '{det_name}' has an empty value list"),
1304                            format!("/detection/{det_name}/{field_key_str}"),
1305                        ));
1306                    } else {
1307                        let has_null = seq.iter().any(|v| v.is_null());
1308                        let has_non_null = seq.iter().any(|v| !v.is_null());
1309                        if has_null && has_non_null {
1310                            warnings.push(warning(
1311                                LintRule::NullInValueList,
1312                                format!(
1313                                    "'{field_key_str}' in '{det_name}' mixes null with other values; null should be in its own selection"
1314                                ),
1315                                format!("/detection/{det_name}/{field_key_str}"),
1316                            ));
1317                        }
1318                    }
1319                }
1320
1321                // Check wildcard-only value: field: '*' usually means field|exists
1322                let base_field = field_key_str.split('|').next().unwrap_or(field_key_str);
1323                let is_wildcard_only = match field_val {
1324                    Value::String(s) => s == "*",
1325                    Value::Sequence(seq) => seq.len() == 1 && seq[0].as_str() == Some("*"),
1326                    _ => false,
1327                };
1328                if is_wildcard_only && !field_key_str.contains("|re") {
1329                    let new_key = format!("{base_field}|exists");
1330                    let mut w = warning(
1331                        LintRule::WildcardOnlyValue,
1332                        format!(
1333                            "'{field_key_str}' in '{det_name}' uses a lone wildcard '*'; \
1334                             consider '{base_field}|exists: true' instead"
1335                        ),
1336                        format!("/detection/{det_name}/{field_key_str}"),
1337                    );
1338                    w.fix = safe_fix(
1339                        format!("replace with '{new_key}: true'"),
1340                        vec![
1341                            FixPatch::ReplaceKey {
1342                                path: format!("/detection/{det_name}/{field_key_str}"),
1343                                new_key,
1344                            },
1345                            FixPatch::ReplaceValue {
1346                                path: format!("/detection/{det_name}/{base_field}|exists"),
1347                                new_value: "true".into(),
1348                            },
1349                        ],
1350                    );
1351                    warnings.push(w);
1352                }
1353            }
1354        }
1355        Value::Sequence(seq) => {
1356            // List of maps (OR-linked) or keyword list
1357            for item in seq {
1358                if item.is_mapping() {
1359                    lint_detection_value(item, det_name, warnings);
1360                }
1361            }
1362        }
1363        _ => {}
1364    }
1365}
1366
1367// =============================================================================
1368// Correlation rule lint checks
1369// =============================================================================
1370
1371fn lint_correlation_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1372    let Some(corr_val) = m.get(key("correlation")) else {
1373        warnings.push(err(
1374            LintRule::MissingCorrelation,
1375            "missing required field 'correlation'",
1376            "/correlation",
1377        ));
1378        return;
1379    };
1380
1381    let Some(corr) = corr_val.as_mapping() else {
1382        warnings.push(err(
1383            LintRule::MissingCorrelation,
1384            "'correlation' must be a mapping",
1385            "/correlation",
1386        ));
1387        return;
1388    };
1389
1390    // ── type ─────────────────────────────────────────────────────────────
1391    let corr_type = get_str(corr, "type");
1392    match corr_type {
1393        None => {
1394            warnings.push(err(
1395                LintRule::MissingCorrelationType,
1396                "missing required field 'correlation.type'",
1397                "/correlation/type",
1398            ));
1399        }
1400        Some(t) if !VALID_CORRELATION_TYPES.contains(&t) => {
1401            warnings.push(err(
1402                LintRule::InvalidCorrelationType,
1403                format!(
1404                    "invalid correlation type \"{t}\", expected one of: {}",
1405                    VALID_CORRELATION_TYPES.join(", ")
1406                ),
1407                "/correlation/type",
1408            ));
1409        }
1410        _ => {}
1411    }
1412
1413    // ── rules ────────────────────────────────────────────────────────────
1414    if let Some(rules) = corr.get(key("rules")) {
1415        if let Some(seq) = rules.as_sequence()
1416            && seq.is_empty()
1417        {
1418            warnings.push(warning(
1419                LintRule::EmptyCorrelationRules,
1420                "correlation.rules should not be empty",
1421                "/correlation/rules",
1422            ));
1423        }
1424    } else {
1425        warnings.push(err(
1426            LintRule::MissingCorrelationRules,
1427            "missing required field 'correlation.rules'",
1428            "/correlation/rules",
1429        ));
1430    }
1431
1432    // ── timespan ─────────────────────────────────────────────────────────
1433    if let Some(ts) = get_str(corr, "timespan").or_else(|| get_str(corr, "timeframe")) {
1434        if !is_valid_timespan(ts) {
1435            warnings.push(err(
1436                LintRule::InvalidTimespanFormat,
1437                format!(
1438                    "invalid timespan \"{ts}\", expected format like 5m, 1h, 30s, 7d, 1w, 1M, 1y"
1439                ),
1440                "/correlation/timespan",
1441            ));
1442        }
1443    } else {
1444        warnings.push(err(
1445            LintRule::MissingCorrelationTimespan,
1446            "missing required field 'correlation.timespan'",
1447            "/correlation/timespan",
1448        ));
1449    }
1450
1451    // ── Conditional requirements per correlation type ─────────────────────
1452    if let Some(ct) = corr_type {
1453        // group-by is required for all correlation types
1454        if !corr.contains_key(key("group-by")) {
1455            warnings.push(err(
1456                LintRule::MissingGroupBy,
1457                format!("{ct} correlation requires 'group-by'"),
1458                "/correlation/group-by",
1459            ));
1460        }
1461
1462        // condition required for non-temporal types
1463        if TYPES_REQUIRING_CONDITION.contains(&ct) {
1464            if let Some(cond_val) = corr.get(key("condition")) {
1465                if let Some(cond_map) = cond_val.as_mapping() {
1466                    lint_correlation_condition(cond_map, ct, warnings);
1467                }
1468            } else {
1469                warnings.push(err(
1470                    LintRule::MissingCorrelationCondition,
1471                    format!("{ct} correlation requires a 'condition'"),
1472                    "/correlation/condition",
1473                ));
1474            }
1475        }
1476    }
1477
1478    // ── generate (document root per schema; nested under `correlation` is legacy) ──
1479    for (path, val) in [
1480        ("/generate", m.get(key("generate"))),
1481        ("/correlation/generate", corr.get(key("generate"))),
1482    ] {
1483        if let Some(gen_val) = val
1484            && !gen_val.is_bool()
1485        {
1486            warnings.push(err(
1487                LintRule::GenerateNotBoolean,
1488                "'generate' must be a boolean (true/false)",
1489                path,
1490            ));
1491        }
1492    }
1493}
1494
1495fn lint_correlation_condition(
1496    cond: &serde_yaml::Mapping,
1497    corr_type: &str,
1498    warnings: &mut Vec<LintWarning>,
1499) {
1500    // Check condition.field requirement
1501    if TYPES_REQUIRING_FIELD.contains(&corr_type) && !cond.contains_key(key("field")) {
1502        warnings.push(err(
1503            LintRule::MissingConditionField,
1504            format!("{corr_type} correlation condition requires 'field'"),
1505            "/correlation/condition/field",
1506        ));
1507    }
1508
1509    // Validate operator keys and numeric values
1510    for (k, v) in cond {
1511        let ks = k.as_str().unwrap_or("");
1512        if ks == "field" {
1513            continue;
1514        }
1515        if !VALID_CONDITION_OPERATORS.contains(&ks) {
1516            warnings.push(err(
1517                LintRule::InvalidConditionOperator,
1518                format!(
1519                    "invalid condition operator \"{ks}\", expected one of: {}",
1520                    VALID_CONDITION_OPERATORS.join(", ")
1521                ),
1522                format!("/correlation/condition/{ks}"),
1523            ));
1524        } else if !v.is_i64() && !v.is_u64() && !v.is_f64() {
1525            warnings.push(err(
1526                LintRule::ConditionValueNotNumeric,
1527                format!("condition operator '{ks}' requires a numeric value"),
1528                format!("/correlation/condition/{ks}"),
1529            ));
1530        }
1531    }
1532}
1533
1534/// Check field modifier compatibility and return a diagnostic message if
1535/// the combination is invalid.
1536///
1537/// Modifier categories (at most one from each exclusive group):
1538/// - **String match**: contains, startswith, endswith
1539/// - **Pattern match**: re, cidr (incompatible with string-match modifiers)
1540/// - **Numeric comparison**: gt, gte, lt, lte, neq
1541/// - **Existence**: exists (standalone, incompatible with everything except all/cased)
1542/// - **Regex flags**: i, m, s (require re)
1543fn check_modifier_compatibility(field_key: &str) -> Option<String> {
1544    let parts: Vec<&str> = field_key.split('|').collect();
1545    if parts.len() < 2 {
1546        return None;
1547    }
1548    let modifiers = &parts[1..];
1549
1550    let string_match: &[&str] = &["contains", "startswith", "endswith"];
1551    let pattern_match: &[&str] = &["re", "cidr"];
1552    let numeric_compare: &[&str] = &["gt", "gte", "lt", "lte", "neq"];
1553    let regex_flags: &[&str] = &["i", "ignorecase", "m", "multiline", "s", "dotall"];
1554
1555    let has_string = modifiers
1556        .iter()
1557        .filter(|m| string_match.contains(m))
1558        .count();
1559    let has_pattern: Vec<&&str> = modifiers
1560        .iter()
1561        .filter(|m| pattern_match.contains(m))
1562        .collect();
1563    let has_numeric = modifiers.iter().any(|m| numeric_compare.contains(m));
1564    let has_exists = modifiers.contains(&"exists");
1565    let has_re = modifiers.contains(&"re");
1566    let has_regex_flags = modifiers.iter().any(|m| regex_flags.contains(m));
1567
1568    // Multiple string-match modifiers are mutually exclusive
1569    if has_string > 1 {
1570        return Some(
1571            "multiple string-match modifiers (contains, startswith, endswith) \
1572             are mutually exclusive"
1573                .to_string(),
1574        );
1575    }
1576
1577    // Pattern-match (re, cidr) is incompatible with string-match modifiers
1578    if !has_pattern.is_empty() && has_string > 0 {
1579        return Some(format!(
1580            "pattern modifier '{}' is incompatible with string-match modifiers \
1581             (contains, startswith, endswith)",
1582            has_pattern
1583                .iter()
1584                .map(|m| **m)
1585                .collect::<Vec<_>>()
1586                .join(", ")
1587        ));
1588    }
1589
1590    // Numeric comparison is incompatible with string-match and pattern modifiers
1591    if has_numeric && (has_string > 0 || !has_pattern.is_empty()) {
1592        return Some(
1593            "numeric comparison modifiers (gt, gte, lt, lte, neq) are incompatible \
1594             with string-match and pattern modifiers"
1595                .to_string(),
1596        );
1597    }
1598
1599    // exists is standalone
1600    if has_exists && modifiers.len() > 1 {
1601        let others: Vec<&&str> = modifiers
1602            .iter()
1603            .filter(|m| **m != "exists" && **m != "all" && **m != "cased")
1604            .collect();
1605        if !others.is_empty() {
1606            return Some(format!(
1607                "'exists' modifier is incompatible with: {}",
1608                others.iter().map(|m| **m).collect::<Vec<_>>().join(", ")
1609            ));
1610        }
1611    }
1612
1613    // Regex flags require re
1614    if has_regex_flags && !has_re {
1615        return Some("regex flag modifiers (i, m, s) require the 're' modifier".to_string());
1616    }
1617
1618    None
1619}
1620
1621fn is_valid_timespan(s: &str) -> bool {
1622    if s.is_empty() {
1623        return false;
1624    }
1625    let last = s.as_bytes()[s.len() - 1];
1626    // s=second, m=minute, h=hour, d=day, w=week, M=month, y=year
1627    if !matches!(last, b's' | b'm' | b'h' | b'd' | b'w' | b'M' | b'y') {
1628        return false;
1629    }
1630    let num_part = &s[..s.len() - 1];
1631    !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit())
1632}
1633
1634// =============================================================================
1635// Filter rule lint checks
1636// =============================================================================
1637
1638fn lint_filter_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1639    // ── filter section ───────────────────────────────────────────────────
1640    let Some(filter_val) = m.get(key("filter")) else {
1641        warnings.push(err(
1642            LintRule::MissingFilter,
1643            "missing required field 'filter'",
1644            "/filter",
1645        ));
1646        return;
1647    };
1648
1649    let Some(filter) = filter_val.as_mapping() else {
1650        warnings.push(err(
1651            LintRule::MissingFilter,
1652            "'filter' must be a mapping",
1653            "/filter",
1654        ));
1655        return;
1656    };
1657
1658    // ── filter.rules ─────────────────────────────────────────────────────
1659    // `rules` is optional: omitted means "apply to all rules".
1660    // Valid forms: a sequence of rule IDs/names, the string "any", or omitted.
1661    if let Some(rules_val) = filter.get(key("rules")) {
1662        match rules_val {
1663            serde_yaml::Value::Sequence(_) => {}
1664            serde_yaml::Value::String(s) if s.eq_ignore_ascii_case("any") => {}
1665            serde_yaml::Value::String(_) => {}
1666            _ => {
1667                warnings.push(err(
1668                    LintRule::MissingFilterRules,
1669                    "filter.rules must be a sequence of rule IDs, a single rule ID string, or 'any'",
1670                    "/filter/rules",
1671                ));
1672            }
1673        }
1674    }
1675
1676    // ── filter.selection ─────────────────────────────────────────────────
1677    if !filter.contains_key(key("selection")) {
1678        warnings.push(err(
1679            LintRule::MissingFilterSelection,
1680            "missing required field 'filter.selection'",
1681            "/filter/selection",
1682        ));
1683    }
1684
1685    // ── filter.condition ─────────────────────────────────────────────────
1686    if !filter.contains_key(key("condition")) {
1687        warnings.push(err(
1688            LintRule::MissingFilterCondition,
1689            "missing required field 'filter.condition'",
1690            "/filter/condition",
1691        ));
1692    }
1693
1694    // ── logsource required for filters ───────────────────────────────────
1695    if !m.contains_key(key("logsource")) {
1696        warnings.push(err(
1697            LintRule::MissingFilterLogsource,
1698            "missing required field 'logsource' for filter rule",
1699            "/logsource",
1700        ));
1701    } else {
1702        lint_logsource(m, warnings);
1703    }
1704
1705    // ── Filters should NOT have level or status ──────────────────────────
1706    if m.contains_key(key("level")) {
1707        let mut w = warning(
1708            LintRule::FilterHasLevel,
1709            "filter rules should not have a 'level' field",
1710            "/level",
1711        );
1712        w.fix = safe_fix(
1713            "remove 'level' from filter rule",
1714            vec![FixPatch::Remove {
1715                path: "/level".into(),
1716            }],
1717        );
1718        warnings.push(w);
1719    }
1720
1721    if m.contains_key(key("status")) {
1722        let mut w = warning(
1723            LintRule::FilterHasStatus,
1724            "filter rules should not have a 'status' field",
1725            "/status",
1726        );
1727        w.fix = safe_fix(
1728            "remove 'status' from filter rule",
1729            vec![FixPatch::Remove {
1730                path: "/status".into(),
1731            }],
1732        );
1733        warnings.push(w);
1734    }
1735}
1736
1737// =============================================================================
1738// Public API
1739// =============================================================================
1740
1741/// Levenshtein edit distance between two strings.
1742fn edit_distance(a: &str, b: &str) -> usize {
1743    let (a_len, b_len) = (a.len(), b.len());
1744    if a_len == 0 {
1745        return b_len;
1746    }
1747    if b_len == 0 {
1748        return a_len;
1749    }
1750    let mut prev: Vec<usize> = (0..=b_len).collect();
1751    let mut curr = vec![0; b_len + 1];
1752    for (i, ca) in a.bytes().enumerate() {
1753        curr[0] = i + 1;
1754        for (j, cb) in b.bytes().enumerate() {
1755            let cost = if ca == cb { 0 } else { 1 };
1756            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
1757        }
1758        std::mem::swap(&mut prev, &mut curr);
1759    }
1760    prev[b_len]
1761}
1762
1763/// Maximum edit distance to consider an unknown key a likely typo of a known key.
1764const TYPO_MAX_EDIT_DISTANCE: usize = 2;
1765
1766/// Check for unknown top-level keys that are likely typos of known keys.
1767///
1768/// The Sigma specification v2.1.0 explicitly allows arbitrary custom top-level
1769/// fields, so unknown keys are not errors. However, when an unknown key is
1770/// within a small edit distance of a known key it is likely a typo and we
1771/// surface an informational hint.
1772fn lint_unknown_keys(m: &serde_yaml::Mapping, doc_type: DocType, warnings: &mut Vec<LintWarning>) {
1773    let type_keys = doc_type.known_keys();
1774    let all_known: Vec<&str> = KNOWN_KEYS_SHARED
1775        .iter()
1776        .chain(type_keys.iter())
1777        .copied()
1778        .collect();
1779
1780    for k in m.keys() {
1781        let Some(ks) = k.as_str() else { continue };
1782        if KNOWN_KEYS_SHARED.contains(&ks) || type_keys.contains(&ks) {
1783            continue;
1784        }
1785        // Only warn when the key looks like a typo of a known key.
1786        if let Some(closest) = all_known
1787            .iter()
1788            .filter(|known| edit_distance(ks, known) <= TYPO_MAX_EDIT_DISTANCE)
1789            .min_by_key(|known| edit_distance(ks, known))
1790        {
1791            let mut w = info(
1792                LintRule::UnknownKey,
1793                format!("unknown top-level key \"{ks}\"; did you mean \"{closest}\"?"),
1794                format!("/{ks}"),
1795            );
1796            w.fix = safe_fix(
1797                format!("rename '{ks}' to '{closest}'"),
1798                vec![FixPatch::ReplaceKey {
1799                    path: format!("/{ks}"),
1800                    new_key: closest.to_string(),
1801                }],
1802            );
1803            warnings.push(w);
1804        }
1805    }
1806}
1807
1808/// Lint a single YAML document value.
1809///
1810/// Auto-detects document type (detection / correlation / filter) and runs
1811/// the appropriate checks. Returns all findings.
1812pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
1813    let Some(m) = value.as_mapping() else {
1814        return vec![err(
1815            LintRule::NotAMapping,
1816            "document is not a YAML mapping",
1817            "/",
1818        )];
1819    };
1820
1821    // Skip collection action fragments
1822    if is_action_fragment(m) {
1823        return Vec::new();
1824    }
1825
1826    let mut warnings = Vec::new();
1827
1828    // Run shared checks
1829    lint_shared(m, &mut warnings);
1830
1831    // Run type-specific checks
1832    let doc_type = detect_doc_type(m);
1833    match doc_type {
1834        DocType::Detection => lint_detection_rule(m, &mut warnings),
1835        DocType::Correlation => lint_correlation_rule(m, &mut warnings),
1836        DocType::Filter => lint_filter_rule(m, &mut warnings),
1837    }
1838
1839    // Check for unknown top-level keys
1840    lint_unknown_keys(m, doc_type, &mut warnings);
1841
1842    warnings
1843}
1844
1845/// Lint a raw YAML string, returning warnings with resolved source spans.
1846///
1847/// Unlike [`lint_yaml_value`], this function takes the raw text and resolves
1848/// JSON-pointer paths to `(line, col)` spans. This is the preferred entry
1849/// point for the LSP server.
1850pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
1851    let mut all_warnings = Vec::new();
1852
1853    for doc in serde_yaml::Deserializer::from_str(text) {
1854        let value: Value = match Value::deserialize(doc) {
1855            Ok(v) => v,
1856            Err(e) => {
1857                let mut w = err(
1858                    LintRule::YamlParseError,
1859                    format!("YAML parse error: {e}"),
1860                    "/",
1861                );
1862                // serde_yaml can give us a location
1863                if let Some(loc) = e.location() {
1864                    w.span = Some(Span {
1865                        start_line: loc.line().saturating_sub(1) as u32,
1866                        start_col: loc.column() as u32,
1867                        end_line: loc.line().saturating_sub(1) as u32,
1868                        end_col: loc.column() as u32 + 1,
1869                    });
1870                }
1871                all_warnings.push(w);
1872                // A parse error leaves the YAML stream in an undefined state;
1873                // the deserializer iterator may never terminate on malformed
1874                // input, so we must stop iterating to avoid infinite loops and
1875                // unbounded memory growth.
1876                break;
1877            }
1878        };
1879
1880        let warnings = lint_yaml_value(&value);
1881        // Resolve spans for each warning
1882        for mut w in warnings {
1883            w.span = resolve_path_to_span(text, &w.path);
1884            all_warnings.push(w);
1885        }
1886    }
1887
1888    all_warnings
1889}
1890
1891/// Resolve a JSON-pointer path to a `Span` by scanning the YAML text.
1892///
1893/// Returns `None` if the path cannot be resolved.
1894fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
1895    if path == "/" || path.is_empty() {
1896        // Root — first non-empty line
1897        for (i, line) in text.lines().enumerate() {
1898            let trimmed = line.trim();
1899            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
1900                return Some(Span {
1901                    start_line: i as u32,
1902                    start_col: 0,
1903                    end_line: i as u32,
1904                    end_col: line.len() as u32,
1905                });
1906            }
1907        }
1908        return None;
1909    }
1910
1911    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
1912
1913    if segments.is_empty() {
1914        return None;
1915    }
1916
1917    let lines: Vec<&str> = text.lines().collect();
1918    let mut current_indent: i32 = -1;
1919    let mut search_start = 0usize;
1920    let mut last_matched_line: Option<usize> = None;
1921
1922    for segment in &segments {
1923        let array_index: Option<usize> = segment.parse().ok();
1924        let mut found = false;
1925
1926        let mut line_num = search_start;
1927        while line_num < lines.len() {
1928            let line = lines[line_num];
1929            let trimmed = line.trim();
1930            if trimmed.is_empty() || trimmed.starts_with('#') {
1931                line_num += 1;
1932                continue;
1933            }
1934
1935            let indent = (line.len() - trimmed.len()) as i32;
1936
1937            if indent <= current_indent && found {
1938                break;
1939            }
1940            if indent <= current_indent {
1941                line_num += 1;
1942                continue;
1943            }
1944
1945            if let Some(idx) = array_index {
1946                if trimmed.starts_with("- ") && indent > current_indent {
1947                    let mut count = 0usize;
1948                    for (offset, sl) in lines[search_start..].iter().enumerate() {
1949                        let scan = search_start + offset;
1950                        let st = sl.trim();
1951                        if st.is_empty() || st.starts_with('#') {
1952                            continue;
1953                        }
1954                        let si = (sl.len() - st.len()) as i32;
1955                        if si == indent && st.starts_with("- ") {
1956                            if count == idx {
1957                                last_matched_line = Some(scan);
1958                                search_start = scan + 1;
1959                                current_indent = indent;
1960                                found = true;
1961                                break;
1962                            }
1963                            count += 1;
1964                        }
1965                        if si < indent && count > 0 {
1966                            break;
1967                        }
1968                    }
1969                    break;
1970                }
1971            } else {
1972                let key_pattern = format!("{segment}:");
1973                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
1974                    last_matched_line = Some(line_num);
1975                    search_start = line_num + 1;
1976                    current_indent = indent;
1977                    found = true;
1978                    break;
1979                }
1980            }
1981
1982            line_num += 1;
1983        }
1984
1985        if !found && last_matched_line.is_none() {
1986            break;
1987        }
1988    }
1989
1990    last_matched_line.map(|line_num| {
1991        let line = lines[line_num];
1992        Span {
1993            start_line: line_num as u32,
1994            start_col: 0,
1995            end_line: line_num as u32,
1996            end_col: line.len() as u32,
1997        }
1998    })
1999}
2000
2001/// Lint all YAML documents in a file.
2002///
2003/// Handles multi-document YAML (separated by `---`). Collection action
2004/// fragments (`action: global/reset/repeat`) are skipped. Warnings include
2005/// resolved source spans (delegates to [`lint_yaml_str`]).
2006pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
2007    let content = std::fs::read_to_string(path)?;
2008    let warnings = lint_yaml_str(&content);
2009    Ok(FileLintResult {
2010        path: path.to_path_buf(),
2011        warnings,
2012    })
2013}
2014
2015/// Lint all `.yml`/`.yaml` files in a directory recursively.
2016///
2017/// Skips hidden directories (starting with `.`) and tracks visited
2018/// canonical paths to avoid infinite loops from symlink cycles.
2019pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
2020    let mut results = Vec::new();
2021    let mut visited = HashSet::new();
2022
2023    fn walk(
2024        dir: &Path,
2025        results: &mut Vec<FileLintResult>,
2026        visited: &mut HashSet<std::path::PathBuf>,
2027    ) -> crate::error::Result<()> {
2028        // Resolve symlinks and canonicalize for cycle detection
2029        let canonical = match dir.canonicalize() {
2030            Ok(p) => p,
2031            Err(_) => return Ok(()),
2032        };
2033        if !visited.insert(canonical) {
2034            // Already visited this directory — symlink cycle
2035            return Ok(());
2036        }
2037
2038        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
2039        entries.sort_by_key(|e| e.path());
2040
2041        for entry in entries {
2042            let path = entry.path();
2043
2044            // Skip hidden directories (e.g. .git)
2045            if path.is_dir() {
2046                if path
2047                    .file_name()
2048                    .and_then(|n| n.to_str())
2049                    .is_some_and(|n| n.starts_with('.'))
2050                {
2051                    continue;
2052                }
2053                walk(&path, results, visited)?;
2054            } else if matches!(
2055                path.extension().and_then(|e| e.to_str()),
2056                Some("yml" | "yaml")
2057            ) {
2058                match crate::lint::lint_yaml_file(&path) {
2059                    Ok(file_result) => results.push(file_result),
2060                    Err(e) => {
2061                        results.push(FileLintResult {
2062                            path: path.clone(),
2063                            warnings: vec![err(
2064                                LintRule::FileReadError,
2065                                format!("error reading file: {e}"),
2066                                "/",
2067                            )],
2068                        });
2069                    }
2070                }
2071            }
2072        }
2073        Ok(())
2074    }
2075
2076    walk(dir, &mut results, &mut visited)?;
2077    Ok(results)
2078}
2079
2080// =============================================================================
2081// Lint configuration & suppression
2082// =============================================================================
2083
2084/// Configuration for lint rule suppression and severity overrides.
2085///
2086/// Can be loaded from a `.rsigma-lint.yml` config file, merged with CLI
2087/// `--disable` flags, and combined with inline `# rsigma-disable` comments.
2088#[derive(Debug, Clone, Default, Serialize)]
2089pub struct LintConfig {
2090    /// Rule names to suppress entirely (e.g. `"missing_description"`).
2091    pub disabled_rules: HashSet<String>,
2092    /// Override the default severity of a rule (e.g. `title_too_long -> Info`).
2093    pub severity_overrides: HashMap<String, Severity>,
2094    /// Glob patterns for paths to exclude from directory linting.
2095    /// Matched against relative paths from the lint root (e.g. `"config/**"`).
2096    pub exclude_patterns: Vec<String>,
2097}
2098
2099/// Raw YAML shape for `.rsigma-lint.yml`.
2100#[derive(Debug, Deserialize)]
2101struct RawLintConfig {
2102    #[serde(default)]
2103    disabled_rules: Vec<String>,
2104    #[serde(default)]
2105    severity_overrides: HashMap<String, String>,
2106    #[serde(default)]
2107    exclude: Vec<String>,
2108}
2109
2110impl LintConfig {
2111    /// Load a `LintConfig` from a `.rsigma-lint.yml` file.
2112    pub fn load(path: &Path) -> crate::error::Result<Self> {
2113        let content = std::fs::read_to_string(path)?;
2114        let raw: RawLintConfig = serde_yaml::from_str(&content)?;
2115
2116        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
2117        let mut severity_overrides = HashMap::new();
2118        for (rule, sev_str) in &raw.severity_overrides {
2119            let sev = match sev_str.as_str() {
2120                "error" => Severity::Error,
2121                "warning" => Severity::Warning,
2122                "info" => Severity::Info,
2123                "hint" => Severity::Hint,
2124                other => {
2125                    return Err(crate::error::SigmaParserError::InvalidRule(format!(
2126                        "invalid severity '{other}' for rule '{rule}' in lint config"
2127                    )));
2128                }
2129            };
2130            severity_overrides.insert(rule.clone(), sev);
2131        }
2132
2133        Ok(LintConfig {
2134            disabled_rules,
2135            severity_overrides,
2136            exclude_patterns: raw.exclude,
2137        })
2138    }
2139
2140    /// Walk up from `start_path` to find the nearest `.rsigma-lint.yml`.
2141    ///
2142    /// Checks `start_path` itself (if a directory) or its parent, then
2143    /// ancestors until the filesystem root.
2144    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
2145        let dir = if start_path.is_file() {
2146            start_path.parent()?
2147        } else {
2148            start_path
2149        };
2150
2151        let mut current = dir;
2152        loop {
2153            let candidate = current.join(".rsigma-lint.yml");
2154            if candidate.is_file() {
2155                return Some(candidate);
2156            }
2157            // Also try .yaml extension
2158            let candidate_yaml = current.join(".rsigma-lint.yaml");
2159            if candidate_yaml.is_file() {
2160                return Some(candidate_yaml);
2161            }
2162            current = current.parent()?;
2163        }
2164    }
2165
2166    /// Merge another config into this one (e.g. CLI `--disable` into file config).
2167    pub fn merge(&mut self, other: &LintConfig) {
2168        self.disabled_rules
2169            .extend(other.disabled_rules.iter().cloned());
2170        for (rule, sev) in &other.severity_overrides {
2171            self.severity_overrides.insert(rule.clone(), *sev);
2172        }
2173        self.exclude_patterns
2174            .extend(other.exclude_patterns.iter().cloned());
2175    }
2176
2177    /// Check if a rule is disabled.
2178    pub fn is_disabled(&self, rule: &LintRule) -> bool {
2179        self.disabled_rules.contains(&rule.to_string())
2180    }
2181
2182    /// Build a compiled [`globset::GlobSet`] from the exclude patterns.
2183    ///
2184    /// Returns `None` if there are no patterns. Invalid patterns are silently
2185    /// skipped (they will have been validated at config load time in practice).
2186    pub fn build_exclude_set(&self) -> Option<globset::GlobSet> {
2187        if self.exclude_patterns.is_empty() {
2188            return None;
2189        }
2190        let mut builder = globset::GlobSetBuilder::new();
2191        for pat in &self.exclude_patterns {
2192            if let Ok(glob) = globset::GlobBuilder::new(pat)
2193                .literal_separator(false)
2194                .build()
2195            {
2196                builder.add(glob);
2197            }
2198        }
2199        builder.build().ok()
2200    }
2201}
2202
2203// =============================================================================
2204// Inline suppression comments
2205// =============================================================================
2206
2207/// Parsed inline suppression directives from YAML source text.
2208#[derive(Debug, Clone, Default)]
2209pub struct InlineSuppressions {
2210    /// If `true`, all rules are suppressed for the entire file.
2211    pub disable_all: bool,
2212    /// Rules suppressed for the entire file (from `# rsigma-disable rule1, rule2`).
2213    pub file_disabled: HashSet<String>,
2214    /// Rules suppressed for specific lines: `line_number -> set of rule names`.
2215    /// An empty set means all rules are suppressed for that line.
2216    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
2217}
2218
2219/// Parse `# rsigma-disable` comments from raw YAML text.
2220///
2221/// Supported forms:
2222/// - `# rsigma-disable` — suppress **all** rules for the file
2223/// - `# rsigma-disable rule1, rule2` — suppress specific rules for the file
2224/// - `# rsigma-disable-next-line` — suppress all rules for the next line
2225/// - `# rsigma-disable-next-line rule1, rule2` — suppress specific rules for the next line
2226pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
2227    let mut result = InlineSuppressions::default();
2228
2229    for (i, line) in text.lines().enumerate() {
2230        let trimmed = line.trim();
2231
2232        // Look for comment-only lines or trailing comments
2233        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
2234            trimmed[pos + 1..].trim()
2235        } else {
2236            continue;
2237        };
2238
2239        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
2240            let rest = rest.trim();
2241            let next_line = (i + 1) as u32;
2242            if rest.is_empty() {
2243                // Suppress all rules for next line
2244                result.line_disabled.insert(next_line, None);
2245            } else {
2246                // Suppress specific rules for next line
2247                let rules: HashSet<String> = rest
2248                    .split(',')
2249                    .map(|s| s.trim().to_string())
2250                    .filter(|s| !s.is_empty())
2251                    .collect();
2252                if !rules.is_empty() {
2253                    result
2254                        .line_disabled
2255                        .entry(next_line)
2256                        .and_modify(|existing| {
2257                            if let Some(existing_set) = existing {
2258                                existing_set.extend(rules.iter().cloned());
2259                            }
2260                            // If None (all suppressed), leave as None
2261                        })
2262                        .or_insert(Some(rules));
2263                }
2264            }
2265        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
2266            let rest = rest.trim();
2267            if rest.is_empty() {
2268                // Suppress all rules for the entire file
2269                result.disable_all = true;
2270            } else {
2271                // Suppress specific rules for the file
2272                for rule in rest.split(',') {
2273                    let rule = rule.trim();
2274                    if !rule.is_empty() {
2275                        result.file_disabled.insert(rule.to_string());
2276                    }
2277                }
2278            }
2279        }
2280    }
2281
2282    result
2283}
2284
2285/// Find the start of a YAML comment (`#`) that is not inside a quoted string.
2286///
2287/// Returns the byte offset of `#` within the trimmed line, or `None`.
2288fn find_yaml_comment(line: &str) -> Option<usize> {
2289    let mut in_single = false;
2290    let mut in_double = false;
2291    for (i, c) in line.char_indices() {
2292        match c {
2293            '\'' if !in_double => in_single = !in_single,
2294            '"' if !in_single => in_double = !in_double,
2295            '#' if !in_single && !in_double => return Some(i),
2296            _ => {}
2297        }
2298    }
2299    None
2300}
2301
2302impl InlineSuppressions {
2303    /// Check if a warning should be suppressed.
2304    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
2305        // File-level disable-all
2306        if self.disable_all {
2307            return true;
2308        }
2309
2310        // File-level specific rules
2311        let rule_name = warning.rule.to_string();
2312        if self.file_disabled.contains(&rule_name) {
2313            return true;
2314        }
2315
2316        // Line-level suppression (requires a resolved span)
2317        if let Some(span) = &warning.span
2318            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
2319        {
2320            return match line_rules {
2321                None => true, // All rules suppressed for this line
2322                Some(rules) => rules.contains(&rule_name),
2323            };
2324        }
2325
2326        false
2327    }
2328}
2329
2330// =============================================================================
2331// Suppression filtering
2332// =============================================================================
2333
2334/// Apply suppression from config and inline comments to lint warnings.
2335///
2336/// 1. Removes warnings whose rule is in `config.disabled_rules`.
2337/// 2. Removes warnings suppressed by inline comments.
2338/// 3. Applies `severity_overrides` to remaining warnings.
2339pub fn apply_suppressions(
2340    warnings: Vec<LintWarning>,
2341    config: &LintConfig,
2342    inline: &InlineSuppressions,
2343) -> Vec<LintWarning> {
2344    warnings
2345        .into_iter()
2346        .filter(|w| !config.is_disabled(&w.rule))
2347        .filter(|w| !inline.is_suppressed(w))
2348        .map(|mut w| {
2349            let rule_name = w.rule.to_string();
2350            if let Some(sev) = config.severity_overrides.get(&rule_name) {
2351                w.severity = *sev;
2352            }
2353            w
2354        })
2355        .collect()
2356}
2357
2358/// Lint a raw YAML string with config-based suppression.
2359///
2360/// Combines [`lint_yaml_str`] + [`parse_inline_suppressions`] +
2361/// [`apply_suppressions`] in one call.
2362pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
2363    let warnings = lint_yaml_str(text);
2364    let inline = parse_inline_suppressions(text);
2365    apply_suppressions(warnings, config, &inline)
2366}
2367
2368/// Lint a file with config-based suppression.
2369pub fn lint_yaml_file_with_config(
2370    path: &Path,
2371    config: &LintConfig,
2372) -> crate::error::Result<FileLintResult> {
2373    let content = std::fs::read_to_string(path)?;
2374    let warnings = lint_yaml_str_with_config(&content, config);
2375    Ok(FileLintResult {
2376        path: path.to_path_buf(),
2377        warnings,
2378    })
2379}
2380
2381/// Lint a directory with config-based suppression.
2382///
2383/// Respects `config.exclude_patterns`: glob patterns matched against paths
2384/// relative to `dir` (e.g. `"config/**"` skips `<dir>/config/...`).
2385pub fn lint_yaml_directory_with_config(
2386    dir: &Path,
2387    config: &LintConfig,
2388) -> crate::error::Result<Vec<FileLintResult>> {
2389    let mut results = Vec::new();
2390    let mut visited = HashSet::new();
2391    let exclude_set = config.build_exclude_set();
2392
2393    fn walk(
2394        dir: &Path,
2395        base: &Path,
2396        config: &LintConfig,
2397        exclude_set: &Option<globset::GlobSet>,
2398        results: &mut Vec<FileLintResult>,
2399        visited: &mut HashSet<std::path::PathBuf>,
2400    ) -> crate::error::Result<()> {
2401        let canonical = match dir.canonicalize() {
2402            Ok(p) => p,
2403            Err(_) => return Ok(()),
2404        };
2405        if !visited.insert(canonical) {
2406            return Ok(());
2407        }
2408
2409        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
2410        entries.sort_by_key(|e| e.path());
2411
2412        for entry in entries {
2413            let path = entry.path();
2414
2415            if let Some(gs) = exclude_set
2416                && let Ok(rel) = path.strip_prefix(base)
2417                && gs.is_match(rel)
2418            {
2419                continue;
2420            }
2421
2422            if path.is_dir() {
2423                if path
2424                    .file_name()
2425                    .and_then(|n| n.to_str())
2426                    .is_some_and(|n| n.starts_with('.'))
2427                {
2428                    continue;
2429                }
2430                walk(&path, base, config, exclude_set, results, visited)?;
2431            } else if matches!(
2432                path.extension().and_then(|e| e.to_str()),
2433                Some("yml" | "yaml")
2434            ) {
2435                match lint_yaml_file_with_config(&path, config) {
2436                    Ok(file_result) => results.push(file_result),
2437                    Err(e) => {
2438                        results.push(FileLintResult {
2439                            path: path.clone(),
2440                            warnings: vec![err(
2441                                LintRule::FileReadError,
2442                                format!("error reading file: {e}"),
2443                                "/",
2444                            )],
2445                        });
2446                    }
2447                }
2448            }
2449        }
2450        Ok(())
2451    }
2452
2453    walk(dir, dir, config, &exclude_set, &mut results, &mut visited)?;
2454    Ok(results)
2455}
2456
2457// =============================================================================
2458// Tests
2459// =============================================================================
2460
2461#[cfg(test)]
2462mod tests {
2463    use super::*;
2464
2465    fn yaml_value(yaml: &str) -> Value {
2466        serde_yaml::from_str(yaml).unwrap()
2467    }
2468
2469    fn lint(yaml: &str) -> Vec<LintWarning> {
2470        lint_yaml_value(&yaml_value(yaml))
2471    }
2472
2473    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2474        warnings.iter().any(|w| w.rule == rule)
2475    }
2476
2477    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2478        !has_rule(warnings, rule)
2479    }
2480
2481    // ── Valid rule produces no errors ────────────────────────────────────
2482
2483    #[test]
2484    fn valid_detection_rule_no_errors() {
2485        let w = lint(
2486            r#"
2487title: Test Rule
2488id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2489status: test
2490logsource:
2491    category: process_creation
2492    product: windows
2493detection:
2494    selection:
2495        CommandLine|contains: 'whoami'
2496    condition: selection
2497level: medium
2498tags:
2499    - attack.execution
2500    - attack.t1059
2501"#,
2502        );
2503        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2504        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2505    }
2506
2507    // ── Shared checks ───────────────────────────────────────────────────
2508
2509    #[test]
2510    fn missing_title() {
2511        let w = lint(
2512            r#"
2513logsource:
2514    category: test
2515detection:
2516    selection:
2517        field: value
2518    condition: selection
2519"#,
2520        );
2521        assert!(has_rule(&w, LintRule::MissingTitle));
2522    }
2523
2524    #[test]
2525    fn title_too_long() {
2526        let long_title = "a".repeat(257);
2527        let yaml = format!(
2528            r#"
2529title: '{long_title}'
2530logsource:
2531    category: test
2532detection:
2533    selection:
2534        field: value
2535    condition: selection
2536"#
2537        );
2538        let w = lint(&yaml);
2539        assert!(has_rule(&w, LintRule::TitleTooLong));
2540    }
2541
2542    #[test]
2543    fn invalid_id() {
2544        let w = lint(
2545            r#"
2546title: Test
2547id: not-a-uuid
2548logsource:
2549    category: test
2550detection:
2551    selection:
2552        field: value
2553    condition: selection
2554"#,
2555        );
2556        assert!(has_rule(&w, LintRule::InvalidId));
2557    }
2558
2559    #[test]
2560    fn valid_id_no_warning() {
2561        let w = lint(
2562            r#"
2563title: Test
2564id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2565logsource:
2566    category: test
2567detection:
2568    selection:
2569        field: value
2570    condition: selection
2571"#,
2572        );
2573        assert!(has_no_rule(&w, LintRule::InvalidId));
2574    }
2575
2576    #[test]
2577    fn invalid_status() {
2578        let w = lint(
2579            r#"
2580title: Test
2581status: invalid
2582logsource:
2583    category: test
2584detection:
2585    selection:
2586        field: value
2587    condition: selection
2588"#,
2589        );
2590        assert!(has_rule(&w, LintRule::InvalidStatus));
2591    }
2592
2593    #[test]
2594    fn invalid_level() {
2595        let w = lint(
2596            r#"
2597title: Test
2598level: important
2599logsource:
2600    category: test
2601detection:
2602    selection:
2603        field: value
2604    condition: selection
2605"#,
2606        );
2607        assert!(has_rule(&w, LintRule::InvalidLevel));
2608    }
2609
2610    #[test]
2611    fn invalid_date_format() {
2612        let w = lint(
2613            r#"
2614title: Test
2615date: 'Jan 2025'
2616logsource:
2617    category: test
2618detection:
2619    selection:
2620        field: value
2621    condition: selection
2622"#,
2623        );
2624        assert!(has_rule(&w, LintRule::InvalidDate));
2625    }
2626
2627    #[test]
2628    fn modified_before_date() {
2629        let w = lint(
2630            r#"
2631title: Test
2632date: '2025-06-15'
2633modified: '2025-06-10'
2634logsource:
2635    category: test
2636detection:
2637    selection:
2638        field: value
2639    condition: selection
2640"#,
2641        );
2642        assert!(has_rule(&w, LintRule::ModifiedBeforeDate));
2643    }
2644
2645    #[test]
2646    fn non_lowercase_key() {
2647        let w = lint(
2648            r#"
2649title: Test
2650Status: test
2651logsource:
2652    category: test
2653detection:
2654    selection:
2655        field: value
2656    condition: selection
2657"#,
2658        );
2659        assert!(has_rule(&w, LintRule::NonLowercaseKey));
2660    }
2661
2662    // ── Detection rule checks ───────────────────────────────────────────
2663
2664    #[test]
2665    fn missing_logsource() {
2666        let w = lint(
2667            r#"
2668title: Test
2669detection:
2670    selection:
2671        field: value
2672    condition: selection
2673"#,
2674        );
2675        assert!(has_rule(&w, LintRule::MissingLogsource));
2676    }
2677
2678    #[test]
2679    fn missing_detection() {
2680        let w = lint(
2681            r#"
2682title: Test
2683logsource:
2684    category: test
2685"#,
2686        );
2687        assert!(has_rule(&w, LintRule::MissingDetection));
2688    }
2689
2690    #[test]
2691    fn missing_condition() {
2692        let w = lint(
2693            r#"
2694title: Test
2695logsource:
2696    category: test
2697detection:
2698    selection:
2699        field: value
2700"#,
2701        );
2702        assert!(has_rule(&w, LintRule::MissingCondition));
2703    }
2704
2705    #[test]
2706    fn empty_detection() {
2707        let w = lint(
2708            r#"
2709title: Test
2710logsource:
2711    category: test
2712detection:
2713    condition: selection
2714"#,
2715        );
2716        assert!(has_rule(&w, LintRule::EmptyDetection));
2717    }
2718
2719    #[test]
2720    fn invalid_related_type() {
2721        let w = lint(
2722            r#"
2723title: Test
2724logsource:
2725    category: test
2726detection:
2727    selection:
2728        field: value
2729    condition: selection
2730related:
2731    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2732      type: invalid_type
2733"#,
2734        );
2735        assert!(has_rule(&w, LintRule::InvalidRelatedType));
2736    }
2737
2738    #[test]
2739    fn related_missing_required_fields() {
2740        let w = lint(
2741            r#"
2742title: Test
2743logsource:
2744    category: test
2745detection:
2746    selection:
2747        field: value
2748    condition: selection
2749related:
2750    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2751"#,
2752        );
2753        assert!(has_rule(&w, LintRule::RelatedMissingRequired));
2754    }
2755
2756    #[test]
2757    fn deprecated_without_related() {
2758        let w = lint(
2759            r#"
2760title: Test
2761status: deprecated
2762logsource:
2763    category: test
2764detection:
2765    selection:
2766        field: value
2767    condition: selection
2768"#,
2769        );
2770        assert!(has_rule(&w, LintRule::DeprecatedWithoutRelated));
2771    }
2772
2773    #[test]
2774    fn invalid_tag_pattern() {
2775        let w = lint(
2776            r#"
2777title: Test
2778logsource:
2779    category: test
2780detection:
2781    selection:
2782        field: value
2783    condition: selection
2784tags:
2785    - 'Invalid Tag'
2786"#,
2787        );
2788        assert!(has_rule(&w, LintRule::InvalidTag));
2789    }
2790
2791    #[test]
2792    fn unknown_tag_namespace() {
2793        let w = lint(
2794            r#"
2795title: Test
2796logsource:
2797    category: test
2798detection:
2799    selection:
2800        field: value
2801    condition: selection
2802tags:
2803    - custom.something
2804"#,
2805        );
2806        assert!(has_rule(&w, LintRule::UnknownTagNamespace));
2807    }
2808
2809    #[test]
2810    fn duplicate_tags() {
2811        let w = lint(
2812            r#"
2813title: Test
2814logsource:
2815    category: test
2816detection:
2817    selection:
2818        field: value
2819    condition: selection
2820tags:
2821    - attack.execution
2822    - attack.execution
2823"#,
2824        );
2825        assert!(has_rule(&w, LintRule::DuplicateTags));
2826    }
2827
2828    #[test]
2829    fn logsource_not_lowercase() {
2830        let w = lint(
2831            r#"
2832title: Test
2833logsource:
2834    category: Process_Creation
2835    product: Windows
2836detection:
2837    selection:
2838        field: value
2839    condition: selection
2840"#,
2841        );
2842        assert!(has_rule(&w, LintRule::LogsourceValueNotLowercase));
2843    }
2844
2845    #[test]
2846    fn single_value_all_modifier() {
2847        let w = lint(
2848            r#"
2849title: Test
2850logsource:
2851    category: test
2852detection:
2853    selection:
2854        CommandLine|contains|all: 'single'
2855    condition: selection
2856"#,
2857        );
2858        assert!(has_rule(&w, LintRule::SingleValueAllModifier));
2859    }
2860
2861    #[test]
2862    fn null_in_value_list() {
2863        let w = lint(
2864            r#"
2865title: Test
2866logsource:
2867    category: test
2868detection:
2869    selection:
2870        FieldA:
2871            - 'value1'
2872            - null
2873    condition: selection
2874"#,
2875        );
2876        assert!(has_rule(&w, LintRule::NullInValueList));
2877    }
2878
2879    // ── Correlation rule checks ─────────────────────────────────────────
2880
2881    #[test]
2882    fn valid_correlation_no_errors() {
2883        let w = lint(
2884            r#"
2885title: Brute Force
2886correlation:
2887    type: event_count
2888    rules:
2889        - 929a690e-bef0-4204-a928-ef5e620d6fcc
2890    group-by:
2891        - User
2892    timespan: 1h
2893    condition:
2894        gte: 100
2895level: high
2896"#,
2897        );
2898        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2899        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2900    }
2901
2902    #[test]
2903    fn invalid_correlation_type() {
2904        let w = lint(
2905            r#"
2906title: Test
2907correlation:
2908    type: invalid_type
2909    rules:
2910        - some-rule
2911    timespan: 1h
2912    group-by:
2913        - User
2914"#,
2915        );
2916        assert!(has_rule(&w, LintRule::InvalidCorrelationType));
2917    }
2918
2919    #[test]
2920    fn missing_correlation_timespan() {
2921        let w = lint(
2922            r#"
2923title: Test
2924correlation:
2925    type: event_count
2926    rules:
2927        - some-rule
2928    group-by:
2929        - User
2930    condition:
2931        gte: 10
2932"#,
2933        );
2934        assert!(has_rule(&w, LintRule::MissingCorrelationTimespan));
2935    }
2936
2937    #[test]
2938    fn invalid_timespan_format() {
2939        let w = lint(
2940            r#"
2941title: Test
2942correlation:
2943    type: event_count
2944    rules:
2945        - some-rule
2946    group-by:
2947        - User
2948    timespan: 1hour
2949    condition:
2950        gte: 10
2951"#,
2952        );
2953        assert!(has_rule(&w, LintRule::InvalidTimespanFormat));
2954    }
2955
2956    #[test]
2957    fn missing_group_by() {
2958        let w = lint(
2959            r#"
2960title: Test
2961correlation:
2962    type: event_count
2963    rules:
2964        - some-rule
2965    timespan: 1h
2966    condition:
2967        gte: 10
2968"#,
2969        );
2970        assert!(has_rule(&w, LintRule::MissingGroupBy));
2971    }
2972
2973    #[test]
2974    fn missing_condition_field_for_value_count() {
2975        let w = lint(
2976            r#"
2977title: Test
2978correlation:
2979    type: value_count
2980    rules:
2981        - some-rule
2982    group-by:
2983        - User
2984    timespan: 1h
2985    condition:
2986        gte: 10
2987"#,
2988        );
2989        assert!(has_rule(&w, LintRule::MissingConditionField));
2990    }
2991
2992    #[test]
2993    fn invalid_condition_operator() {
2994        let w = lint(
2995            r#"
2996title: Test
2997correlation:
2998    type: event_count
2999    rules:
3000        - some-rule
3001    group-by:
3002        - User
3003    timespan: 1h
3004    condition:
3005        bigger: 10
3006"#,
3007        );
3008        assert!(has_rule(&w, LintRule::InvalidConditionOperator));
3009    }
3010
3011    #[test]
3012    fn generate_not_boolean() {
3013        let w = lint(
3014            r#"
3015title: Test
3016correlation:
3017    type: event_count
3018    rules:
3019        - some-rule
3020    group-by:
3021        - User
3022    timespan: 1h
3023    condition:
3024        gte: 10
3025    generate: 'yes'
3026"#,
3027        );
3028        assert!(has_rule(&w, LintRule::GenerateNotBoolean));
3029    }
3030
3031    // ── Filter rule checks ──────────────────────────────────────────────
3032
3033    #[test]
3034    fn valid_filter_no_errors() {
3035        let w = lint(
3036            r#"
3037title: Filter Admin
3038logsource:
3039    category: process_creation
3040    product: windows
3041filter:
3042    rules:
3043        - 929a690e-bef0-4204-a928-ef5e620d6fcc
3044    selection:
3045        User|startswith: 'adm_'
3046    condition: selection
3047"#,
3048        );
3049        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
3050        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
3051    }
3052
3053    #[test]
3054    fn filter_without_rules_is_valid() {
3055        let w = lint(
3056            r#"
3057title: Test
3058logsource:
3059    category: test
3060filter:
3061    selection:
3062        User: admin
3063    condition: selection
3064"#,
3065        );
3066        assert!(!has_rule(&w, LintRule::MissingFilterRules));
3067    }
3068
3069    #[test]
3070    fn filter_rules_invalid_type() {
3071        let w = lint(
3072            r#"
3073title: Test
3074logsource:
3075    category: test
3076filter:
3077    rules: 123
3078    selection:
3079        User: admin
3080    condition: selection
3081"#,
3082        );
3083        assert!(has_rule(&w, LintRule::MissingFilterRules));
3084    }
3085
3086    #[test]
3087    fn filter_rules_any_string_is_valid() {
3088        let w = lint(
3089            r#"
3090title: Test
3091logsource:
3092    category: test
3093filter:
3094    rules: any
3095    selection:
3096        User: admin
3097    condition: selection
3098"#,
3099        );
3100        assert!(!has_rule(&w, LintRule::MissingFilterRules));
3101    }
3102
3103    #[test]
3104    fn filter_rules_empty_sequence_is_valid() {
3105        let w = lint(
3106            r#"
3107title: Test
3108logsource:
3109    category: test
3110filter:
3111    rules: []
3112    selection:
3113        User: admin
3114    condition: selection
3115"#,
3116        );
3117        assert!(!has_rule(&w, LintRule::EmptyFilterRules));
3118    }
3119
3120    #[test]
3121    fn missing_filter_selection() {
3122        let w = lint(
3123            r#"
3124title: Test
3125logsource:
3126    category: test
3127filter:
3128    rules:
3129        - some-rule
3130    condition: selection
3131"#,
3132        );
3133        assert!(has_rule(&w, LintRule::MissingFilterSelection));
3134    }
3135
3136    #[test]
3137    fn missing_filter_condition() {
3138        let w = lint(
3139            r#"
3140title: Test
3141logsource:
3142    category: test
3143filter:
3144    rules:
3145        - some-rule
3146    selection:
3147        User: admin
3148"#,
3149        );
3150        assert!(has_rule(&w, LintRule::MissingFilterCondition));
3151    }
3152
3153    #[test]
3154    fn filter_has_level_warning() {
3155        let w = lint(
3156            r#"
3157title: Test
3158logsource:
3159    category: test
3160level: high
3161filter:
3162    rules:
3163        - some-rule
3164    selection:
3165        User: admin
3166    condition: selection
3167"#,
3168        );
3169        assert!(has_rule(&w, LintRule::FilterHasLevel));
3170    }
3171
3172    #[test]
3173    fn filter_has_status_warning() {
3174        let w = lint(
3175            r#"
3176title: Test
3177logsource:
3178    category: test
3179status: test
3180filter:
3181    rules:
3182        - some-rule
3183    selection:
3184        User: admin
3185    condition: selection
3186"#,
3187        );
3188        assert!(has_rule(&w, LintRule::FilterHasStatus));
3189    }
3190
3191    #[test]
3192    fn missing_filter_logsource() {
3193        let w = lint(
3194            r#"
3195title: Test
3196filter:
3197    rules:
3198        - some-rule
3199    selection:
3200        User: admin
3201    condition: selection
3202"#,
3203        );
3204        assert!(has_rule(&w, LintRule::MissingFilterLogsource));
3205    }
3206
3207    // ── Action fragments are skipped ────────────────────────────────────
3208
3209    #[test]
3210    fn action_global_skipped() {
3211        let w = lint(
3212            r#"
3213action: global
3214title: Global Template
3215logsource:
3216    product: windows
3217"#,
3218        );
3219        assert!(w.is_empty());
3220    }
3221
3222    #[test]
3223    fn action_reset_skipped() {
3224        let w = lint(
3225            r#"
3226action: reset
3227"#,
3228        );
3229        assert!(w.is_empty());
3230    }
3231
3232    // ── New checks ──────────────────────────────────────────────────────
3233
3234    #[test]
3235    fn empty_title() {
3236        let w = lint(
3237            r#"
3238title: ''
3239logsource:
3240    category: test
3241detection:
3242    selection:
3243        field: value
3244    condition: selection
3245level: medium
3246"#,
3247        );
3248        assert!(has_rule(&w, LintRule::EmptyTitle));
3249    }
3250
3251    #[test]
3252    fn missing_level() {
3253        let w = lint(
3254            r#"
3255title: Test
3256logsource:
3257    category: test
3258detection:
3259    selection:
3260        field: value
3261    condition: selection
3262"#,
3263        );
3264        assert!(has_rule(&w, LintRule::MissingLevel));
3265    }
3266
3267    #[test]
3268    fn valid_level_no_missing_warning() {
3269        let w = lint(
3270            r#"
3271title: Test
3272logsource:
3273    category: test
3274detection:
3275    selection:
3276        field: value
3277    condition: selection
3278level: medium
3279"#,
3280        );
3281        assert!(has_no_rule(&w, LintRule::MissingLevel));
3282    }
3283
3284    #[test]
3285    fn invalid_date_feb_30() {
3286        assert!(!is_valid_date("2025-02-30"));
3287    }
3288
3289    #[test]
3290    fn invalid_date_apr_31() {
3291        assert!(!is_valid_date("2025-04-31"));
3292    }
3293
3294    #[test]
3295    fn valid_date_feb_28() {
3296        assert!(is_valid_date("2025-02-28"));
3297    }
3298
3299    #[test]
3300    fn valid_date_leap_year_feb_29() {
3301        assert!(is_valid_date("2024-02-29"));
3302    }
3303
3304    #[test]
3305    fn invalid_date_non_leap_feb_29() {
3306        assert!(!is_valid_date("2025-02-29"));
3307    }
3308
3309    #[test]
3310    fn condition_references_unknown() {
3311        let w = lint(
3312            r#"
3313title: Test
3314logsource:
3315    category: test
3316detection:
3317    selection:
3318        field: value
3319    condition: sel_main
3320level: medium
3321"#,
3322        );
3323        assert!(has_rule(&w, LintRule::ConditionReferencesUnknown));
3324    }
3325
3326    #[test]
3327    fn condition_references_valid() {
3328        let w = lint(
3329            r#"
3330title: Test
3331logsource:
3332    category: test
3333detection:
3334    selection:
3335        field: value
3336    condition: selection
3337level: medium
3338"#,
3339        );
3340        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3341    }
3342
3343    #[test]
3344    fn condition_references_complex_valid() {
3345        let w = lint(
3346            r#"
3347title: Test
3348logsource:
3349    category: test
3350detection:
3351    sel_main:
3352        field: value
3353    filter_fp:
3354        User: admin
3355    condition: sel_main and not filter_fp
3356level: medium
3357"#,
3358        );
3359        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3360    }
3361
3362    #[test]
3363    fn empty_value_list() {
3364        let w = lint(
3365            r#"
3366title: Test
3367logsource:
3368    category: test
3369detection:
3370    selection:
3371        field: []
3372    condition: selection
3373level: medium
3374"#,
3375        );
3376        assert!(has_rule(&w, LintRule::EmptyValueList));
3377    }
3378
3379    #[test]
3380    fn not_a_mapping() {
3381        let v: serde_yaml::Value = serde_yaml::from_str("- item1\n- item2").unwrap();
3382        let w = lint_yaml_value(&v);
3383        assert!(has_rule(&w, LintRule::NotAMapping));
3384    }
3385
3386    #[test]
3387    fn lint_yaml_str_produces_spans() {
3388        let text = r#"title: Test
3389status: invalid_status
3390logsource:
3391    category: test
3392detection:
3393    selection:
3394        field: value
3395    condition: selection
3396level: medium
3397"#;
3398        let warnings = lint_yaml_str(text);
3399        // InvalidStatus points to /status which exists in the text
3400        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
3401        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
3402        let span = invalid_status.unwrap().span;
3403        assert!(span.is_some(), "expected span to be resolved");
3404        // "status:" is on line 1 (0-indexed)
3405        assert_eq!(span.unwrap().start_line, 1);
3406    }
3407
3408    #[test]
3409    fn yaml_parse_error_uses_correct_rule() {
3410        let text = "title: [unclosed";
3411        let warnings = lint_yaml_str(text);
3412        assert!(has_rule(&warnings, LintRule::YamlParseError));
3413        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
3414    }
3415
3416    // ── Unknown top-level keys ───────────────────────────────────────────
3417
3418    #[test]
3419    fn unknown_key_typo_detected() {
3420        let w = lint(
3421            r#"
3422title: Test
3423desciption: Typo field
3424logsource:
3425    category: test
3426detection:
3427    selection:
3428        field: value
3429    condition: selection
3430level: medium
3431"#,
3432        );
3433        assert!(has_rule(&w, LintRule::UnknownKey));
3434        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3435        assert!(unk.message.contains("desciption"));
3436        assert!(unk.message.contains("description"));
3437        assert_eq!(unk.severity, Severity::Info);
3438    }
3439
3440    #[test]
3441    fn known_keys_no_unknown_warning() {
3442        let w = lint(
3443            r#"
3444title: Test Rule
3445id: 929a690e-bef0-4204-a928-ef5e620d6fcc
3446status: test
3447description: A valid description
3448author: tester
3449date: '2025-01-01'
3450modified: '2025-06-01'
3451license: MIT
3452logsource:
3453    category: process_creation
3454    product: windows
3455detection:
3456    selection:
3457        CommandLine|contains: 'whoami'
3458    condition: selection
3459level: medium
3460tags:
3461    - attack.execution
3462references:
3463    - https://example.com
3464fields:
3465    - CommandLine
3466falsepositives:
3467    - Legitimate admin
3468"#,
3469        );
3470        assert!(has_no_rule(&w, LintRule::UnknownKey));
3471    }
3472
3473    #[test]
3474    fn custom_fields_allowed_by_spec() {
3475        // The Sigma spec v2.1.0 explicitly allows arbitrary custom top-level
3476        // fields, so keys like "simulation" and "regression_tests_path" that
3477        // are not close to any known key should NOT produce warnings.
3478        let w = lint(
3479            r#"
3480title: Test Rule
3481logsource:
3482    category: test
3483detection:
3484    selection:
3485        field: value
3486    condition: selection
3487level: medium
3488simulation:
3489    action: scan
3490regression_tests_path: tests/
3491custom_metadata: hello
3492"#,
3493        );
3494        assert!(has_no_rule(&w, LintRule::UnknownKey));
3495    }
3496
3497    #[test]
3498    fn unknown_key_typo_correlation() {
3499        // "lvel" is edit-distance 1 from "level"
3500        let w = lint(
3501            r#"
3502title: Correlation Test
3503name: test_correlation
3504correlation:
3505    type: event_count
3506    rules:
3507        - rule1
3508    group-by:
3509        - src_ip
3510    timespan: 5m
3511    condition:
3512        gte: 10
3513lvel: high
3514"#,
3515        );
3516        assert!(has_rule(&w, LintRule::UnknownKey));
3517        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3518        assert!(unk.message.contains("lvel"));
3519        assert!(unk.message.contains("level"));
3520    }
3521
3522    #[test]
3523    fn unknown_key_custom_field_filter() {
3524        // "badkey" is not close to any known key — no warning.
3525        let w = lint(
3526            r#"
3527title: Filter Test
3528logsource:
3529    category: test
3530filter:
3531    rules:
3532        - rule1
3533    selection:
3534        User: admin
3535    condition: selection
3536badkey: foo
3537"#,
3538        );
3539        assert!(has_no_rule(&w, LintRule::UnknownKey));
3540    }
3541
3542    // ── Wildcard-only value ──────────────────────────────────────────────
3543
3544    #[test]
3545    fn wildcard_only_value_string() {
3546        let w = lint(
3547            r#"
3548title: Test
3549logsource:
3550    category: test
3551detection:
3552    selection:
3553        TargetFilename: '*'
3554    condition: selection
3555level: medium
3556"#,
3557        );
3558        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3559    }
3560
3561    #[test]
3562    fn wildcard_only_value_list() {
3563        let w = lint(
3564            r#"
3565title: Test
3566logsource:
3567    category: test
3568detection:
3569    selection:
3570        TargetFilename:
3571            - '*'
3572    condition: selection
3573level: medium
3574"#,
3575        );
3576        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3577    }
3578
3579    #[test]
3580    fn wildcard_with_other_values_no_warning() {
3581        let w = lint(
3582            r#"
3583title: Test
3584logsource:
3585    category: test
3586detection:
3587    selection:
3588        TargetFilename:
3589            - '*temp*'
3590            - '*cache*'
3591    condition: selection
3592level: medium
3593"#,
3594        );
3595        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3596    }
3597
3598    #[test]
3599    fn wildcard_regex_no_warning() {
3600        let w = lint(
3601            r#"
3602title: Test
3603logsource:
3604    category: test
3605detection:
3606    selection:
3607        TargetFilename|re: '*'
3608    condition: selection
3609level: medium
3610"#,
3611        );
3612        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3613    }
3614
3615    // ── resolve_path_to_span tests ───────────────────────────────────────
3616
3617    #[test]
3618    fn resolve_path_to_span_root() {
3619        let text = "title: Test\nstatus: test\n";
3620        let span = resolve_path_to_span(text, "/");
3621        assert!(span.is_some());
3622        assert_eq!(span.unwrap().start_line, 0);
3623    }
3624
3625    #[test]
3626    fn resolve_path_to_span_top_level_key() {
3627        let text = "title: Test\nstatus: test\nlevel: high\n";
3628        let span = resolve_path_to_span(text, "/status");
3629        assert!(span.is_some());
3630        assert_eq!(span.unwrap().start_line, 1);
3631    }
3632
3633    #[test]
3634    fn resolve_path_to_span_nested_key() {
3635        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
3636        let span = resolve_path_to_span(text, "/logsource/product");
3637        assert!(span.is_some());
3638        assert_eq!(span.unwrap().start_line, 3);
3639    }
3640
3641    #[test]
3642    fn resolve_path_to_span_missing_key() {
3643        let text = "title: Test\nstatus: test\n";
3644        let span = resolve_path_to_span(text, "/nonexistent");
3645        assert!(span.is_none());
3646    }
3647
3648    // ── Multi-document YAML ──────────────────────────────────────────────
3649
3650    #[test]
3651    fn multi_doc_yaml_lints_all_documents() {
3652        let text = r#"title: Rule 1
3653logsource:
3654    category: test
3655detection:
3656    selection:
3657        field: value
3658    condition: selection
3659level: medium
3660---
3661title: Rule 2
3662status: bad_status
3663logsource:
3664    category: test
3665detection:
3666    selection:
3667        field: value
3668    condition: selection
3669level: medium
3670"#;
3671        let warnings = lint_yaml_str(text);
3672        // Second doc has InvalidStatus
3673        assert!(has_rule(&warnings, LintRule::InvalidStatus));
3674    }
3675
3676    // ── is_valid_timespan edge cases ─────────────────────────────────────
3677
3678    #[test]
3679    fn timespan_zero_seconds() {
3680        assert!(is_valid_timespan("0s"));
3681    }
3682
3683    #[test]
3684    fn timespan_no_digits() {
3685        assert!(!is_valid_timespan("s"));
3686    }
3687
3688    #[test]
3689    fn timespan_no_unit() {
3690        assert!(!is_valid_timespan("123"));
3691    }
3692
3693    #[test]
3694    fn timespan_invalid_unit() {
3695        assert!(!is_valid_timespan("5x"));
3696    }
3697
3698    #[test]
3699    fn timespan_valid_variants() {
3700        assert!(is_valid_timespan("30s"));
3701        assert!(is_valid_timespan("5m"));
3702        assert!(is_valid_timespan("1h"));
3703        assert!(is_valid_timespan("7d"));
3704        assert!(is_valid_timespan("1w"));
3705        assert!(is_valid_timespan("1M"));
3706        assert!(is_valid_timespan("1y"));
3707    }
3708
3709    // ── FileLintResult methods ───────────────────────────────────────────
3710
3711    #[test]
3712    fn file_lint_result_has_errors() {
3713        let result = FileLintResult {
3714            path: std::path::PathBuf::from("test.yml"),
3715            warnings: vec![
3716                warning(LintRule::TitleTooLong, "too long", "/title"),
3717                err(
3718                    LintRule::MissingCondition,
3719                    "missing",
3720                    "/detection/condition",
3721                ),
3722            ],
3723        };
3724        assert!(result.has_errors());
3725        assert_eq!(result.error_count(), 1);
3726        assert_eq!(result.warning_count(), 1);
3727    }
3728
3729    #[test]
3730    fn file_lint_result_no_errors() {
3731        let result = FileLintResult {
3732            path: std::path::PathBuf::from("test.yml"),
3733            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
3734        };
3735        assert!(!result.has_errors());
3736        assert_eq!(result.error_count(), 0);
3737        assert_eq!(result.warning_count(), 1);
3738    }
3739
3740    #[test]
3741    fn file_lint_result_empty() {
3742        let result = FileLintResult {
3743            path: std::path::PathBuf::from("test.yml"),
3744            warnings: vec![],
3745        };
3746        assert!(!result.has_errors());
3747        assert_eq!(result.error_count(), 0);
3748        assert_eq!(result.warning_count(), 0);
3749    }
3750
3751    // ── LintWarning Display impl ─────────────────────────────────────────
3752
3753    #[test]
3754    fn lint_warning_display() {
3755        let w = err(
3756            LintRule::MissingTitle,
3757            "missing required field 'title'",
3758            "/title",
3759        );
3760        let display = format!("{w}");
3761        assert!(display.contains("error"));
3762        assert!(display.contains("missing_title"));
3763        assert!(display.contains("/title"));
3764    }
3765
3766    // ── New checks: missing description / author / all+re ────────────────
3767
3768    #[test]
3769    fn missing_description_info() {
3770        let w = lint(
3771            r#"
3772title: Test
3773logsource:
3774    category: test
3775detection:
3776    selection:
3777        field: value
3778    condition: selection
3779level: medium
3780"#,
3781        );
3782        assert!(has_rule(&w, LintRule::MissingDescription));
3783        let md = w
3784            .iter()
3785            .find(|w| w.rule == LintRule::MissingDescription)
3786            .unwrap();
3787        assert_eq!(md.severity, Severity::Info);
3788    }
3789
3790    #[test]
3791    fn has_description_no_info() {
3792        let w = lint(
3793            r#"
3794title: Test
3795description: A fine description
3796logsource:
3797    category: test
3798detection:
3799    selection:
3800        field: value
3801    condition: selection
3802level: medium
3803"#,
3804        );
3805        assert!(has_no_rule(&w, LintRule::MissingDescription));
3806    }
3807
3808    #[test]
3809    fn missing_author_info() {
3810        let w = lint(
3811            r#"
3812title: Test
3813logsource:
3814    category: test
3815detection:
3816    selection:
3817        field: value
3818    condition: selection
3819level: medium
3820"#,
3821        );
3822        assert!(has_rule(&w, LintRule::MissingAuthor));
3823        let ma = w
3824            .iter()
3825            .find(|w| w.rule == LintRule::MissingAuthor)
3826            .unwrap();
3827        assert_eq!(ma.severity, Severity::Info);
3828    }
3829
3830    #[test]
3831    fn has_author_no_info() {
3832        let w = lint(
3833            r#"
3834title: Test
3835author: tester
3836logsource:
3837    category: test
3838detection:
3839    selection:
3840        field: value
3841    condition: selection
3842level: medium
3843"#,
3844        );
3845        assert!(has_no_rule(&w, LintRule::MissingAuthor));
3846    }
3847
3848    #[test]
3849    fn all_with_re_warning() {
3850        let w = lint(
3851            r#"
3852title: Test
3853logsource:
3854    category: test
3855detection:
3856    selection:
3857        CommandLine|all|re:
3858            - '(?i)whoami'
3859            - '(?i)net user'
3860    condition: selection
3861level: medium
3862"#,
3863        );
3864        assert!(has_rule(&w, LintRule::AllWithRe));
3865    }
3866
3867    #[test]
3868    fn all_without_re_no_all_with_re() {
3869        let w = lint(
3870            r#"
3871title: Test
3872logsource:
3873    category: test
3874detection:
3875    selection:
3876        CommandLine|contains|all:
3877            - 'whoami'
3878            - 'net user'
3879    condition: selection
3880level: medium
3881"#,
3882        );
3883        assert!(has_no_rule(&w, LintRule::AllWithRe));
3884    }
3885
3886    #[test]
3887    fn re_without_all_no_all_with_re() {
3888        let w = lint(
3889            r#"
3890title: Test
3891logsource:
3892    category: test
3893detection:
3894    selection:
3895        CommandLine|re: '(?i)whoami|net user'
3896    condition: selection
3897level: medium
3898"#,
3899        );
3900        assert!(has_no_rule(&w, LintRule::AllWithRe));
3901    }
3902
3903    // ── Modifier compatibility checks ────────────────────────────────────
3904
3905    #[test]
3906    fn incompatible_contains_startswith() {
3907        let w = lint(
3908            r#"
3909title: Test
3910logsource:
3911    category: test
3912detection:
3913    selection:
3914        Field|contains|startswith: 'test'
3915    condition: selection
3916level: medium
3917"#,
3918        );
3919        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3920    }
3921
3922    #[test]
3923    fn incompatible_endswith_startswith() {
3924        let w = lint(
3925            r#"
3926title: Test
3927logsource:
3928    category: test
3929detection:
3930    selection:
3931        Field|endswith|startswith: 'test'
3932    condition: selection
3933level: medium
3934"#,
3935        );
3936        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3937    }
3938
3939    #[test]
3940    fn incompatible_contains_endswith() {
3941        let w = lint(
3942            r#"
3943title: Test
3944logsource:
3945    category: test
3946detection:
3947    selection:
3948        Field|contains|endswith: 'test'
3949    condition: selection
3950level: medium
3951"#,
3952        );
3953        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3954    }
3955
3956    #[test]
3957    fn incompatible_re_with_contains() {
3958        let w = lint(
3959            r#"
3960title: Test
3961logsource:
3962    category: test
3963detection:
3964    selection:
3965        Field|re|contains: '.*test.*'
3966    condition: selection
3967level: medium
3968"#,
3969        );
3970        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3971    }
3972
3973    #[test]
3974    fn incompatible_cidr_with_startswith() {
3975        let w = lint(
3976            r#"
3977title: Test
3978logsource:
3979    category: test
3980detection:
3981    selection:
3982        Field|cidr|startswith: '192.168.0.0/16'
3983    condition: selection
3984level: medium
3985"#,
3986        );
3987        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3988    }
3989
3990    #[test]
3991    fn incompatible_exists_with_contains() {
3992        let w = lint(
3993            r#"
3994title: Test
3995logsource:
3996    category: test
3997detection:
3998    selection:
3999        Field|exists|contains: true
4000    condition: selection
4001level: medium
4002"#,
4003        );
4004        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
4005    }
4006
4007    #[test]
4008    fn incompatible_gt_with_contains() {
4009        let w = lint(
4010            r#"
4011title: Test
4012logsource:
4013    category: test
4014detection:
4015    selection:
4016        Field|gt|contains: 100
4017    condition: selection
4018level: medium
4019"#,
4020        );
4021        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
4022    }
4023
4024    #[test]
4025    fn incompatible_regex_flags_without_re() {
4026        let w = lint(
4027            r#"
4028title: Test
4029logsource:
4030    category: test
4031detection:
4032    selection:
4033        Field|i|m: 'test'
4034    condition: selection
4035level: medium
4036"#,
4037        );
4038        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
4039    }
4040
4041    #[test]
4042    fn compatible_re_with_regex_flags() {
4043        let w = lint(
4044            r#"
4045title: Test
4046logsource:
4047    category: test
4048detection:
4049    selection:
4050        Field|re|i|m|s: '(?i)test'
4051    condition: selection
4052level: medium
4053"#,
4054        );
4055        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4056    }
4057
4058    #[test]
4059    fn compatible_contains_all() {
4060        let w = lint(
4061            r#"
4062title: Test
4063logsource:
4064    category: test
4065detection:
4066    selection:
4067        Field|contains|all:
4068            - 'val1'
4069            - 'val2'
4070    condition: selection
4071level: medium
4072"#,
4073        );
4074        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4075    }
4076
4077    #[test]
4078    fn compatible_base64offset_contains() {
4079        let w = lint(
4080            r#"
4081title: Test
4082logsource:
4083    category: test
4084detection:
4085    selection:
4086        Field|base64offset|contains: 'test'
4087    condition: selection
4088level: medium
4089"#,
4090        );
4091        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4092    }
4093
4094    #[test]
4095    fn compatible_wide_base64() {
4096        let w = lint(
4097            r#"
4098title: Test
4099logsource:
4100    category: test
4101detection:
4102    selection:
4103        Field|wide|base64: 'test'
4104    condition: selection
4105level: medium
4106"#,
4107        );
4108        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
4109    }
4110
4111    // ── Info/Hint severity levels ────────────────────────────────────────
4112
4113    #[test]
4114    fn severity_display() {
4115        assert_eq!(format!("{}", Severity::Error), "error");
4116        assert_eq!(format!("{}", Severity::Warning), "warning");
4117        assert_eq!(format!("{}", Severity::Info), "info");
4118        assert_eq!(format!("{}", Severity::Hint), "hint");
4119    }
4120
4121    #[test]
4122    fn file_lint_result_info_count() {
4123        let result = FileLintResult {
4124            path: std::path::PathBuf::from("test.yml"),
4125            warnings: vec![
4126                info(LintRule::MissingDescription, "missing desc", "/description"),
4127                info(LintRule::MissingAuthor, "missing author", "/author"),
4128                warning(LintRule::TitleTooLong, "too long", "/title"),
4129            ],
4130        };
4131        assert_eq!(result.info_count(), 2);
4132        assert_eq!(result.warning_count(), 1);
4133        assert_eq!(result.error_count(), 0);
4134        assert!(!result.has_errors());
4135    }
4136
4137    // ── Inline suppression parsing ───────────────────────────────────────
4138
4139    #[test]
4140    fn parse_inline_disable_all() {
4141        let text = "# rsigma-disable\ntitle: Test\n";
4142        let sup = parse_inline_suppressions(text);
4143        assert!(sup.disable_all);
4144    }
4145
4146    #[test]
4147    fn parse_inline_disable_specific_rules() {
4148        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
4149        let sup = parse_inline_suppressions(text);
4150        assert!(!sup.disable_all);
4151        assert!(sup.file_disabled.contains("missing_description"));
4152        assert!(sup.file_disabled.contains("missing_author"));
4153    }
4154
4155    #[test]
4156    fn parse_inline_disable_next_line_all() {
4157        let text = "# rsigma-disable-next-line\ntitle: Test\n";
4158        let sup = parse_inline_suppressions(text);
4159        assert!(!sup.disable_all);
4160        // Line 0 has the comment, line 1 is "title: Test"
4161        assert!(sup.line_disabled.contains_key(&1));
4162        assert!(sup.line_disabled[&1].is_none()); // None means all rules
4163    }
4164
4165    #[test]
4166    fn parse_inline_disable_next_line_specific() {
4167        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
4168        let sup = parse_inline_suppressions(text);
4169        // Comment on line 1, suppresses line 2
4170        assert!(sup.line_disabled.contains_key(&2));
4171        let rules = sup.line_disabled[&2].as_ref().unwrap();
4172        assert!(rules.contains("missing_level"));
4173    }
4174
4175    #[test]
4176    fn parse_inline_no_comments() {
4177        let text = "title: Test\nstatus: test\n";
4178        let sup = parse_inline_suppressions(text);
4179        assert!(!sup.disable_all);
4180        assert!(sup.file_disabled.is_empty());
4181        assert!(sup.line_disabled.is_empty());
4182    }
4183
4184    #[test]
4185    fn parse_inline_comment_in_quoted_string() {
4186        // The '#' is inside a quoted string — should NOT be treated as a comment
4187        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
4188        let sup = parse_inline_suppressions(text);
4189        assert!(!sup.disable_all);
4190        assert!(sup.file_disabled.is_empty());
4191    }
4192
4193    // ── Suppression filtering ────────────────────────────────────────────
4194
4195    #[test]
4196    fn apply_suppressions_disables_rule() {
4197        let warnings = vec![
4198            info(LintRule::MissingDescription, "desc", "/description"),
4199            info(LintRule::MissingAuthor, "author", "/author"),
4200            warning(LintRule::TitleTooLong, "title", "/title"),
4201        ];
4202        let mut config = LintConfig::default();
4203        config
4204            .disabled_rules
4205            .insert("missing_description".to_string());
4206        let inline = InlineSuppressions::default();
4207
4208        let result = apply_suppressions(warnings, &config, &inline);
4209        assert_eq!(result.len(), 2);
4210        assert!(
4211            result
4212                .iter()
4213                .all(|w| w.rule != LintRule::MissingDescription)
4214        );
4215    }
4216
4217    #[test]
4218    fn apply_suppressions_severity_override() {
4219        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
4220        let mut config = LintConfig::default();
4221        config
4222            .severity_overrides
4223            .insert("title_too_long".to_string(), Severity::Info);
4224        let inline = InlineSuppressions::default();
4225
4226        let result = apply_suppressions(warnings, &config, &inline);
4227        assert_eq!(result.len(), 1);
4228        assert_eq!(result[0].severity, Severity::Info);
4229    }
4230
4231    #[test]
4232    fn apply_suppressions_inline_file_disable() {
4233        let warnings = vec![
4234            info(LintRule::MissingDescription, "desc", "/description"),
4235            info(LintRule::MissingAuthor, "author", "/author"),
4236        ];
4237        let config = LintConfig::default();
4238        let mut inline = InlineSuppressions::default();
4239        inline.file_disabled.insert("missing_author".to_string());
4240
4241        let result = apply_suppressions(warnings, &config, &inline);
4242        assert_eq!(result.len(), 1);
4243        assert_eq!(result[0].rule, LintRule::MissingDescription);
4244    }
4245
4246    #[test]
4247    fn apply_suppressions_inline_disable_all() {
4248        let warnings = vec![
4249            err(LintRule::MissingTitle, "title", "/title"),
4250            warning(LintRule::TitleTooLong, "long", "/title"),
4251        ];
4252        let config = LintConfig::default();
4253        let inline = InlineSuppressions {
4254            disable_all: true,
4255            ..Default::default()
4256        };
4257
4258        let result = apply_suppressions(warnings, &config, &inline);
4259        assert!(result.is_empty());
4260    }
4261
4262    #[test]
4263    fn apply_suppressions_inline_next_line() {
4264        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
4265        w1.span = Some(Span {
4266            start_line: 5,
4267            start_col: 0,
4268            end_line: 5,
4269            end_col: 10,
4270        });
4271        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
4272        w2.span = Some(Span {
4273            start_line: 6,
4274            start_col: 0,
4275            end_line: 6,
4276            end_col: 10,
4277        });
4278
4279        let config = LintConfig::default();
4280        let mut inline = InlineSuppressions::default();
4281        // Suppress all rules on line 5
4282        inline.line_disabled.insert(5, None);
4283
4284        let result = apply_suppressions(vec![w1, w2], &config, &inline);
4285        assert_eq!(result.len(), 1);
4286        assert_eq!(result[0].rule, LintRule::InvalidStatus);
4287    }
4288
4289    // ── lint_yaml_str_with_config integration ────────────────────────────
4290
4291    #[test]
4292    fn lint_with_config_disables_rules() {
4293        let text = r#"title: Test
4294logsource:
4295    category: test
4296detection:
4297    selection:
4298        field: value
4299    condition: selection
4300level: medium
4301"#;
4302        let mut config = LintConfig::default();
4303        config
4304            .disabled_rules
4305            .insert("missing_description".to_string());
4306        config.disabled_rules.insert("missing_author".to_string());
4307
4308        let warnings = lint_yaml_str_with_config(text, &config);
4309        assert!(
4310            !warnings
4311                .iter()
4312                .any(|w| w.rule == LintRule::MissingDescription)
4313        );
4314        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4315    }
4316
4317    #[test]
4318    fn lint_with_inline_disable_next_line() {
4319        let text = r#"title: Test
4320# rsigma-disable-next-line missing_level
4321logsource:
4322    category: test
4323detection:
4324    selection:
4325        field: value
4326    condition: selection
4327"#;
4328        // Note: missing_level is on the logsource line... actually we need to think about
4329        // where the warning span resolves to. The warning for missing_level has path /level,
4330        // and won't have a span matching line 2. Let's use a config-based suppression
4331        // instead for this test.
4332        let config = LintConfig::default();
4333        let warnings = lint_yaml_str_with_config(text, &config);
4334        // This test verifies that inline parsing doesn't break normal linting
4335        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
4336    }
4337
4338    #[test]
4339    fn lint_with_inline_file_disable() {
4340        let text = r#"# rsigma-disable missing_description, missing_author
4341title: Test
4342logsource:
4343    category: test
4344detection:
4345    selection:
4346        field: value
4347    condition: selection
4348level: medium
4349"#;
4350        let config = LintConfig::default();
4351        let warnings = lint_yaml_str_with_config(text, &config);
4352        assert!(
4353            !warnings
4354                .iter()
4355                .any(|w| w.rule == LintRule::MissingDescription)
4356        );
4357        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4358    }
4359
4360    #[test]
4361    fn lint_with_inline_disable_all() {
4362        let text = r#"# rsigma-disable
4363title: Test
4364status: invalid_status
4365logsource:
4366    category: test
4367detection:
4368    selection:
4369        field: value
4370    condition: selection
4371"#;
4372        let config = LintConfig::default();
4373        let warnings = lint_yaml_str_with_config(text, &config);
4374        assert!(warnings.is_empty());
4375    }
4376
4377    // ── LintConfig ───────────────────────────────────────────────────────
4378
4379    #[test]
4380    fn lint_config_merge() {
4381        let mut base = LintConfig::default();
4382        base.disabled_rules.insert("rule_a".to_string());
4383        base.severity_overrides
4384            .insert("rule_b".to_string(), Severity::Info);
4385
4386        let other = LintConfig {
4387            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
4388            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
4389                .into_iter()
4390                .collect(),
4391            exclude_patterns: vec!["test/**".to_string()],
4392        };
4393
4394        base.merge(&other);
4395        assert!(base.disabled_rules.contains("rule_a"));
4396        assert!(base.disabled_rules.contains("rule_c"));
4397        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
4398        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
4399        assert_eq!(base.exclude_patterns, vec!["test/**".to_string()]);
4400    }
4401
4402    #[test]
4403    fn lint_config_is_disabled() {
4404        let mut config = LintConfig::default();
4405        config.disabled_rules.insert("missing_title".to_string());
4406        assert!(config.is_disabled(&LintRule::MissingTitle));
4407        assert!(!config.is_disabled(&LintRule::EmptyTitle));
4408    }
4409
4410    #[test]
4411    fn find_yaml_comment_basic() {
4412        assert_eq!(find_yaml_comment("# comment"), Some(0));
4413        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
4414        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
4415        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
4416        assert_eq!(find_yaml_comment("key: value"), None);
4417    }
4418
4419    // ── Fix generation tests ─────────────────────────────────────────────
4420
4421    fn find_fix(warnings: &[LintWarning], rule: LintRule) -> Option<&Fix> {
4422        warnings
4423            .iter()
4424            .find(|w| w.rule == rule)
4425            .and_then(|w| w.fix.as_ref())
4426    }
4427
4428    fn fix_summary(fix: &Fix) -> String {
4429        use std::fmt::Write;
4430        let mut s = String::new();
4431        writeln!(s, "title: {}", fix.title).unwrap();
4432        writeln!(s, "disposition: {:?}", fix.disposition).unwrap();
4433        for (i, p) in fix.patches.iter().enumerate() {
4434            match p {
4435                FixPatch::ReplaceValue { path, new_value } => {
4436                    writeln!(s, "patch[{i}]: ReplaceValue {path} -> {new_value}").unwrap();
4437                }
4438                FixPatch::ReplaceKey { path, new_key } => {
4439                    writeln!(s, "patch[{i}]: ReplaceKey {path} -> {new_key}").unwrap();
4440                }
4441                FixPatch::Remove { path } => {
4442                    writeln!(s, "patch[{i}]: Remove {path}").unwrap();
4443                }
4444            }
4445        }
4446        s
4447    }
4448
4449    #[test]
4450    fn fix_invalid_status() {
4451        let w = lint(
4452            r#"
4453title: Test
4454status: expreimental
4455logsource:
4456    category: test
4457detection:
4458    sel:
4459        field: value
4460    condition: sel
4461"#,
4462        );
4463        let fix = find_fix(&w, LintRule::InvalidStatus).expect("should have fix");
4464        insta::assert_snapshot!(fix_summary(fix), @r"
4465        title: replace 'expreimental' with 'experimental'
4466        disposition: Safe
4467        patch[0]: ReplaceValue /status -> experimental
4468        ");
4469    }
4470
4471    #[test]
4472    fn fix_invalid_level() {
4473        let w = lint(
4474            r#"
4475title: Test
4476level: hgih
4477logsource:
4478    category: test
4479detection:
4480    sel:
4481        field: value
4482    condition: sel
4483"#,
4484        );
4485        let fix = find_fix(&w, LintRule::InvalidLevel).expect("should have fix");
4486        insta::assert_snapshot!(fix_summary(fix), @r"
4487        title: replace 'hgih' with 'high'
4488        disposition: Safe
4489        patch[0]: ReplaceValue /level -> high
4490        ");
4491    }
4492
4493    #[test]
4494    fn fix_non_lowercase_key() {
4495        let w = lint(
4496            r#"
4497title: Test
4498Status: test
4499logsource:
4500    category: test
4501detection:
4502    sel:
4503        field: value
4504    condition: sel
4505"#,
4506        );
4507        let fix = find_fix(&w, LintRule::NonLowercaseKey).expect("should have fix");
4508        insta::assert_snapshot!(fix_summary(fix), @r"
4509        title: rename 'Status' to 'status'
4510        disposition: Safe
4511        patch[0]: ReplaceKey /Status -> status
4512        ");
4513    }
4514
4515    #[test]
4516    fn fix_logsource_value_not_lowercase() {
4517        let w = lint(
4518            r#"
4519title: Test
4520logsource:
4521    category: Test
4522detection:
4523    sel:
4524        field: value
4525    condition: sel
4526"#,
4527        );
4528        let fix = find_fix(&w, LintRule::LogsourceValueNotLowercase).expect("should have fix");
4529        insta::assert_snapshot!(fix_summary(fix), @r"
4530        title: lowercase 'Test' to 'test'
4531        disposition: Safe
4532        patch[0]: ReplaceValue /logsource/category -> test
4533        ");
4534    }
4535
4536    #[test]
4537    fn fix_unknown_key_typo() {
4538        let w = lint(
4539            r#"
4540title: Test
4541desciption: Typo field
4542logsource:
4543    category: test
4544detection:
4545    sel:
4546        field: value
4547    condition: sel
4548level: medium
4549"#,
4550        );
4551        let fix = find_fix(&w, LintRule::UnknownKey).expect("should have fix");
4552        insta::assert_snapshot!(fix_summary(fix), @r"
4553        title: rename 'desciption' to 'description'
4554        disposition: Safe
4555        patch[0]: ReplaceKey /desciption -> description
4556        ");
4557    }
4558
4559    #[test]
4560    fn fix_duplicate_tags() {
4561        let w = lint(
4562            r#"
4563title: Test
4564status: test
4565tags:
4566    - attack.execution
4567    - attack.execution
4568logsource:
4569    category: test
4570detection:
4571    sel:
4572        field: value
4573    condition: sel
4574"#,
4575        );
4576        let fix = find_fix(&w, LintRule::DuplicateTags).expect("should have fix");
4577        insta::assert_snapshot!(fix_summary(fix), @r"
4578        title: remove duplicate tag 'attack.execution'
4579        disposition: Safe
4580        patch[0]: Remove /tags/1
4581        ");
4582    }
4583
4584    #[test]
4585    fn fix_duplicate_references() {
4586        let w = lint(
4587            r#"
4588title: Test
4589references:
4590    - https://example.com
4591    - https://example.com
4592logsource:
4593    category: test
4594detection:
4595    sel:
4596        field: value
4597    condition: sel
4598"#,
4599        );
4600        let fix = find_fix(&w, LintRule::DuplicateReferences).expect("should have fix");
4601        insta::assert_snapshot!(fix_summary(fix), @r"
4602        title: remove duplicate reference
4603        disposition: Safe
4604        patch[0]: Remove /references/1
4605        ");
4606    }
4607
4608    #[test]
4609    fn fix_duplicate_fields() {
4610        let w = lint(
4611            r#"
4612title: Test
4613fields:
4614    - CommandLine
4615    - CommandLine
4616logsource:
4617    category: test
4618detection:
4619    sel:
4620        field: value
4621    condition: sel
4622"#,
4623        );
4624        let fix = find_fix(&w, LintRule::DuplicateFields).expect("should have fix");
4625        insta::assert_snapshot!(fix_summary(fix), @r"
4626        title: remove duplicate field
4627        disposition: Safe
4628        patch[0]: Remove /fields/1
4629        ");
4630    }
4631
4632    #[test]
4633    fn fix_all_with_re() {
4634        let w = lint(
4635            r#"
4636title: Test
4637logsource:
4638    category: test
4639detection:
4640    sel:
4641        Cmd|all|re:
4642            - foo.*
4643            - bar.*
4644    condition: sel
4645"#,
4646        );
4647        let fix = find_fix(&w, LintRule::AllWithRe).expect("should have fix");
4648        insta::assert_snapshot!(fix_summary(fix), @r"
4649        title: remove |all from 'Cmd|all|re'
4650        disposition: Safe
4651        patch[0]: ReplaceKey /detection/sel/Cmd|all|re -> Cmd|re
4652        ");
4653    }
4654
4655    #[test]
4656    fn fix_single_value_all_modifier() {
4657        let w = lint(
4658            r#"
4659title: Test
4660logsource:
4661    category: test
4662detection:
4663    sel:
4664        Cmd|all|contains:
4665            - only_one
4666    condition: sel
4667"#,
4668        );
4669        let fix = find_fix(&w, LintRule::SingleValueAllModifier).expect("should have fix");
4670        insta::assert_snapshot!(fix_summary(fix), @r"
4671        title: remove |all from 'Cmd|all|contains'
4672        disposition: Safe
4673        patch[0]: ReplaceKey /detection/sel/Cmd|all|contains -> Cmd|contains
4674        ");
4675    }
4676
4677    #[test]
4678    fn fix_wildcard_only_value() {
4679        let w = lint(
4680            r#"
4681title: Test
4682logsource:
4683    category: test
4684detection:
4685    sel:
4686        CommandLine: '*'
4687    condition: sel
4688"#,
4689        );
4690        let fix = find_fix(&w, LintRule::WildcardOnlyValue).expect("should have fix");
4691        insta::assert_snapshot!(fix_summary(fix), @r"
4692        title: replace with 'CommandLine|exists: true'
4693        disposition: Safe
4694        patch[0]: ReplaceKey /detection/sel/CommandLine -> CommandLine|exists
4695        patch[1]: ReplaceValue /detection/sel/CommandLine|exists -> true
4696        ");
4697    }
4698
4699    #[test]
4700    fn fix_filter_has_level() {
4701        let w = lint(
4702            r#"
4703title: Test
4704logsource:
4705    category: test
4706level: high
4707filter:
4708    rules:
4709        - rule1
4710    selection:
4711        User: admin
4712    condition: selection
4713"#,
4714        );
4715        let fix = find_fix(&w, LintRule::FilterHasLevel).expect("should have fix");
4716        insta::assert_snapshot!(fix_summary(fix), @r"
4717        title: remove 'level' from filter rule
4718        disposition: Safe
4719        patch[0]: Remove /level
4720        ");
4721    }
4722
4723    #[test]
4724    fn fix_filter_has_status() {
4725        let w = lint(
4726            r#"
4727title: Test
4728logsource:
4729    category: test
4730status: test
4731filter:
4732    rules:
4733        - rule1
4734    selection:
4735        User: admin
4736    condition: selection
4737"#,
4738        );
4739        let fix = find_fix(&w, LintRule::FilterHasStatus).expect("should have fix");
4740        insta::assert_snapshot!(fix_summary(fix), @r"
4741        title: remove 'status' from filter rule
4742        disposition: Safe
4743        patch[0]: Remove /status
4744        ");
4745    }
4746
4747    #[test]
4748    fn no_fix_for_unfixable_rule() {
4749        let w = lint(
4750            r#"
4751title: Test
4752logsource:
4753    category: test
4754"#,
4755        );
4756        assert!(has_rule(&w, LintRule::MissingDetection));
4757        assert!(find_fix(&w, LintRule::MissingDetection).is_none());
4758    }
4759
4760    #[test]
4761    fn no_fix_for_far_invalid_status() {
4762        let w = lint(
4763            r#"
4764title: Test
4765status: totallyinvalidxyz
4766logsource:
4767    category: test
4768detection:
4769    sel:
4770        field: value
4771    condition: sel
4772"#,
4773        );
4774        assert!(has_rule(&w, LintRule::InvalidStatus));
4775        assert!(
4776            find_fix(&w, LintRule::InvalidStatus).is_none(),
4777            "no fix when edit distance is too large"
4778        );
4779    }
4780
4781    // ── Deprecated aggregation syntax ───────────────────────────────────
4782
4783    #[test]
4784    fn deprecated_aggregation_count() {
4785        let w = lint(
4786            r#"
4787title: Test
4788logsource:
4789    category: test
4790detection:
4791    selection:
4792        EventID: 4625
4793    condition: selection | count(TargetUserName) by IpAddress > 5
4794level: medium
4795"#,
4796        );
4797        assert!(has_rule(&w, LintRule::DeprecatedAggregationSyntax));
4798        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4799        let dag = w
4800            .iter()
4801            .find(|w| w.rule == LintRule::DeprecatedAggregationSyntax)
4802            .unwrap();
4803        assert_eq!(dag.severity, Severity::Warning);
4804    }
4805
4806    #[test]
4807    fn deprecated_aggregation_near() {
4808        let w = lint(
4809            r#"
4810title: Test
4811logsource:
4812    category: test
4813detection:
4814    selection:
4815        EventID: 1
4816    condition: selection | near(field) by host
4817level: medium
4818"#,
4819        );
4820        assert!(has_rule(&w, LintRule::DeprecatedAggregationSyntax));
4821        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4822    }
4823
4824    #[test]
4825    fn no_deprecated_aggregation_for_normal_condition() {
4826        let w = lint(
4827            r#"
4828title: Test
4829logsource:
4830    category: test
4831detection:
4832    selection:
4833        field: value
4834    condition: selection
4835level: medium
4836"#,
4837        );
4838        assert!(has_no_rule(&w, LintRule::DeprecatedAggregationSyntax));
4839    }
4840
4841    #[test]
4842    fn no_deprecated_aggregation_for_pipe_in_field_modifier() {
4843        let w = lint(
4844            r#"
4845title: Test
4846logsource:
4847    category: test
4848detection:
4849    selection:
4850        field|contains: value
4851    condition: selection
4852level: medium
4853"#,
4854        );
4855        assert!(has_no_rule(&w, LintRule::DeprecatedAggregationSyntax));
4856        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
4857    }
4858
4859    #[test]
4860    fn has_deprecated_aggregation_function() {
4861        assert!(has_deprecated_aggregation(
4862            "selection | count(User) by SourceIP > 5"
4863        ));
4864        assert!(has_deprecated_aggregation(
4865            "selection |  sum(Amount) by Account > 1000"
4866        ));
4867        assert!(has_deprecated_aggregation(
4868            "selection | near(field) by host"
4869        ));
4870        assert!(has_deprecated_aggregation(
4871            "selection | min(score) by host > 0"
4872        ));
4873        assert!(has_deprecated_aggregation(
4874            "selection | max(score) by host > 100"
4875        ));
4876        assert!(has_deprecated_aggregation(
4877            "selection | avg(score) by host > 50"
4878        ));
4879        assert!(!has_deprecated_aggregation("selection and not filter"));
4880        assert!(!has_deprecated_aggregation("1 of selection*"));
4881        assert!(!has_deprecated_aggregation("all of them"));
4882    }
4883
4884    // ── Exclude patterns ────────────────────────────────────────────────
4885
4886    #[test]
4887    fn lint_config_exclude_from_yaml() {
4888        let yaml = r#"
4889disabled_rules:
4890  - missing_description
4891exclude:
4892  - "config/**"
4893  - "**/unsupported/**"
4894"#;
4895        let tmp = std::env::temp_dir().join("rsigma_test_exclude.yml");
4896        std::fs::write(&tmp, yaml).unwrap();
4897        let config = LintConfig::load(&tmp).unwrap();
4898        std::fs::remove_file(&tmp).ok();
4899
4900        assert!(config.disabled_rules.contains("missing_description"));
4901        assert_eq!(config.exclude_patterns.len(), 2);
4902        assert_eq!(config.exclude_patterns[0], "config/**");
4903        assert_eq!(config.exclude_patterns[1], "**/unsupported/**");
4904    }
4905
4906    #[test]
4907    fn lint_config_build_exclude_set_empty() {
4908        let config = LintConfig::default();
4909        assert!(config.build_exclude_set().is_none());
4910    }
4911
4912    #[test]
4913    fn lint_config_build_exclude_set_matches() {
4914        let config = LintConfig {
4915            exclude_patterns: vec!["config/**".to_string()],
4916            ..Default::default()
4917        };
4918        let gs = config.build_exclude_set().expect("should build");
4919        assert!(gs.is_match("config/data_mapping/foo.yaml"));
4920        assert!(gs.is_match("config/nested/deep/bar.yml"));
4921        assert!(!gs.is_match("rules/windows/test.yml"));
4922    }
4923
4924    #[test]
4925    fn lint_directory_with_excludes() {
4926        let tmp = tempfile::tempdir().unwrap();
4927        let rules_dir = tmp.path().join("rules");
4928        let config_dir = tmp.path().join("config");
4929        std::fs::create_dir_all(&rules_dir).unwrap();
4930        std::fs::create_dir_all(&config_dir).unwrap();
4931
4932        // Valid rule
4933        std::fs::write(
4934            rules_dir.join("good.yml"),
4935            r#"
4936title: Good Rule
4937logsource:
4938    category: test
4939detection:
4940    sel:
4941        field: value
4942    condition: sel
4943level: medium
4944"#,
4945        )
4946        .unwrap();
4947
4948        // Non-rule config file (would produce errors if linted)
4949        std::fs::write(
4950            config_dir.join("mapping.yaml"),
4951            r#"
4952Title: Logon
4953Channel: Security
4954EventID: 4624
4955"#,
4956        )
4957        .unwrap();
4958
4959        // Without excludes: config file produces errors
4960        let no_exclude = LintConfig::default();
4961        let results = lint_yaml_directory_with_config(tmp.path(), &no_exclude).unwrap();
4962        let config_warnings: Vec<_> = results
4963            .iter()
4964            .filter(|r| r.path.to_string_lossy().contains("config"))
4965            .flat_map(|r| &r.warnings)
4966            .collect();
4967        assert!(
4968            !config_warnings.is_empty(),
4969            "config file should produce warnings without excludes"
4970        );
4971
4972        // With excludes: config file is skipped
4973        let with_exclude = LintConfig {
4974            exclude_patterns: vec!["config/**".to_string()],
4975            ..Default::default()
4976        };
4977        let results = lint_yaml_directory_with_config(tmp.path(), &with_exclude).unwrap();
4978        let config_results: Vec<_> = results
4979            .iter()
4980            .filter(|r| r.path.to_string_lossy().contains("config"))
4981            .collect();
4982        assert!(config_results.is_empty(), "config file should be excluded");
4983
4984        // The valid rule should still be linted
4985        let rule_results: Vec<_> = results
4986            .iter()
4987            .filter(|r| r.path.to_string_lossy().contains("good.yml"))
4988            .collect();
4989        assert_eq!(rule_results.len(), 1);
4990    }
4991}