Skip to main content

rsigma_parser/
lint.rs

1//! Built-in linter for Sigma rules, correlations, and filters.
2//!
3//! Validates raw `serde_yaml::Value` documents against the Sigma specification
4//! v2.1.0 constraints — catching metadata issues that the parser silently
5//! ignores (invalid enums, date formats, tag patterns, etc.).
6//!
7//! # Usage
8//!
9//! ```rust
10//! use rsigma_parser::lint::{lint_yaml_value, Severity};
11//!
12//! let yaml = "title: Test\nlogsource:\n  category: test\ndetection:\n  sel:\n    field: value\n  condition: sel\n";
13//! let value: serde_yaml::Value = serde_yaml::from_str(yaml).unwrap();
14//! let warnings = lint_yaml_value(&value);
15//! for w in &warnings {
16//!     if w.severity == Severity::Error {
17//!         eprintln!("{}", w.message);
18//!     }
19//! }
20//! ```
21
22use std::collections::{HashMap, HashSet};
23use std::fmt;
24use std::path::Path;
25use std::sync::LazyLock;
26
27use serde::{Deserialize, Serialize};
28use serde_yaml::Value;
29
30// =============================================================================
31// Public types
32// =============================================================================
33
34/// Severity of a lint finding.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
36pub enum Severity {
37    /// Spec violation — the rule is invalid.
38    Error,
39    /// Best-practice issue — the rule works but is not spec-ideal.
40    Warning,
41    /// Informational suggestion — soft best-practice hint (e.g. missing author).
42    Info,
43    /// Subtle hint — lowest severity, for stylistic suggestions.
44    Hint,
45}
46
47impl fmt::Display for Severity {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self {
50            Severity::Error => write!(f, "error"),
51            Severity::Warning => write!(f, "warning"),
52            Severity::Info => write!(f, "info"),
53            Severity::Hint => write!(f, "hint"),
54        }
55    }
56}
57
58/// Identifies which lint rule fired.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
60pub enum LintRule {
61    // ── Infrastructure / parse errors ────────────────────────────────────
62    YamlParseError,
63    NotAMapping,
64    FileReadError,
65    SchemaViolation,
66
67    // ── Shared (all document types) ──────────────────────────────────────
68    MissingTitle,
69    EmptyTitle,
70    TitleTooLong,
71    MissingDescription,
72    MissingAuthor,
73    InvalidId,
74    InvalidStatus,
75    MissingLevel,
76    InvalidLevel,
77    InvalidDate,
78    InvalidModified,
79    ModifiedBeforeDate,
80    DescriptionTooLong,
81    NameTooLong,
82    TaxonomyTooLong,
83    NonLowercaseKey,
84
85    // ── Detection rules ──────────────────────────────────────────────────
86    MissingLogsource,
87    MissingDetection,
88    MissingCondition,
89    EmptyDetection,
90    InvalidRelatedType,
91    InvalidRelatedId,
92    RelatedMissingRequired,
93    DeprecatedWithoutRelated,
94    InvalidTag,
95    UnknownTagNamespace,
96    DuplicateTags,
97    DuplicateReferences,
98    DuplicateFields,
99    FalsepositiveTooShort,
100    ScopeTooShort,
101    LogsourceValueNotLowercase,
102    ConditionReferencesUnknown,
103
104    // ── Correlation rules ────────────────────────────────────────────────
105    MissingCorrelation,
106    MissingCorrelationType,
107    InvalidCorrelationType,
108    MissingCorrelationRules,
109    EmptyCorrelationRules,
110    MissingCorrelationTimespan,
111    InvalidTimespanFormat,
112    MissingGroupBy,
113    MissingCorrelationCondition,
114    MissingConditionField,
115    InvalidConditionOperator,
116    ConditionValueNotNumeric,
117    GenerateNotBoolean,
118
119    // ── Filter rules ─────────────────────────────────────────────────────
120    MissingFilter,
121    MissingFilterRules,
122    EmptyFilterRules,
123    MissingFilterSelection,
124    MissingFilterCondition,
125    FilterHasLevel,
126    FilterHasStatus,
127    MissingFilterLogsource,
128
129    // ── Detection logic (cross-cutting) ──────────────────────────────────
130    NullInValueList,
131    SingleValueAllModifier,
132    AllWithRe,
133    IncompatibleModifiers,
134    EmptyValueList,
135    WildcardOnlyValue,
136    UnknownKey,
137}
138
139impl fmt::Display for LintRule {
140    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141        let s = match self {
142            LintRule::YamlParseError => "yaml_parse_error",
143            LintRule::NotAMapping => "not_a_mapping",
144            LintRule::FileReadError => "file_read_error",
145            LintRule::SchemaViolation => "schema_violation",
146            LintRule::MissingTitle => "missing_title",
147            LintRule::EmptyTitle => "empty_title",
148            LintRule::TitleTooLong => "title_too_long",
149            LintRule::MissingDescription => "missing_description",
150            LintRule::MissingAuthor => "missing_author",
151            LintRule::InvalidId => "invalid_id",
152            LintRule::InvalidStatus => "invalid_status",
153            LintRule::MissingLevel => "missing_level",
154            LintRule::InvalidLevel => "invalid_level",
155            LintRule::InvalidDate => "invalid_date",
156            LintRule::InvalidModified => "invalid_modified",
157            LintRule::ModifiedBeforeDate => "modified_before_date",
158            LintRule::DescriptionTooLong => "description_too_long",
159            LintRule::NameTooLong => "name_too_long",
160            LintRule::TaxonomyTooLong => "taxonomy_too_long",
161            LintRule::NonLowercaseKey => "non_lowercase_key",
162            LintRule::MissingLogsource => "missing_logsource",
163            LintRule::MissingDetection => "missing_detection",
164            LintRule::MissingCondition => "missing_condition",
165            LintRule::EmptyDetection => "empty_detection",
166            LintRule::InvalidRelatedType => "invalid_related_type",
167            LintRule::InvalidRelatedId => "invalid_related_id",
168            LintRule::RelatedMissingRequired => "related_missing_required",
169            LintRule::DeprecatedWithoutRelated => "deprecated_without_related",
170            LintRule::InvalidTag => "invalid_tag",
171            LintRule::UnknownTagNamespace => "unknown_tag_namespace",
172            LintRule::DuplicateTags => "duplicate_tags",
173            LintRule::DuplicateReferences => "duplicate_references",
174            LintRule::DuplicateFields => "duplicate_fields",
175            LintRule::FalsepositiveTooShort => "falsepositive_too_short",
176            LintRule::ScopeTooShort => "scope_too_short",
177            LintRule::LogsourceValueNotLowercase => "logsource_value_not_lowercase",
178            LintRule::ConditionReferencesUnknown => "condition_references_unknown",
179            LintRule::MissingCorrelation => "missing_correlation",
180            LintRule::MissingCorrelationType => "missing_correlation_type",
181            LintRule::InvalidCorrelationType => "invalid_correlation_type",
182            LintRule::MissingCorrelationRules => "missing_correlation_rules",
183            LintRule::EmptyCorrelationRules => "empty_correlation_rules",
184            LintRule::MissingCorrelationTimespan => "missing_correlation_timespan",
185            LintRule::InvalidTimespanFormat => "invalid_timespan_format",
186            LintRule::MissingGroupBy => "missing_group_by",
187            LintRule::MissingCorrelationCondition => "missing_correlation_condition",
188            LintRule::MissingConditionField => "missing_condition_field",
189            LintRule::InvalidConditionOperator => "invalid_condition_operator",
190            LintRule::ConditionValueNotNumeric => "condition_value_not_numeric",
191            LintRule::GenerateNotBoolean => "generate_not_boolean",
192            LintRule::MissingFilter => "missing_filter",
193            LintRule::MissingFilterRules => "missing_filter_rules",
194            LintRule::EmptyFilterRules => "empty_filter_rules",
195            LintRule::MissingFilterSelection => "missing_filter_selection",
196            LintRule::MissingFilterCondition => "missing_filter_condition",
197            LintRule::FilterHasLevel => "filter_has_level",
198            LintRule::FilterHasStatus => "filter_has_status",
199            LintRule::MissingFilterLogsource => "missing_filter_logsource",
200            LintRule::NullInValueList => "null_in_value_list",
201            LintRule::SingleValueAllModifier => "single_value_all_modifier",
202            LintRule::AllWithRe => "all_with_re",
203            LintRule::IncompatibleModifiers => "incompatible_modifiers",
204            LintRule::EmptyValueList => "empty_value_list",
205            LintRule::WildcardOnlyValue => "wildcard_only_value",
206            LintRule::UnknownKey => "unknown_key",
207        };
208        write!(f, "{s}")
209    }
210}
211
212/// A source span (line/column, both 0-indexed).
213///
214/// Used by the LSP layer to avoid re-resolving JSON-pointer paths to
215/// source positions. When the lint is produced from raw `serde_yaml::Value`
216/// (which has no source positions), `span` will be `None`.
217#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
218pub struct Span {
219    /// 0-indexed start line.
220    pub start_line: u32,
221    /// 0-indexed start column.
222    pub start_col: u32,
223    /// 0-indexed end line.
224    pub end_line: u32,
225    /// 0-indexed end column.
226    pub end_col: u32,
227}
228
229// =============================================================================
230// Auto-fix types
231// =============================================================================
232
233/// Whether a fix is safe to apply automatically or needs manual review.
234#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
235pub enum FixDisposition {
236    /// No semantic change — safe to apply without review.
237    Safe,
238    /// May change meaning — should be reviewed before applying.
239    Unsafe,
240}
241
242/// A single patch operation within a [`Fix`].
243///
244/// Each variant describes a format-preserving edit to a YAML document.
245/// Paths are JSON-pointer-style strings (e.g. `"/status"`, `"/tags/2"`)
246/// matching the `LintWarning::path` convention.
247///
248/// These are intentionally yamlpath/yamlpatch-agnostic so that
249/// `rsigma-parser` carries no dependency on those crates. The consumer
250/// (CLI or LSP) converts these to concrete patch operations at apply time.
251#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
252pub enum FixPatch {
253    /// Replace the value at `path` with `new_value`.
254    ReplaceValue { path: String, new_value: String },
255    /// Rename the YAML key targeted by `path`.
256    ReplaceKey { path: String, new_key: String },
257    /// Remove the node at `path` entirely.
258    Remove { path: String },
259}
260
261/// A suggested fix for a lint finding.
262///
263/// Attached to a [`LintWarning`] when the issue can be corrected
264/// automatically. Contains one or more [`FixPatch`] operations that,
265/// applied sequentially, resolve the finding.
266#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
267pub struct Fix {
268    /// Short human-readable description (e.g. "rename 'Status' to 'status'").
269    pub title: String,
270    /// Whether the fix is safe to apply without review.
271    pub disposition: FixDisposition,
272    /// Ordered patch operations to apply.
273    pub patches: Vec<FixPatch>,
274}
275
276/// A single lint finding.
277#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
278pub struct LintWarning {
279    /// Which lint rule fired.
280    pub rule: LintRule,
281    /// Error or warning.
282    pub severity: Severity,
283    /// Human-readable message.
284    pub message: String,
285    /// JSON-pointer-style location, e.g. `"/status"`, `"/tags/2"`.
286    pub path: String,
287    /// Optional source span. `None` when linting `serde_yaml::Value` (no
288    /// source positions available). Populated by `lint_yaml_str` which
289    /// can resolve paths against the raw text.
290    pub span: Option<Span>,
291    /// Optional auto-fix. `None` when the finding cannot be corrected
292    /// automatically.
293    pub fix: Option<Fix>,
294}
295
296impl fmt::Display for LintWarning {
297    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
298        write!(
299            f,
300            "{}[{}]: {}\n    --> {}",
301            self.severity, self.rule, self.message, self.path
302        )
303    }
304}
305
306/// Result of linting a single file (may contain multiple YAML documents).
307#[derive(Debug, Clone, Serialize)]
308pub struct FileLintResult {
309    pub path: std::path::PathBuf,
310    pub warnings: Vec<LintWarning>,
311}
312
313impl FileLintResult {
314    pub fn has_errors(&self) -> bool {
315        self.warnings.iter().any(|w| w.severity == Severity::Error)
316    }
317
318    pub fn error_count(&self) -> usize {
319        self.warnings
320            .iter()
321            .filter(|w| w.severity == Severity::Error)
322            .count()
323    }
324
325    pub fn warning_count(&self) -> usize {
326        self.warnings
327            .iter()
328            .filter(|w| w.severity == Severity::Warning)
329            .count()
330    }
331
332    pub fn info_count(&self) -> usize {
333        self.warnings
334            .iter()
335            .filter(|w| w.severity == Severity::Info)
336            .count()
337    }
338
339    pub fn hint_count(&self) -> usize {
340        self.warnings
341            .iter()
342            .filter(|w| w.severity == Severity::Hint)
343            .count()
344    }
345}
346
347// =============================================================================
348// Helpers
349// =============================================================================
350
351/// Pre-cached `Value::String` keys to avoid per-call allocations when
352/// looking up fields in `serde_yaml::Mapping`.
353static KEY_CACHE: LazyLock<HashMap<&'static str, Value>> = LazyLock::new(|| {
354    [
355        "action",
356        "author",
357        "category",
358        "condition",
359        "correlation",
360        "date",
361        "description",
362        "detection",
363        "falsepositives",
364        "field",
365        "fields",
366        "filter",
367        "generate",
368        "group-by",
369        "id",
370        "level",
371        "logsource",
372        "modified",
373        "name",
374        "product",
375        "references",
376        "related",
377        "rules",
378        "scope",
379        "selection",
380        "service",
381        "status",
382        "tags",
383        "taxonomy",
384        "timeframe",
385        "timespan",
386        "title",
387        "type",
388    ]
389    .into_iter()
390    .map(|n| (n, Value::String(n.into())))
391    .collect()
392});
393
394fn key(s: &str) -> &'static Value {
395    KEY_CACHE
396        .get(s)
397        .unwrap_or_else(|| panic!("lint key not pre-cached: \"{s}\" — add it to KEY_CACHE"))
398}
399
400fn get_str<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a str> {
401    m.get(key(k)).and_then(|v| v.as_str())
402}
403
404fn get_mapping<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Mapping> {
405    m.get(key(k)).and_then(|v| v.as_mapping())
406}
407
408fn get_seq<'a>(m: &'a serde_yaml::Mapping, k: &str) -> Option<&'a serde_yaml::Sequence> {
409    m.get(key(k)).and_then(|v| v.as_sequence())
410}
411
412fn warn(
413    rule: LintRule,
414    severity: Severity,
415    message: impl Into<String>,
416    path: impl Into<String>,
417) -> LintWarning {
418    LintWarning {
419        rule,
420        severity,
421        message: message.into(),
422        path: path.into(),
423        span: None,
424        fix: None,
425    }
426}
427
428fn err(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
429    warn(rule, Severity::Error, message, path)
430}
431
432fn warning(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
433    warn(rule, Severity::Warning, message, path)
434}
435
436fn info(rule: LintRule, message: impl Into<String>, path: impl Into<String>) -> LintWarning {
437    warn(rule, Severity::Info, message, path)
438}
439
440fn safe_fix(title: impl Into<String>, patches: Vec<FixPatch>) -> Option<Fix> {
441    Some(Fix {
442        title: title.into(),
443        disposition: FixDisposition::Safe,
444        patches,
445    })
446}
447
448/// Find the closest match for `input` among `candidates` using edit distance.
449fn closest_match<'a>(input: &str, candidates: &[&'a str], max_distance: usize) -> Option<&'a str> {
450    candidates
451        .iter()
452        .filter(|c| edit_distance(input, c) <= max_distance)
453        .min_by_key(|c| edit_distance(input, c))
454        .copied()
455}
456
457/// Validate a date string matches YYYY-MM-DD with correct day-of-month.
458fn is_valid_date(s: &str) -> bool {
459    if s.len() != 10 {
460        return false;
461    }
462    let bytes = s.as_bytes();
463    if bytes[4] != b'-' || bytes[7] != b'-' {
464        return false;
465    }
466    let year_ok = bytes[0..4].iter().all(|b| b.is_ascii_digit());
467    let year: u16 = s[0..4].parse().unwrap_or(0);
468    let month: u8 = s[5..7].parse().unwrap_or(0);
469    let day: u8 = s[8..10].parse().unwrap_or(0);
470    if !year_ok || !(1..=12).contains(&month) || day == 0 {
471        return false;
472    }
473    let is_leap = (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400);
474    let max_day = match month {
475        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
476        4 | 6 | 9 | 11 => 30,
477        2 => {
478            if is_leap {
479                29
480            } else {
481                28
482            }
483        }
484        _ => return false,
485    };
486    day <= max_day
487}
488
489/// Extract a date string from a YAML value, handling serde_yaml auto-parsing.
490///
491/// `serde_yaml` sometimes deserialises `YYYY-MM-DD` as a tagged/non-string
492/// type. This helper coerces such values back to a trimmed string.
493fn extract_date_string(raw: &Value) -> Option<String> {
494    raw.as_str().map(|s| s.to_string()).or_else(|| {
495        serde_yaml::to_string(raw)
496            .ok()
497            .map(|s| s.trim().to_string())
498    })
499}
500
501/// Validate a UUID string (any version, hyphenated form).
502fn is_valid_uuid(s: &str) -> bool {
503    if s.len() != 36 {
504        return false;
505    }
506    let parts: Vec<&str> = s.split('-').collect();
507    if parts.len() != 5 {
508        return false;
509    }
510    let expected_lens = [8, 4, 4, 4, 12];
511    parts
512        .iter()
513        .zip(expected_lens.iter())
514        .all(|(part, &len)| part.len() == len && part.chars().all(|c| c.is_ascii_hexdigit()))
515}
516
517/// Check if a logsource value is lowercase with valid chars.
518fn is_valid_logsource_value(s: &str) -> bool {
519    !s.is_empty()
520        && s.chars().all(|c| {
521            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.' || c == '-'
522        })
523}
524
525/// Known tag namespaces from the spec.
526const KNOWN_TAG_NAMESPACES: &[&str] =
527    &["attack", "car", "cve", "d3fend", "detection", "stp", "tlp"];
528
529/// Valid status values.
530const VALID_STATUSES: &[&str] = &[
531    "stable",
532    "test",
533    "experimental",
534    "deprecated",
535    "unsupported",
536];
537
538/// Valid level values.
539const VALID_LEVELS: &[&str] = &["informational", "low", "medium", "high", "critical"];
540
541/// Valid related types.
542const VALID_RELATED_TYPES: &[&str] = &["derived", "obsolete", "merged", "renamed", "similar"];
543
544/// Valid correlation types.
545const VALID_CORRELATION_TYPES: &[&str] = &[
546    "event_count",
547    "value_count",
548    "temporal",
549    "temporal_ordered",
550    "value_sum",
551    "value_avg",
552    "value_percentile",
553    "value_median",
554];
555
556/// Valid condition operators.
557const VALID_CONDITION_OPERATORS: &[&str] = &["gt", "gte", "lt", "lte", "eq", "neq"];
558
559/// Correlation types that require a condition section.
560const TYPES_REQUIRING_CONDITION: &[&str] = &[
561    "event_count",
562    "value_count",
563    "value_sum",
564    "value_avg",
565    "value_percentile",
566];
567
568/// Correlation types that require condition.field.
569const TYPES_REQUIRING_FIELD: &[&str] =
570    &["value_count", "value_sum", "value_avg", "value_percentile"];
571
572/// Known top-level keys shared across all Sigma document types.
573const KNOWN_KEYS_SHARED: &[&str] = &[
574    "title",
575    "id",
576    "name",
577    "status",
578    "description",
579    "author",
580    "date",
581    "modified",
582    "related",
583    "taxonomy",
584    "action",
585    "license",
586    "references",
587    "tags",
588];
589
590/// Extra top-level keys valid for detection rules.
591const KNOWN_KEYS_DETECTION: &[&str] = &[
592    "logsource",
593    "detection",
594    "fields",
595    "falsepositives",
596    "level",
597    "scope",
598];
599
600/// Extra top-level keys valid for correlation rules.
601const KNOWN_KEYS_CORRELATION: &[&str] = &["correlation", "level", "generate"];
602
603/// Extra top-level keys valid for filter rules.
604const KNOWN_KEYS_FILTER: &[&str] = &["logsource", "filter"];
605
606/// Tag pattern: `^[a-z0-9_-]+\.[a-z0-9._-]+$`
607fn is_valid_tag(s: &str) -> bool {
608    let parts: Vec<&str> = s.splitn(2, '.').collect();
609    if parts.len() != 2 {
610        return false;
611    }
612    let ns_ok = !parts[0].is_empty()
613        && parts[0]
614            .chars()
615            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '-');
616    let rest_ok = !parts[1].is_empty()
617        && parts[1].chars().all(|c| {
618            c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '_' || c == '-'
619        });
620    ns_ok && rest_ok
621}
622
623// =============================================================================
624// Document type detection
625// =============================================================================
626
627#[derive(Debug, Clone, Copy, PartialEq, Eq)]
628enum DocType {
629    Detection,
630    Correlation,
631    Filter,
632}
633
634impl DocType {
635    fn known_keys(&self) -> &'static [&'static str] {
636        match self {
637            DocType::Detection => KNOWN_KEYS_DETECTION,
638            DocType::Correlation => KNOWN_KEYS_CORRELATION,
639            DocType::Filter => KNOWN_KEYS_FILTER,
640        }
641    }
642}
643
644fn detect_doc_type(m: &serde_yaml::Mapping) -> DocType {
645    if m.contains_key(key("correlation")) {
646        DocType::Correlation
647    } else if m.contains_key(key("filter")) {
648        DocType::Filter
649    } else {
650        DocType::Detection
651    }
652}
653
654/// Returns `true` if this document is a collection action fragment
655/// (`action: global`, `action: reset`, `action: repeat`) that should be
656/// skipped during linting.
657fn is_action_fragment(m: &serde_yaml::Mapping) -> bool {
658    matches!(get_str(m, "action"), Some("global" | "reset" | "repeat"))
659}
660
661// =============================================================================
662// Shared lint checks
663// =============================================================================
664
665fn lint_shared(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
666    // ── title ────────────────────────────────────────────────────────────
667    match get_str(m, "title") {
668        None => warnings.push(err(
669            LintRule::MissingTitle,
670            "missing required field 'title'",
671            "/title",
672        )),
673        Some(t) if t.trim().is_empty() => {
674            warnings.push(err(
675                LintRule::EmptyTitle,
676                "title must not be empty",
677                "/title",
678            ));
679        }
680        Some(t) if t.len() > 256 => {
681            warnings.push(warning(
682                LintRule::TitleTooLong,
683                format!("title is {} characters, maximum is 256", t.len()),
684                "/title",
685            ));
686        }
687        _ => {}
688    }
689
690    // ── id ───────────────────────────────────────────────────────────────
691    if let Some(id) = get_str(m, "id")
692        && !is_valid_uuid(id)
693    {
694        warnings.push(warning(
695            LintRule::InvalidId,
696            format!("id \"{id}\" is not a valid UUID"),
697            "/id",
698        ));
699    }
700
701    // ── status ───────────────────────────────────────────────────────────
702    if let Some(status) = get_str(m, "status")
703        && !VALID_STATUSES.contains(&status)
704    {
705        let fix = closest_match(status, VALID_STATUSES, 3).map(|closest| Fix {
706            title: format!("replace '{status}' with '{closest}'"),
707            disposition: FixDisposition::Safe,
708            patches: vec![FixPatch::ReplaceValue {
709                path: "/status".into(),
710                new_value: closest.into(),
711            }],
712        });
713        warnings.push(LintWarning {
714            rule: LintRule::InvalidStatus,
715            severity: Severity::Error,
716            message: format!(
717                "invalid status \"{status}\", expected one of: {}",
718                VALID_STATUSES.join(", ")
719            ),
720            path: "/status".into(),
721            span: None,
722            fix,
723        });
724    }
725
726    // ── level ────────────────────────────────────────────────────────────
727    if let Some(level) = get_str(m, "level")
728        && !VALID_LEVELS.contains(&level)
729    {
730        let fix = closest_match(level, VALID_LEVELS, 3).map(|closest| Fix {
731            title: format!("replace '{level}' with '{closest}'"),
732            disposition: FixDisposition::Safe,
733            patches: vec![FixPatch::ReplaceValue {
734                path: "/level".into(),
735                new_value: closest.into(),
736            }],
737        });
738        warnings.push(LintWarning {
739            rule: LintRule::InvalidLevel,
740            severity: Severity::Error,
741            message: format!(
742                "invalid level \"{level}\", expected one of: {}",
743                VALID_LEVELS.join(", ")
744            ),
745            path: "/level".into(),
746            span: None,
747            fix,
748        });
749    }
750
751    // ── date ─────────────────────────────────────────────────────────────
752    let date_string = m.get(key("date")).and_then(extract_date_string);
753    if let Some(d) = &date_string
754        && !is_valid_date(d)
755    {
756        warnings.push(err(
757            LintRule::InvalidDate,
758            format!("invalid date \"{d}\", expected YYYY-MM-DD"),
759            "/date",
760        ));
761    }
762
763    // ── modified ─────────────────────────────────────────────────────────
764    let modified_string = m.get(key("modified")).and_then(extract_date_string);
765    if let Some(d) = &modified_string
766        && !is_valid_date(d)
767    {
768        warnings.push(err(
769            LintRule::InvalidModified,
770            format!("invalid modified date \"{d}\", expected YYYY-MM-DD"),
771            "/modified",
772        ));
773    }
774
775    // ── modified >= date ─────────────────────────────────────────────────
776    if let (Some(date_val), Some(mod_val)) = (&date_string, &modified_string)
777        && is_valid_date(date_val)
778        && is_valid_date(mod_val)
779        && mod_val.as_str() < date_val.as_str()
780    {
781        warnings.push(warning(
782            LintRule::ModifiedBeforeDate,
783            format!("modified date \"{mod_val}\" is before creation date \"{date_val}\""),
784            "/modified",
785        ));
786    }
787
788    // ── description (missing) ──────────────────────────────────────────
789    if !m.contains_key(key("description")) {
790        warnings.push(info(
791            LintRule::MissingDescription,
792            "missing recommended field 'description'",
793            "/description",
794        ));
795    }
796
797    // ── author (missing) ─────────────────────────────────────────────
798    if !m.contains_key(key("author")) {
799        warnings.push(info(
800            LintRule::MissingAuthor,
801            "missing recommended field 'author'",
802            "/author",
803        ));
804    }
805
806    // ── description (too long) ───────────────────────────────────────
807    if let Some(desc) = get_str(m, "description")
808        && desc.len() > 65535
809    {
810        warnings.push(warning(
811            LintRule::DescriptionTooLong,
812            format!("description is {} characters, maximum is 65535", desc.len()),
813            "/description",
814        ));
815    }
816
817    // ── name ─────────────────────────────────────────────────────────────
818    if let Some(name) = get_str(m, "name")
819        && name.len() > 256
820    {
821        warnings.push(warning(
822            LintRule::NameTooLong,
823            format!("name is {} characters, maximum is 256", name.len()),
824            "/name",
825        ));
826    }
827
828    // ── taxonomy ─────────────────────────────────────────────────────────
829    if let Some(tax) = get_str(m, "taxonomy")
830        && tax.len() > 256
831    {
832        warnings.push(warning(
833            LintRule::TaxonomyTooLong,
834            format!("taxonomy is {} characters, maximum is 256", tax.len()),
835            "/taxonomy",
836        ));
837    }
838
839    // ── lowercase keys ───────────────────────────────────────────────────
840    for k in m.keys() {
841        if let Some(ks) = k.as_str()
842            && ks != ks.to_ascii_lowercase()
843        {
844            let lower = ks.to_ascii_lowercase();
845            let mut w = warning(
846                LintRule::NonLowercaseKey,
847                format!("key \"{ks}\" should be lowercase"),
848                format!("/{ks}"),
849            );
850            w.fix = safe_fix(
851                format!("rename '{ks}' to '{lower}'"),
852                vec![FixPatch::ReplaceKey {
853                    path: format!("/{ks}"),
854                    new_key: lower,
855                }],
856            );
857            warnings.push(w);
858        }
859    }
860}
861
862// =============================================================================
863// Detection rule lint checks
864// =============================================================================
865
866fn lint_detection_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
867    // ── level ─────────────────────────────────────────────────────────────
868    if !m.contains_key(key("level")) {
869        warnings.push(warning(
870            LintRule::MissingLevel,
871            "missing recommended field 'level'",
872            "/level",
873        ));
874    }
875
876    // ── logsource ────────────────────────────────────────────────────────
877    if !m.contains_key(key("logsource")) {
878        warnings.push(err(
879            LintRule::MissingLogsource,
880            "missing required field 'logsource'",
881            "/logsource",
882        ));
883    } else {
884        lint_logsource(m, warnings);
885    }
886
887    // ── detection ────────────────────────────────────────────────────────
888    if let Some(det_val) = m.get(key("detection")) {
889        if let Some(det) = det_val.as_mapping() {
890            // Collect detection identifier names (excluding condition/timeframe)
891            let det_keys: HashSet<&str> = det
892                .keys()
893                .filter_map(|k| k.as_str())
894                .filter(|k| *k != "condition" && *k != "timeframe")
895                .collect();
896
897            if !det.contains_key(key("condition")) {
898                warnings.push(err(
899                    LintRule::MissingCondition,
900                    "detection section is missing required 'condition'",
901                    "/detection/condition",
902                ));
903            } else if let Some(cond_str) = get_str(det, "condition") {
904                // Check that condition references existing identifiers
905                for ident in extract_condition_identifiers(cond_str) {
906                    if !det_keys.contains(ident.as_str()) {
907                        warnings.push(err(
908                            LintRule::ConditionReferencesUnknown,
909                            format!(
910                                "condition references '{ident}' but no such detection identifier exists"
911                            ),
912                            "/detection/condition",
913                        ));
914                    }
915                }
916            }
917
918            if det_keys.is_empty() {
919                warnings.push(warning(
920                    LintRule::EmptyDetection,
921                    "detection section has no named search identifiers",
922                    "/detection",
923                ));
924            }
925
926            // Detection logic checks
927            lint_detection_logic(det, warnings);
928        }
929    } else {
930        warnings.push(err(
931            LintRule::MissingDetection,
932            "missing required field 'detection'",
933            "/detection",
934        ));
935    }
936
937    // ── related ──────────────────────────────────────────────────────────
938    if let Some(related) = get_seq(m, "related") {
939        for (i, item) in related.iter().enumerate() {
940            let path_prefix = format!("/related/{i}");
941            if let Some(item_map) = item.as_mapping() {
942                let has_id = item_map.contains_key(key("id"));
943                let has_type = item_map.contains_key(key("type"));
944
945                if !has_id || !has_type {
946                    warnings.push(err(
947                        LintRule::RelatedMissingRequired,
948                        "related entry must have both 'id' and 'type'",
949                        &path_prefix,
950                    ));
951                }
952
953                if let Some(id) = get_str(item_map, "id")
954                    && !is_valid_uuid(id)
955                {
956                    warnings.push(warning(
957                        LintRule::InvalidRelatedId,
958                        format!("related id \"{id}\" is not a valid UUID"),
959                        format!("{path_prefix}/id"),
960                    ));
961                }
962
963                if let Some(type_val) = get_str(item_map, "type")
964                    && !VALID_RELATED_TYPES.contains(&type_val)
965                {
966                    warnings.push(err(
967                        LintRule::InvalidRelatedType,
968                        format!(
969                            "invalid related type \"{type_val}\", expected one of: {}",
970                            VALID_RELATED_TYPES.join(", ")
971                        ),
972                        format!("{path_prefix}/type"),
973                    ));
974                }
975            }
976        }
977    }
978
979    // ── deprecated + related consistency ─────────────────────────────────
980    if get_str(m, "status") == Some("deprecated") {
981        let has_related = get_seq(m, "related")
982            .map(|seq| !seq.is_empty())
983            .unwrap_or(false);
984        if !has_related {
985            warnings.push(warning(
986                LintRule::DeprecatedWithoutRelated,
987                "deprecated rule should have a 'related' entry linking to its replacement",
988                "/status",
989            ));
990        }
991    }
992
993    // ── tags ─────────────────────────────────────────────────────────────
994    if let Some(tags) = get_seq(m, "tags") {
995        let mut seen_tags: HashSet<String> = HashSet::new();
996        for (i, tag_val) in tags.iter().enumerate() {
997            if let Some(tag) = tag_val.as_str() {
998                if !is_valid_tag(tag) {
999                    warnings.push(warning(
1000                        LintRule::InvalidTag,
1001                        format!(
1002                            "tag \"{tag}\" does not match required pattern (lowercase, dotted namespace)"
1003                        ),
1004                        format!("/tags/{i}"),
1005                    ));
1006                } else {
1007                    // Check known namespace
1008                    if let Some(ns) = tag.split('.').next()
1009                        && !KNOWN_TAG_NAMESPACES.contains(&ns)
1010                    {
1011                        warnings.push(warning(
1012                            LintRule::UnknownTagNamespace,
1013                            format!(
1014                                "unknown tag namespace \"{ns}\", known namespaces: {}",
1015                                KNOWN_TAG_NAMESPACES.join(", ")
1016                            ),
1017                            format!("/tags/{i}"),
1018                        ));
1019                    }
1020                }
1021
1022                if !seen_tags.insert(tag.to_string()) {
1023                    let mut w = warning(
1024                        LintRule::DuplicateTags,
1025                        format!("duplicate tag \"{tag}\""),
1026                        format!("/tags/{i}"),
1027                    );
1028                    w.fix = safe_fix(
1029                        format!("remove duplicate tag '{tag}'"),
1030                        vec![FixPatch::Remove {
1031                            path: format!("/tags/{i}"),
1032                        }],
1033                    );
1034                    warnings.push(w);
1035                }
1036            }
1037        }
1038    }
1039
1040    // ── references (unique) ──────────────────────────────────────────────
1041    if let Some(refs) = get_seq(m, "references") {
1042        let mut seen: HashSet<String> = HashSet::new();
1043        for (i, r) in refs.iter().enumerate() {
1044            if let Some(s) = r.as_str()
1045                && !seen.insert(s.to_string())
1046            {
1047                let mut w = warning(
1048                    LintRule::DuplicateReferences,
1049                    format!("duplicate reference \"{s}\""),
1050                    format!("/references/{i}"),
1051                );
1052                w.fix = safe_fix(
1053                    "remove duplicate reference",
1054                    vec![FixPatch::Remove {
1055                        path: format!("/references/{i}"),
1056                    }],
1057                );
1058                warnings.push(w);
1059            }
1060        }
1061    }
1062
1063    // ── fields (unique) ──────────────────────────────────────────────────
1064    if let Some(fields) = get_seq(m, "fields") {
1065        let mut seen: HashSet<String> = HashSet::new();
1066        for (i, f) in fields.iter().enumerate() {
1067            if let Some(s) = f.as_str()
1068                && !seen.insert(s.to_string())
1069            {
1070                let mut w = warning(
1071                    LintRule::DuplicateFields,
1072                    format!("duplicate field \"{s}\""),
1073                    format!("/fields/{i}"),
1074                );
1075                w.fix = safe_fix(
1076                    "remove duplicate field",
1077                    vec![FixPatch::Remove {
1078                        path: format!("/fields/{i}"),
1079                    }],
1080                );
1081                warnings.push(w);
1082            }
1083        }
1084    }
1085
1086    // ── falsepositives (minLength 2) ─────────────────────────────────────
1087    if let Some(fps) = get_seq(m, "falsepositives") {
1088        for (i, fp) in fps.iter().enumerate() {
1089            if let Some(s) = fp.as_str()
1090                && s.len() < 2
1091            {
1092                warnings.push(warning(
1093                    LintRule::FalsepositiveTooShort,
1094                    format!("falsepositive entry \"{s}\" must be at least 2 characters"),
1095                    format!("/falsepositives/{i}"),
1096                ));
1097            }
1098        }
1099    }
1100
1101    // ── scope (minLength 2) ──────────────────────────────────────────────
1102    if let Some(scope) = get_seq(m, "scope") {
1103        for (i, s_val) in scope.iter().enumerate() {
1104            if let Some(s) = s_val.as_str()
1105                && s.len() < 2
1106            {
1107                warnings.push(warning(
1108                    LintRule::ScopeTooShort,
1109                    format!("scope entry \"{s}\" must be at least 2 characters"),
1110                    format!("/scope/{i}"),
1111                ));
1112            }
1113        }
1114    }
1115}
1116
1117fn lint_logsource(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1118    if let Some(ls) = get_mapping(m, "logsource") {
1119        for field in &["category", "product", "service"] {
1120            if let Some(val) = get_str(ls, field)
1121                && !is_valid_logsource_value(val)
1122            {
1123                let lower = val.to_ascii_lowercase();
1124                let mut w = warning(
1125                    LintRule::LogsourceValueNotLowercase,
1126                    format!("logsource {field} \"{val}\" should be lowercase (a-z, 0-9, _, ., -)"),
1127                    format!("/logsource/{field}"),
1128                );
1129                w.fix = safe_fix(
1130                    format!("lowercase '{val}' to '{lower}'"),
1131                    vec![FixPatch::ReplaceValue {
1132                        path: format!("/logsource/{field}"),
1133                        new_value: lower,
1134                    }],
1135                );
1136                warnings.push(w);
1137            }
1138        }
1139    }
1140}
1141
1142/// Extract bare identifiers from a condition expression (excluding keywords
1143/// and wildcard patterns) so we can check they exist in the detection section.
1144fn extract_condition_identifiers(condition: &str) -> Vec<String> {
1145    const KEYWORDS: &[&str] = &["and", "or", "not", "of", "all", "them"];
1146    condition
1147        .split(|c: char| !c.is_alphanumeric() && c != '_' && c != '*')
1148        .filter(|s| !s.is_empty())
1149        .filter(|s| !KEYWORDS.contains(s))
1150        .filter(|s| !s.chars().all(|c| c.is_ascii_digit()))
1151        .filter(|s| !s.contains('*'))
1152        .map(|s| s.to_string())
1153        .collect()
1154}
1155
1156/// Checks detection logic: null in value lists, single-value |all, empty value lists.
1157fn lint_detection_logic(det: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1158    for (det_key, det_val) in det {
1159        let det_key_str = det_key.as_str().unwrap_or("");
1160        if det_key_str == "condition" || det_key_str == "timeframe" {
1161            continue;
1162        }
1163
1164        lint_detection_value(det_val, det_key_str, warnings);
1165    }
1166}
1167
1168fn lint_detection_value(value: &Value, det_name: &str, warnings: &mut Vec<LintWarning>) {
1169    match value {
1170        Value::Mapping(m) => {
1171            for (field_key, field_val) in m {
1172                let field_key_str = field_key.as_str().unwrap_or("");
1173
1174                // Check |all combined with |re (regex alternation makes |all misleading)
1175                if field_key_str.contains("|all") && field_key_str.contains("|re") {
1176                    let new_key = field_key_str.replace("|all", "");
1177                    let mut w = warning(
1178                        LintRule::AllWithRe,
1179                        format!(
1180                            "'{field_key_str}' in '{det_name}' combines |all with |re; \
1181                             regex alternation (|) already handles multi-match — \
1182                             |all is redundant or misleading here"
1183                        ),
1184                        format!("/detection/{det_name}/{field_key_str}"),
1185                    );
1186                    w.fix = safe_fix(
1187                        format!("remove |all from '{field_key_str}'"),
1188                        vec![FixPatch::ReplaceKey {
1189                            path: format!("/detection/{det_name}/{field_key_str}"),
1190                            new_key,
1191                        }],
1192                    );
1193                    warnings.push(w);
1194                }
1195
1196                // Check |all with single value
1197                if field_key_str.contains("|all") {
1198                    let needs_fix = if let Value::Sequence(seq) = field_val {
1199                        seq.len() <= 1
1200                    } else {
1201                        true
1202                    };
1203                    if needs_fix {
1204                        let new_key = field_key_str.replace("|all", "");
1205                        let count = if let Value::Sequence(seq) = field_val {
1206                            seq.len().to_string()
1207                        } else {
1208                            "a single".into()
1209                        };
1210                        let mut w = warning(
1211                            LintRule::SingleValueAllModifier,
1212                            format!(
1213                                "'{field_key_str}' in '{det_name}' uses |all modifier with {count} value(s); |all requires multiple values"
1214                            ),
1215                            format!("/detection/{det_name}/{field_key_str}"),
1216                        );
1217                        w.fix = safe_fix(
1218                            format!("remove |all from '{field_key_str}'"),
1219                            vec![FixPatch::ReplaceKey {
1220                                path: format!("/detection/{det_name}/{field_key_str}"),
1221                                new_key,
1222                            }],
1223                        );
1224                        warnings.push(w);
1225                    }
1226                }
1227
1228                // Check for incompatible modifier combinations
1229                if let Some(msg) = check_modifier_compatibility(field_key_str) {
1230                    warnings.push(warning(
1231                        LintRule::IncompatibleModifiers,
1232                        format!("'{field_key_str}' in '{det_name}': {msg}"),
1233                        format!("/detection/{det_name}/{field_key_str}"),
1234                    ));
1235                }
1236
1237                // Check null in value list and empty value list
1238                if let Value::Sequence(seq) = field_val {
1239                    if seq.is_empty() {
1240                        warnings.push(warning(
1241                            LintRule::EmptyValueList,
1242                            format!("'{field_key_str}' in '{det_name}' has an empty value list"),
1243                            format!("/detection/{det_name}/{field_key_str}"),
1244                        ));
1245                    } else {
1246                        let has_null = seq.iter().any(|v| v.is_null());
1247                        let has_non_null = seq.iter().any(|v| !v.is_null());
1248                        if has_null && has_non_null {
1249                            warnings.push(warning(
1250                                LintRule::NullInValueList,
1251                                format!(
1252                                    "'{field_key_str}' in '{det_name}' mixes null with other values; null should be in its own selection"
1253                                ),
1254                                format!("/detection/{det_name}/{field_key_str}"),
1255                            ));
1256                        }
1257                    }
1258                }
1259
1260                // Check wildcard-only value: field: '*' usually means field|exists
1261                let base_field = field_key_str.split('|').next().unwrap_or(field_key_str);
1262                let is_wildcard_only = match field_val {
1263                    Value::String(s) => s == "*",
1264                    Value::Sequence(seq) => seq.len() == 1 && seq[0].as_str() == Some("*"),
1265                    _ => false,
1266                };
1267                if is_wildcard_only && !field_key_str.contains("|re") {
1268                    let new_key = format!("{base_field}|exists");
1269                    let mut w = warning(
1270                        LintRule::WildcardOnlyValue,
1271                        format!(
1272                            "'{field_key_str}' in '{det_name}' uses a lone wildcard '*'; \
1273                             consider '{base_field}|exists: true' instead"
1274                        ),
1275                        format!("/detection/{det_name}/{field_key_str}"),
1276                    );
1277                    w.fix = safe_fix(
1278                        format!("replace with '{new_key}: true'"),
1279                        vec![
1280                            FixPatch::ReplaceKey {
1281                                path: format!("/detection/{det_name}/{field_key_str}"),
1282                                new_key,
1283                            },
1284                            FixPatch::ReplaceValue {
1285                                path: format!("/detection/{det_name}/{base_field}|exists"),
1286                                new_value: "true".into(),
1287                            },
1288                        ],
1289                    );
1290                    warnings.push(w);
1291                }
1292            }
1293        }
1294        Value::Sequence(seq) => {
1295            // List of maps (OR-linked) or keyword list
1296            for item in seq {
1297                if item.is_mapping() {
1298                    lint_detection_value(item, det_name, warnings);
1299                }
1300            }
1301        }
1302        _ => {}
1303    }
1304}
1305
1306// =============================================================================
1307// Correlation rule lint checks
1308// =============================================================================
1309
1310fn lint_correlation_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1311    let Some(corr_val) = m.get(key("correlation")) else {
1312        warnings.push(err(
1313            LintRule::MissingCorrelation,
1314            "missing required field 'correlation'",
1315            "/correlation",
1316        ));
1317        return;
1318    };
1319
1320    let Some(corr) = corr_val.as_mapping() else {
1321        warnings.push(err(
1322            LintRule::MissingCorrelation,
1323            "'correlation' must be a mapping",
1324            "/correlation",
1325        ));
1326        return;
1327    };
1328
1329    // ── type ─────────────────────────────────────────────────────────────
1330    let corr_type = get_str(corr, "type");
1331    match corr_type {
1332        None => {
1333            warnings.push(err(
1334                LintRule::MissingCorrelationType,
1335                "missing required field 'correlation.type'",
1336                "/correlation/type",
1337            ));
1338        }
1339        Some(t) if !VALID_CORRELATION_TYPES.contains(&t) => {
1340            warnings.push(err(
1341                LintRule::InvalidCorrelationType,
1342                format!(
1343                    "invalid correlation type \"{t}\", expected one of: {}",
1344                    VALID_CORRELATION_TYPES.join(", ")
1345                ),
1346                "/correlation/type",
1347            ));
1348        }
1349        _ => {}
1350    }
1351
1352    // ── rules ────────────────────────────────────────────────────────────
1353    if let Some(rules) = corr.get(key("rules")) {
1354        if let Some(seq) = rules.as_sequence()
1355            && seq.is_empty()
1356        {
1357            warnings.push(warning(
1358                LintRule::EmptyCorrelationRules,
1359                "correlation.rules should not be empty",
1360                "/correlation/rules",
1361            ));
1362        }
1363    } else {
1364        warnings.push(err(
1365            LintRule::MissingCorrelationRules,
1366            "missing required field 'correlation.rules'",
1367            "/correlation/rules",
1368        ));
1369    }
1370
1371    // ── timespan ─────────────────────────────────────────────────────────
1372    if let Some(ts) = get_str(corr, "timespan").or_else(|| get_str(corr, "timeframe")) {
1373        if !is_valid_timespan(ts) {
1374            warnings.push(err(
1375                LintRule::InvalidTimespanFormat,
1376                format!(
1377                    "invalid timespan \"{ts}\", expected format like 5m, 1h, 30s, 7d, 1w, 1M, 1y"
1378                ),
1379                "/correlation/timespan",
1380            ));
1381        }
1382    } else {
1383        warnings.push(err(
1384            LintRule::MissingCorrelationTimespan,
1385            "missing required field 'correlation.timespan'",
1386            "/correlation/timespan",
1387        ));
1388    }
1389
1390    // ── Conditional requirements per correlation type ─────────────────────
1391    if let Some(ct) = corr_type {
1392        // group-by is required for all correlation types
1393        if !corr.contains_key(key("group-by")) {
1394            warnings.push(err(
1395                LintRule::MissingGroupBy,
1396                format!("{ct} correlation requires 'group-by'"),
1397                "/correlation/group-by",
1398            ));
1399        }
1400
1401        // condition required for non-temporal types
1402        if TYPES_REQUIRING_CONDITION.contains(&ct) {
1403            if let Some(cond_val) = corr.get(key("condition")) {
1404                if let Some(cond_map) = cond_val.as_mapping() {
1405                    lint_correlation_condition(cond_map, ct, warnings);
1406                }
1407            } else {
1408                warnings.push(err(
1409                    LintRule::MissingCorrelationCondition,
1410                    format!("{ct} correlation requires a 'condition'"),
1411                    "/correlation/condition",
1412                ));
1413            }
1414        }
1415    }
1416
1417    // ── generate ─────────────────────────────────────────────────────────
1418    if let Some(gen_val) = corr.get(key("generate"))
1419        && !gen_val.is_bool()
1420    {
1421        warnings.push(err(
1422            LintRule::GenerateNotBoolean,
1423            "'generate' must be a boolean (true/false)",
1424            "/correlation/generate",
1425        ));
1426    }
1427}
1428
1429fn lint_correlation_condition(
1430    cond: &serde_yaml::Mapping,
1431    corr_type: &str,
1432    warnings: &mut Vec<LintWarning>,
1433) {
1434    // Check condition.field requirement
1435    if TYPES_REQUIRING_FIELD.contains(&corr_type) && !cond.contains_key(key("field")) {
1436        warnings.push(err(
1437            LintRule::MissingConditionField,
1438            format!("{corr_type} correlation condition requires 'field'"),
1439            "/correlation/condition/field",
1440        ));
1441    }
1442
1443    // Validate operator keys and numeric values
1444    for (k, v) in cond {
1445        let ks = k.as_str().unwrap_or("");
1446        if ks == "field" {
1447            continue;
1448        }
1449        if !VALID_CONDITION_OPERATORS.contains(&ks) {
1450            warnings.push(err(
1451                LintRule::InvalidConditionOperator,
1452                format!(
1453                    "invalid condition operator \"{ks}\", expected one of: {}",
1454                    VALID_CONDITION_OPERATORS.join(", ")
1455                ),
1456                format!("/correlation/condition/{ks}"),
1457            ));
1458        } else if !v.is_i64() && !v.is_u64() && !v.is_f64() {
1459            warnings.push(err(
1460                LintRule::ConditionValueNotNumeric,
1461                format!("condition operator '{ks}' requires a numeric value"),
1462                format!("/correlation/condition/{ks}"),
1463            ));
1464        }
1465    }
1466}
1467
1468/// Check field modifier compatibility and return a diagnostic message if
1469/// the combination is invalid.
1470///
1471/// Modifier categories (at most one from each exclusive group):
1472/// - **String match**: contains, startswith, endswith
1473/// - **Pattern match**: re, cidr (incompatible with string-match modifiers)
1474/// - **Numeric comparison**: gt, gte, lt, lte, neq
1475/// - **Existence**: exists (standalone, incompatible with everything except all/cased)
1476/// - **Regex flags**: i, m, s (require re)
1477fn check_modifier_compatibility(field_key: &str) -> Option<String> {
1478    let parts: Vec<&str> = field_key.split('|').collect();
1479    if parts.len() < 2 {
1480        return None;
1481    }
1482    let modifiers = &parts[1..];
1483
1484    let string_match: &[&str] = &["contains", "startswith", "endswith"];
1485    let pattern_match: &[&str] = &["re", "cidr"];
1486    let numeric_compare: &[&str] = &["gt", "gte", "lt", "lte", "neq"];
1487    let regex_flags: &[&str] = &["i", "ignorecase", "m", "multiline", "s", "dotall"];
1488
1489    let has_string = modifiers
1490        .iter()
1491        .filter(|m| string_match.contains(m))
1492        .count();
1493    let has_pattern: Vec<&&str> = modifiers
1494        .iter()
1495        .filter(|m| pattern_match.contains(m))
1496        .collect();
1497    let has_numeric = modifiers.iter().any(|m| numeric_compare.contains(m));
1498    let has_exists = modifiers.contains(&"exists");
1499    let has_re = modifiers.contains(&"re");
1500    let has_regex_flags = modifiers.iter().any(|m| regex_flags.contains(m));
1501
1502    // Multiple string-match modifiers are mutually exclusive
1503    if has_string > 1 {
1504        return Some(
1505            "multiple string-match modifiers (contains, startswith, endswith) \
1506             are mutually exclusive"
1507                .to_string(),
1508        );
1509    }
1510
1511    // Pattern-match (re, cidr) is incompatible with string-match modifiers
1512    if !has_pattern.is_empty() && has_string > 0 {
1513        return Some(format!(
1514            "pattern modifier '{}' is incompatible with string-match modifiers \
1515             (contains, startswith, endswith)",
1516            has_pattern
1517                .iter()
1518                .map(|m| **m)
1519                .collect::<Vec<_>>()
1520                .join(", ")
1521        ));
1522    }
1523
1524    // Numeric comparison is incompatible with string-match and pattern modifiers
1525    if has_numeric && (has_string > 0 || !has_pattern.is_empty()) {
1526        return Some(
1527            "numeric comparison modifiers (gt, gte, lt, lte, neq) are incompatible \
1528             with string-match and pattern modifiers"
1529                .to_string(),
1530        );
1531    }
1532
1533    // exists is standalone
1534    if has_exists && modifiers.len() > 1 {
1535        let others: Vec<&&str> = modifiers
1536            .iter()
1537            .filter(|m| **m != "exists" && **m != "all" && **m != "cased")
1538            .collect();
1539        if !others.is_empty() {
1540            return Some(format!(
1541                "'exists' modifier is incompatible with: {}",
1542                others.iter().map(|m| **m).collect::<Vec<_>>().join(", ")
1543            ));
1544        }
1545    }
1546
1547    // Regex flags require re
1548    if has_regex_flags && !has_re {
1549        return Some("regex flag modifiers (i, m, s) require the 're' modifier".to_string());
1550    }
1551
1552    None
1553}
1554
1555fn is_valid_timespan(s: &str) -> bool {
1556    if s.is_empty() {
1557        return false;
1558    }
1559    let last = s.as_bytes()[s.len() - 1];
1560    // s=second, m=minute, h=hour, d=day, w=week, M=month, y=year
1561    if !matches!(last, b's' | b'm' | b'h' | b'd' | b'w' | b'M' | b'y') {
1562        return false;
1563    }
1564    let num_part = &s[..s.len() - 1];
1565    !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit())
1566}
1567
1568// =============================================================================
1569// Filter rule lint checks
1570// =============================================================================
1571
1572fn lint_filter_rule(m: &serde_yaml::Mapping, warnings: &mut Vec<LintWarning>) {
1573    // ── filter section ───────────────────────────────────────────────────
1574    let Some(filter_val) = m.get(key("filter")) else {
1575        warnings.push(err(
1576            LintRule::MissingFilter,
1577            "missing required field 'filter'",
1578            "/filter",
1579        ));
1580        return;
1581    };
1582
1583    let Some(filter) = filter_val.as_mapping() else {
1584        warnings.push(err(
1585            LintRule::MissingFilter,
1586            "'filter' must be a mapping",
1587            "/filter",
1588        ));
1589        return;
1590    };
1591
1592    // ── filter.rules ─────────────────────────────────────────────────────
1593    if let Some(rules_val) = filter.get(key("rules")) {
1594        if let Some(seq) = rules_val.as_sequence()
1595            && seq.is_empty()
1596        {
1597            warnings.push(warning(
1598                LintRule::EmptyFilterRules,
1599                "filter.rules should have at least one entry",
1600                "/filter/rules",
1601            ));
1602        }
1603    } else {
1604        warnings.push(err(
1605            LintRule::MissingFilterRules,
1606            "missing required field 'filter.rules'",
1607            "/filter/rules",
1608        ));
1609    }
1610
1611    // ── filter.selection ─────────────────────────────────────────────────
1612    if !filter.contains_key(key("selection")) {
1613        warnings.push(err(
1614            LintRule::MissingFilterSelection,
1615            "missing required field 'filter.selection'",
1616            "/filter/selection",
1617        ));
1618    }
1619
1620    // ── filter.condition ─────────────────────────────────────────────────
1621    if !filter.contains_key(key("condition")) {
1622        warnings.push(err(
1623            LintRule::MissingFilterCondition,
1624            "missing required field 'filter.condition'",
1625            "/filter/condition",
1626        ));
1627    }
1628
1629    // ── logsource required for filters ───────────────────────────────────
1630    if !m.contains_key(key("logsource")) {
1631        warnings.push(err(
1632            LintRule::MissingFilterLogsource,
1633            "missing required field 'logsource' for filter rule",
1634            "/logsource",
1635        ));
1636    } else {
1637        lint_logsource(m, warnings);
1638    }
1639
1640    // ── Filters should NOT have level or status ──────────────────────────
1641    if m.contains_key(key("level")) {
1642        let mut w = warning(
1643            LintRule::FilterHasLevel,
1644            "filter rules should not have a 'level' field",
1645            "/level",
1646        );
1647        w.fix = safe_fix(
1648            "remove 'level' from filter rule",
1649            vec![FixPatch::Remove {
1650                path: "/level".into(),
1651            }],
1652        );
1653        warnings.push(w);
1654    }
1655
1656    if m.contains_key(key("status")) {
1657        let mut w = warning(
1658            LintRule::FilterHasStatus,
1659            "filter rules should not have a 'status' field",
1660            "/status",
1661        );
1662        w.fix = safe_fix(
1663            "remove 'status' from filter rule",
1664            vec![FixPatch::Remove {
1665                path: "/status".into(),
1666            }],
1667        );
1668        warnings.push(w);
1669    }
1670}
1671
1672// =============================================================================
1673// Public API
1674// =============================================================================
1675
1676/// Levenshtein edit distance between two strings.
1677fn edit_distance(a: &str, b: &str) -> usize {
1678    let (a_len, b_len) = (a.len(), b.len());
1679    if a_len == 0 {
1680        return b_len;
1681    }
1682    if b_len == 0 {
1683        return a_len;
1684    }
1685    let mut prev: Vec<usize> = (0..=b_len).collect();
1686    let mut curr = vec![0; b_len + 1];
1687    for (i, ca) in a.bytes().enumerate() {
1688        curr[0] = i + 1;
1689        for (j, cb) in b.bytes().enumerate() {
1690            let cost = if ca == cb { 0 } else { 1 };
1691            curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
1692        }
1693        std::mem::swap(&mut prev, &mut curr);
1694    }
1695    prev[b_len]
1696}
1697
1698/// Maximum edit distance to consider an unknown key a likely typo of a known key.
1699const TYPO_MAX_EDIT_DISTANCE: usize = 2;
1700
1701/// Check for unknown top-level keys that are likely typos of known keys.
1702///
1703/// The Sigma specification v2.1.0 explicitly allows arbitrary custom top-level
1704/// fields, so unknown keys are not errors. However, when an unknown key is
1705/// within a small edit distance of a known key it is likely a typo and we
1706/// surface an informational hint.
1707fn lint_unknown_keys(m: &serde_yaml::Mapping, doc_type: DocType, warnings: &mut Vec<LintWarning>) {
1708    let type_keys = doc_type.known_keys();
1709    let all_known: Vec<&str> = KNOWN_KEYS_SHARED
1710        .iter()
1711        .chain(type_keys.iter())
1712        .copied()
1713        .collect();
1714
1715    for k in m.keys() {
1716        let Some(ks) = k.as_str() else { continue };
1717        if KNOWN_KEYS_SHARED.contains(&ks) || type_keys.contains(&ks) {
1718            continue;
1719        }
1720        // Only warn when the key looks like a typo of a known key.
1721        if let Some(closest) = all_known
1722            .iter()
1723            .filter(|known| edit_distance(ks, known) <= TYPO_MAX_EDIT_DISTANCE)
1724            .min_by_key(|known| edit_distance(ks, known))
1725        {
1726            let mut w = info(
1727                LintRule::UnknownKey,
1728                format!("unknown top-level key \"{ks}\"; did you mean \"{closest}\"?"),
1729                format!("/{ks}"),
1730            );
1731            w.fix = safe_fix(
1732                format!("rename '{ks}' to '{closest}'"),
1733                vec![FixPatch::ReplaceKey {
1734                    path: format!("/{ks}"),
1735                    new_key: closest.to_string(),
1736                }],
1737            );
1738            warnings.push(w);
1739        }
1740    }
1741}
1742
1743/// Lint a single YAML document value.
1744///
1745/// Auto-detects document type (detection / correlation / filter) and runs
1746/// the appropriate checks. Returns all findings.
1747pub fn lint_yaml_value(value: &Value) -> Vec<LintWarning> {
1748    let Some(m) = value.as_mapping() else {
1749        return vec![err(
1750            LintRule::NotAMapping,
1751            "document is not a YAML mapping",
1752            "/",
1753        )];
1754    };
1755
1756    // Skip collection action fragments
1757    if is_action_fragment(m) {
1758        return Vec::new();
1759    }
1760
1761    let mut warnings = Vec::new();
1762
1763    // Run shared checks
1764    lint_shared(m, &mut warnings);
1765
1766    // Run type-specific checks
1767    let doc_type = detect_doc_type(m);
1768    match doc_type {
1769        DocType::Detection => lint_detection_rule(m, &mut warnings),
1770        DocType::Correlation => lint_correlation_rule(m, &mut warnings),
1771        DocType::Filter => lint_filter_rule(m, &mut warnings),
1772    }
1773
1774    // Check for unknown top-level keys
1775    lint_unknown_keys(m, doc_type, &mut warnings);
1776
1777    warnings
1778}
1779
1780/// Lint a raw YAML string, returning warnings with resolved source spans.
1781///
1782/// Unlike [`lint_yaml_value`], this function takes the raw text and resolves
1783/// JSON-pointer paths to `(line, col)` spans. This is the preferred entry
1784/// point for the LSP server.
1785pub fn lint_yaml_str(text: &str) -> Vec<LintWarning> {
1786    let mut all_warnings = Vec::new();
1787
1788    for doc in serde_yaml::Deserializer::from_str(text) {
1789        let value: Value = match Value::deserialize(doc) {
1790            Ok(v) => v,
1791            Err(e) => {
1792                let mut w = err(
1793                    LintRule::YamlParseError,
1794                    format!("YAML parse error: {e}"),
1795                    "/",
1796                );
1797                // serde_yaml can give us a location
1798                if let Some(loc) = e.location() {
1799                    w.span = Some(Span {
1800                        start_line: loc.line().saturating_sub(1) as u32,
1801                        start_col: loc.column() as u32,
1802                        end_line: loc.line().saturating_sub(1) as u32,
1803                        end_col: loc.column() as u32 + 1,
1804                    });
1805                }
1806                all_warnings.push(w);
1807                // A parse error leaves the YAML stream in an undefined state;
1808                // the deserializer iterator may never terminate on malformed
1809                // input, so we must stop iterating to avoid infinite loops and
1810                // unbounded memory growth.
1811                break;
1812            }
1813        };
1814
1815        let warnings = lint_yaml_value(&value);
1816        // Resolve spans for each warning
1817        for mut w in warnings {
1818            w.span = resolve_path_to_span(text, &w.path);
1819            all_warnings.push(w);
1820        }
1821    }
1822
1823    all_warnings
1824}
1825
1826/// Resolve a JSON-pointer path to a `Span` by scanning the YAML text.
1827///
1828/// Returns `None` if the path cannot be resolved.
1829fn resolve_path_to_span(text: &str, path: &str) -> Option<Span> {
1830    if path == "/" || path.is_empty() {
1831        // Root — first non-empty line
1832        for (i, line) in text.lines().enumerate() {
1833            let trimmed = line.trim();
1834            if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed != "---" {
1835                return Some(Span {
1836                    start_line: i as u32,
1837                    start_col: 0,
1838                    end_line: i as u32,
1839                    end_col: line.len() as u32,
1840                });
1841            }
1842        }
1843        return None;
1844    }
1845
1846    let segments: Vec<&str> = path.strip_prefix('/').unwrap_or(path).split('/').collect();
1847
1848    if segments.is_empty() {
1849        return None;
1850    }
1851
1852    let lines: Vec<&str> = text.lines().collect();
1853    let mut current_indent: i32 = -1;
1854    let mut search_start = 0usize;
1855    let mut last_matched_line: Option<usize> = None;
1856
1857    for segment in &segments {
1858        let array_index: Option<usize> = segment.parse().ok();
1859        let mut found = false;
1860
1861        let mut line_num = search_start;
1862        while line_num < lines.len() {
1863            let line = lines[line_num];
1864            let trimmed = line.trim();
1865            if trimmed.is_empty() || trimmed.starts_with('#') {
1866                line_num += 1;
1867                continue;
1868            }
1869
1870            let indent = (line.len() - trimmed.len()) as i32;
1871
1872            if indent <= current_indent && found {
1873                break;
1874            }
1875            if indent <= current_indent {
1876                line_num += 1;
1877                continue;
1878            }
1879
1880            if let Some(idx) = array_index {
1881                if trimmed.starts_with("- ") && indent > current_indent {
1882                    let mut count = 0usize;
1883                    for (offset, sl) in lines[search_start..].iter().enumerate() {
1884                        let scan = search_start + offset;
1885                        let st = sl.trim();
1886                        if st.is_empty() || st.starts_with('#') {
1887                            continue;
1888                        }
1889                        let si = (sl.len() - st.len()) as i32;
1890                        if si == indent && st.starts_with("- ") {
1891                            if count == idx {
1892                                last_matched_line = Some(scan);
1893                                search_start = scan + 1;
1894                                current_indent = indent;
1895                                found = true;
1896                                break;
1897                            }
1898                            count += 1;
1899                        }
1900                        if si < indent && count > 0 {
1901                            break;
1902                        }
1903                    }
1904                    break;
1905                }
1906            } else {
1907                let key_pattern = format!("{segment}:");
1908                if trimmed.starts_with(&key_pattern) || trimmed == *segment {
1909                    last_matched_line = Some(line_num);
1910                    search_start = line_num + 1;
1911                    current_indent = indent;
1912                    found = true;
1913                    break;
1914                }
1915            }
1916
1917            line_num += 1;
1918        }
1919
1920        if !found && last_matched_line.is_none() {
1921            break;
1922        }
1923    }
1924
1925    last_matched_line.map(|line_num| {
1926        let line = lines[line_num];
1927        Span {
1928            start_line: line_num as u32,
1929            start_col: 0,
1930            end_line: line_num as u32,
1931            end_col: line.len() as u32,
1932        }
1933    })
1934}
1935
1936/// Lint all YAML documents in a file.
1937///
1938/// Handles multi-document YAML (separated by `---`). Collection action
1939/// fragments (`action: global/reset/repeat`) are skipped. Warnings include
1940/// resolved source spans (delegates to [`lint_yaml_str`]).
1941pub fn lint_yaml_file(path: &Path) -> crate::error::Result<FileLintResult> {
1942    let content = std::fs::read_to_string(path)?;
1943    let warnings = lint_yaml_str(&content);
1944    Ok(FileLintResult {
1945        path: path.to_path_buf(),
1946        warnings,
1947    })
1948}
1949
1950/// Lint all `.yml`/`.yaml` files in a directory recursively.
1951///
1952/// Skips hidden directories (starting with `.`) and tracks visited
1953/// canonical paths to avoid infinite loops from symlink cycles.
1954pub fn lint_yaml_directory(dir: &Path) -> crate::error::Result<Vec<FileLintResult>> {
1955    let mut results = Vec::new();
1956    let mut visited = HashSet::new();
1957
1958    fn walk(
1959        dir: &Path,
1960        results: &mut Vec<FileLintResult>,
1961        visited: &mut HashSet<std::path::PathBuf>,
1962    ) -> crate::error::Result<()> {
1963        // Resolve symlinks and canonicalize for cycle detection
1964        let canonical = match dir.canonicalize() {
1965            Ok(p) => p,
1966            Err(_) => return Ok(()),
1967        };
1968        if !visited.insert(canonical) {
1969            // Already visited this directory — symlink cycle
1970            return Ok(());
1971        }
1972
1973        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
1974        entries.sort_by_key(|e| e.path());
1975
1976        for entry in entries {
1977            let path = entry.path();
1978
1979            // Skip hidden directories (e.g. .git)
1980            if path.is_dir() {
1981                if path
1982                    .file_name()
1983                    .and_then(|n| n.to_str())
1984                    .is_some_and(|n| n.starts_with('.'))
1985                {
1986                    continue;
1987                }
1988                walk(&path, results, visited)?;
1989            } else if matches!(
1990                path.extension().and_then(|e| e.to_str()),
1991                Some("yml" | "yaml")
1992            ) {
1993                match crate::lint::lint_yaml_file(&path) {
1994                    Ok(file_result) => results.push(file_result),
1995                    Err(e) => {
1996                        results.push(FileLintResult {
1997                            path: path.clone(),
1998                            warnings: vec![err(
1999                                LintRule::FileReadError,
2000                                format!("error reading file: {e}"),
2001                                "/",
2002                            )],
2003                        });
2004                    }
2005                }
2006            }
2007        }
2008        Ok(())
2009    }
2010
2011    walk(dir, &mut results, &mut visited)?;
2012    Ok(results)
2013}
2014
2015// =============================================================================
2016// Lint configuration & suppression
2017// =============================================================================
2018
2019/// Configuration for lint rule suppression and severity overrides.
2020///
2021/// Can be loaded from a `.rsigma-lint.yml` config file, merged with CLI
2022/// `--disable` flags, and combined with inline `# rsigma-disable` comments.
2023#[derive(Debug, Clone, Default, Serialize)]
2024pub struct LintConfig {
2025    /// Rule names to suppress entirely (e.g. `"missing_description"`).
2026    pub disabled_rules: HashSet<String>,
2027    /// Override the default severity of a rule (e.g. `title_too_long -> Info`).
2028    pub severity_overrides: HashMap<String, Severity>,
2029}
2030
2031/// Raw YAML shape for `.rsigma-lint.yml`.
2032#[derive(Debug, Deserialize)]
2033struct RawLintConfig {
2034    #[serde(default)]
2035    disabled_rules: Vec<String>,
2036    #[serde(default)]
2037    severity_overrides: HashMap<String, String>,
2038}
2039
2040impl LintConfig {
2041    /// Load a `LintConfig` from a `.rsigma-lint.yml` file.
2042    pub fn load(path: &Path) -> crate::error::Result<Self> {
2043        let content = std::fs::read_to_string(path)?;
2044        let raw: RawLintConfig = serde_yaml::from_str(&content)?;
2045
2046        let disabled_rules: HashSet<String> = raw.disabled_rules.into_iter().collect();
2047        let mut severity_overrides = HashMap::new();
2048        for (rule, sev_str) in &raw.severity_overrides {
2049            let sev = match sev_str.as_str() {
2050                "error" => Severity::Error,
2051                "warning" => Severity::Warning,
2052                "info" => Severity::Info,
2053                "hint" => Severity::Hint,
2054                other => {
2055                    return Err(crate::error::SigmaParserError::InvalidRule(format!(
2056                        "invalid severity '{other}' for rule '{rule}' in lint config"
2057                    )));
2058                }
2059            };
2060            severity_overrides.insert(rule.clone(), sev);
2061        }
2062
2063        Ok(LintConfig {
2064            disabled_rules,
2065            severity_overrides,
2066        })
2067    }
2068
2069    /// Walk up from `start_path` to find the nearest `.rsigma-lint.yml`.
2070    ///
2071    /// Checks `start_path` itself (if a directory) or its parent, then
2072    /// ancestors until the filesystem root.
2073    pub fn find_in_ancestors(start_path: &Path) -> Option<std::path::PathBuf> {
2074        let dir = if start_path.is_file() {
2075            start_path.parent()?
2076        } else {
2077            start_path
2078        };
2079
2080        let mut current = dir;
2081        loop {
2082            let candidate = current.join(".rsigma-lint.yml");
2083            if candidate.is_file() {
2084                return Some(candidate);
2085            }
2086            // Also try .yaml extension
2087            let candidate_yaml = current.join(".rsigma-lint.yaml");
2088            if candidate_yaml.is_file() {
2089                return Some(candidate_yaml);
2090            }
2091            current = current.parent()?;
2092        }
2093    }
2094
2095    /// Merge another config into this one (e.g. CLI `--disable` into file config).
2096    pub fn merge(&mut self, other: &LintConfig) {
2097        self.disabled_rules
2098            .extend(other.disabled_rules.iter().cloned());
2099        for (rule, sev) in &other.severity_overrides {
2100            self.severity_overrides.insert(rule.clone(), *sev);
2101        }
2102    }
2103
2104    /// Check if a rule is disabled.
2105    pub fn is_disabled(&self, rule: &LintRule) -> bool {
2106        self.disabled_rules.contains(&rule.to_string())
2107    }
2108}
2109
2110// =============================================================================
2111// Inline suppression comments
2112// =============================================================================
2113
2114/// Parsed inline suppression directives from YAML source text.
2115#[derive(Debug, Clone, Default)]
2116pub struct InlineSuppressions {
2117    /// If `true`, all rules are suppressed for the entire file.
2118    pub disable_all: bool,
2119    /// Rules suppressed for the entire file (from `# rsigma-disable rule1, rule2`).
2120    pub file_disabled: HashSet<String>,
2121    /// Rules suppressed for specific lines: `line_number -> set of rule names`.
2122    /// An empty set means all rules are suppressed for that line.
2123    pub line_disabled: HashMap<u32, Option<HashSet<String>>>,
2124}
2125
2126/// Parse `# rsigma-disable` comments from raw YAML text.
2127///
2128/// Supported forms:
2129/// - `# rsigma-disable` — suppress **all** rules for the file
2130/// - `# rsigma-disable rule1, rule2` — suppress specific rules for the file
2131/// - `# rsigma-disable-next-line` — suppress all rules for the next line
2132/// - `# rsigma-disable-next-line rule1, rule2` — suppress specific rules for the next line
2133pub fn parse_inline_suppressions(text: &str) -> InlineSuppressions {
2134    let mut result = InlineSuppressions::default();
2135
2136    for (i, line) in text.lines().enumerate() {
2137        let trimmed = line.trim();
2138
2139        // Look for comment-only lines or trailing comments
2140        let comment = if let Some(pos) = find_yaml_comment(trimmed) {
2141            trimmed[pos + 1..].trim()
2142        } else {
2143            continue;
2144        };
2145
2146        if let Some(rest) = comment.strip_prefix("rsigma-disable-next-line") {
2147            let rest = rest.trim();
2148            let next_line = (i + 1) as u32;
2149            if rest.is_empty() {
2150                // Suppress all rules for next line
2151                result.line_disabled.insert(next_line, None);
2152            } else {
2153                // Suppress specific rules for next line
2154                let rules: HashSet<String> = rest
2155                    .split(',')
2156                    .map(|s| s.trim().to_string())
2157                    .filter(|s| !s.is_empty())
2158                    .collect();
2159                if !rules.is_empty() {
2160                    result
2161                        .line_disabled
2162                        .entry(next_line)
2163                        .and_modify(|existing| {
2164                            if let Some(existing_set) = existing {
2165                                existing_set.extend(rules.iter().cloned());
2166                            }
2167                            // If None (all suppressed), leave as None
2168                        })
2169                        .or_insert(Some(rules));
2170                }
2171            }
2172        } else if let Some(rest) = comment.strip_prefix("rsigma-disable") {
2173            let rest = rest.trim();
2174            if rest.is_empty() {
2175                // Suppress all rules for the entire file
2176                result.disable_all = true;
2177            } else {
2178                // Suppress specific rules for the file
2179                for rule in rest.split(',') {
2180                    let rule = rule.trim();
2181                    if !rule.is_empty() {
2182                        result.file_disabled.insert(rule.to_string());
2183                    }
2184                }
2185            }
2186        }
2187    }
2188
2189    result
2190}
2191
2192/// Find the start of a YAML comment (`#`) that is not inside a quoted string.
2193///
2194/// Returns the byte offset of `#` within the trimmed line, or `None`.
2195fn find_yaml_comment(line: &str) -> Option<usize> {
2196    let mut in_single = false;
2197    let mut in_double = false;
2198    for (i, c) in line.char_indices() {
2199        match c {
2200            '\'' if !in_double => in_single = !in_single,
2201            '"' if !in_single => in_double = !in_double,
2202            '#' if !in_single && !in_double => return Some(i),
2203            _ => {}
2204        }
2205    }
2206    None
2207}
2208
2209impl InlineSuppressions {
2210    /// Check if a warning should be suppressed.
2211    pub fn is_suppressed(&self, warning: &LintWarning) -> bool {
2212        // File-level disable-all
2213        if self.disable_all {
2214            return true;
2215        }
2216
2217        // File-level specific rules
2218        let rule_name = warning.rule.to_string();
2219        if self.file_disabled.contains(&rule_name) {
2220            return true;
2221        }
2222
2223        // Line-level suppression (requires a resolved span)
2224        if let Some(span) = &warning.span
2225            && let Some(line_rules) = self.line_disabled.get(&span.start_line)
2226        {
2227            return match line_rules {
2228                None => true, // All rules suppressed for this line
2229                Some(rules) => rules.contains(&rule_name),
2230            };
2231        }
2232
2233        false
2234    }
2235}
2236
2237// =============================================================================
2238// Suppression filtering
2239// =============================================================================
2240
2241/// Apply suppression from config and inline comments to lint warnings.
2242///
2243/// 1. Removes warnings whose rule is in `config.disabled_rules`.
2244/// 2. Removes warnings suppressed by inline comments.
2245/// 3. Applies `severity_overrides` to remaining warnings.
2246pub fn apply_suppressions(
2247    warnings: Vec<LintWarning>,
2248    config: &LintConfig,
2249    inline: &InlineSuppressions,
2250) -> Vec<LintWarning> {
2251    warnings
2252        .into_iter()
2253        .filter(|w| !config.is_disabled(&w.rule))
2254        .filter(|w| !inline.is_suppressed(w))
2255        .map(|mut w| {
2256            let rule_name = w.rule.to_string();
2257            if let Some(sev) = config.severity_overrides.get(&rule_name) {
2258                w.severity = *sev;
2259            }
2260            w
2261        })
2262        .collect()
2263}
2264
2265/// Lint a raw YAML string with config-based suppression.
2266///
2267/// Combines [`lint_yaml_str`] + [`parse_inline_suppressions`] +
2268/// [`apply_suppressions`] in one call.
2269pub fn lint_yaml_str_with_config(text: &str, config: &LintConfig) -> Vec<LintWarning> {
2270    let warnings = lint_yaml_str(text);
2271    let inline = parse_inline_suppressions(text);
2272    apply_suppressions(warnings, config, &inline)
2273}
2274
2275/// Lint a file with config-based suppression.
2276pub fn lint_yaml_file_with_config(
2277    path: &Path,
2278    config: &LintConfig,
2279) -> crate::error::Result<FileLintResult> {
2280    let content = std::fs::read_to_string(path)?;
2281    let warnings = lint_yaml_str_with_config(&content, config);
2282    Ok(FileLintResult {
2283        path: path.to_path_buf(),
2284        warnings,
2285    })
2286}
2287
2288/// Lint a directory with config-based suppression.
2289pub fn lint_yaml_directory_with_config(
2290    dir: &Path,
2291    config: &LintConfig,
2292) -> crate::error::Result<Vec<FileLintResult>> {
2293    let mut results = Vec::new();
2294    let mut visited = HashSet::new();
2295
2296    fn walk(
2297        dir: &Path,
2298        config: &LintConfig,
2299        results: &mut Vec<FileLintResult>,
2300        visited: &mut HashSet<std::path::PathBuf>,
2301    ) -> crate::error::Result<()> {
2302        let canonical = match dir.canonicalize() {
2303            Ok(p) => p,
2304            Err(_) => return Ok(()),
2305        };
2306        if !visited.insert(canonical) {
2307            return Ok(());
2308        }
2309
2310        let mut entries: Vec<_> = std::fs::read_dir(dir)?.filter_map(|e| e.ok()).collect();
2311        entries.sort_by_key(|e| e.path());
2312
2313        for entry in entries {
2314            let path = entry.path();
2315            if path.is_dir() {
2316                if path
2317                    .file_name()
2318                    .and_then(|n| n.to_str())
2319                    .is_some_and(|n| n.starts_with('.'))
2320                {
2321                    continue;
2322                }
2323                walk(&path, config, results, visited)?;
2324            } else if matches!(
2325                path.extension().and_then(|e| e.to_str()),
2326                Some("yml" | "yaml")
2327            ) {
2328                match lint_yaml_file_with_config(&path, config) {
2329                    Ok(file_result) => results.push(file_result),
2330                    Err(e) => {
2331                        results.push(FileLintResult {
2332                            path: path.clone(),
2333                            warnings: vec![err(
2334                                LintRule::FileReadError,
2335                                format!("error reading file: {e}"),
2336                                "/",
2337                            )],
2338                        });
2339                    }
2340                }
2341            }
2342        }
2343        Ok(())
2344    }
2345
2346    walk(dir, config, &mut results, &mut visited)?;
2347    Ok(results)
2348}
2349
2350// =============================================================================
2351// Tests
2352// =============================================================================
2353
2354#[cfg(test)]
2355mod tests {
2356    use super::*;
2357
2358    fn yaml_value(yaml: &str) -> Value {
2359        serde_yaml::from_str(yaml).unwrap()
2360    }
2361
2362    fn lint(yaml: &str) -> Vec<LintWarning> {
2363        lint_yaml_value(&yaml_value(yaml))
2364    }
2365
2366    fn has_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2367        warnings.iter().any(|w| w.rule == rule)
2368    }
2369
2370    fn has_no_rule(warnings: &[LintWarning], rule: LintRule) -> bool {
2371        !has_rule(warnings, rule)
2372    }
2373
2374    // ── Valid rule produces no errors ────────────────────────────────────
2375
2376    #[test]
2377    fn valid_detection_rule_no_errors() {
2378        let w = lint(
2379            r#"
2380title: Test Rule
2381id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2382status: test
2383logsource:
2384    category: process_creation
2385    product: windows
2386detection:
2387    selection:
2388        CommandLine|contains: 'whoami'
2389    condition: selection
2390level: medium
2391tags:
2392    - attack.execution
2393    - attack.t1059
2394"#,
2395        );
2396        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2397        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2398    }
2399
2400    // ── Shared checks ───────────────────────────────────────────────────
2401
2402    #[test]
2403    fn missing_title() {
2404        let w = lint(
2405            r#"
2406logsource:
2407    category: test
2408detection:
2409    selection:
2410        field: value
2411    condition: selection
2412"#,
2413        );
2414        assert!(has_rule(&w, LintRule::MissingTitle));
2415    }
2416
2417    #[test]
2418    fn title_too_long() {
2419        let long_title = "a".repeat(257);
2420        let yaml = format!(
2421            r#"
2422title: '{long_title}'
2423logsource:
2424    category: test
2425detection:
2426    selection:
2427        field: value
2428    condition: selection
2429"#
2430        );
2431        let w = lint(&yaml);
2432        assert!(has_rule(&w, LintRule::TitleTooLong));
2433    }
2434
2435    #[test]
2436    fn invalid_id() {
2437        let w = lint(
2438            r#"
2439title: Test
2440id: not-a-uuid
2441logsource:
2442    category: test
2443detection:
2444    selection:
2445        field: value
2446    condition: selection
2447"#,
2448        );
2449        assert!(has_rule(&w, LintRule::InvalidId));
2450    }
2451
2452    #[test]
2453    fn valid_id_no_warning() {
2454        let w = lint(
2455            r#"
2456title: Test
2457id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2458logsource:
2459    category: test
2460detection:
2461    selection:
2462        field: value
2463    condition: selection
2464"#,
2465        );
2466        assert!(has_no_rule(&w, LintRule::InvalidId));
2467    }
2468
2469    #[test]
2470    fn invalid_status() {
2471        let w = lint(
2472            r#"
2473title: Test
2474status: invalid
2475logsource:
2476    category: test
2477detection:
2478    selection:
2479        field: value
2480    condition: selection
2481"#,
2482        );
2483        assert!(has_rule(&w, LintRule::InvalidStatus));
2484    }
2485
2486    #[test]
2487    fn invalid_level() {
2488        let w = lint(
2489            r#"
2490title: Test
2491level: important
2492logsource:
2493    category: test
2494detection:
2495    selection:
2496        field: value
2497    condition: selection
2498"#,
2499        );
2500        assert!(has_rule(&w, LintRule::InvalidLevel));
2501    }
2502
2503    #[test]
2504    fn invalid_date_format() {
2505        let w = lint(
2506            r#"
2507title: Test
2508date: 'Jan 2025'
2509logsource:
2510    category: test
2511detection:
2512    selection:
2513        field: value
2514    condition: selection
2515"#,
2516        );
2517        assert!(has_rule(&w, LintRule::InvalidDate));
2518    }
2519
2520    #[test]
2521    fn modified_before_date() {
2522        let w = lint(
2523            r#"
2524title: Test
2525date: '2025-06-15'
2526modified: '2025-06-10'
2527logsource:
2528    category: test
2529detection:
2530    selection:
2531        field: value
2532    condition: selection
2533"#,
2534        );
2535        assert!(has_rule(&w, LintRule::ModifiedBeforeDate));
2536    }
2537
2538    #[test]
2539    fn non_lowercase_key() {
2540        let w = lint(
2541            r#"
2542title: Test
2543Status: test
2544logsource:
2545    category: test
2546detection:
2547    selection:
2548        field: value
2549    condition: selection
2550"#,
2551        );
2552        assert!(has_rule(&w, LintRule::NonLowercaseKey));
2553    }
2554
2555    // ── Detection rule checks ───────────────────────────────────────────
2556
2557    #[test]
2558    fn missing_logsource() {
2559        let w = lint(
2560            r#"
2561title: Test
2562detection:
2563    selection:
2564        field: value
2565    condition: selection
2566"#,
2567        );
2568        assert!(has_rule(&w, LintRule::MissingLogsource));
2569    }
2570
2571    #[test]
2572    fn missing_detection() {
2573        let w = lint(
2574            r#"
2575title: Test
2576logsource:
2577    category: test
2578"#,
2579        );
2580        assert!(has_rule(&w, LintRule::MissingDetection));
2581    }
2582
2583    #[test]
2584    fn missing_condition() {
2585        let w = lint(
2586            r#"
2587title: Test
2588logsource:
2589    category: test
2590detection:
2591    selection:
2592        field: value
2593"#,
2594        );
2595        assert!(has_rule(&w, LintRule::MissingCondition));
2596    }
2597
2598    #[test]
2599    fn empty_detection() {
2600        let w = lint(
2601            r#"
2602title: Test
2603logsource:
2604    category: test
2605detection:
2606    condition: selection
2607"#,
2608        );
2609        assert!(has_rule(&w, LintRule::EmptyDetection));
2610    }
2611
2612    #[test]
2613    fn invalid_related_type() {
2614        let w = lint(
2615            r#"
2616title: Test
2617logsource:
2618    category: test
2619detection:
2620    selection:
2621        field: value
2622    condition: selection
2623related:
2624    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2625      type: invalid_type
2626"#,
2627        );
2628        assert!(has_rule(&w, LintRule::InvalidRelatedType));
2629    }
2630
2631    #[test]
2632    fn related_missing_required_fields() {
2633        let w = lint(
2634            r#"
2635title: Test
2636logsource:
2637    category: test
2638detection:
2639    selection:
2640        field: value
2641    condition: selection
2642related:
2643    - id: 929a690e-bef0-4204-a928-ef5e620d6fcc
2644"#,
2645        );
2646        assert!(has_rule(&w, LintRule::RelatedMissingRequired));
2647    }
2648
2649    #[test]
2650    fn deprecated_without_related() {
2651        let w = lint(
2652            r#"
2653title: Test
2654status: deprecated
2655logsource:
2656    category: test
2657detection:
2658    selection:
2659        field: value
2660    condition: selection
2661"#,
2662        );
2663        assert!(has_rule(&w, LintRule::DeprecatedWithoutRelated));
2664    }
2665
2666    #[test]
2667    fn invalid_tag_pattern() {
2668        let w = lint(
2669            r#"
2670title: Test
2671logsource:
2672    category: test
2673detection:
2674    selection:
2675        field: value
2676    condition: selection
2677tags:
2678    - 'Invalid Tag'
2679"#,
2680        );
2681        assert!(has_rule(&w, LintRule::InvalidTag));
2682    }
2683
2684    #[test]
2685    fn unknown_tag_namespace() {
2686        let w = lint(
2687            r#"
2688title: Test
2689logsource:
2690    category: test
2691detection:
2692    selection:
2693        field: value
2694    condition: selection
2695tags:
2696    - custom.something
2697"#,
2698        );
2699        assert!(has_rule(&w, LintRule::UnknownTagNamespace));
2700    }
2701
2702    #[test]
2703    fn duplicate_tags() {
2704        let w = lint(
2705            r#"
2706title: Test
2707logsource:
2708    category: test
2709detection:
2710    selection:
2711        field: value
2712    condition: selection
2713tags:
2714    - attack.execution
2715    - attack.execution
2716"#,
2717        );
2718        assert!(has_rule(&w, LintRule::DuplicateTags));
2719    }
2720
2721    #[test]
2722    fn logsource_not_lowercase() {
2723        let w = lint(
2724            r#"
2725title: Test
2726logsource:
2727    category: Process_Creation
2728    product: Windows
2729detection:
2730    selection:
2731        field: value
2732    condition: selection
2733"#,
2734        );
2735        assert!(has_rule(&w, LintRule::LogsourceValueNotLowercase));
2736    }
2737
2738    #[test]
2739    fn single_value_all_modifier() {
2740        let w = lint(
2741            r#"
2742title: Test
2743logsource:
2744    category: test
2745detection:
2746    selection:
2747        CommandLine|contains|all: 'single'
2748    condition: selection
2749"#,
2750        );
2751        assert!(has_rule(&w, LintRule::SingleValueAllModifier));
2752    }
2753
2754    #[test]
2755    fn null_in_value_list() {
2756        let w = lint(
2757            r#"
2758title: Test
2759logsource:
2760    category: test
2761detection:
2762    selection:
2763        FieldA:
2764            - 'value1'
2765            - null
2766    condition: selection
2767"#,
2768        );
2769        assert!(has_rule(&w, LintRule::NullInValueList));
2770    }
2771
2772    // ── Correlation rule checks ─────────────────────────────────────────
2773
2774    #[test]
2775    fn valid_correlation_no_errors() {
2776        let w = lint(
2777            r#"
2778title: Brute Force
2779correlation:
2780    type: event_count
2781    rules:
2782        - 929a690e-bef0-4204-a928-ef5e620d6fcc
2783    group-by:
2784        - User
2785    timespan: 1h
2786    condition:
2787        gte: 100
2788level: high
2789"#,
2790        );
2791        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2792        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2793    }
2794
2795    #[test]
2796    fn invalid_correlation_type() {
2797        let w = lint(
2798            r#"
2799title: Test
2800correlation:
2801    type: invalid_type
2802    rules:
2803        - some-rule
2804    timespan: 1h
2805    group-by:
2806        - User
2807"#,
2808        );
2809        assert!(has_rule(&w, LintRule::InvalidCorrelationType));
2810    }
2811
2812    #[test]
2813    fn missing_correlation_timespan() {
2814        let w = lint(
2815            r#"
2816title: Test
2817correlation:
2818    type: event_count
2819    rules:
2820        - some-rule
2821    group-by:
2822        - User
2823    condition:
2824        gte: 10
2825"#,
2826        );
2827        assert!(has_rule(&w, LintRule::MissingCorrelationTimespan));
2828    }
2829
2830    #[test]
2831    fn invalid_timespan_format() {
2832        let w = lint(
2833            r#"
2834title: Test
2835correlation:
2836    type: event_count
2837    rules:
2838        - some-rule
2839    group-by:
2840        - User
2841    timespan: 1hour
2842    condition:
2843        gte: 10
2844"#,
2845        );
2846        assert!(has_rule(&w, LintRule::InvalidTimespanFormat));
2847    }
2848
2849    #[test]
2850    fn missing_group_by() {
2851        let w = lint(
2852            r#"
2853title: Test
2854correlation:
2855    type: event_count
2856    rules:
2857        - some-rule
2858    timespan: 1h
2859    condition:
2860        gte: 10
2861"#,
2862        );
2863        assert!(has_rule(&w, LintRule::MissingGroupBy));
2864    }
2865
2866    #[test]
2867    fn missing_condition_field_for_value_count() {
2868        let w = lint(
2869            r#"
2870title: Test
2871correlation:
2872    type: value_count
2873    rules:
2874        - some-rule
2875    group-by:
2876        - User
2877    timespan: 1h
2878    condition:
2879        gte: 10
2880"#,
2881        );
2882        assert!(has_rule(&w, LintRule::MissingConditionField));
2883    }
2884
2885    #[test]
2886    fn invalid_condition_operator() {
2887        let w = lint(
2888            r#"
2889title: Test
2890correlation:
2891    type: event_count
2892    rules:
2893        - some-rule
2894    group-by:
2895        - User
2896    timespan: 1h
2897    condition:
2898        bigger: 10
2899"#,
2900        );
2901        assert!(has_rule(&w, LintRule::InvalidConditionOperator));
2902    }
2903
2904    #[test]
2905    fn generate_not_boolean() {
2906        let w = lint(
2907            r#"
2908title: Test
2909correlation:
2910    type: event_count
2911    rules:
2912        - some-rule
2913    group-by:
2914        - User
2915    timespan: 1h
2916    condition:
2917        gte: 10
2918    generate: 'yes'
2919"#,
2920        );
2921        assert!(has_rule(&w, LintRule::GenerateNotBoolean));
2922    }
2923
2924    // ── Filter rule checks ──────────────────────────────────────────────
2925
2926    #[test]
2927    fn valid_filter_no_errors() {
2928        let w = lint(
2929            r#"
2930title: Filter Admin
2931logsource:
2932    category: process_creation
2933    product: windows
2934filter:
2935    rules:
2936        - 929a690e-bef0-4204-a928-ef5e620d6fcc
2937    selection:
2938        User|startswith: 'adm_'
2939    condition: selection
2940"#,
2941        );
2942        let errors: Vec<_> = w.iter().filter(|w| w.severity == Severity::Error).collect();
2943        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2944    }
2945
2946    #[test]
2947    fn missing_filter_rules() {
2948        let w = lint(
2949            r#"
2950title: Test
2951logsource:
2952    category: test
2953filter:
2954    selection:
2955        User: admin
2956    condition: selection
2957"#,
2958        );
2959        assert!(has_rule(&w, LintRule::MissingFilterRules));
2960    }
2961
2962    #[test]
2963    fn missing_filter_selection() {
2964        let w = lint(
2965            r#"
2966title: Test
2967logsource:
2968    category: test
2969filter:
2970    rules:
2971        - some-rule
2972    condition: selection
2973"#,
2974        );
2975        assert!(has_rule(&w, LintRule::MissingFilterSelection));
2976    }
2977
2978    #[test]
2979    fn missing_filter_condition() {
2980        let w = lint(
2981            r#"
2982title: Test
2983logsource:
2984    category: test
2985filter:
2986    rules:
2987        - some-rule
2988    selection:
2989        User: admin
2990"#,
2991        );
2992        assert!(has_rule(&w, LintRule::MissingFilterCondition));
2993    }
2994
2995    #[test]
2996    fn filter_has_level_warning() {
2997        let w = lint(
2998            r#"
2999title: Test
3000logsource:
3001    category: test
3002level: high
3003filter:
3004    rules:
3005        - some-rule
3006    selection:
3007        User: admin
3008    condition: selection
3009"#,
3010        );
3011        assert!(has_rule(&w, LintRule::FilterHasLevel));
3012    }
3013
3014    #[test]
3015    fn filter_has_status_warning() {
3016        let w = lint(
3017            r#"
3018title: Test
3019logsource:
3020    category: test
3021status: test
3022filter:
3023    rules:
3024        - some-rule
3025    selection:
3026        User: admin
3027    condition: selection
3028"#,
3029        );
3030        assert!(has_rule(&w, LintRule::FilterHasStatus));
3031    }
3032
3033    #[test]
3034    fn missing_filter_logsource() {
3035        let w = lint(
3036            r#"
3037title: Test
3038filter:
3039    rules:
3040        - some-rule
3041    selection:
3042        User: admin
3043    condition: selection
3044"#,
3045        );
3046        assert!(has_rule(&w, LintRule::MissingFilterLogsource));
3047    }
3048
3049    // ── Action fragments are skipped ────────────────────────────────────
3050
3051    #[test]
3052    fn action_global_skipped() {
3053        let w = lint(
3054            r#"
3055action: global
3056title: Global Template
3057logsource:
3058    product: windows
3059"#,
3060        );
3061        assert!(w.is_empty());
3062    }
3063
3064    #[test]
3065    fn action_reset_skipped() {
3066        let w = lint(
3067            r#"
3068action: reset
3069"#,
3070        );
3071        assert!(w.is_empty());
3072    }
3073
3074    // ── New checks ──────────────────────────────────────────────────────
3075
3076    #[test]
3077    fn empty_title() {
3078        let w = lint(
3079            r#"
3080title: ''
3081logsource:
3082    category: test
3083detection:
3084    selection:
3085        field: value
3086    condition: selection
3087level: medium
3088"#,
3089        );
3090        assert!(has_rule(&w, LintRule::EmptyTitle));
3091    }
3092
3093    #[test]
3094    fn missing_level() {
3095        let w = lint(
3096            r#"
3097title: Test
3098logsource:
3099    category: test
3100detection:
3101    selection:
3102        field: value
3103    condition: selection
3104"#,
3105        );
3106        assert!(has_rule(&w, LintRule::MissingLevel));
3107    }
3108
3109    #[test]
3110    fn valid_level_no_missing_warning() {
3111        let w = lint(
3112            r#"
3113title: Test
3114logsource:
3115    category: test
3116detection:
3117    selection:
3118        field: value
3119    condition: selection
3120level: medium
3121"#,
3122        );
3123        assert!(has_no_rule(&w, LintRule::MissingLevel));
3124    }
3125
3126    #[test]
3127    fn invalid_date_feb_30() {
3128        assert!(!is_valid_date("2025-02-30"));
3129    }
3130
3131    #[test]
3132    fn invalid_date_apr_31() {
3133        assert!(!is_valid_date("2025-04-31"));
3134    }
3135
3136    #[test]
3137    fn valid_date_feb_28() {
3138        assert!(is_valid_date("2025-02-28"));
3139    }
3140
3141    #[test]
3142    fn valid_date_leap_year_feb_29() {
3143        assert!(is_valid_date("2024-02-29"));
3144    }
3145
3146    #[test]
3147    fn invalid_date_non_leap_feb_29() {
3148        assert!(!is_valid_date("2025-02-29"));
3149    }
3150
3151    #[test]
3152    fn condition_references_unknown() {
3153        let w = lint(
3154            r#"
3155title: Test
3156logsource:
3157    category: test
3158detection:
3159    selection:
3160        field: value
3161    condition: sel_main
3162level: medium
3163"#,
3164        );
3165        assert!(has_rule(&w, LintRule::ConditionReferencesUnknown));
3166    }
3167
3168    #[test]
3169    fn condition_references_valid() {
3170        let w = lint(
3171            r#"
3172title: Test
3173logsource:
3174    category: test
3175detection:
3176    selection:
3177        field: value
3178    condition: selection
3179level: medium
3180"#,
3181        );
3182        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3183    }
3184
3185    #[test]
3186    fn condition_references_complex_valid() {
3187        let w = lint(
3188            r#"
3189title: Test
3190logsource:
3191    category: test
3192detection:
3193    sel_main:
3194        field: value
3195    filter_fp:
3196        User: admin
3197    condition: sel_main and not filter_fp
3198level: medium
3199"#,
3200        );
3201        assert!(has_no_rule(&w, LintRule::ConditionReferencesUnknown));
3202    }
3203
3204    #[test]
3205    fn empty_value_list() {
3206        let w = lint(
3207            r#"
3208title: Test
3209logsource:
3210    category: test
3211detection:
3212    selection:
3213        field: []
3214    condition: selection
3215level: medium
3216"#,
3217        );
3218        assert!(has_rule(&w, LintRule::EmptyValueList));
3219    }
3220
3221    #[test]
3222    fn not_a_mapping() {
3223        let v: serde_yaml::Value = serde_yaml::from_str("- item1\n- item2").unwrap();
3224        let w = lint_yaml_value(&v);
3225        assert!(has_rule(&w, LintRule::NotAMapping));
3226    }
3227
3228    #[test]
3229    fn lint_yaml_str_produces_spans() {
3230        let text = r#"title: Test
3231status: invalid_status
3232logsource:
3233    category: test
3234detection:
3235    selection:
3236        field: value
3237    condition: selection
3238level: medium
3239"#;
3240        let warnings = lint_yaml_str(text);
3241        // InvalidStatus points to /status which exists in the text
3242        let invalid_status = warnings.iter().find(|w| w.rule == LintRule::InvalidStatus);
3243        assert!(invalid_status.is_some(), "expected InvalidStatus warning");
3244        let span = invalid_status.unwrap().span;
3245        assert!(span.is_some(), "expected span to be resolved");
3246        // "status:" is on line 1 (0-indexed)
3247        assert_eq!(span.unwrap().start_line, 1);
3248    }
3249
3250    #[test]
3251    fn yaml_parse_error_uses_correct_rule() {
3252        let text = "title: [unclosed";
3253        let warnings = lint_yaml_str(text);
3254        assert!(has_rule(&warnings, LintRule::YamlParseError));
3255        assert!(has_no_rule(&warnings, LintRule::MissingTitle));
3256    }
3257
3258    // ── Unknown top-level keys ───────────────────────────────────────────
3259
3260    #[test]
3261    fn unknown_key_typo_detected() {
3262        let w = lint(
3263            r#"
3264title: Test
3265desciption: Typo field
3266logsource:
3267    category: test
3268detection:
3269    selection:
3270        field: value
3271    condition: selection
3272level: medium
3273"#,
3274        );
3275        assert!(has_rule(&w, LintRule::UnknownKey));
3276        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3277        assert!(unk.message.contains("desciption"));
3278        assert!(unk.message.contains("description"));
3279        assert_eq!(unk.severity, Severity::Info);
3280    }
3281
3282    #[test]
3283    fn known_keys_no_unknown_warning() {
3284        let w = lint(
3285            r#"
3286title: Test Rule
3287id: 929a690e-bef0-4204-a928-ef5e620d6fcc
3288status: test
3289description: A valid description
3290author: tester
3291date: '2025-01-01'
3292modified: '2025-06-01'
3293license: MIT
3294logsource:
3295    category: process_creation
3296    product: windows
3297detection:
3298    selection:
3299        CommandLine|contains: 'whoami'
3300    condition: selection
3301level: medium
3302tags:
3303    - attack.execution
3304references:
3305    - https://example.com
3306fields:
3307    - CommandLine
3308falsepositives:
3309    - Legitimate admin
3310"#,
3311        );
3312        assert!(has_no_rule(&w, LintRule::UnknownKey));
3313    }
3314
3315    #[test]
3316    fn custom_fields_allowed_by_spec() {
3317        // The Sigma spec v2.1.0 explicitly allows arbitrary custom top-level
3318        // fields, so keys like "simulation" and "regression_tests_path" that
3319        // are not close to any known key should NOT produce warnings.
3320        let w = lint(
3321            r#"
3322title: Test Rule
3323logsource:
3324    category: test
3325detection:
3326    selection:
3327        field: value
3328    condition: selection
3329level: medium
3330simulation:
3331    action: scan
3332regression_tests_path: tests/
3333custom_metadata: hello
3334"#,
3335        );
3336        assert!(has_no_rule(&w, LintRule::UnknownKey));
3337    }
3338
3339    #[test]
3340    fn unknown_key_typo_correlation() {
3341        // "lvel" is edit-distance 1 from "level"
3342        let w = lint(
3343            r#"
3344title: Correlation Test
3345name: test_correlation
3346correlation:
3347    type: event_count
3348    rules:
3349        - rule1
3350    group-by:
3351        - src_ip
3352    timespan: 5m
3353    condition:
3354        gte: 10
3355lvel: high
3356"#,
3357        );
3358        assert!(has_rule(&w, LintRule::UnknownKey));
3359        let unk = w.iter().find(|w| w.rule == LintRule::UnknownKey).unwrap();
3360        assert!(unk.message.contains("lvel"));
3361        assert!(unk.message.contains("level"));
3362    }
3363
3364    #[test]
3365    fn unknown_key_custom_field_filter() {
3366        // "badkey" is not close to any known key — no warning.
3367        let w = lint(
3368            r#"
3369title: Filter Test
3370logsource:
3371    category: test
3372filter:
3373    rules:
3374        - rule1
3375    selection:
3376        User: admin
3377    condition: selection
3378badkey: foo
3379"#,
3380        );
3381        assert!(has_no_rule(&w, LintRule::UnknownKey));
3382    }
3383
3384    // ── Wildcard-only value ──────────────────────────────────────────────
3385
3386    #[test]
3387    fn wildcard_only_value_string() {
3388        let w = lint(
3389            r#"
3390title: Test
3391logsource:
3392    category: test
3393detection:
3394    selection:
3395        TargetFilename: '*'
3396    condition: selection
3397level: medium
3398"#,
3399        );
3400        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3401    }
3402
3403    #[test]
3404    fn wildcard_only_value_list() {
3405        let w = lint(
3406            r#"
3407title: Test
3408logsource:
3409    category: test
3410detection:
3411    selection:
3412        TargetFilename:
3413            - '*'
3414    condition: selection
3415level: medium
3416"#,
3417        );
3418        assert!(has_rule(&w, LintRule::WildcardOnlyValue));
3419    }
3420
3421    #[test]
3422    fn wildcard_with_other_values_no_warning() {
3423        let w = lint(
3424            r#"
3425title: Test
3426logsource:
3427    category: test
3428detection:
3429    selection:
3430        TargetFilename:
3431            - '*temp*'
3432            - '*cache*'
3433    condition: selection
3434level: medium
3435"#,
3436        );
3437        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3438    }
3439
3440    #[test]
3441    fn wildcard_regex_no_warning() {
3442        let w = lint(
3443            r#"
3444title: Test
3445logsource:
3446    category: test
3447detection:
3448    selection:
3449        TargetFilename|re: '*'
3450    condition: selection
3451level: medium
3452"#,
3453        );
3454        assert!(has_no_rule(&w, LintRule::WildcardOnlyValue));
3455    }
3456
3457    // ── resolve_path_to_span tests ───────────────────────────────────────
3458
3459    #[test]
3460    fn resolve_path_to_span_root() {
3461        let text = "title: Test\nstatus: test\n";
3462        let span = resolve_path_to_span(text, "/");
3463        assert!(span.is_some());
3464        assert_eq!(span.unwrap().start_line, 0);
3465    }
3466
3467    #[test]
3468    fn resolve_path_to_span_top_level_key() {
3469        let text = "title: Test\nstatus: test\nlevel: high\n";
3470        let span = resolve_path_to_span(text, "/status");
3471        assert!(span.is_some());
3472        assert_eq!(span.unwrap().start_line, 1);
3473    }
3474
3475    #[test]
3476    fn resolve_path_to_span_nested_key() {
3477        let text = "title: Test\nlogsource:\n    category: test\n    product: windows\n";
3478        let span = resolve_path_to_span(text, "/logsource/product");
3479        assert!(span.is_some());
3480        assert_eq!(span.unwrap().start_line, 3);
3481    }
3482
3483    #[test]
3484    fn resolve_path_to_span_missing_key() {
3485        let text = "title: Test\nstatus: test\n";
3486        let span = resolve_path_to_span(text, "/nonexistent");
3487        assert!(span.is_none());
3488    }
3489
3490    // ── Multi-document YAML ──────────────────────────────────────────────
3491
3492    #[test]
3493    fn multi_doc_yaml_lints_all_documents() {
3494        let text = r#"title: Rule 1
3495logsource:
3496    category: test
3497detection:
3498    selection:
3499        field: value
3500    condition: selection
3501level: medium
3502---
3503title: Rule 2
3504status: bad_status
3505logsource:
3506    category: test
3507detection:
3508    selection:
3509        field: value
3510    condition: selection
3511level: medium
3512"#;
3513        let warnings = lint_yaml_str(text);
3514        // Second doc has InvalidStatus
3515        assert!(has_rule(&warnings, LintRule::InvalidStatus));
3516    }
3517
3518    // ── is_valid_timespan edge cases ─────────────────────────────────────
3519
3520    #[test]
3521    fn timespan_zero_seconds() {
3522        assert!(is_valid_timespan("0s"));
3523    }
3524
3525    #[test]
3526    fn timespan_no_digits() {
3527        assert!(!is_valid_timespan("s"));
3528    }
3529
3530    #[test]
3531    fn timespan_no_unit() {
3532        assert!(!is_valid_timespan("123"));
3533    }
3534
3535    #[test]
3536    fn timespan_invalid_unit() {
3537        assert!(!is_valid_timespan("5x"));
3538    }
3539
3540    #[test]
3541    fn timespan_valid_variants() {
3542        assert!(is_valid_timespan("30s"));
3543        assert!(is_valid_timespan("5m"));
3544        assert!(is_valid_timespan("1h"));
3545        assert!(is_valid_timespan("7d"));
3546        assert!(is_valid_timespan("1w"));
3547        assert!(is_valid_timespan("1M"));
3548        assert!(is_valid_timespan("1y"));
3549    }
3550
3551    // ── FileLintResult methods ───────────────────────────────────────────
3552
3553    #[test]
3554    fn file_lint_result_has_errors() {
3555        let result = FileLintResult {
3556            path: std::path::PathBuf::from("test.yml"),
3557            warnings: vec![
3558                warning(LintRule::TitleTooLong, "too long", "/title"),
3559                err(
3560                    LintRule::MissingCondition,
3561                    "missing",
3562                    "/detection/condition",
3563                ),
3564            ],
3565        };
3566        assert!(result.has_errors());
3567        assert_eq!(result.error_count(), 1);
3568        assert_eq!(result.warning_count(), 1);
3569    }
3570
3571    #[test]
3572    fn file_lint_result_no_errors() {
3573        let result = FileLintResult {
3574            path: std::path::PathBuf::from("test.yml"),
3575            warnings: vec![warning(LintRule::TitleTooLong, "too long", "/title")],
3576        };
3577        assert!(!result.has_errors());
3578        assert_eq!(result.error_count(), 0);
3579        assert_eq!(result.warning_count(), 1);
3580    }
3581
3582    #[test]
3583    fn file_lint_result_empty() {
3584        let result = FileLintResult {
3585            path: std::path::PathBuf::from("test.yml"),
3586            warnings: vec![],
3587        };
3588        assert!(!result.has_errors());
3589        assert_eq!(result.error_count(), 0);
3590        assert_eq!(result.warning_count(), 0);
3591    }
3592
3593    // ── LintWarning Display impl ─────────────────────────────────────────
3594
3595    #[test]
3596    fn lint_warning_display() {
3597        let w = err(
3598            LintRule::MissingTitle,
3599            "missing required field 'title'",
3600            "/title",
3601        );
3602        let display = format!("{w}");
3603        assert!(display.contains("error"));
3604        assert!(display.contains("missing_title"));
3605        assert!(display.contains("/title"));
3606    }
3607
3608    // ── New checks: missing description / author / all+re ────────────────
3609
3610    #[test]
3611    fn missing_description_info() {
3612        let w = lint(
3613            r#"
3614title: Test
3615logsource:
3616    category: test
3617detection:
3618    selection:
3619        field: value
3620    condition: selection
3621level: medium
3622"#,
3623        );
3624        assert!(has_rule(&w, LintRule::MissingDescription));
3625        let md = w
3626            .iter()
3627            .find(|w| w.rule == LintRule::MissingDescription)
3628            .unwrap();
3629        assert_eq!(md.severity, Severity::Info);
3630    }
3631
3632    #[test]
3633    fn has_description_no_info() {
3634        let w = lint(
3635            r#"
3636title: Test
3637description: A fine description
3638logsource:
3639    category: test
3640detection:
3641    selection:
3642        field: value
3643    condition: selection
3644level: medium
3645"#,
3646        );
3647        assert!(has_no_rule(&w, LintRule::MissingDescription));
3648    }
3649
3650    #[test]
3651    fn missing_author_info() {
3652        let w = lint(
3653            r#"
3654title: Test
3655logsource:
3656    category: test
3657detection:
3658    selection:
3659        field: value
3660    condition: selection
3661level: medium
3662"#,
3663        );
3664        assert!(has_rule(&w, LintRule::MissingAuthor));
3665        let ma = w
3666            .iter()
3667            .find(|w| w.rule == LintRule::MissingAuthor)
3668            .unwrap();
3669        assert_eq!(ma.severity, Severity::Info);
3670    }
3671
3672    #[test]
3673    fn has_author_no_info() {
3674        let w = lint(
3675            r#"
3676title: Test
3677author: tester
3678logsource:
3679    category: test
3680detection:
3681    selection:
3682        field: value
3683    condition: selection
3684level: medium
3685"#,
3686        );
3687        assert!(has_no_rule(&w, LintRule::MissingAuthor));
3688    }
3689
3690    #[test]
3691    fn all_with_re_warning() {
3692        let w = lint(
3693            r#"
3694title: Test
3695logsource:
3696    category: test
3697detection:
3698    selection:
3699        CommandLine|all|re:
3700            - '(?i)whoami'
3701            - '(?i)net user'
3702    condition: selection
3703level: medium
3704"#,
3705        );
3706        assert!(has_rule(&w, LintRule::AllWithRe));
3707    }
3708
3709    #[test]
3710    fn all_without_re_no_all_with_re() {
3711        let w = lint(
3712            r#"
3713title: Test
3714logsource:
3715    category: test
3716detection:
3717    selection:
3718        CommandLine|contains|all:
3719            - 'whoami'
3720            - 'net user'
3721    condition: selection
3722level: medium
3723"#,
3724        );
3725        assert!(has_no_rule(&w, LintRule::AllWithRe));
3726    }
3727
3728    #[test]
3729    fn re_without_all_no_all_with_re() {
3730        let w = lint(
3731            r#"
3732title: Test
3733logsource:
3734    category: test
3735detection:
3736    selection:
3737        CommandLine|re: '(?i)whoami|net user'
3738    condition: selection
3739level: medium
3740"#,
3741        );
3742        assert!(has_no_rule(&w, LintRule::AllWithRe));
3743    }
3744
3745    // ── Modifier compatibility checks ────────────────────────────────────
3746
3747    #[test]
3748    fn incompatible_contains_startswith() {
3749        let w = lint(
3750            r#"
3751title: Test
3752logsource:
3753    category: test
3754detection:
3755    selection:
3756        Field|contains|startswith: 'test'
3757    condition: selection
3758level: medium
3759"#,
3760        );
3761        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3762    }
3763
3764    #[test]
3765    fn incompatible_endswith_startswith() {
3766        let w = lint(
3767            r#"
3768title: Test
3769logsource:
3770    category: test
3771detection:
3772    selection:
3773        Field|endswith|startswith: 'test'
3774    condition: selection
3775level: medium
3776"#,
3777        );
3778        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3779    }
3780
3781    #[test]
3782    fn incompatible_contains_endswith() {
3783        let w = lint(
3784            r#"
3785title: Test
3786logsource:
3787    category: test
3788detection:
3789    selection:
3790        Field|contains|endswith: 'test'
3791    condition: selection
3792level: medium
3793"#,
3794        );
3795        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3796    }
3797
3798    #[test]
3799    fn incompatible_re_with_contains() {
3800        let w = lint(
3801            r#"
3802title: Test
3803logsource:
3804    category: test
3805detection:
3806    selection:
3807        Field|re|contains: '.*test.*'
3808    condition: selection
3809level: medium
3810"#,
3811        );
3812        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3813    }
3814
3815    #[test]
3816    fn incompatible_cidr_with_startswith() {
3817        let w = lint(
3818            r#"
3819title: Test
3820logsource:
3821    category: test
3822detection:
3823    selection:
3824        Field|cidr|startswith: '192.168.0.0/16'
3825    condition: selection
3826level: medium
3827"#,
3828        );
3829        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3830    }
3831
3832    #[test]
3833    fn incompatible_exists_with_contains() {
3834        let w = lint(
3835            r#"
3836title: Test
3837logsource:
3838    category: test
3839detection:
3840    selection:
3841        Field|exists|contains: true
3842    condition: selection
3843level: medium
3844"#,
3845        );
3846        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3847    }
3848
3849    #[test]
3850    fn incompatible_gt_with_contains() {
3851        let w = lint(
3852            r#"
3853title: Test
3854logsource:
3855    category: test
3856detection:
3857    selection:
3858        Field|gt|contains: 100
3859    condition: selection
3860level: medium
3861"#,
3862        );
3863        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3864    }
3865
3866    #[test]
3867    fn incompatible_regex_flags_without_re() {
3868        let w = lint(
3869            r#"
3870title: Test
3871logsource:
3872    category: test
3873detection:
3874    selection:
3875        Field|i|m: 'test'
3876    condition: selection
3877level: medium
3878"#,
3879        );
3880        assert!(has_rule(&w, LintRule::IncompatibleModifiers));
3881    }
3882
3883    #[test]
3884    fn compatible_re_with_regex_flags() {
3885        let w = lint(
3886            r#"
3887title: Test
3888logsource:
3889    category: test
3890detection:
3891    selection:
3892        Field|re|i|m|s: '(?i)test'
3893    condition: selection
3894level: medium
3895"#,
3896        );
3897        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
3898    }
3899
3900    #[test]
3901    fn compatible_contains_all() {
3902        let w = lint(
3903            r#"
3904title: Test
3905logsource:
3906    category: test
3907detection:
3908    selection:
3909        Field|contains|all:
3910            - 'val1'
3911            - 'val2'
3912    condition: selection
3913level: medium
3914"#,
3915        );
3916        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
3917    }
3918
3919    #[test]
3920    fn compatible_base64offset_contains() {
3921        let w = lint(
3922            r#"
3923title: Test
3924logsource:
3925    category: test
3926detection:
3927    selection:
3928        Field|base64offset|contains: 'test'
3929    condition: selection
3930level: medium
3931"#,
3932        );
3933        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
3934    }
3935
3936    #[test]
3937    fn compatible_wide_base64() {
3938        let w = lint(
3939            r#"
3940title: Test
3941logsource:
3942    category: test
3943detection:
3944    selection:
3945        Field|wide|base64: 'test'
3946    condition: selection
3947level: medium
3948"#,
3949        );
3950        assert!(has_no_rule(&w, LintRule::IncompatibleModifiers));
3951    }
3952
3953    // ── Info/Hint severity levels ────────────────────────────────────────
3954
3955    #[test]
3956    fn severity_display() {
3957        assert_eq!(format!("{}", Severity::Error), "error");
3958        assert_eq!(format!("{}", Severity::Warning), "warning");
3959        assert_eq!(format!("{}", Severity::Info), "info");
3960        assert_eq!(format!("{}", Severity::Hint), "hint");
3961    }
3962
3963    #[test]
3964    fn file_lint_result_info_count() {
3965        let result = FileLintResult {
3966            path: std::path::PathBuf::from("test.yml"),
3967            warnings: vec![
3968                info(LintRule::MissingDescription, "missing desc", "/description"),
3969                info(LintRule::MissingAuthor, "missing author", "/author"),
3970                warning(LintRule::TitleTooLong, "too long", "/title"),
3971            ],
3972        };
3973        assert_eq!(result.info_count(), 2);
3974        assert_eq!(result.warning_count(), 1);
3975        assert_eq!(result.error_count(), 0);
3976        assert!(!result.has_errors());
3977    }
3978
3979    // ── Inline suppression parsing ───────────────────────────────────────
3980
3981    #[test]
3982    fn parse_inline_disable_all() {
3983        let text = "# rsigma-disable\ntitle: Test\n";
3984        let sup = parse_inline_suppressions(text);
3985        assert!(sup.disable_all);
3986    }
3987
3988    #[test]
3989    fn parse_inline_disable_specific_rules() {
3990        let text = "# rsigma-disable missing_description, missing_author\ntitle: Test\n";
3991        let sup = parse_inline_suppressions(text);
3992        assert!(!sup.disable_all);
3993        assert!(sup.file_disabled.contains("missing_description"));
3994        assert!(sup.file_disabled.contains("missing_author"));
3995    }
3996
3997    #[test]
3998    fn parse_inline_disable_next_line_all() {
3999        let text = "# rsigma-disable-next-line\ntitle: Test\n";
4000        let sup = parse_inline_suppressions(text);
4001        assert!(!sup.disable_all);
4002        // Line 0 has the comment, line 1 is "title: Test"
4003        assert!(sup.line_disabled.contains_key(&1));
4004        assert!(sup.line_disabled[&1].is_none()); // None means all rules
4005    }
4006
4007    #[test]
4008    fn parse_inline_disable_next_line_specific() {
4009        let text = "title: Test\n# rsigma-disable-next-line missing_level\nlevel: medium\n";
4010        let sup = parse_inline_suppressions(text);
4011        // Comment on line 1, suppresses line 2
4012        assert!(sup.line_disabled.contains_key(&2));
4013        let rules = sup.line_disabled[&2].as_ref().unwrap();
4014        assert!(rules.contains("missing_level"));
4015    }
4016
4017    #[test]
4018    fn parse_inline_no_comments() {
4019        let text = "title: Test\nstatus: test\n";
4020        let sup = parse_inline_suppressions(text);
4021        assert!(!sup.disable_all);
4022        assert!(sup.file_disabled.is_empty());
4023        assert!(sup.line_disabled.is_empty());
4024    }
4025
4026    #[test]
4027    fn parse_inline_comment_in_quoted_string() {
4028        // The '#' is inside a quoted string — should NOT be treated as a comment
4029        let text = "description: 'no # rsigma-disable here'\ntitle: Test\n";
4030        let sup = parse_inline_suppressions(text);
4031        assert!(!sup.disable_all);
4032        assert!(sup.file_disabled.is_empty());
4033    }
4034
4035    // ── Suppression filtering ────────────────────────────────────────────
4036
4037    #[test]
4038    fn apply_suppressions_disables_rule() {
4039        let warnings = vec![
4040            info(LintRule::MissingDescription, "desc", "/description"),
4041            info(LintRule::MissingAuthor, "author", "/author"),
4042            warning(LintRule::TitleTooLong, "title", "/title"),
4043        ];
4044        let mut config = LintConfig::default();
4045        config
4046            .disabled_rules
4047            .insert("missing_description".to_string());
4048        let inline = InlineSuppressions::default();
4049
4050        let result = apply_suppressions(warnings, &config, &inline);
4051        assert_eq!(result.len(), 2);
4052        assert!(
4053            result
4054                .iter()
4055                .all(|w| w.rule != LintRule::MissingDescription)
4056        );
4057    }
4058
4059    #[test]
4060    fn apply_suppressions_severity_override() {
4061        let warnings = vec![warning(LintRule::TitleTooLong, "title too long", "/title")];
4062        let mut config = LintConfig::default();
4063        config
4064            .severity_overrides
4065            .insert("title_too_long".to_string(), Severity::Info);
4066        let inline = InlineSuppressions::default();
4067
4068        let result = apply_suppressions(warnings, &config, &inline);
4069        assert_eq!(result.len(), 1);
4070        assert_eq!(result[0].severity, Severity::Info);
4071    }
4072
4073    #[test]
4074    fn apply_suppressions_inline_file_disable() {
4075        let warnings = vec![
4076            info(LintRule::MissingDescription, "desc", "/description"),
4077            info(LintRule::MissingAuthor, "author", "/author"),
4078        ];
4079        let config = LintConfig::default();
4080        let mut inline = InlineSuppressions::default();
4081        inline.file_disabled.insert("missing_author".to_string());
4082
4083        let result = apply_suppressions(warnings, &config, &inline);
4084        assert_eq!(result.len(), 1);
4085        assert_eq!(result[0].rule, LintRule::MissingDescription);
4086    }
4087
4088    #[test]
4089    fn apply_suppressions_inline_disable_all() {
4090        let warnings = vec![
4091            err(LintRule::MissingTitle, "title", "/title"),
4092            warning(LintRule::TitleTooLong, "long", "/title"),
4093        ];
4094        let config = LintConfig::default();
4095        let inline = InlineSuppressions {
4096            disable_all: true,
4097            ..Default::default()
4098        };
4099
4100        let result = apply_suppressions(warnings, &config, &inline);
4101        assert!(result.is_empty());
4102    }
4103
4104    #[test]
4105    fn apply_suppressions_inline_next_line() {
4106        let mut w1 = warning(LintRule::TitleTooLong, "long", "/title");
4107        w1.span = Some(Span {
4108            start_line: 5,
4109            start_col: 0,
4110            end_line: 5,
4111            end_col: 10,
4112        });
4113        let mut w2 = err(LintRule::InvalidStatus, "bad", "/status");
4114        w2.span = Some(Span {
4115            start_line: 6,
4116            start_col: 0,
4117            end_line: 6,
4118            end_col: 10,
4119        });
4120
4121        let config = LintConfig::default();
4122        let mut inline = InlineSuppressions::default();
4123        // Suppress all rules on line 5
4124        inline.line_disabled.insert(5, None);
4125
4126        let result = apply_suppressions(vec![w1, w2], &config, &inline);
4127        assert_eq!(result.len(), 1);
4128        assert_eq!(result[0].rule, LintRule::InvalidStatus);
4129    }
4130
4131    // ── lint_yaml_str_with_config integration ────────────────────────────
4132
4133    #[test]
4134    fn lint_with_config_disables_rules() {
4135        let text = r#"title: Test
4136logsource:
4137    category: test
4138detection:
4139    selection:
4140        field: value
4141    condition: selection
4142level: medium
4143"#;
4144        let mut config = LintConfig::default();
4145        config
4146            .disabled_rules
4147            .insert("missing_description".to_string());
4148        config.disabled_rules.insert("missing_author".to_string());
4149
4150        let warnings = lint_yaml_str_with_config(text, &config);
4151        assert!(
4152            !warnings
4153                .iter()
4154                .any(|w| w.rule == LintRule::MissingDescription)
4155        );
4156        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4157    }
4158
4159    #[test]
4160    fn lint_with_inline_disable_next_line() {
4161        let text = r#"title: Test
4162# rsigma-disable-next-line missing_level
4163logsource:
4164    category: test
4165detection:
4166    selection:
4167        field: value
4168    condition: selection
4169"#;
4170        // Note: missing_level is on the logsource line... actually we need to think about
4171        // where the warning span resolves to. The warning for missing_level has path /level,
4172        // and won't have a span matching line 2. Let's use a config-based suppression
4173        // instead for this test.
4174        let config = LintConfig::default();
4175        let warnings = lint_yaml_str_with_config(text, &config);
4176        // This test verifies that inline parsing doesn't break normal linting
4177        assert!(warnings.iter().any(|w| w.rule == LintRule::MissingLevel));
4178    }
4179
4180    #[test]
4181    fn lint_with_inline_file_disable() {
4182        let text = r#"# rsigma-disable missing_description, missing_author
4183title: Test
4184logsource:
4185    category: test
4186detection:
4187    selection:
4188        field: value
4189    condition: selection
4190level: medium
4191"#;
4192        let config = LintConfig::default();
4193        let warnings = lint_yaml_str_with_config(text, &config);
4194        assert!(
4195            !warnings
4196                .iter()
4197                .any(|w| w.rule == LintRule::MissingDescription)
4198        );
4199        assert!(!warnings.iter().any(|w| w.rule == LintRule::MissingAuthor));
4200    }
4201
4202    #[test]
4203    fn lint_with_inline_disable_all() {
4204        let text = r#"# rsigma-disable
4205title: Test
4206status: invalid_status
4207logsource:
4208    category: test
4209detection:
4210    selection:
4211        field: value
4212    condition: selection
4213"#;
4214        let config = LintConfig::default();
4215        let warnings = lint_yaml_str_with_config(text, &config);
4216        assert!(warnings.is_empty());
4217    }
4218
4219    // ── LintConfig ───────────────────────────────────────────────────────
4220
4221    #[test]
4222    fn lint_config_merge() {
4223        let mut base = LintConfig::default();
4224        base.disabled_rules.insert("rule_a".to_string());
4225        base.severity_overrides
4226            .insert("rule_b".to_string(), Severity::Info);
4227
4228        let other = LintConfig {
4229            disabled_rules: ["rule_c".to_string()].into_iter().collect(),
4230            severity_overrides: [("rule_d".to_string(), Severity::Hint)]
4231                .into_iter()
4232                .collect(),
4233        };
4234
4235        base.merge(&other);
4236        assert!(base.disabled_rules.contains("rule_a"));
4237        assert!(base.disabled_rules.contains("rule_c"));
4238        assert_eq!(base.severity_overrides.get("rule_b"), Some(&Severity::Info));
4239        assert_eq!(base.severity_overrides.get("rule_d"), Some(&Severity::Hint));
4240    }
4241
4242    #[test]
4243    fn lint_config_is_disabled() {
4244        let mut config = LintConfig::default();
4245        config.disabled_rules.insert("missing_title".to_string());
4246        assert!(config.is_disabled(&LintRule::MissingTitle));
4247        assert!(!config.is_disabled(&LintRule::EmptyTitle));
4248    }
4249
4250    #[test]
4251    fn find_yaml_comment_basic() {
4252        assert_eq!(find_yaml_comment("# comment"), Some(0));
4253        assert_eq!(find_yaml_comment("key: value # comment"), Some(11));
4254        assert_eq!(find_yaml_comment("key: 'value # not comment'"), None);
4255        assert_eq!(find_yaml_comment("key: \"value # not comment\""), None);
4256        assert_eq!(find_yaml_comment("key: value"), None);
4257    }
4258
4259    // ── Fix generation tests ─────────────────────────────────────────────
4260
4261    fn find_fix(warnings: &[LintWarning], rule: LintRule) -> Option<&Fix> {
4262        warnings
4263            .iter()
4264            .find(|w| w.rule == rule)
4265            .and_then(|w| w.fix.as_ref())
4266    }
4267
4268    fn fix_summary(fix: &Fix) -> String {
4269        use std::fmt::Write;
4270        let mut s = String::new();
4271        writeln!(s, "title: {}", fix.title).unwrap();
4272        writeln!(s, "disposition: {:?}", fix.disposition).unwrap();
4273        for (i, p) in fix.patches.iter().enumerate() {
4274            match p {
4275                FixPatch::ReplaceValue { path, new_value } => {
4276                    writeln!(s, "patch[{i}]: ReplaceValue {path} -> {new_value}").unwrap();
4277                }
4278                FixPatch::ReplaceKey { path, new_key } => {
4279                    writeln!(s, "patch[{i}]: ReplaceKey {path} -> {new_key}").unwrap();
4280                }
4281                FixPatch::Remove { path } => {
4282                    writeln!(s, "patch[{i}]: Remove {path}").unwrap();
4283                }
4284            }
4285        }
4286        s
4287    }
4288
4289    #[test]
4290    fn fix_invalid_status() {
4291        let w = lint(
4292            r#"
4293title: Test
4294status: expreimental
4295logsource:
4296    category: test
4297detection:
4298    sel:
4299        field: value
4300    condition: sel
4301"#,
4302        );
4303        let fix = find_fix(&w, LintRule::InvalidStatus).expect("should have fix");
4304        insta::assert_snapshot!(fix_summary(fix), @r"
4305        title: replace 'expreimental' with 'experimental'
4306        disposition: Safe
4307        patch[0]: ReplaceValue /status -> experimental
4308        ");
4309    }
4310
4311    #[test]
4312    fn fix_invalid_level() {
4313        let w = lint(
4314            r#"
4315title: Test
4316level: hgih
4317logsource:
4318    category: test
4319detection:
4320    sel:
4321        field: value
4322    condition: sel
4323"#,
4324        );
4325        let fix = find_fix(&w, LintRule::InvalidLevel).expect("should have fix");
4326        insta::assert_snapshot!(fix_summary(fix), @r"
4327        title: replace 'hgih' with 'high'
4328        disposition: Safe
4329        patch[0]: ReplaceValue /level -> high
4330        ");
4331    }
4332
4333    #[test]
4334    fn fix_non_lowercase_key() {
4335        let w = lint(
4336            r#"
4337title: Test
4338Status: test
4339logsource:
4340    category: test
4341detection:
4342    sel:
4343        field: value
4344    condition: sel
4345"#,
4346        );
4347        let fix = find_fix(&w, LintRule::NonLowercaseKey).expect("should have fix");
4348        insta::assert_snapshot!(fix_summary(fix), @r"
4349        title: rename 'Status' to 'status'
4350        disposition: Safe
4351        patch[0]: ReplaceKey /Status -> status
4352        ");
4353    }
4354
4355    #[test]
4356    fn fix_logsource_value_not_lowercase() {
4357        let w = lint(
4358            r#"
4359title: Test
4360logsource:
4361    category: Test
4362detection:
4363    sel:
4364        field: value
4365    condition: sel
4366"#,
4367        );
4368        let fix = find_fix(&w, LintRule::LogsourceValueNotLowercase).expect("should have fix");
4369        insta::assert_snapshot!(fix_summary(fix), @r"
4370        title: lowercase 'Test' to 'test'
4371        disposition: Safe
4372        patch[0]: ReplaceValue /logsource/category -> test
4373        ");
4374    }
4375
4376    #[test]
4377    fn fix_unknown_key_typo() {
4378        let w = lint(
4379            r#"
4380title: Test
4381desciption: Typo field
4382logsource:
4383    category: test
4384detection:
4385    sel:
4386        field: value
4387    condition: sel
4388level: medium
4389"#,
4390        );
4391        let fix = find_fix(&w, LintRule::UnknownKey).expect("should have fix");
4392        insta::assert_snapshot!(fix_summary(fix), @r"
4393        title: rename 'desciption' to 'description'
4394        disposition: Safe
4395        patch[0]: ReplaceKey /desciption -> description
4396        ");
4397    }
4398
4399    #[test]
4400    fn fix_duplicate_tags() {
4401        let w = lint(
4402            r#"
4403title: Test
4404status: test
4405tags:
4406    - attack.execution
4407    - attack.execution
4408logsource:
4409    category: test
4410detection:
4411    sel:
4412        field: value
4413    condition: sel
4414"#,
4415        );
4416        let fix = find_fix(&w, LintRule::DuplicateTags).expect("should have fix");
4417        insta::assert_snapshot!(fix_summary(fix), @r"
4418        title: remove duplicate tag 'attack.execution'
4419        disposition: Safe
4420        patch[0]: Remove /tags/1
4421        ");
4422    }
4423
4424    #[test]
4425    fn fix_duplicate_references() {
4426        let w = lint(
4427            r#"
4428title: Test
4429references:
4430    - https://example.com
4431    - https://example.com
4432logsource:
4433    category: test
4434detection:
4435    sel:
4436        field: value
4437    condition: sel
4438"#,
4439        );
4440        let fix = find_fix(&w, LintRule::DuplicateReferences).expect("should have fix");
4441        insta::assert_snapshot!(fix_summary(fix), @r"
4442        title: remove duplicate reference
4443        disposition: Safe
4444        patch[0]: Remove /references/1
4445        ");
4446    }
4447
4448    #[test]
4449    fn fix_duplicate_fields() {
4450        let w = lint(
4451            r#"
4452title: Test
4453fields:
4454    - CommandLine
4455    - CommandLine
4456logsource:
4457    category: test
4458detection:
4459    sel:
4460        field: value
4461    condition: sel
4462"#,
4463        );
4464        let fix = find_fix(&w, LintRule::DuplicateFields).expect("should have fix");
4465        insta::assert_snapshot!(fix_summary(fix), @r"
4466        title: remove duplicate field
4467        disposition: Safe
4468        patch[0]: Remove /fields/1
4469        ");
4470    }
4471
4472    #[test]
4473    fn fix_all_with_re() {
4474        let w = lint(
4475            r#"
4476title: Test
4477logsource:
4478    category: test
4479detection:
4480    sel:
4481        Cmd|all|re:
4482            - foo.*
4483            - bar.*
4484    condition: sel
4485"#,
4486        );
4487        let fix = find_fix(&w, LintRule::AllWithRe).expect("should have fix");
4488        insta::assert_snapshot!(fix_summary(fix), @r"
4489        title: remove |all from 'Cmd|all|re'
4490        disposition: Safe
4491        patch[0]: ReplaceKey /detection/sel/Cmd|all|re -> Cmd|re
4492        ");
4493    }
4494
4495    #[test]
4496    fn fix_single_value_all_modifier() {
4497        let w = lint(
4498            r#"
4499title: Test
4500logsource:
4501    category: test
4502detection:
4503    sel:
4504        Cmd|all|contains:
4505            - only_one
4506    condition: sel
4507"#,
4508        );
4509        let fix = find_fix(&w, LintRule::SingleValueAllModifier).expect("should have fix");
4510        insta::assert_snapshot!(fix_summary(fix), @r"
4511        title: remove |all from 'Cmd|all|contains'
4512        disposition: Safe
4513        patch[0]: ReplaceKey /detection/sel/Cmd|all|contains -> Cmd|contains
4514        ");
4515    }
4516
4517    #[test]
4518    fn fix_wildcard_only_value() {
4519        let w = lint(
4520            r#"
4521title: Test
4522logsource:
4523    category: test
4524detection:
4525    sel:
4526        CommandLine: '*'
4527    condition: sel
4528"#,
4529        );
4530        let fix = find_fix(&w, LintRule::WildcardOnlyValue).expect("should have fix");
4531        insta::assert_snapshot!(fix_summary(fix), @r"
4532        title: replace with 'CommandLine|exists: true'
4533        disposition: Safe
4534        patch[0]: ReplaceKey /detection/sel/CommandLine -> CommandLine|exists
4535        patch[1]: ReplaceValue /detection/sel/CommandLine|exists -> true
4536        ");
4537    }
4538
4539    #[test]
4540    fn fix_filter_has_level() {
4541        let w = lint(
4542            r#"
4543title: Test
4544logsource:
4545    category: test
4546level: high
4547filter:
4548    rules:
4549        - rule1
4550    selection:
4551        User: admin
4552    condition: selection
4553"#,
4554        );
4555        let fix = find_fix(&w, LintRule::FilterHasLevel).expect("should have fix");
4556        insta::assert_snapshot!(fix_summary(fix), @r"
4557        title: remove 'level' from filter rule
4558        disposition: Safe
4559        patch[0]: Remove /level
4560        ");
4561    }
4562
4563    #[test]
4564    fn fix_filter_has_status() {
4565        let w = lint(
4566            r#"
4567title: Test
4568logsource:
4569    category: test
4570status: test
4571filter:
4572    rules:
4573        - rule1
4574    selection:
4575        User: admin
4576    condition: selection
4577"#,
4578        );
4579        let fix = find_fix(&w, LintRule::FilterHasStatus).expect("should have fix");
4580        insta::assert_snapshot!(fix_summary(fix), @r"
4581        title: remove 'status' from filter rule
4582        disposition: Safe
4583        patch[0]: Remove /status
4584        ");
4585    }
4586
4587    #[test]
4588    fn no_fix_for_unfixable_rule() {
4589        let w = lint(
4590            r#"
4591title: Test
4592logsource:
4593    category: test
4594"#,
4595        );
4596        assert!(has_rule(&w, LintRule::MissingDetection));
4597        assert!(find_fix(&w, LintRule::MissingDetection).is_none());
4598    }
4599
4600    #[test]
4601    fn no_fix_for_far_invalid_status() {
4602        let w = lint(
4603            r#"
4604title: Test
4605status: totallyinvalidxyz
4606logsource:
4607    category: test
4608detection:
4609    sel:
4610        field: value
4611    condition: sel
4612"#,
4613        );
4614        assert!(has_rule(&w, LintRule::InvalidStatus));
4615        assert!(
4616            find_fix(&w, LintRule::InvalidStatus).is_none(),
4617            "no fix when edit distance is too large"
4618        );
4619    }
4620}