Skip to main content

normalize_rules_config/
lib.rs

1//! Shared rule configuration types for all normalize rule engines.
2//!
3//! Both syntax rules and fact rules use `RulesConfig` as their configuration type,
4//! loaded from `[rules]` in `.normalize/config.toml`.
5
6use std::collections::{HashMap, HashSet};
7use std::path::Path;
8
9/// Severity level for rule findings.
10///
11/// Shared across all rule engines (syntax, fact, native). `DiagnosticLevel` in
12/// `normalize-facts-rules-api` is the ABI-stable counterpart.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
14#[serde(rename_all = "lowercase")]
15pub enum Severity {
16    Error,
17    #[default]
18    Warning,
19    Info,
20    Hint,
21}
22
23impl std::fmt::Display for Severity {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        match self {
26            Severity::Error => write!(f, "error"),
27            Severity::Warning => write!(f, "warning"),
28            Severity::Info => write!(f, "info"),
29            Severity::Hint => write!(f, "hint"),
30        }
31    }
32}
33
34impl std::str::FromStr for Severity {
35    type Err = String;
36
37    fn from_str(s: &str) -> Result<Self, Self::Err> {
38        match s.to_lowercase().as_str() {
39            "error" => Ok(Severity::Error),
40            "warning" | "warn" => Ok(Severity::Warning),
41            "info" | "note" => Ok(Severity::Info),
42            "hint" => Ok(Severity::Hint),
43            _ => Err(format!("unknown severity: {}", s)),
44        }
45    }
46}
47
48/// An external tool that emits SARIF 2.1.0 output (used with `--engine sarif`).
49///
50/// Configured via `[[rules.sarif-tools]]` in `.normalize/config.toml`.
51#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default, schemars::JsonSchema)]
52#[serde(default)]
53pub struct SarifTool {
54    /// Display name for this tool (used as `source` in DiagnosticsReport).
55    pub name: String,
56    /// Command to run. `{root}` is replaced with the project root path.
57    /// Example: `["npx", "eslint", "--format", "json", "{root}"]`
58    pub command: Vec<String>,
59    /// Glob patterns (relative to project root) for files this tool watches.
60    ///
61    /// When set, `normalize rules run` caches this tool's SARIF output keyed by the
62    /// maximum mtime of all matching files. On warm runs where no watched file has
63    /// changed, the tool is skipped and results are served from cache.
64    ///
65    /// If empty (the default), the tool always re-runs (no caching).
66    ///
67    /// Example: `["**/*.py"]` for a Python linter, `["**/*.ts"]` for a TypeScript checker.
68    #[serde(default)]
69    pub watch: Vec<String>,
70}
71
72/// Common per-rule configuration fields shared across all rule engines.
73///
74/// Used under `[rules.rule."rule-id"]` in `.normalize/config.toml`. These fields
75/// apply to every rule regardless of engine. Rule-specific configuration
76/// (e.g. thresholds, filenames) is defined as typed structs owned by each
77/// rule and deserialized from the same TOML table via `#[serde(flatten)]`.
78#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default, schemars::JsonSchema)]
79#[serde(default)]
80pub struct RuleOverride {
81    /// Override the rule's severity (error, warning, info, hint).
82    pub severity: Option<String>,
83    /// Enable or disable the rule.
84    pub enabled: Option<bool>,
85    /// Additional file patterns to allow (skip) for this rule.
86    #[serde(default)]
87    pub allow: Vec<String>,
88    /// Additional tags to add to this rule (appends to built-in tags).
89    #[serde(default)]
90    pub tags: Vec<String>,
91    /// Raw TOML table for rule-specific fields. Each rule deserializes its
92    /// own typed config from this via [`RuleOverride::rule_config`].
93    #[serde(flatten)]
94    #[schemars(skip)]
95    pub extra: std::collections::HashMap<String, toml::Value>,
96}
97
98pub fn deserialize_one_or_many<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
99where
100    D: serde::Deserializer<'de>,
101{
102    use serde::Deserialize as _;
103
104    #[derive(serde::Deserialize)]
105    #[serde(untagged)]
106    enum OneOrMany {
107        One(String),
108        Many(Vec<String>),
109    }
110
111    match OneOrMany::deserialize(deserializer)? {
112        OneOrMany::One(s) => Ok(vec![s]),
113        OneOrMany::Many(v) => Ok(v),
114    }
115}
116
117impl normalize_core::Merge for RuleOverride {
118    /// Merge two `RuleOverride` values, with `other` taking priority.
119    ///
120    /// - `Option` fields: `other`'s value wins if `Some`; falls back to `self`.
121    /// - Vec fields (`allow`, `tags`): if `other`'s field is non-empty it replaces
122    ///   `self`'s field entirely; an empty `other` field inherits from `self`.
123    /// - `extra` HashMap: merged key-by-key, `other`'s keys override `self`'s.
124    fn merge(self, other: Self) -> Self {
125        let mut extra = self.extra;
126        extra.extend(other.extra);
127        Self {
128            severity: other.severity.or(self.severity),
129            enabled: other.enabled.or(self.enabled),
130            allow: if other.allow.is_empty() {
131                self.allow
132            } else {
133                other.allow
134            },
135            tags: if other.tags.is_empty() {
136                self.tags
137            } else {
138                other.tags
139            },
140            extra,
141        }
142    }
143}
144
145impl RuleOverride {
146    /// Deserialize rule-specific config from the `extra` fields.
147    ///
148    /// Each rule defines a typed config struct and calls this to extract it.
149    /// Unknown fields in `extra` that don't match `T`'s fields are ignored.
150    ///
151    /// ```ignore
152    /// #[derive(Deserialize, Default)]
153    /// struct LargeFileConfig { threshold: Option<u64> }
154    ///
155    /// let cfg: LargeFileConfig = override_.rule_config();
156    /// let threshold = cfg.threshold.unwrap_or(500);
157    /// ```
158    pub fn rule_config<T: serde::de::DeserializeOwned + Default>(&self) -> T {
159        let table = toml::Value::Table(
160            self.extra
161                .iter()
162                .map(|(k, v)| (k.clone(), v.clone()))
163                .collect(),
164        );
165        table.try_into().unwrap_or_default()
166    }
167}
168
169/// Rules configuration covering all engines (syntax, fact, native, sarif).
170///
171/// Deserialized from `[rules]` in `.normalize/config.toml`. Per-rule overrides
172/// live under `[rules.rule."<id>"]`. Engine-wide settings live as bare keys
173/// directly under `[rules]` (e.g. `global-allow`, `sarif-tools`).
174///
175/// **Legacy layout** (`[rules."<id>"]` directly under `[rules]`) is still parsed
176/// for one release with a stderr deprecation warning. It is unsound in principle
177/// because a rule named `global-allow` would collide with the engine-wide key —
178/// the new nested layout removes the namespace collision.
179#[derive(Debug, Clone, serde::Serialize, Default, schemars::JsonSchema)]
180pub struct RulesConfig {
181    /// Allow patterns applied to every rule (e.g. `["**/tests/fixtures/**"]`).
182    /// Entries here skip violations in matching files across all rules.
183    #[serde(
184        rename = "global-allow",
185        default,
186        skip_serializing_if = "Vec::is_empty"
187    )]
188    pub global_allow: Vec<String>,
189    /// External tools that emit SARIF 2.1.0 output (the `sarif` engine).
190    #[serde(rename = "sarif-tools", default, skip_serializing_if = "Vec::is_empty")]
191    pub sarif_tools: Vec<SarifTool>,
192    /// Per-rule configuration overrides, keyed by rule ID.
193    ///
194    /// Serialized under the `rule` sub-table (`[rules.rule."<id>"]`). On
195    /// deserialization, both the new nested layout and the legacy flat layout
196    /// (`[rules."<id>"]`) are accepted; the legacy form emits a stderr
197    /// deprecation warning.
198    #[serde(default, rename = "rule", skip_serializing_if = "HashMap::is_empty")]
199    pub rules: HashMap<String, RuleOverride>,
200}
201
202/// Engine-wide bare keys reserved under `[rules]`. Anything else found at the
203/// top level of `[rules]` is interpreted as a legacy `[rules."<id>"]` per-rule
204/// override (and triggers a deprecation warning).
205const RULES_RESERVED_KEYS: &[&str] = &["global-allow", "sarif-tools", "rule"];
206
207impl<'de> serde::Deserialize<'de> for RulesConfig {
208    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
209    where
210        D: serde::Deserializer<'de>,
211    {
212        // Capture every entry as a generic toml::Value first so we can route bare
213        // engine keys, the new `rule` sub-table, and legacy per-rule entries
214        // (`[rules."<id>"]`) separately.
215        let raw: HashMap<String, toml::Value> = HashMap::deserialize(deserializer)?;
216
217        let mut global_allow: Vec<String> = Vec::new();
218        let mut sarif_tools: Vec<SarifTool> = Vec::new();
219        let mut rules: HashMap<String, RuleOverride> = HashMap::new();
220        let mut legacy_rule_ids: Vec<String> = Vec::new();
221
222        for (key, value) in raw {
223            match key.as_str() {
224                "global-allow" => {
225                    global_allow = value.try_into().map_err(serde::de::Error::custom)?;
226                }
227                "sarif-tools" => {
228                    sarif_tools = value.try_into().map_err(serde::de::Error::custom)?;
229                }
230                "rule" => {
231                    let nested: HashMap<String, RuleOverride> =
232                        value.try_into().map_err(serde::de::Error::custom)?;
233                    // Nested-layout entries take precedence over any legacy entries
234                    // with the same id (extend overwrites).
235                    rules.extend(nested);
236                }
237                _ => {
238                    // Legacy: bare key is a rule id ([rules."<id>"]).
239                    let override_: RuleOverride =
240                        value.try_into().map_err(serde::de::Error::custom)?;
241                    legacy_rule_ids.push(key.clone());
242                    // Don't overwrite a nested entry with the same id if one
243                    // already landed; nested wins.
244                    rules.entry(key).or_insert(override_);
245                }
246            }
247        }
248
249        if !legacy_rule_ids.is_empty() {
250            legacy_rule_ids.sort();
251            eprintln!(
252                "warning: deprecated [rules.\"<id>\"] layout in .normalize/config.toml — \
253                 migrate to [rules.rule.\"<id>\"] (affected rule ids: {}). \
254                 The legacy layout will be removed in a future release.",
255                legacy_rule_ids.join(", "),
256            );
257        }
258
259        // Sanity check: forbid any future engine key colliding with the
260        // reserved bare-key namespace from being interpreted as a rule.
261        // (Currently RULES_RESERVED_KEYS is only used for documentation /
262        // future-proofing — every reserved key is already handled above.)
263        let _ = RULES_RESERVED_KEYS;
264
265        Ok(RulesConfig {
266            global_allow,
267            sarif_tools,
268            rules,
269        })
270    }
271}
272
273impl normalize_core::Merge for RulesConfig {
274    /// Merge two `RulesConfig` values, with `other` taking priority.
275    ///
276    /// - Vec fields (`global_allow`, `sarif_tools`): if `other`'s field is non-empty
277    ///   it replaces `self`'s field; an empty `other` field inherits from `self`.
278    ///   **This means you cannot reset a Vec to empty via merge** — an empty `other`
279    ///   vec is treated as "no override" rather than "clear the list".
280    /// - `rules` HashMap: merged using `extend`, so `other`'s keys override `self`'s
281    ///   keys. Keys present only in `self` are preserved.
282    fn merge(self, other: Self) -> Self {
283        let global_allow = if other.global_allow.is_empty() {
284            self.global_allow
285        } else {
286            other.global_allow
287        };
288        let sarif_tools = if other.sarif_tools.is_empty() {
289            self.sarif_tools
290        } else {
291            other.sarif_tools
292        };
293        let mut merged_rules = self.rules;
294        merged_rules.extend(other.rules);
295        Self {
296            global_allow,
297            sarif_tools,
298            rules: merged_rules,
299        }
300    }
301}
302
303/// Configuration for directory walking behavior.
304///
305/// Controls which ignore files are respected and which directories are always
306/// excluded. Deserialized from `[walk]` in `.normalize/config.toml`.
307///
308/// ```toml
309/// [walk]
310/// ignore_files = [".gitignore"]   # default
311/// exclude = []                    # default (empty); `normalize init` seeds [".git/"]
312/// ```
313#[derive(
314    Debug,
315    Clone,
316    serde::Deserialize,
317    serde::Serialize,
318    Default,
319    schemars::JsonSchema,
320    normalize_core::Merge,
321)]
322#[serde(default)]
323pub struct WalkConfig {
324    /// List of gitignore-format files to respect. Default: `[".gitignore"]`.
325    /// Set to `[]` to disable gitignore-based exclusion entirely.
326    pub ignore_files: Option<Vec<String>>,
327    /// Additional gitignore-style patterns to always skip. Default: empty
328    /// (per Rust `Default` convention — opinions live in
329    /// [`crate::NormalizeConfig::bootstrap`] in the main crate, which seeds
330    /// `[".git/"]` for new projects).
331    ///
332    /// Patterns use the same syntax as `.gitignore`:
333    /// - A pattern with no slash (e.g. `node_modules`, `.git`) matches any
334    ///   directory or file with that basename, at any depth.
335    /// - A pattern with a slash (e.g. `crates/foo/build/`, `**/target/`) is
336    ///   anchored relative to the project root.
337    /// - Trailing `/` restricts the match to directories.
338    /// - `**` matches any number of path segments.
339    pub exclude: Option<Vec<String>>,
340}
341
342impl WalkConfig {
343    /// Returns the ignore files to respect, defaulting to `[".gitignore"]`.
344    pub fn ignore_files(&self) -> Vec<&str> {
345        match &self.ignore_files {
346            Some(v) => v.iter().map(|s| s.as_str()).collect(),
347            None => vec![".gitignore"],
348        }
349    }
350
351    /// Returns the directory patterns to exclude. Empty when no patterns are
352    /// configured — `Default` is empty per Rust convention. Opinions like
353    /// "always exclude `.git/`" belong in a typed bootstrap constructor on the
354    /// owning config (see `NormalizeConfig::bootstrap`), not here.
355    pub fn exclude(&self) -> Vec<&str> {
356        match &self.exclude {
357            Some(v) => v.iter().map(|s| s.as_str()).collect(),
358            None => Vec::new(),
359        }
360    }
361
362    /// Compile the configured `exclude` patterns into a gitignore matcher anchored at `root`.
363    ///
364    /// Returns an empty matcher if no patterns are configured. Invalid patterns
365    /// are silently dropped (consistent with how `.gitignore` itself behaves).
366    pub fn compiled_excludes(&self, root: &Path) -> ignore::gitignore::Gitignore {
367        let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
368        for pat in self.exclude() {
369            // GitignoreBuilder::add_line silently no-ops on bad patterns.
370            let _ = builder.add_line(None, pat);
371        }
372        builder.build().unwrap_or_else(|_| {
373            // Fallback: empty matcher (matches nothing).
374            ignore::gitignore::Gitignore::empty()
375        })
376    }
377
378    /// Check whether a path (relative to `root`) matches any exclude pattern.
379    ///
380    /// `is_dir` distinguishes directories from files (relevant for trailing-`/` patterns).
381    /// For repeat queries, prefer building [`compiled_excludes`] once and querying
382    /// it directly; this method is a convenience for one-shot checks.
383    pub fn is_excluded_path(&self, root: &Path, rel_path: &Path, is_dir: bool) -> bool {
384        let gi = self.compiled_excludes(root);
385        gi.matched_path_or_any_parents(rel_path, is_dir).is_ignore()
386    }
387}
388
389/// Pre-walk path filter for `--only` / `--exclude` glob patterns.
390///
391/// Compiled once in the service layer and threaded to each rule engine so files
392/// can be skipped *before* parsing or walking. The post-walk filter in the
393/// service layer remains as a safety net.
394#[derive(Debug, Clone, Default)]
395pub struct PathFilter {
396    pub only: Vec<glob::Pattern>,
397    pub exclude: Vec<glob::Pattern>,
398}
399
400impl PathFilter {
401    /// Build a `PathFilter` from raw glob strings (as provided by CLI flags).
402    /// Invalid patterns are silently dropped (matches the post-walk filter behavior).
403    pub fn new(only: &[String], exclude: &[String]) -> Self {
404        Self {
405            only: only
406                .iter()
407                .filter_map(|s| glob::Pattern::new(s).ok())
408                .collect(),
409            exclude: exclude
410                .iter()
411                .filter_map(|s| glob::Pattern::new(s).ok())
412                .collect(),
413        }
414    }
415
416    /// Returns `true` if this filter has no patterns (i.e. passes everything).
417    pub fn is_empty(&self) -> bool {
418        self.only.is_empty() && self.exclude.is_empty()
419    }
420
421    /// Check whether a relative path passes the filter.
422    ///
423    /// - If `only` is non-empty, the path must match at least one `only` pattern.
424    /// - If `exclude` is non-empty, the path must not match any `exclude` pattern.
425    pub fn matches(&self, rel_path: &str) -> bool {
426        if !self.exclude.is_empty() && self.exclude.iter().any(|p| p.matches(rel_path)) {
427            return false;
428        }
429        if !self.only.is_empty() && !self.only.iter().any(|p| p.matches(rel_path)) {
430            return false;
431        }
432        true
433    }
434
435    /// Convenience: check a `Path` by converting to a string first.
436    pub fn matches_path(&self, rel_path: &Path) -> bool {
437        self.matches(&rel_path.to_string_lossy())
438    }
439}
440
441/// Surgical-invalidation diff between two rule configurations.
442///
443/// Produced by [`ConfigDiff::compute`] to classify what changed between an old
444/// `(RulesConfig, WalkConfig)` snapshot and a new one. Consumers (the daemon's
445/// config-reload handler) use the classification to pick the cheapest correct
446/// invalidation strategy:
447///
448/// - **Tier 1 (filter-only):** severities, allow-lists, or `enabled = false`
449///   changed. The cached findings are still correct — applying the new config's
450///   filter at serve time is enough. No re-evaluation.
451/// - **Tier 2 (per-rule re-run):** specific rules' behavior changed (newly
452///   enabled, rule-specific config field, or backing `.scm` file edited).
453///   Only those rules need to be re-evaluated; everything else stays cached.
454/// - **Tier 3 (full reprime):** `[walk] exclude` changed (file set differs) or
455///   the diff doesn't fit the above. Conservative fallback.
456///
457/// `.scm` rule-definition file diffs are tracked outside this struct because
458/// this crate has no filesystem dependency; the daemon hashes
459/// `.normalize/rules/**` itself and unions its result into `rules_to_rerun`
460/// before consulting [`ConfigDiff::is_filter_only`] / [`ConfigDiff::requires_full_reprime`].
461#[derive(Debug, Default, Clone)]
462pub struct ConfigDiff {
463    /// Rules whose evaluation behavior changed (newly-enabled, rule-specific
464    /// config field, or `.scm` definition edited). These need to be re-run.
465    pub rules_to_rerun: HashSet<String>,
466    /// Rules that became disabled. Their cached findings should be dropped at
467    /// serve time (no re-run needed).
468    pub rules_disabled: HashSet<String>,
469    /// True if any allow-list (per-rule `allow` or top-level `global-allow`)
470    /// changed without a corresponding behavior change. Filter at serve time.
471    pub allow_lists_changed: bool,
472    /// True if any rule's severity changed without a corresponding behavior
473    /// change. Override severity at serve time.
474    pub severities_changed: bool,
475    /// True if `[walk] exclude` changed. Forces a full reprime (Tier 3) because
476    /// the file set may differ.
477    pub walk_exclude_changed: bool,
478}
479
480impl ConfigDiff {
481    /// Compute a diff describing what changed between `old` and `new`.
482    ///
483    /// The diff classifies each per-rule change into the cheapest tier that's
484    /// still correct. Adding/removing a rule entry that flips `enabled` from
485    /// the implicit default is treated the same as toggling it explicitly.
486    pub fn compute(
487        old_rules: &RulesConfig,
488        new_rules: &RulesConfig,
489        old_walk: &WalkConfig,
490        new_walk: &WalkConfig,
491    ) -> Self {
492        let mut diff = ConfigDiff::default();
493
494        // Walk-exclude changed → Tier 3.
495        if old_walk.exclude() != new_walk.exclude() {
496            diff.walk_exclude_changed = true;
497        }
498
499        // Global allow change → filter-only.
500        if old_rules.global_allow != new_rules.global_allow {
501            diff.allow_lists_changed = true;
502        }
503
504        // Walk every rule id present in either snapshot.
505        let ids: HashSet<&str> = old_rules
506            .rules
507            .keys()
508            .chain(new_rules.rules.keys())
509            .map(String::as_str)
510            .collect();
511
512        for id in ids {
513            let old = old_rules.rules.get(id);
514            let new = new_rules.rules.get(id);
515
516            // Enabled-state transitions. `None` ≡ default-enabled, so missing
517            // entry vs `enabled = Some(true)` is *not* a state change.
518            let was_enabled = old.is_none_or(|o| o.enabled.unwrap_or(true));
519            let is_enabled = new.is_none_or(|n| n.enabled.unwrap_or(true));
520            match (was_enabled, is_enabled) {
521                (true, false) => {
522                    diff.rules_disabled.insert(id.to_string());
523                }
524                (false, true) => {
525                    // Newly enabled — must re-evaluate.
526                    diff.rules_to_rerun.insert(id.to_string());
527                }
528                _ => {}
529            }
530
531            // Severity change is filter-only; only flag it when the rule is
532            // (and stays) enabled — otherwise the disabled/re-enabled paths
533            // already handle it.
534            if was_enabled && is_enabled {
535                let old_sev = old.and_then(|o| o.severity.as_deref());
536                let new_sev = new.and_then(|n| n.severity.as_deref());
537                if old_sev != new_sev {
538                    diff.severities_changed = true;
539                }
540
541                // Per-rule allow-list change is filter-only.
542                let old_allow = old.map(|o| o.allow.as_slice()).unwrap_or(&[]);
543                let new_allow = new.map(|n| n.allow.as_slice()).unwrap_or(&[]);
544                if old_allow != new_allow {
545                    diff.allow_lists_changed = true;
546                }
547
548                // Rule-specific config (the `extra` toml table) or `tags`
549                // changed → behavior changed → re-run the rule.
550                let old_extra = old.map(|o| &o.extra);
551                let new_extra = new.map(|n| &n.extra);
552                if old_extra != new_extra {
553                    diff.rules_to_rerun.insert(id.to_string());
554                }
555                let old_tags = old.map(|o| o.tags.as_slice()).unwrap_or(&[]);
556                let new_tags = new.map(|n| n.tags.as_slice()).unwrap_or(&[]);
557                if old_tags != new_tags {
558                    // Tags affect filter selection (`--tag`) but do not change
559                    // findings under a typical `rules run`. Treat as filter-only.
560                    diff.allow_lists_changed = true;
561                }
562            }
563        }
564
565        // sarif-tools change → conservatively force a re-run of every sarif
566        // tool by listing them in `rules_to_rerun` keyed by tool name. The
567        // daemon today doesn't run sarif tools through the per-rule re-eval
568        // path, so this surfaces as "non-filter-only" → full reprime, which
569        // is the conservative correct behavior.
570        if old_rules.sarif_tools.len() != new_rules.sarif_tools.len() {
571            diff.rules_to_rerun
572                .insert("__sarif_tools_changed__".to_string());
573        } else {
574            for (a, b) in old_rules
575                .sarif_tools
576                .iter()
577                .zip(new_rules.sarif_tools.iter())
578            {
579                if a.name != b.name || a.command != b.command || a.watch != b.watch {
580                    diff.rules_to_rerun
581                        .insert("__sarif_tools_changed__".to_string());
582                    break;
583                }
584            }
585        }
586
587        diff
588    }
589
590    /// True if this diff can be honored by re-filtering cached findings at
591    /// serve time, with no re-evaluation.
592    ///
593    /// Specifically: no rule needs re-running and `[walk] exclude` is
594    /// unchanged. Allow-list, severity, and `enabled = false` changes are all
595    /// filter-only because the cached findings are a superset of the new
596    /// answer — dropping disabled rules / allow-matched paths and overriding
597    /// severities at serve time produces the correct result.
598    pub fn is_filter_only(&self) -> bool {
599        self.rules_to_rerun.is_empty() && !self.walk_exclude_changed
600    }
601
602    /// True if this diff requires a full reprime (Tier 3).
603    ///
604    /// Today only `walk_exclude_changed` triggers this; future fields that
605    /// can't be expressed as either filter-only or per-rule re-run should
606    /// extend this check.
607    pub fn requires_full_reprime(&self) -> bool {
608        self.walk_exclude_changed
609    }
610
611    /// True if this diff has no observable effect.
612    pub fn is_empty(&self) -> bool {
613        self.rules_to_rerun.is_empty()
614            && self.rules_disabled.is_empty()
615            && !self.allow_lists_changed
616            && !self.severities_changed
617            && !self.walk_exclude_changed
618    }
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    #[test]
626    fn allow_field_not_swallowed_by_extra() {
627        let toml_str = r#"
628global-allow = ["**/fixtures/**"]
629
630[rule."no-grammar-loader-new"]
631allow = ["**/tests/**", "src/lib.rs"]
632threshold = 42
633"#;
634        let config: RulesConfig = toml::from_str(toml_str).unwrap();
635        let rule = config.rules.get("no-grammar-loader-new").unwrap();
636        assert_eq!(rule.allow, vec!["**/tests/**", "src/lib.rs"]);
637        assert!(!rule.extra.contains_key("allow"));
638        assert!(rule.extra.contains_key("threshold"));
639    }
640
641    #[test]
642    fn full_config_round_trip() {
643        // Simulate the actual config.toml structure (new layout).
644        let toml_str = r#"
645global-allow = ["**/tests/fixtures/**", "**/fixtures/**", ".claude/**"]
646
647[rule."rust/dbg-macro"]
648severity = "error"
649allow = ["**/tests/fixtures/**"]
650
651[rule."no-grammar-loader-new"]
652allow = ["**/tests/**", "crates/*/tests/**", "**/normalize-scope/**"]
653"#;
654        let config: RulesConfig = toml::from_str(toml_str).unwrap();
655        let dbg = config.rules.get("rust/dbg-macro").unwrap();
656        assert_eq!(dbg.severity.as_deref(), Some("error"));
657        assert_eq!(dbg.allow, vec!["**/tests/fixtures/**"]);
658
659        let ngl = config.rules.get("no-grammar-loader-new").unwrap();
660        assert_eq!(ngl.allow.len(), 3);
661        assert_eq!(ngl.allow[2], "**/normalize-scope/**");
662    }
663
664    #[test]
665    fn legacy_layout_still_parses() {
666        // Old layout: per-rule entries directly under [rules]. Should still load
667        // (with a stderr deprecation warning) for one release.
668        let toml_str = r#"
669global-allow = ["**/fixtures/**"]
670
671["rust/dbg-macro"]
672severity = "error"
673"#;
674        let config: RulesConfig = toml::from_str(toml_str).unwrap();
675        assert_eq!(config.global_allow, vec!["**/fixtures/**"]);
676        let dbg = config.rules.get("rust/dbg-macro").unwrap();
677        assert_eq!(dbg.severity.as_deref(), Some("error"));
678    }
679
680    #[test]
681    fn nested_layout_does_not_collide_with_engine_keys() {
682        // A rule literally named "global-allow" must coexist with the
683        // engine-wide global-allow value — only possible because per-rule
684        // configs live under the `rule` sub-table.
685        let toml_str = r#"
686global-allow = ["**/fixtures/**"]
687
688[rule."global-allow"]
689severity = "error"
690allow = ["legacy/**"]
691"#;
692        let config: RulesConfig = toml::from_str(toml_str).unwrap();
693        // Engine-wide value preserved
694        assert_eq!(config.global_allow, vec!["**/fixtures/**"]);
695        // Per-rule override for the (admittedly weird) rule named "global-allow"
696        let r = config.rules.get("global-allow").unwrap();
697        assert_eq!(r.severity.as_deref(), Some("error"));
698        assert_eq!(r.allow, vec!["legacy/**"]);
699    }
700
701    #[test]
702    fn nested_layout_wins_over_legacy_on_id_collision() {
703        // If both layouts define the same rule id, the new layout wins.
704        let toml_str = r#"
705[rule."rust/dbg-macro"]
706severity = "warning"
707
708["rust/dbg-macro"]
709severity = "error"
710"#;
711        let config: RulesConfig = toml::from_str(toml_str).unwrap();
712        let dbg = config.rules.get("rust/dbg-macro").unwrap();
713        assert_eq!(dbg.severity.as_deref(), Some("warning"));
714    }
715
716    #[test]
717    fn path_filter_empty_passes_everything() {
718        let f = PathFilter::default();
719        assert!(f.is_empty());
720        assert!(f.matches("anything/at/all.rs"));
721    }
722
723    #[test]
724    fn path_filter_only() {
725        let f = PathFilter::new(&["src/**/*.rs".into()], &[]);
726        assert!(f.matches("src/lib.rs"));
727        assert!(f.matches("src/deep/mod.rs"));
728        assert!(!f.matches("tests/integration.rs"));
729    }
730
731    #[test]
732    fn path_filter_exclude() {
733        let f = PathFilter::new(&[], &["**/tests/**".into()]);
734        assert!(f.matches("src/lib.rs"));
735        assert!(!f.matches("crates/foo/tests/bar.rs"));
736    }
737
738    #[test]
739    fn path_filter_only_and_exclude() {
740        let f = PathFilter::new(&["crates/**/*.rs".into()], &["**/tests/**".into()]);
741        assert!(f.matches("crates/foo/src/lib.rs"));
742        assert!(!f.matches("crates/foo/tests/it.rs")); // excluded
743        assert!(!f.matches("src/main.rs")); // not in only
744    }
745
746    #[test]
747    fn walk_config_defaults() {
748        // `Default` is empty per Rust convention — opinions like ".git/"
749        // live in NormalizeConfig::bootstrap, not WalkConfig::default.
750        let config = WalkConfig::default();
751        assert_eq!(config.ignore_files(), vec![".gitignore"]);
752        assert!(config.exclude().is_empty());
753        let root = Path::new("/tmp/root");
754        assert!(!config.is_excluded_path(root, Path::new(".git"), true));
755        assert!(!config.is_excluded_path(root, Path::new("src"), true));
756    }
757
758    #[test]
759    fn walk_config_custom() {
760        let config = WalkConfig {
761            ignore_files: Some(vec![".gitignore".into(), ".npmignore".into()]),
762            exclude: Some(vec![".git".into(), "node_modules".into()]),
763        };
764        assert_eq!(config.ignore_files(), vec![".gitignore", ".npmignore"]);
765        assert_eq!(config.exclude(), vec![".git", "node_modules"]);
766        let root = Path::new("/tmp/root");
767        assert!(config.is_excluded_path(root, Path::new("node_modules"), true));
768        assert!(!config.is_excluded_path(root, Path::new("src"), true));
769    }
770
771    #[test]
772    fn walk_config_empty_disables() {
773        let config = WalkConfig {
774            ignore_files: Some(vec![]),
775            exclude: Some(vec![]),
776        };
777        assert!(config.ignore_files().is_empty());
778        assert!(config.exclude().is_empty());
779        let root = Path::new("/tmp/root");
780        assert!(!config.is_excluded_path(root, Path::new(".git"), true));
781    }
782
783    #[test]
784    fn walk_config_excludes_basename_at_any_depth() {
785        // gitignore semantics: pattern with no slash matches at any depth.
786        let config = WalkConfig {
787            ignore_files: None,
788            exclude: Some(vec!["node_modules".into(), "worktrees".into()]),
789        };
790        let root = Path::new("/tmp/root");
791        // Top-level
792        assert!(config.is_excluded_path(root, Path::new("node_modules"), true));
793        // Nested
794        assert!(config.is_excluded_path(root, Path::new("crates/foo/node_modules"), true));
795        // .claude/worktrees nested
796        assert!(config.is_excluded_path(root, Path::new(".claude/worktrees"), true));
797    }
798
799    #[test]
800    fn walk_config_excludes_anchored_glob() {
801        let config = WalkConfig {
802            ignore_files: None,
803            exclude: Some(vec!["**/target/".into(), "path/to/specific.rs".into()]),
804        };
805        let root = Path::new("/tmp/root");
806        assert!(config.is_excluded_path(root, Path::new("crates/foo/target"), true));
807        assert!(config.is_excluded_path(root, Path::new("target"), true));
808        assert!(config.is_excluded_path(root, Path::new("path/to/specific.rs"), false));
809        assert!(!config.is_excluded_path(root, Path::new("path/to/other.rs"), false));
810    }
811
812    #[test]
813    fn walk_config_deserialize() {
814        let toml_str = r#"
815ignore_files = [".gitignore", ".dockerignore"]
816exclude = [".git", "node_modules", ".cache"]
817"#;
818        let config: WalkConfig = toml::from_str(toml_str).unwrap();
819        assert_eq!(config.ignore_files(), vec![".gitignore", ".dockerignore"]);
820        assert_eq!(config.exclude(), vec![".git", "node_modules", ".cache"]);
821    }
822
823    #[test]
824    fn walk_config_merge_option_semantics() {
825        use normalize_core::Merge;
826
827        // When both are default (None), result is default (empty).
828        let a = WalkConfig::default();
829        let b = WalkConfig::default();
830        let merged = a.merge(b);
831        assert_eq!(merged.ignore_files(), vec![".gitignore"]);
832        assert!(merged.exclude().is_empty());
833
834        // When self has custom and other is default (None), self wins
835        let a = WalkConfig {
836            ignore_files: Some(vec![".npmignore".into()]),
837            exclude: Some(vec!["dist".into()]),
838        };
839        let b = WalkConfig::default();
840        let merged = a.merge(b);
841        assert_eq!(merged.ignore_files(), vec![".npmignore"]);
842        assert_eq!(merged.exclude(), vec!["dist"]);
843
844        // When other has custom, other wins
845        let a = WalkConfig::default();
846        let b = WalkConfig {
847            ignore_files: Some(vec![".npmignore".into()]),
848            exclude: None,
849        };
850        let merged = a.merge(b);
851        assert_eq!(merged.ignore_files(), vec![".npmignore"]);
852        assert!(merged.exclude().is_empty()); // self's None → empty (default)
853    }
854
855    // -- ConfigDiff -----------------------------------------------------------
856
857    fn parse_rules(s: &str) -> RulesConfig {
858        toml::from_str(s).unwrap()
859    }
860
861    #[test]
862    fn config_diff_no_change_is_empty() {
863        let cfg = parse_rules(
864            r#"
865[rule."rust/dbg-macro"]
866severity = "error"
867"#,
868        );
869        let walk = WalkConfig::default();
870        let diff = ConfigDiff::compute(&cfg, &cfg, &walk, &walk);
871        assert!(diff.is_empty());
872        assert!(diff.is_filter_only());
873        assert!(!diff.requires_full_reprime());
874    }
875
876    #[test]
877    fn config_diff_severity_only_is_filter_only() {
878        let old = parse_rules(
879            r#"
880[rule."rust/dbg-macro"]
881severity = "error"
882"#,
883        );
884        let new = parse_rules(
885            r#"
886[rule."rust/dbg-macro"]
887severity = "info"
888"#,
889        );
890        let walk = WalkConfig::default();
891        let diff = ConfigDiff::compute(&old, &new, &walk, &walk);
892        assert!(diff.severities_changed);
893        assert!(diff.is_filter_only());
894        assert!(diff.rules_to_rerun.is_empty());
895    }
896
897    #[test]
898    fn config_diff_allow_change_is_filter_only() {
899        let old = parse_rules(
900            r#"
901global-allow = ["**/fixtures/**"]
902"#,
903        );
904        let new = parse_rules(
905            r#"
906global-allow = ["**/fixtures/**", "**/tests/**"]
907"#,
908        );
909        let walk = WalkConfig::default();
910        let diff = ConfigDiff::compute(&old, &new, &walk, &walk);
911        assert!(diff.allow_lists_changed);
912        assert!(diff.is_filter_only());
913    }
914
915    #[test]
916    fn config_diff_disable_is_filter_only() {
917        let old = parse_rules(
918            r#"
919[rule."rust/dbg-macro"]
920severity = "error"
921"#,
922        );
923        let new = parse_rules(
924            r#"
925[rule."rust/dbg-macro"]
926severity = "error"
927enabled = false
928"#,
929        );
930        let walk = WalkConfig::default();
931        let diff = ConfigDiff::compute(&old, &new, &walk, &walk);
932        assert!(diff.rules_disabled.contains("rust/dbg-macro"));
933        assert!(diff.rules_to_rerun.is_empty());
934        assert!(diff.is_filter_only());
935    }
936
937    #[test]
938    fn config_diff_enable_requires_rerun() {
939        let old = parse_rules(
940            r#"
941[rule."rust/dbg-macro"]
942enabled = false
943"#,
944        );
945        let new = parse_rules(
946            r#"
947[rule."rust/dbg-macro"]
948enabled = true
949"#,
950        );
951        let walk = WalkConfig::default();
952        let diff = ConfigDiff::compute(&old, &new, &walk, &walk);
953        assert!(diff.rules_to_rerun.contains("rust/dbg-macro"));
954        assert!(!diff.is_filter_only());
955        assert!(!diff.requires_full_reprime());
956    }
957
958    #[test]
959    fn config_diff_threshold_change_requires_rerun() {
960        let old = parse_rules(
961            r#"
962[rule."long-function"]
963threshold = 100
964"#,
965        );
966        let new = parse_rules(
967            r#"
968[rule."long-function"]
969threshold = 50
970"#,
971        );
972        let walk = WalkConfig::default();
973        let diff = ConfigDiff::compute(&old, &new, &walk, &walk);
974        assert!(diff.rules_to_rerun.contains("long-function"));
975    }
976
977    #[test]
978    fn config_diff_walk_exclude_change_requires_full_reprime() {
979        let cfg = RulesConfig::default();
980        let old_walk = WalkConfig::default();
981        let new_walk = WalkConfig {
982            ignore_files: None,
983            exclude: Some(vec![".git".into(), "node_modules".into()]),
984        };
985        let diff = ConfigDiff::compute(&cfg, &cfg, &old_walk, &new_walk);
986        assert!(diff.walk_exclude_changed);
987        assert!(diff.requires_full_reprime());
988        assert!(!diff.is_filter_only());
989    }
990}