Skip to main content

rumdl_lib/config/
types.rs

1use crate::types::LineLength;
2use globset::{Glob, GlobBuilder, GlobMatcher, GlobSet, GlobSetBuilder};
3use indexmap::IndexMap;
4use serde::{Deserialize, Serialize};
5use std::collections::{BTreeMap, HashSet};
6use std::fs;
7use std::io;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, OnceLock};
10
11use super::flavor::{MarkdownFlavor, normalize_key};
12
13/// Represents a rule-specific configuration
14#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
15pub struct RuleConfig {
16    /// Severity override for this rule (Error, Warning, or Info)
17    #[serde(default, skip_serializing_if = "Option::is_none")]
18    pub severity: Option<crate::rule::Severity>,
19
20    /// Configuration values for the rule
21    #[serde(flatten)]
22    #[schemars(schema_with = "arbitrary_value_schema")]
23    pub values: BTreeMap<String, toml::Value>,
24}
25
26/// Generate a JSON schema for arbitrary configuration values
27fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
28    schemars::json_schema!({
29        "type": "object",
30        "additionalProperties": true
31    })
32}
33
34/// Represents the complete configuration loaded from rumdl.toml
35#[derive(Debug, Clone, Serialize, Deserialize, Default, schemars::JsonSchema)]
36#[schemars(
37    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
38)]
39pub struct Config {
40    /// Path to a base config file to inherit settings from.
41    /// Supports relative paths, absolute paths, and `~/` for home directory.
42    /// Example: `extends = "../base.rumdl.toml"`
43    #[serde(default, skip_serializing_if = "Option::is_none")]
44    pub extends: Option<String>,
45
46    /// Global configuration options
47    #[serde(default)]
48    pub global: GlobalConfig,
49
50    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
51    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
52    #[serde(default, rename = "per-file-ignores")]
53    pub per_file_ignores: BTreeMap<String, Vec<String>>,
54
55    /// Per-file flavor overrides: maps file patterns to Markdown flavors
56    /// Example: { "docs/**/*.md": MkDocs, "**/*.mdx": MDX }
57    /// Uses IndexMap to preserve config file order for "first match wins" semantics
58    #[serde(default, rename = "per-file-flavor")]
59    #[schemars(with = "BTreeMap<String, MarkdownFlavor>")]
60    pub per_file_flavor: IndexMap<String, MarkdownFlavor>,
61
62    /// Code block tools configuration for per-language linting and formatting
63    /// using external tools like ruff, prettier, shellcheck, etc.
64    #[serde(default, rename = "code-block-tools")]
65    pub code_block_tools: crate::code_block_tools::CodeBlockToolsConfig,
66
67    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
68    /// Each rule section can contain options specific to that rule.
69    ///
70    /// Common examples:
71    /// - MD013: line_length, code_blocks, tables, headings
72    /// - MD007: indent
73    /// - MD003: style ("atx", "atx-closed", "setext")
74    /// - MD044: names (array of proper names to check)
75    ///
76    /// See <https://github.com/rvben/rumdl> for full rule documentation.
77    #[serde(flatten)]
78    pub rules: BTreeMap<String, RuleConfig>,
79
80    /// Project root directory, used for resolving relative paths in per-file-ignores
81    #[serde(skip)]
82    pub project_root: Option<std::path::PathBuf>,
83
84    #[serde(skip)]
85    #[schemars(skip)]
86    pub(super) per_file_ignores_cache: Arc<OnceLock<PerFileIgnoreCache>>,
87
88    #[serde(skip)]
89    #[schemars(skip)]
90    pub(super) per_file_flavor_cache: Arc<OnceLock<PerFileFlavorCache>>,
91
92    /// Lazily-computed canonical form of `project_root`.
93    ///
94    /// `normalize_match_path` needs the canonical project root to strip
95    /// prefixes from absolute file paths. Without this cache, every per-file
96    /// lookup would re-canonicalize the project root (one syscall per file).
97    ///
98    /// ## Invariants
99    ///
100    /// - **Single-shot**: computed once on first use of [`Config::canonical_project_root`].
101    /// - **Never invalidated**: callers must not mutate `project_root` after
102    ///   the first call. `Config` is treated as immutable post-construction
103    ///   (the same assumption as `per_file_ignores_cache` and `per_file_flavor_cache`).
104    /// - **Construction-time existence**: the cache stores `None` if
105    ///   `project_root` is unset, missing on disk, or otherwise can't be
106    ///   canonicalized. In practice `project_root` is set after walking up to
107    ///   `.git`, so the directory always exists at the time the cache is first
108    ///   read; if a caller sets `project_root` to a not-yet-existing path,
109    ///   the cache will permanently store `None`.
110    /// - **`Arc` wrapping**: `Config` derives `Clone`, and clones share the
111    ///   same `OnceLock` so a value computed by one clone is observable to all.
112    ///
113    /// `cwd` deliberately is NOT cached symmetrically: callers read it fresh
114    /// from `std::env::current_dir()` per call because tests (and embedding
115    /// hosts like LSP servers) may legitimately mutate the process cwd
116    /// between lookups.
117    #[serde(skip)]
118    #[schemars(skip)]
119    pub(super) canonical_project_root_cache: Arc<OnceLock<Option<PathBuf>>>,
120}
121
122impl PartialEq for Config {
123    fn eq(&self, other: &Self) -> bool {
124        self.global == other.global
125            && self.per_file_ignores == other.per_file_ignores
126            && self.per_file_flavor == other.per_file_flavor
127            && self.code_block_tools == other.code_block_tools
128            && self.rules == other.rules
129            && self.project_root == other.project_root
130    }
131}
132
133#[derive(Debug)]
134pub(super) struct PerFileIgnoreCache {
135    globset: GlobSet,
136    rules: Vec<Vec<String>>,
137}
138
139#[derive(Debug)]
140pub(super) struct PerFileFlavorCache {
141    matchers: Vec<(GlobMatcher, MarkdownFlavor)>,
142}
143
144impl Config {
145    /// Check if the Markdown flavor is set to MkDocs
146    pub fn is_mkdocs_flavor(&self) -> bool {
147        self.global.flavor == MarkdownFlavor::MkDocs
148    }
149
150    // Future methods for when GFM and CommonMark are implemented:
151    // pub fn is_gfm_flavor(&self) -> bool
152    // pub fn is_commonmark_flavor(&self) -> bool
153
154    /// Get the configured Markdown flavor
155    pub fn markdown_flavor(&self) -> MarkdownFlavor {
156        self.global.flavor
157    }
158
159    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
160    pub fn is_mkdocs_project(&self) -> bool {
161        self.is_mkdocs_flavor()
162    }
163
164    /// Apply per-rule `enabled` config to the global enable/disable lists.
165    ///
166    /// For `[MD060] enabled = true`: adds the rule to `extend_enable` and
167    /// removes it from `disable` and `extend_disable`, ensuring the rule is active.
168    ///
169    /// For `[MD041] enabled = false`: adds the rule to `disable` and
170    /// removes it from `extend_enable`, ensuring the rule is inactive.
171    ///
172    /// Per-rule `enabled` takes precedence over global lists when there
173    /// is a conflict, since it represents a more specific intent.
174    pub fn apply_per_rule_enabled(&mut self) {
175        let mut to_enable: Vec<String> = Vec::new();
176        let mut to_disable: Vec<String> = Vec::new();
177
178        for (name, cfg) in &self.rules {
179            match cfg.values.get("enabled") {
180                Some(toml::Value::Boolean(true)) => {
181                    to_enable.push(name.clone());
182                }
183                Some(toml::Value::Boolean(false)) => {
184                    to_disable.push(name.clone());
185                }
186                _ => {}
187            }
188        }
189
190        for name in to_enable {
191            if !self.global.extend_enable.contains(&name) {
192                self.global.extend_enable.push(name.clone());
193            }
194            self.global.disable.retain(|n| n != &name);
195            self.global.extend_disable.retain(|n| n != &name);
196        }
197
198        for name in to_disable {
199            if !self.global.disable.contains(&name) {
200                self.global.disable.push(name.clone());
201            }
202            self.global.extend_enable.retain(|n| n != &name);
203        }
204    }
205
206    /// Get the severity override for a specific rule, if configured
207    pub fn get_rule_severity(&self, rule_name: &str) -> Option<crate::rule::Severity> {
208        self.rules.get(rule_name).and_then(|r| r.severity)
209    }
210
211    /// Return the canonical form of `project_root`, computed once and cached.
212    ///
213    /// Returns `None` if `project_root` is unset, doesn't exist on disk, or
214    /// otherwise cannot be canonicalized. Subsequent calls reuse the cached
215    /// value, eliminating the per-file `canonicalize()` syscall that
216    /// `normalize_match_path` would otherwise perform.
217    pub(super) fn canonical_project_root(&self) -> Option<&Path> {
218        self.canonical_project_root_cache
219            .get_or_init(|| self.project_root.as_deref().and_then(|p| p.canonicalize().ok()))
220            .as_deref()
221    }
222
223    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
224    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
225    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
226        let mut ignored_rules = HashSet::new();
227
228        if self.per_file_ignores.is_empty() {
229            return ignored_rules;
230        }
231
232        let cwd = std::env::current_dir().ok();
233        let path_for_matching = normalize_match_path(file_path, self.canonical_project_root(), cwd.as_deref());
234
235        let cache = self
236            .per_file_ignores_cache
237            .get_or_init(|| PerFileIgnoreCache::new(&self.per_file_ignores));
238
239        // Match the file path against all patterns
240        for match_idx in cache.globset.matches(path_for_matching.as_ref()) {
241            if let Some(rules) = cache.rules.get(match_idx) {
242                for rule in rules {
243                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
244                    ignored_rules.insert(rule.clone());
245                }
246            }
247        }
248
249        ignored_rules
250    }
251
252    /// Get the MarkdownFlavor for a specific file based on per-file-flavor configuration.
253    /// Returns the first matching pattern's flavor, or falls back to global flavor,
254    /// or auto-detects from extension, or defaults to Standard.
255    pub fn get_flavor_for_file(&self, file_path: &Path) -> MarkdownFlavor {
256        // If no per-file patterns, use fallback logic
257        if self.per_file_flavor.is_empty() {
258            return self.resolve_flavor_fallback(file_path);
259        }
260
261        let cwd = std::env::current_dir().ok();
262        let path_for_matching = normalize_match_path(file_path, self.canonical_project_root(), cwd.as_deref());
263
264        let cache = self
265            .per_file_flavor_cache
266            .get_or_init(|| PerFileFlavorCache::new(&self.per_file_flavor));
267
268        // Iterate in config order and return first match (IndexMap preserves order)
269        for (matcher, flavor) in &cache.matchers {
270            if matcher.is_match(path_for_matching.as_ref()) {
271                return *flavor;
272            }
273        }
274
275        // No pattern matched, use fallback
276        self.resolve_flavor_fallback(file_path)
277    }
278
279    /// Fallback flavor resolution: global flavor → auto-detect → Standard
280    fn resolve_flavor_fallback(&self, file_path: &Path) -> MarkdownFlavor {
281        // If global flavor is explicitly set to non-Standard, use it
282        if self.global.flavor != MarkdownFlavor::Standard {
283            return self.global.flavor;
284        }
285        // Auto-detect from extension
286        MarkdownFlavor::from_path(file_path)
287    }
288
289    /// Canonicalize every rule-name list inside this `Config`.
290    ///
291    /// This is the single enforcement point for the runtime invariant:
292    /// **after a `Config` is fully built, every rule-name list contains
293    /// canonical rule IDs (`"MD033"`) — never aliases (`"no-inline-html"`).**
294    ///
295    /// The invariant lets every consumer (`rules::filter_rules`, the LSP,
296    /// WASM, fix coordinator, per-file-ignore lookups) match against
297    /// `Rule::name()` with simple string equality. Mutation boundaries
298    /// (`From<SourcedConfig> for Config`, LSP `apply_lsp_settings_*`, WASM
299    /// `to_config_with_warnings`) call this before handing the `Config` to
300    /// the linting pipeline.
301    ///
302    /// Covers `global.{enable,disable,extend_enable,extend_disable,fixable,unfixable}`
303    /// and the values of `per_file_ignores`. Idempotent.
304    pub fn canonicalize_rule_lists(&mut self) {
305        use super::registry::canonicalize_rule_list_in_place;
306        self.global.canonicalize_rule_lists();
307        for rules in self.per_file_ignores.values_mut() {
308            canonicalize_rule_list_in_place(rules);
309        }
310    }
311
312    /// Merge inline configuration overrides into a copy of this config
313    ///
314    /// This enables automatic inline config support - the engine can merge
315    /// inline overrides and recreate rules without any per-rule changes.
316    ///
317    /// Returns a new Config with the inline overrides merged in.
318    /// If there are no inline overrides, returns a clone of self.
319    pub fn merge_with_inline_config(&self, inline_config: &crate::inline_config::InlineConfig) -> Self {
320        let overrides = inline_config.get_all_rule_configs();
321        if overrides.is_empty() {
322            return self.clone();
323        }
324
325        let mut merged = self.clone();
326
327        for (rule_name, json_override) in overrides {
328            // Get or create the rule config entry
329            let rule_config = merged.rules.entry(rule_name.clone()).or_default();
330
331            // Merge JSON values into the rule's config
332            if let Some(obj) = json_override.as_object() {
333                for (key, value) in obj {
334                    // Normalize key to kebab-case for consistency
335                    let normalized_key = key.replace('_', "-");
336
337                    // Convert JSON value to TOML value
338                    if let Some(toml_value) = json_to_toml(value) {
339                        rule_config.values.insert(normalized_key, toml_value);
340                    }
341                }
342            }
343        }
344
345        merged
346    }
347}
348
349/// Normalize a file path for matching against a glob pattern from configuration.
350///
351/// Glob patterns in `per-file-ignores` and `per-file-flavor` are written relative
352/// to the project root (e.g. `docs/**/*.md`), and the underlying matcher uses
353/// `literal_separator(true)` so an absolute path like `/home/user/proj/docs/x.md`
354/// will not match `docs/**/*.md`. This helper produces the form the glob expects:
355///
356/// 1. **Relative path** → return as-is.
357/// 2. **Absolute path under `project_root`** → return path relative to `project_root`.
358/// 3. **Absolute path under `cwd`** → return path relative to `cwd`. This is the
359///    safety net for invocations where `project_root` could not be discovered
360///    (no `.git` upward, LSP/CLI calls outside a project) but the file still
361///    lives somewhere under the working directory.
362/// 4. **Anywhere else** → return the raw path. A relative glob simply won't
363///    match it, which is the desired outcome for files outside any known root.
364///
365/// All canonicalization failures degrade gracefully to step 4 so editor buffers
366/// and pre-creation paths still flow through without panicking.
367///
368/// `canonical_project_root` is expected to already be canonical (via
369/// `Config::canonical_project_root`). `cwd` is canonicalized internally on each
370/// call since it is read fresh from the environment per invocation.
371pub(super) fn normalize_match_path<'a>(
372    file_path: &'a Path,
373    canonical_project_root: Option<&Path>,
374    cwd: Option<&Path>,
375) -> std::borrow::Cow<'a, Path> {
376    use std::borrow::Cow;
377
378    if file_path.is_relative() {
379        return Cow::Borrowed(file_path);
380    }
381
382    let Ok(canonical_file) = file_path.canonicalize() else {
383        log::debug!(
384            "normalize_match_path: canonicalize failed for {}; returning raw path. \
385             Per-file glob patterns may not match (file may not yet exist on disk).",
386            file_path.display()
387        );
388        return Cow::Borrowed(file_path);
389    };
390
391    if let Some(root) = canonical_project_root
392        && let Ok(rel) = canonical_file.strip_prefix(root)
393    {
394        return Cow::Owned(rel.to_path_buf());
395    }
396
397    if let Some(working_dir) = cwd
398        && let Ok(canonical_cwd) = working_dir.canonicalize()
399        && let Ok(rel) = canonical_file.strip_prefix(&canonical_cwd)
400    {
401        return Cow::Owned(rel.to_path_buf());
402    }
403
404    // Surface the silent fallback once per process at warn level so users with
405    // per-file glob configs notice when their patterns can't match a file.
406    // Subsequent occurrences stay at debug to avoid log spam.
407    static SILENT_FALLBACK_WARNED: OnceLock<()> = OnceLock::new();
408    log::log!(
409        first_call_warn_else_debug(&SILENT_FALLBACK_WARNED),
410        "{}",
411        format_silent_fallback_message(file_path, canonical_project_root, cwd),
412    );
413    Cow::Borrowed(file_path)
414}
415
416/// Returns [`log::Level::Warn`] the first time it is called with a given
417/// `latch`, and [`log::Level::Debug`] on every subsequent call. The latch
418/// is consumed by the first caller via `OnceLock::set`; later callers
419/// observe the latch as already set and downgrade.
420///
421/// Used to flag a fallback condition once per process without flooding
422/// logs when the same condition recurs (e.g. once per linted file).
423pub(super) fn first_call_warn_else_debug(latch: &OnceLock<()>) -> log::Level {
424    if latch.set(()).is_ok() {
425        log::Level::Warn
426    } else {
427        log::Level::Debug
428    }
429}
430
431/// Format the diagnostic emitted when [`normalize_match_path`] cannot
432/// relativise `file_path` against either the project root or the current
433/// working directory. Extracted so the exact wording can be asserted in
434/// tests without capturing log output.
435pub(super) fn format_silent_fallback_message(
436    file_path: &Path,
437    canonical_project_root: Option<&Path>,
438    cwd: Option<&Path>,
439) -> String {
440    format!(
441        "Per-file glob patterns will not match {}: file is outside project_root ({}) and cwd ({})",
442        file_path.display(),
443        DisplayPathOrUnset(canonical_project_root),
444        DisplayPathOrUnset(cwd),
445    )
446}
447
448/// Display adapter for `Option<&Path>` that renders the path via
449/// [`Path::display`] when present, or the literal `<unset>` when absent.
450/// Angle brackets follow Rust's diagnostic convention (e.g. `<unknown>`)
451/// and avoid double-paren rendering when the surrounding format string
452/// already wraps the value in `(…)`.
453struct DisplayPathOrUnset<'a>(Option<&'a Path>);
454
455impl std::fmt::Display for DisplayPathOrUnset<'_> {
456    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
457        match self.0 {
458            Some(path) => std::fmt::Display::fmt(&path.display(), f),
459            None => f.write_str("<unset>"),
460        }
461    }
462}
463
464/// Convert a serde_json::Value to a toml::Value
465pub(super) fn json_to_toml(json: &serde_json::Value) -> Option<toml::Value> {
466    match json {
467        serde_json::Value::Null => None,
468        serde_json::Value::Bool(b) => Some(toml::Value::Boolean(*b)),
469        serde_json::Value::Number(n) => n
470            .as_i64()
471            .map(toml::Value::Integer)
472            .or_else(|| n.as_f64().map(toml::Value::Float)),
473        serde_json::Value::String(s) => Some(toml::Value::String(s.clone())),
474        serde_json::Value::Array(arr) => {
475            let toml_arr: Vec<toml::Value> = arr.iter().filter_map(json_to_toml).collect();
476            Some(toml::Value::Array(toml_arr))
477        }
478        serde_json::Value::Object(obj) => {
479            let mut table = toml::map::Map::new();
480            for (k, v) in obj {
481                if let Some(tv) = json_to_toml(v) {
482                    table.insert(k.clone(), tv);
483                }
484            }
485            Some(toml::Value::Table(table))
486        }
487    }
488}
489
490impl PerFileIgnoreCache {
491    fn new(per_file_ignores: &BTreeMap<String, Vec<String>>) -> Self {
492        let mut builder = GlobSetBuilder::new();
493        let mut rules = Vec::new();
494
495        for (pattern, rules_list) in per_file_ignores {
496            if let Ok(glob) = Glob::new(pattern) {
497                builder.add(glob);
498                // Canonicalize defensively: callers should have run
499                // Config::canonicalize_rule_lists already, but per-file-ignores
500                // has reached this cache directly from a few code paths
501                // historically, so we re-canonicalize here to keep the cache
502                // sound regardless of caller discipline.
503                rules.push(
504                    rules_list
505                        .iter()
506                        .map(|rule| super::registry::resolve_rule_name(rule))
507                        .collect(),
508                );
509            } else {
510                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
511            }
512        }
513
514        let globset = builder.build().unwrap_or_else(|e| {
515            log::error!("Failed to build globset for per-file-ignores: {e}");
516            GlobSetBuilder::new().build().unwrap()
517        });
518
519        Self { globset, rules }
520    }
521}
522
523impl PerFileFlavorCache {
524    fn new(per_file_flavor: &IndexMap<String, MarkdownFlavor>) -> Self {
525        let mut matchers = Vec::new();
526
527        for (pattern, flavor) in per_file_flavor {
528            if let Ok(glob) = GlobBuilder::new(pattern).literal_separator(true).build() {
529                matchers.push((glob.compile_matcher(), *flavor));
530            } else {
531                log::warn!("Invalid glob pattern in per-file-flavor: {pattern}");
532            }
533        }
534
535        Self { matchers }
536    }
537}
538
539/// Global configuration options
540#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
541#[serde(default, rename_all = "kebab-case")]
542pub struct GlobalConfig {
543    /// Enabled rules
544    #[serde(default)]
545    pub enable: Vec<String>,
546
547    /// Disabled rules
548    #[serde(default)]
549    pub disable: Vec<String>,
550
551    /// Files to exclude
552    #[serde(default)]
553    pub exclude: Vec<String>,
554
555    /// Files to include
556    #[serde(default)]
557    pub include: Vec<String>,
558
559    /// Respect .gitignore files when scanning directories
560    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
561    pub respect_gitignore: bool,
562
563    /// Global line length setting (used by MD013 and other rules if not overridden)
564    #[serde(default, alias = "line_length")]
565    pub line_length: LineLength,
566
567    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
568    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
569    pub output_format: Option<String>,
570
571    /// Rules that are allowed to be fixed when --fix is used
572    /// If specified, only these rules will be fixed
573    #[serde(default)]
574    pub fixable: Vec<String>,
575
576    /// Rules that should never be fixed, even when --fix is used
577    /// Takes precedence over fixable
578    #[serde(default)]
579    pub unfixable: Vec<String>,
580
581    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
582    /// When set, adjusts parsing and validation rules for that specific Markdown variant
583    #[serde(default)]
584    pub flavor: MarkdownFlavor,
585
586    /// \[DEPRECATED\] Whether to enforce exclude patterns for explicitly passed paths.
587    /// This option is deprecated as of v0.0.156 and has no effect.
588    /// Exclude patterns are now always respected, even for explicitly provided files.
589    /// This prevents duplication between rumdl config and tool configs like pre-commit.
590    #[serde(default, alias = "force_exclude")]
591    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
592    pub force_exclude: bool,
593
594    /// Directory to store cache files (default: .rumdl_cache)
595    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
596    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
597    pub cache_dir: Option<String>,
598
599    /// Whether caching is enabled (default: true)
600    /// Can also be disabled via --no-cache CLI flag
601    #[serde(default = "default_true")]
602    pub cache: bool,
603
604    /// Additional rules to enable on top of the base set (additive)
605    #[serde(default, alias = "extend_enable")]
606    pub extend_enable: Vec<String>,
607
608    /// Additional rules to disable on top of the base set (additive)
609    #[serde(default, alias = "extend_disable")]
610    pub extend_disable: Vec<String>,
611
612    /// Whether the enable list was explicitly set (even if empty).
613    /// Used to distinguish "no enable list configured" from "enable list is empty"
614    /// (e.g., markdownlint `default: false` with no rules enabled).
615    #[serde(skip)]
616    pub enable_is_explicit: bool,
617}
618
619fn default_respect_gitignore() -> bool {
620    true
621}
622
623fn default_true() -> bool {
624    true
625}
626
627// Add the Default impl
628impl Default for GlobalConfig {
629    #[allow(deprecated)]
630    fn default() -> Self {
631        Self {
632            enable: Vec::new(),
633            disable: Vec::new(),
634            exclude: Vec::new(),
635            include: Vec::new(),
636            respect_gitignore: true,
637            line_length: LineLength::default(),
638            output_format: None,
639            fixable: Vec::new(),
640            unfixable: Vec::new(),
641            flavor: MarkdownFlavor::default(),
642            force_exclude: false,
643            cache_dir: None,
644            cache: true,
645            extend_enable: Vec::new(),
646            extend_disable: Vec::new(),
647            enable_is_explicit: false,
648        }
649    }
650}
651
652impl GlobalConfig {
653    /// Canonicalize every rule-name list in this `GlobalConfig`.
654    ///
655    /// Rewrites `enable`, `disable`, `extend_enable`, `extend_disable`, `fixable`,
656    /// and `unfixable` so that all entries are canonical rule IDs (`"MD033"`)
657    /// rather than aliases (`"no-inline-html"`). Duplicates are removed,
658    /// preserving first-occurrence order; the special `"all"` keyword is
659    /// preserved.
660    ///
661    /// This must be called by every code path that mutates a runtime
662    /// `Config`'s rule lists from external input (markdownlint configs,
663    /// `.rumdl.toml`, LSP `initializationOptions`, WASM bindings, etc.) so
664    /// that downstream consumers (`rules::filter_rules`, the LSP, WASM) can
665    /// match against `Rule::name()` with simple string equality.
666    pub fn canonicalize_rule_lists(&mut self) {
667        use super::registry::canonicalize_rule_list_in_place;
668        canonicalize_rule_list_in_place(&mut self.enable);
669        canonicalize_rule_list_in_place(&mut self.disable);
670        canonicalize_rule_list_in_place(&mut self.extend_enable);
671        canonicalize_rule_list_in_place(&mut self.extend_disable);
672        canonicalize_rule_list_in_place(&mut self.fixable);
673        canonicalize_rule_list_in_place(&mut self.unfixable);
674    }
675}
676
677/// Names of rumdl-native config files, searched in precedence order when
678/// walking up a directory tree.
679///
680/// This is the single source of truth for config discovery. Both the CLI
681/// (`SourcedConfig::discover_config_upward`, `discover_config_for_dir`) and
682/// the LSP (`RumdlLanguageServer::resolve_config_for_file`) must use this
683/// list; any deviation causes silent config-not-found bugs where the CLI
684/// recognises a config but the LSP does not (or vice versa).
685///
686/// See `src/lsp/tests.rs::test_lsp_cli_resolver_parity_on_fixtures` for
687/// the side-by-side resolver parity test that pins this invariant across
688/// several directory layouts.
689pub const RUMDL_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
690
691pub const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
692    ".markdownlint-cli2.jsonc",
693    ".markdownlint-cli2.yaml",
694    ".markdownlint-cli2.yml",
695    ".markdownlint.json",
696    ".markdownlint.jsonc",
697    ".markdownlint.yaml",
698    ".markdownlint.yml",
699    "markdownlint.json",
700    "markdownlint.jsonc",
701    "markdownlint.yaml",
702    "markdownlint.yml",
703];
704
705/// Create a default configuration file at the specified path
706pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
707    create_preset_config("default", path)
708}
709
710/// Create a configuration file with a specific style preset
711pub fn create_preset_config(preset: &str, path: &str) -> Result<(), ConfigError> {
712    if Path::new(path).exists() {
713        return Err(ConfigError::FileExists { path: path.to_string() });
714    }
715
716    let config_content = match preset {
717        "default" => generate_default_preset(),
718        "google" => generate_google_preset(),
719        "relaxed" => generate_relaxed_preset(),
720        _ => {
721            return Err(ConfigError::UnknownPreset {
722                name: preset.to_string(),
723            });
724        }
725    };
726
727    match fs::write(path, config_content) {
728        Ok(_) => Ok(()),
729        Err(err) => Err(ConfigError::IoError {
730            source: err,
731            path: path.to_string(),
732        }),
733    }
734}
735
736/// Generate the default preset configuration content.
737/// Returns the same content as `create_default_config`.
738fn generate_default_preset() -> String {
739    r#"# rumdl configuration file
740
741# Inherit settings from another config file (relative to this file's directory)
742# extends = "../base.rumdl.toml"
743
744# Global configuration options
745[global]
746# List of rules to disable (uncomment and modify as needed)
747# disable = ["MD013", "MD033"]
748
749# List of rules to enable exclusively (replaces defaults; only these rules will run)
750# enable = ["MD001", "MD003", "MD004"]
751
752# Additional rules to enable on top of defaults (additive, does not replace)
753# Use this to activate opt-in rules like MD060, MD063, MD072, MD073, MD074
754# extend-enable = ["MD060", "MD063"]
755
756# Additional rules to disable on top of the disable list (additive)
757# extend-disable = ["MD041"]
758
759# List of file/directory patterns to include for linting (if provided, only these will be linted)
760# include = [
761#    "docs/*.md",
762#    "src/**/*.md",
763#    "README.md"
764# ]
765
766# List of file/directory patterns to exclude from linting
767exclude = [
768    # Common directories to exclude
769    ".git",
770    ".github",
771    "node_modules",
772    "vendor",
773    "dist",
774    "build",
775
776    # Specific files or patterns
777    "CHANGELOG.md",
778    "LICENSE.md",
779]
780
781# Respect .gitignore files when scanning directories (default: true)
782respect-gitignore = true
783
784# Markdown flavor/dialect (uncomment to enable)
785# Options: standard (default), gfm, commonmark, mkdocs, mdx, pandoc, quarto, obsidian, kramdown, azure_devops
786# flavor = "mkdocs"
787
788# Rule-specific configurations (uncomment and modify as needed)
789
790# [MD003]
791# style = "atx"  # Heading style (atx, atx_closed, setext)
792
793# [MD004]
794# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
795
796# [MD007]
797# indent = 4  # Unordered list indentation
798
799# [MD013]
800# line-length = 100  # Line length
801# code-blocks = false  # Exclude code blocks from line length check
802# tables = false  # Exclude tables from line length check
803# headings = true  # Include headings in line length check
804
805# [MD044]
806# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
807# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
808"#
809    .to_string()
810}
811
812/// Generate Google developer documentation style preset.
813/// Based on https://google.github.io/styleguide/docguide/style.html
814fn generate_google_preset() -> String {
815    r#"# rumdl configuration - Google developer documentation style
816# Based on https://google.github.io/styleguide/docguide/style.html
817
818[global]
819exclude = [
820    ".git",
821    ".github",
822    "node_modules",
823    "vendor",
824    "dist",
825    "build",
826    "CHANGELOG.md",
827    "LICENSE.md",
828]
829respect-gitignore = true
830
831# ATX-style headings required
832[MD003]
833style = "atx"
834
835# Unordered list style: dash
836[MD004]
837style = "dash"
838
839# 4-space indent for nested lists
840[MD007]
841indent = 4
842
843# Strict mode: no trailing spaces allowed (Google uses backslash for line breaks)
844[MD009]
845strict = true
846
847# 80-character line length
848[MD013]
849line-length = 80
850code-blocks = false
851tables = false
852
853# No trailing punctuation in headings
854[MD026]
855punctuation = ".,;:!。,;:!"
856
857# Fenced code blocks only (no indented code blocks)
858[MD046]
859style = "fenced"
860
861# Emphasis with underscores
862[MD049]
863style = "underscore"
864
865# Strong with asterisks
866[MD050]
867style = "asterisk"
868"#
869    .to_string()
870}
871
872/// Generate relaxed preset for existing projects adopting rumdl incrementally.
873/// Longer line lengths, fewer rules, lenient settings to minimize initial warnings.
874fn generate_relaxed_preset() -> String {
875    r#"# rumdl configuration - Relaxed preset
876# Lenient settings for existing projects adopting rumdl incrementally.
877# Minimizes initial warnings while still catching important issues.
878
879[global]
880exclude = [
881    ".git",
882    ".github",
883    "node_modules",
884    "vendor",
885    "dist",
886    "build",
887    "CHANGELOG.md",
888    "LICENSE.md",
889]
890respect-gitignore = true
891
892# Disable rules that produce the most noise on existing projects
893disable = [
894    "MD013",  # Line length - most existing files exceed 80 chars
895    "MD033",  # Inline HTML - commonly used in real-world markdown
896    "MD041",  # First line heading - not all files need it
897]
898
899# Consistent heading style (any style, just be consistent)
900[MD003]
901style = "consistent"
902
903# Consistent list style
904[MD004]
905style = "consistent"
906
907# Consistent emphasis style
908[MD049]
909style = "consistent"
910
911# Consistent strong style
912[MD050]
913style = "consistent"
914"#
915    .to_string()
916}
917
918/// Errors that can occur when loading configuration
919#[derive(Debug, thiserror::Error)]
920pub enum ConfigError {
921    /// Failed to read the configuration file
922    #[error("Failed to read config file at {path}: {source}")]
923    IoError { source: io::Error, path: String },
924
925    /// Failed to parse the configuration content (TOML or JSON)
926    #[error("Failed to parse config: {0}")]
927    ParseError(String),
928
929    /// Configuration file already exists
930    #[error("Configuration file already exists at {path}")]
931    FileExists { path: String },
932
933    /// Circular extends reference detected
934    #[error("Circular extends reference: {path} already in chain {chain:?}")]
935    CircularExtends { path: String, chain: Vec<String> },
936
937    /// Extends chain exceeds maximum depth
938    #[error("Extends chain exceeds maximum depth of {max_depth} at {path}")]
939    ExtendsDepthExceeded { path: String, max_depth: usize },
940
941    /// Extends target file not found
942    #[error("extends target not found: {path} (referenced from {from})")]
943    ExtendsNotFound { path: String, from: String },
944
945    /// Unknown preset name
946    #[error("Unknown preset: {name}. Valid presets: default, google, relaxed")]
947    UnknownPreset { name: String },
948}
949
950/// Get a rule-specific configuration value
951/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
952/// for better markdownlint compatibility
953pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
954    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
955
956    let rule_config = config.rules.get(&norm_rule_name)?;
957
958    // Try multiple key variants to support both underscore and kebab-case formats
959    let key_variants = [
960        key.to_string(),       // Original key as provided
961        normalize_key(key),    // Normalized key (lowercase, kebab-case)
962        key.replace('-', "_"), // Convert kebab-case to snake_case
963        key.replace('_', "-"), // Convert snake_case to kebab-case
964    ];
965
966    // Try each variant until we find a match
967    for variant in &key_variants {
968        if let Some(value) = rule_config.values.get(variant)
969            && let Ok(result) = T::deserialize(value.clone())
970        {
971            return Some(result);
972        }
973    }
974
975    None
976}
977
978/// Generate preset configuration for pyproject.toml format.
979/// Converts the .rumdl.toml preset to pyproject.toml section format.
980pub fn generate_pyproject_preset_config(preset: &str) -> Result<String, ConfigError> {
981    match preset {
982        "default" => Ok(generate_pyproject_config()),
983        other => {
984            let rumdl_config = match other {
985                "google" => generate_google_preset(),
986                "relaxed" => generate_relaxed_preset(),
987                _ => {
988                    return Err(ConfigError::UnknownPreset {
989                        name: other.to_string(),
990                    });
991                }
992            };
993            Ok(convert_rumdl_to_pyproject(&rumdl_config))
994        }
995    }
996}
997
998/// Convert a .rumdl.toml config string to pyproject.toml format.
999/// Rewrites `[global]` → `[tool.rumdl]` and `[MDXXX]` → `[tool.rumdl.MDXXX]`.
1000fn convert_rumdl_to_pyproject(rumdl_config: &str) -> String {
1001    let mut output = String::with_capacity(rumdl_config.len() + 128);
1002    for line in rumdl_config.lines() {
1003        let trimmed = line.trim();
1004        if trimmed.starts_with('[') && trimmed.ends_with(']') && !trimmed.starts_with("# [") {
1005            let section = &trimmed[1..trimmed.len() - 1];
1006            if section == "global" {
1007                output.push_str("[tool.rumdl]");
1008            } else {
1009                output.push_str(&format!("[tool.rumdl.{section}]"));
1010            }
1011        } else {
1012            output.push_str(line);
1013        }
1014        output.push('\n');
1015    }
1016    output
1017}
1018
1019/// Generate default rumdl configuration for pyproject.toml
1020pub fn generate_pyproject_config() -> String {
1021    let config_content = r#"
1022[tool.rumdl]
1023# Global configuration options
1024line-length = 100
1025disable = []
1026# extend-enable = ["MD060"]  # Add opt-in rules (additive, keeps defaults)
1027# extend-disable = []  # Additional rules to disable (additive)
1028exclude = [
1029    # Common directories to exclude
1030    ".git",
1031    ".github",
1032    "node_modules",
1033    "vendor",
1034    "dist",
1035    "build",
1036]
1037respect-gitignore = true
1038
1039# Rule-specific configurations (uncomment and modify as needed)
1040
1041# [tool.rumdl.MD003]
1042# style = "atx"  # Heading style (atx, atx_closed, setext)
1043
1044# [tool.rumdl.MD004]
1045# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
1046
1047# [tool.rumdl.MD007]
1048# indent = 4  # Unordered list indentation
1049
1050# [tool.rumdl.MD013]
1051# line-length = 100  # Line length
1052# code-blocks = false  # Exclude code blocks from line length check
1053# tables = false  # Exclude tables from line length check
1054# headings = true  # Include headings in line length check
1055
1056# [tool.rumdl.MD044]
1057# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
1058# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
1059"#;
1060
1061    config_content.to_string()
1062}