Skip to main content

rumdl_lib/config/
types.rs

1use crate::types::LineLength;
2use globset::{Glob, GlobBuilder, GlobMatcher, GlobSet, GlobSetBuilder};
3use indexmap::IndexMap;
4use serde::{Deserialize, Serialize};
5use std::collections::BTreeMap;
6use std::collections::{HashMap, HashSet};
7use std::fs;
8use std::io;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, OnceLock};
11
12use super::flavor::{MarkdownFlavor, normalize_key};
13
14/// Represents a rule-specific configuration
15#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
16pub struct RuleConfig {
17    /// Severity override for this rule (Error, Warning, or Info)
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub severity: Option<crate::rule::Severity>,
20
21    /// Configuration values for the rule
22    #[serde(flatten)]
23    #[schemars(schema_with = "arbitrary_value_schema")]
24    pub values: BTreeMap<String, toml::Value>,
25}
26
27/// Generate a JSON schema for arbitrary configuration values
28fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
29    schemars::json_schema!({
30        "type": "object",
31        "additionalProperties": true
32    })
33}
34
35/// Represents the complete configuration loaded from rumdl.toml
36#[derive(Debug, Clone, Serialize, Deserialize, Default, schemars::JsonSchema)]
37#[schemars(
38    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
39)]
40pub struct Config {
41    /// Path to a base config file to inherit settings from.
42    /// Supports relative paths, absolute paths, and `~/` for home directory.
43    /// Example: `extends = "../base.rumdl.toml"`
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub extends: Option<String>,
46
47    /// Global configuration options
48    #[serde(default)]
49    pub global: GlobalConfig,
50
51    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
52    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
53    #[serde(default, rename = "per-file-ignores")]
54    pub per_file_ignores: HashMap<String, Vec<String>>,
55
56    /// Per-file flavor overrides: maps file patterns to Markdown flavors
57    /// Example: { "docs/**/*.md": MkDocs, "**/*.mdx": MDX }
58    /// Uses IndexMap to preserve config file order for "first match wins" semantics
59    #[serde(default, rename = "per-file-flavor")]
60    #[schemars(with = "HashMap<String, MarkdownFlavor>")]
61    pub per_file_flavor: IndexMap<String, MarkdownFlavor>,
62
63    /// Code block tools configuration for per-language linting and formatting
64    /// using external tools like ruff, prettier, shellcheck, etc.
65    #[serde(default, rename = "code-block-tools")]
66    pub code_block_tools: crate::code_block_tools::CodeBlockToolsConfig,
67
68    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
69    /// Each rule section can contain options specific to that rule.
70    ///
71    /// Common examples:
72    /// - MD013: line_length, code_blocks, tables, headings
73    /// - MD007: indent
74    /// - MD003: style ("atx", "atx-closed", "setext")
75    /// - MD044: names (array of proper names to check)
76    ///
77    /// See <https://github.com/rvben/rumdl> for full rule documentation.
78    #[serde(flatten)]
79    pub rules: BTreeMap<String, RuleConfig>,
80
81    /// Project root directory, used for resolving relative paths in per-file-ignores
82    #[serde(skip)]
83    pub project_root: Option<std::path::PathBuf>,
84
85    #[serde(skip)]
86    #[schemars(skip)]
87    pub(super) per_file_ignores_cache: Arc<OnceLock<PerFileIgnoreCache>>,
88
89    #[serde(skip)]
90    #[schemars(skip)]
91    pub(super) per_file_flavor_cache: Arc<OnceLock<PerFileFlavorCache>>,
92
93    /// Lazily-computed canonical form of `project_root`.
94    ///
95    /// `normalize_match_path` needs the canonical project root to strip
96    /// prefixes from absolute file paths. Without this cache, every per-file
97    /// lookup would re-canonicalize the project root (one syscall per file).
98    ///
99    /// ## Invariants
100    ///
101    /// - **Single-shot**: computed once on first use of [`Config::canonical_project_root`].
102    /// - **Never invalidated**: callers must not mutate `project_root` after
103    ///   the first call. `Config` is treated as immutable post-construction
104    ///   (the same assumption as `per_file_ignores_cache` and `per_file_flavor_cache`).
105    /// - **Construction-time existence**: the cache stores `None` if
106    ///   `project_root` is unset, missing on disk, or otherwise can't be
107    ///   canonicalized. In practice `project_root` is set after walking up to
108    ///   `.git`, so the directory always exists at the time the cache is first
109    ///   read; if a caller sets `project_root` to a not-yet-existing path,
110    ///   the cache will permanently store `None`.
111    /// - **`Arc` wrapping**: `Config` derives `Clone`, and clones share the
112    ///   same `OnceLock` so a value computed by one clone is observable to all.
113    ///
114    /// `cwd` deliberately is NOT cached symmetrically: callers read it fresh
115    /// from `std::env::current_dir()` per call because tests (and embedding
116    /// hosts like LSP servers) may legitimately mutate the process cwd
117    /// between lookups.
118    #[serde(skip)]
119    #[schemars(skip)]
120    pub(super) canonical_project_root_cache: Arc<OnceLock<Option<PathBuf>>>,
121}
122
123impl PartialEq for Config {
124    fn eq(&self, other: &Self) -> bool {
125        self.global == other.global
126            && self.per_file_ignores == other.per_file_ignores
127            && self.per_file_flavor == other.per_file_flavor
128            && self.code_block_tools == other.code_block_tools
129            && self.rules == other.rules
130            && self.project_root == other.project_root
131    }
132}
133
134#[derive(Debug)]
135pub(super) struct PerFileIgnoreCache {
136    globset: GlobSet,
137    rules: Vec<Vec<String>>,
138}
139
140#[derive(Debug)]
141pub(super) struct PerFileFlavorCache {
142    matchers: Vec<(GlobMatcher, MarkdownFlavor)>,
143}
144
145impl Config {
146    /// Check if the Markdown flavor is set to MkDocs
147    pub fn is_mkdocs_flavor(&self) -> bool {
148        self.global.flavor == MarkdownFlavor::MkDocs
149    }
150
151    // Future methods for when GFM and CommonMark are implemented:
152    // pub fn is_gfm_flavor(&self) -> bool
153    // pub fn is_commonmark_flavor(&self) -> bool
154
155    /// Get the configured Markdown flavor
156    pub fn markdown_flavor(&self) -> MarkdownFlavor {
157        self.global.flavor
158    }
159
160    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
161    pub fn is_mkdocs_project(&self) -> bool {
162        self.is_mkdocs_flavor()
163    }
164
165    /// Apply per-rule `enabled` config to the global enable/disable lists.
166    ///
167    /// For `[MD060] enabled = true`: adds the rule to `extend_enable` and
168    /// removes it from `disable` and `extend_disable`, ensuring the rule is active.
169    ///
170    /// For `[MD041] enabled = false`: adds the rule to `disable` and
171    /// removes it from `extend_enable`, ensuring the rule is inactive.
172    ///
173    /// Per-rule `enabled` takes precedence over global lists when there
174    /// is a conflict, since it represents a more specific intent.
175    pub fn apply_per_rule_enabled(&mut self) {
176        let mut to_enable: Vec<String> = Vec::new();
177        let mut to_disable: Vec<String> = Vec::new();
178
179        for (name, cfg) in &self.rules {
180            match cfg.values.get("enabled") {
181                Some(toml::Value::Boolean(true)) => {
182                    to_enable.push(name.clone());
183                }
184                Some(toml::Value::Boolean(false)) => {
185                    to_disable.push(name.clone());
186                }
187                _ => {}
188            }
189        }
190
191        for name in to_enable {
192            if !self.global.extend_enable.contains(&name) {
193                self.global.extend_enable.push(name.clone());
194            }
195            self.global.disable.retain(|n| n != &name);
196            self.global.extend_disable.retain(|n| n != &name);
197        }
198
199        for name in to_disable {
200            if !self.global.disable.contains(&name) {
201                self.global.disable.push(name.clone());
202            }
203            self.global.extend_enable.retain(|n| n != &name);
204        }
205    }
206
207    /// Get the severity override for a specific rule, if configured
208    pub fn get_rule_severity(&self, rule_name: &str) -> Option<crate::rule::Severity> {
209        self.rules.get(rule_name).and_then(|r| r.severity)
210    }
211
212    /// Return the canonical form of `project_root`, computed once and cached.
213    ///
214    /// Returns `None` if `project_root` is unset, doesn't exist on disk, or
215    /// otherwise cannot be canonicalized. Subsequent calls reuse the cached
216    /// value, eliminating the per-file `canonicalize()` syscall that
217    /// `normalize_match_path` would otherwise perform.
218    pub(super) fn canonical_project_root(&self) -> Option<&Path> {
219        self.canonical_project_root_cache
220            .get_or_init(|| self.project_root.as_deref().and_then(|p| p.canonicalize().ok()))
221            .as_deref()
222    }
223
224    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
225    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
226    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
227        let mut ignored_rules = HashSet::new();
228
229        if self.per_file_ignores.is_empty() {
230            return ignored_rules;
231        }
232
233        let cwd = std::env::current_dir().ok();
234        let path_for_matching = normalize_match_path(file_path, self.canonical_project_root(), cwd.as_deref());
235
236        let cache = self
237            .per_file_ignores_cache
238            .get_or_init(|| PerFileIgnoreCache::new(&self.per_file_ignores));
239
240        // Match the file path against all patterns
241        for match_idx in cache.globset.matches(path_for_matching.as_ref()) {
242            if let Some(rules) = cache.rules.get(match_idx) {
243                for rule in rules {
244                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
245                    ignored_rules.insert(rule.clone());
246                }
247            }
248        }
249
250        ignored_rules
251    }
252
253    /// Get the MarkdownFlavor for a specific file based on per-file-flavor configuration.
254    /// Returns the first matching pattern's flavor, or falls back to global flavor,
255    /// or auto-detects from extension, or defaults to Standard.
256    pub fn get_flavor_for_file(&self, file_path: &Path) -> MarkdownFlavor {
257        // If no per-file patterns, use fallback logic
258        if self.per_file_flavor.is_empty() {
259            return self.resolve_flavor_fallback(file_path);
260        }
261
262        let cwd = std::env::current_dir().ok();
263        let path_for_matching = normalize_match_path(file_path, self.canonical_project_root(), cwd.as_deref());
264
265        let cache = self
266            .per_file_flavor_cache
267            .get_or_init(|| PerFileFlavorCache::new(&self.per_file_flavor));
268
269        // Iterate in config order and return first match (IndexMap preserves order)
270        for (matcher, flavor) in &cache.matchers {
271            if matcher.is_match(path_for_matching.as_ref()) {
272                return *flavor;
273            }
274        }
275
276        // No pattern matched, use fallback
277        self.resolve_flavor_fallback(file_path)
278    }
279
280    /// Fallback flavor resolution: global flavor → auto-detect → Standard
281    fn resolve_flavor_fallback(&self, file_path: &Path) -> MarkdownFlavor {
282        // If global flavor is explicitly set to non-Standard, use it
283        if self.global.flavor != MarkdownFlavor::Standard {
284            return self.global.flavor;
285        }
286        // Auto-detect from extension
287        MarkdownFlavor::from_path(file_path)
288    }
289
290    /// Canonicalize every rule-name list inside this `Config`.
291    ///
292    /// This is the single enforcement point for the runtime invariant:
293    /// **after a `Config` is fully built, every rule-name list contains
294    /// canonical rule IDs (`"MD033"`) — never aliases (`"no-inline-html"`).**
295    ///
296    /// The invariant lets every consumer (`rules::filter_rules`, the LSP,
297    /// WASM, fix coordinator, per-file-ignore lookups) match against
298    /// `Rule::name()` with simple string equality. Mutation boundaries
299    /// (`From<SourcedConfig> for Config`, LSP `apply_lsp_settings_*`, WASM
300    /// `to_config_with_warnings`) call this before handing the `Config` to
301    /// the linting pipeline.
302    ///
303    /// Covers `global.{enable,disable,extend_enable,extend_disable,fixable,unfixable}`
304    /// and the values of `per_file_ignores`. Idempotent.
305    pub fn canonicalize_rule_lists(&mut self) {
306        use super::registry::canonicalize_rule_list_in_place;
307        self.global.canonicalize_rule_lists();
308        for rules in self.per_file_ignores.values_mut() {
309            canonicalize_rule_list_in_place(rules);
310        }
311    }
312
313    /// Merge inline configuration overrides into a copy of this config
314    ///
315    /// This enables automatic inline config support - the engine can merge
316    /// inline overrides and recreate rules without any per-rule changes.
317    ///
318    /// Returns a new Config with the inline overrides merged in.
319    /// If there are no inline overrides, returns a clone of self.
320    pub fn merge_with_inline_config(&self, inline_config: &crate::inline_config::InlineConfig) -> Self {
321        let overrides = inline_config.get_all_rule_configs();
322        if overrides.is_empty() {
323            return self.clone();
324        }
325
326        let mut merged = self.clone();
327
328        for (rule_name, json_override) in overrides {
329            // Get or create the rule config entry
330            let rule_config = merged.rules.entry(rule_name.clone()).or_default();
331
332            // Merge JSON values into the rule's config
333            if let Some(obj) = json_override.as_object() {
334                for (key, value) in obj {
335                    // Normalize key to kebab-case for consistency
336                    let normalized_key = key.replace('_', "-");
337
338                    // Convert JSON value to TOML value
339                    if let Some(toml_value) = json_to_toml(value) {
340                        rule_config.values.insert(normalized_key, toml_value);
341                    }
342                }
343            }
344        }
345
346        merged
347    }
348}
349
350/// Normalize a file path for matching against a glob pattern from configuration.
351///
352/// Glob patterns in `per-file-ignores` and `per-file-flavor` are written relative
353/// to the project root (e.g. `docs/**/*.md`), and the underlying matcher uses
354/// `literal_separator(true)` so an absolute path like `/home/user/proj/docs/x.md`
355/// will not match `docs/**/*.md`. This helper produces the form the glob expects:
356///
357/// 1. **Relative path** → return as-is.
358/// 2. **Absolute path under `project_root`** → return path relative to `project_root`.
359/// 3. **Absolute path under `cwd`** → return path relative to `cwd`. This is the
360///    safety net for invocations where `project_root` could not be discovered
361///    (no `.git` upward, LSP/CLI calls outside a project) but the file still
362///    lives somewhere under the working directory.
363/// 4. **Anywhere else** → return the raw path. A relative glob simply won't
364///    match it, which is the desired outcome for files outside any known root.
365///
366/// All canonicalization failures degrade gracefully to step 4 so editor buffers
367/// and pre-creation paths still flow through without panicking.
368///
369/// `canonical_project_root` is expected to already be canonical (via
370/// `Config::canonical_project_root`). `cwd` is canonicalized internally on each
371/// call since it is read fresh from the environment per invocation.
372pub(super) fn normalize_match_path<'a>(
373    file_path: &'a Path,
374    canonical_project_root: Option<&Path>,
375    cwd: Option<&Path>,
376) -> std::borrow::Cow<'a, Path> {
377    use std::borrow::Cow;
378
379    if file_path.is_relative() {
380        return Cow::Borrowed(file_path);
381    }
382
383    let Ok(canonical_file) = file_path.canonicalize() else {
384        log::debug!(
385            "normalize_match_path: canonicalize failed for {}; returning raw path. \
386             Per-file glob patterns may not match (file may not yet exist on disk).",
387            file_path.display()
388        );
389        return Cow::Borrowed(file_path);
390    };
391
392    if let Some(root) = canonical_project_root
393        && let Ok(rel) = canonical_file.strip_prefix(root)
394    {
395        return Cow::Owned(rel.to_path_buf());
396    }
397
398    if let Some(working_dir) = cwd
399        && let Ok(canonical_cwd) = working_dir.canonicalize()
400        && let Ok(rel) = canonical_file.strip_prefix(&canonical_cwd)
401    {
402        return Cow::Owned(rel.to_path_buf());
403    }
404
405    // Surface the silent fallback once per process at warn level so users with
406    // per-file glob configs notice when their patterns can't match a file.
407    // Subsequent occurrences stay at debug to avoid log spam.
408    static SILENT_FALLBACK_WARNED: OnceLock<()> = OnceLock::new();
409    log::log!(
410        first_call_warn_else_debug(&SILENT_FALLBACK_WARNED),
411        "{}",
412        format_silent_fallback_message(file_path, canonical_project_root, cwd),
413    );
414    Cow::Borrowed(file_path)
415}
416
417/// Returns [`log::Level::Warn`] the first time it is called with a given
418/// `latch`, and [`log::Level::Debug`] on every subsequent call. The latch
419/// is consumed by the first caller via `OnceLock::set`; later callers
420/// observe the latch as already set and downgrade.
421///
422/// Used to flag a fallback condition once per process without flooding
423/// logs when the same condition recurs (e.g. once per linted file).
424pub(super) fn first_call_warn_else_debug(latch: &OnceLock<()>) -> log::Level {
425    if latch.set(()).is_ok() {
426        log::Level::Warn
427    } else {
428        log::Level::Debug
429    }
430}
431
432/// Format the diagnostic emitted when [`normalize_match_path`] cannot
433/// relativise `file_path` against either the project root or the current
434/// working directory. Extracted so the exact wording can be asserted in
435/// tests without capturing log output.
436pub(super) fn format_silent_fallback_message(
437    file_path: &Path,
438    canonical_project_root: Option<&Path>,
439    cwd: Option<&Path>,
440) -> String {
441    format!(
442        "Per-file glob patterns will not match {}: file is outside project_root ({}) and cwd ({})",
443        file_path.display(),
444        DisplayPathOrUnset(canonical_project_root),
445        DisplayPathOrUnset(cwd),
446    )
447}
448
449/// Display adapter for `Option<&Path>` that renders the path via
450/// [`Path::display`] when present, or the literal `<unset>` when absent.
451/// Angle brackets follow Rust's diagnostic convention (e.g. `<unknown>`)
452/// and avoid double-paren rendering when the surrounding format string
453/// already wraps the value in `(…)`.
454struct DisplayPathOrUnset<'a>(Option<&'a Path>);
455
456impl std::fmt::Display for DisplayPathOrUnset<'_> {
457    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
458        match self.0 {
459            Some(path) => std::fmt::Display::fmt(&path.display(), f),
460            None => f.write_str("<unset>"),
461        }
462    }
463}
464
465/// Convert a serde_json::Value to a toml::Value
466pub(super) fn json_to_toml(json: &serde_json::Value) -> Option<toml::Value> {
467    match json {
468        serde_json::Value::Null => None,
469        serde_json::Value::Bool(b) => Some(toml::Value::Boolean(*b)),
470        serde_json::Value::Number(n) => n
471            .as_i64()
472            .map(toml::Value::Integer)
473            .or_else(|| n.as_f64().map(toml::Value::Float)),
474        serde_json::Value::String(s) => Some(toml::Value::String(s.clone())),
475        serde_json::Value::Array(arr) => {
476            let toml_arr: Vec<toml::Value> = arr.iter().filter_map(json_to_toml).collect();
477            Some(toml::Value::Array(toml_arr))
478        }
479        serde_json::Value::Object(obj) => {
480            let mut table = toml::map::Map::new();
481            for (k, v) in obj {
482                if let Some(tv) = json_to_toml(v) {
483                    table.insert(k.clone(), tv);
484                }
485            }
486            Some(toml::Value::Table(table))
487        }
488    }
489}
490
491impl PerFileIgnoreCache {
492    fn new(per_file_ignores: &HashMap<String, Vec<String>>) -> Self {
493        let mut builder = GlobSetBuilder::new();
494        let mut rules = Vec::new();
495
496        for (pattern, rules_list) in per_file_ignores {
497            if let Ok(glob) = Glob::new(pattern) {
498                builder.add(glob);
499                // Canonicalize defensively: callers should have run
500                // Config::canonicalize_rule_lists already, but per-file-ignores
501                // has reached this cache directly from a few code paths
502                // historically, so we re-canonicalize here to keep the cache
503                // sound regardless of caller discipline.
504                rules.push(
505                    rules_list
506                        .iter()
507                        .map(|rule| super::registry::resolve_rule_name(rule))
508                        .collect(),
509                );
510            } else {
511                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
512            }
513        }
514
515        let globset = builder.build().unwrap_or_else(|e| {
516            log::error!("Failed to build globset for per-file-ignores: {e}");
517            GlobSetBuilder::new().build().unwrap()
518        });
519
520        Self { globset, rules }
521    }
522}
523
524impl PerFileFlavorCache {
525    fn new(per_file_flavor: &IndexMap<String, MarkdownFlavor>) -> Self {
526        let mut matchers = Vec::new();
527
528        for (pattern, flavor) in per_file_flavor {
529            if let Ok(glob) = GlobBuilder::new(pattern).literal_separator(true).build() {
530                matchers.push((glob.compile_matcher(), *flavor));
531            } else {
532                log::warn!("Invalid glob pattern in per-file-flavor: {pattern}");
533            }
534        }
535
536        Self { matchers }
537    }
538}
539
540/// Global configuration options
541#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
542#[serde(default, rename_all = "kebab-case")]
543pub struct GlobalConfig {
544    /// Enabled rules
545    #[serde(default)]
546    pub enable: Vec<String>,
547
548    /// Disabled rules
549    #[serde(default)]
550    pub disable: Vec<String>,
551
552    /// Files to exclude
553    #[serde(default)]
554    pub exclude: Vec<String>,
555
556    /// Files to include
557    #[serde(default)]
558    pub include: Vec<String>,
559
560    /// Respect .gitignore files when scanning directories
561    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
562    pub respect_gitignore: bool,
563
564    /// Global line length setting (used by MD013 and other rules if not overridden)
565    #[serde(default, alias = "line_length")]
566    pub line_length: LineLength,
567
568    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
569    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
570    pub output_format: Option<String>,
571
572    /// Rules that are allowed to be fixed when --fix is used
573    /// If specified, only these rules will be fixed
574    #[serde(default)]
575    pub fixable: Vec<String>,
576
577    /// Rules that should never be fixed, even when --fix is used
578    /// Takes precedence over fixable
579    #[serde(default)]
580    pub unfixable: Vec<String>,
581
582    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
583    /// When set, adjusts parsing and validation rules for that specific Markdown variant
584    #[serde(default)]
585    pub flavor: MarkdownFlavor,
586
587    /// \[DEPRECATED\] Whether to enforce exclude patterns for explicitly passed paths.
588    /// This option is deprecated as of v0.0.156 and has no effect.
589    /// Exclude patterns are now always respected, even for explicitly provided files.
590    /// This prevents duplication between rumdl config and tool configs like pre-commit.
591    #[serde(default, alias = "force_exclude")]
592    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
593    pub force_exclude: bool,
594
595    /// Directory to store cache files (default: .rumdl_cache)
596    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
597    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
598    pub cache_dir: Option<String>,
599
600    /// Whether caching is enabled (default: true)
601    /// Can also be disabled via --no-cache CLI flag
602    #[serde(default = "default_true")]
603    pub cache: bool,
604
605    /// Additional rules to enable on top of the base set (additive)
606    #[serde(default, alias = "extend_enable")]
607    pub extend_enable: Vec<String>,
608
609    /// Additional rules to disable on top of the base set (additive)
610    #[serde(default, alias = "extend_disable")]
611    pub extend_disable: Vec<String>,
612
613    /// Whether the enable list was explicitly set (even if empty).
614    /// Used to distinguish "no enable list configured" from "enable list is empty"
615    /// (e.g., markdownlint `default: false` with no rules enabled).
616    #[serde(skip)]
617    pub enable_is_explicit: bool,
618}
619
620fn default_respect_gitignore() -> bool {
621    true
622}
623
624fn default_true() -> bool {
625    true
626}
627
628// Add the Default impl
629impl Default for GlobalConfig {
630    #[allow(deprecated)]
631    fn default() -> Self {
632        Self {
633            enable: Vec::new(),
634            disable: Vec::new(),
635            exclude: Vec::new(),
636            include: Vec::new(),
637            respect_gitignore: true,
638            line_length: LineLength::default(),
639            output_format: None,
640            fixable: Vec::new(),
641            unfixable: Vec::new(),
642            flavor: MarkdownFlavor::default(),
643            force_exclude: false,
644            cache_dir: None,
645            cache: true,
646            extend_enable: Vec::new(),
647            extend_disable: Vec::new(),
648            enable_is_explicit: false,
649        }
650    }
651}
652
653impl GlobalConfig {
654    /// Canonicalize every rule-name list in this `GlobalConfig`.
655    ///
656    /// Rewrites `enable`, `disable`, `extend_enable`, `extend_disable`, `fixable`,
657    /// and `unfixable` so that all entries are canonical rule IDs (`"MD033"`)
658    /// rather than aliases (`"no-inline-html"`). Duplicates are removed,
659    /// preserving first-occurrence order; the special `"all"` keyword is
660    /// preserved.
661    ///
662    /// This must be called by every code path that mutates a runtime
663    /// `Config`'s rule lists from external input (markdownlint configs,
664    /// `.rumdl.toml`, LSP `initializationOptions`, WASM bindings, etc.) so
665    /// that downstream consumers (`rules::filter_rules`, the LSP, WASM) can
666    /// match against `Rule::name()` with simple string equality.
667    pub fn canonicalize_rule_lists(&mut self) {
668        use super::registry::canonicalize_rule_list_in_place;
669        canonicalize_rule_list_in_place(&mut self.enable);
670        canonicalize_rule_list_in_place(&mut self.disable);
671        canonicalize_rule_list_in_place(&mut self.extend_enable);
672        canonicalize_rule_list_in_place(&mut self.extend_disable);
673        canonicalize_rule_list_in_place(&mut self.fixable);
674        canonicalize_rule_list_in_place(&mut self.unfixable);
675    }
676}
677
678/// Names of rumdl-native config files, searched in precedence order when
679/// walking up a directory tree.
680///
681/// This is the single source of truth for config discovery. Both the CLI
682/// (`SourcedConfig::discover_config_upward`, `discover_config_for_dir`) and
683/// the LSP (`RumdlLanguageServer::resolve_config_for_file`) must use this
684/// list; any deviation causes silent config-not-found bugs where the CLI
685/// recognises a config but the LSP does not (or vice versa).
686///
687/// See `src/lsp/tests.rs::test_lsp_cli_resolver_parity_on_fixtures` for
688/// the side-by-side resolver parity test that pins this invariant across
689/// several directory layouts.
690pub const RUMDL_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
691
692pub const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
693    ".markdownlint-cli2.jsonc",
694    ".markdownlint-cli2.yaml",
695    ".markdownlint-cli2.yml",
696    ".markdownlint.json",
697    ".markdownlint.jsonc",
698    ".markdownlint.yaml",
699    ".markdownlint.yml",
700    "markdownlint.json",
701    "markdownlint.jsonc",
702    "markdownlint.yaml",
703    "markdownlint.yml",
704];
705
706/// Create a default configuration file at the specified path
707pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
708    create_preset_config("default", path)
709}
710
711/// Create a configuration file with a specific style preset
712pub fn create_preset_config(preset: &str, path: &str) -> Result<(), ConfigError> {
713    if Path::new(path).exists() {
714        return Err(ConfigError::FileExists { path: path.to_string() });
715    }
716
717    let config_content = match preset {
718        "default" => generate_default_preset(),
719        "google" => generate_google_preset(),
720        "relaxed" => generate_relaxed_preset(),
721        _ => {
722            return Err(ConfigError::UnknownPreset {
723                name: preset.to_string(),
724            });
725        }
726    };
727
728    match fs::write(path, config_content) {
729        Ok(_) => Ok(()),
730        Err(err) => Err(ConfigError::IoError {
731            source: err,
732            path: path.to_string(),
733        }),
734    }
735}
736
737/// Generate the default preset configuration content.
738/// Returns the same content as `create_default_config`.
739fn generate_default_preset() -> String {
740    r#"# rumdl configuration file
741
742# Inherit settings from another config file (relative to this file's directory)
743# extends = "../base.rumdl.toml"
744
745# Global configuration options
746[global]
747# List of rules to disable (uncomment and modify as needed)
748# disable = ["MD013", "MD033"]
749
750# List of rules to enable exclusively (replaces defaults; only these rules will run)
751# enable = ["MD001", "MD003", "MD004"]
752
753# Additional rules to enable on top of defaults (additive, does not replace)
754# Use this to activate opt-in rules like MD060, MD063, MD072, MD073, MD074
755# extend-enable = ["MD060", "MD063"]
756
757# Additional rules to disable on top of the disable list (additive)
758# extend-disable = ["MD041"]
759
760# List of file/directory patterns to include for linting (if provided, only these will be linted)
761# include = [
762#    "docs/*.md",
763#    "src/**/*.md",
764#    "README.md"
765# ]
766
767# List of file/directory patterns to exclude from linting
768exclude = [
769    # Common directories to exclude
770    ".git",
771    ".github",
772    "node_modules",
773    "vendor",
774    "dist",
775    "build",
776
777    # Specific files or patterns
778    "CHANGELOG.md",
779    "LICENSE.md",
780]
781
782# Respect .gitignore files when scanning directories (default: true)
783respect-gitignore = true
784
785# Markdown flavor/dialect (uncomment to enable)
786# Options: standard (default), gfm, commonmark, mkdocs, mdx, quarto
787# flavor = "mkdocs"
788
789# Rule-specific configurations (uncomment and modify as needed)
790
791# [MD003]
792# style = "atx"  # Heading style (atx, atx_closed, setext)
793
794# [MD004]
795# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
796
797# [MD007]
798# indent = 4  # Unordered list indentation
799
800# [MD013]
801# line-length = 100  # Line length
802# code-blocks = false  # Exclude code blocks from line length check
803# tables = false  # Exclude tables from line length check
804# headings = true  # Include headings in line length check
805
806# [MD044]
807# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
808# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
809"#
810    .to_string()
811}
812
813/// Generate Google developer documentation style preset.
814/// Based on https://google.github.io/styleguide/docguide/style.html
815fn generate_google_preset() -> String {
816    r#"# rumdl configuration - Google developer documentation style
817# Based on https://google.github.io/styleguide/docguide/style.html
818
819[global]
820exclude = [
821    ".git",
822    ".github",
823    "node_modules",
824    "vendor",
825    "dist",
826    "build",
827    "CHANGELOG.md",
828    "LICENSE.md",
829]
830respect-gitignore = true
831
832# ATX-style headings required
833[MD003]
834style = "atx"
835
836# Unordered list style: dash
837[MD004]
838style = "dash"
839
840# 4-space indent for nested lists
841[MD007]
842indent = 4
843
844# Strict mode: no trailing spaces allowed (Google uses backslash for line breaks)
845[MD009]
846strict = true
847
848# 80-character line length
849[MD013]
850line-length = 80
851code-blocks = false
852tables = false
853
854# No trailing punctuation in headings
855[MD026]
856punctuation = ".,;:!。,;:!"
857
858# Fenced code blocks only (no indented code blocks)
859[MD046]
860style = "fenced"
861
862# Emphasis with underscores
863[MD049]
864style = "underscore"
865
866# Strong with asterisks
867[MD050]
868style = "asterisk"
869"#
870    .to_string()
871}
872
873/// Generate relaxed preset for existing projects adopting rumdl incrementally.
874/// Longer line lengths, fewer rules, lenient settings to minimize initial warnings.
875fn generate_relaxed_preset() -> String {
876    r#"# rumdl configuration - Relaxed preset
877# Lenient settings for existing projects adopting rumdl incrementally.
878# Minimizes initial warnings while still catching important issues.
879
880[global]
881exclude = [
882    ".git",
883    ".github",
884    "node_modules",
885    "vendor",
886    "dist",
887    "build",
888    "CHANGELOG.md",
889    "LICENSE.md",
890]
891respect-gitignore = true
892
893# Disable rules that produce the most noise on existing projects
894disable = [
895    "MD013",  # Line length - most existing files exceed 80 chars
896    "MD033",  # Inline HTML - commonly used in real-world markdown
897    "MD041",  # First line heading - not all files need it
898]
899
900# Consistent heading style (any style, just be consistent)
901[MD003]
902style = "consistent"
903
904# Consistent list style
905[MD004]
906style = "consistent"
907
908# Consistent emphasis style
909[MD049]
910style = "consistent"
911
912# Consistent strong style
913[MD050]
914style = "consistent"
915"#
916    .to_string()
917}
918
919/// Errors that can occur when loading configuration
920#[derive(Debug, thiserror::Error)]
921pub enum ConfigError {
922    /// Failed to read the configuration file
923    #[error("Failed to read config file at {path}: {source}")]
924    IoError { source: io::Error, path: String },
925
926    /// Failed to parse the configuration content (TOML or JSON)
927    #[error("Failed to parse config: {0}")]
928    ParseError(String),
929
930    /// Configuration file already exists
931    #[error("Configuration file already exists at {path}")]
932    FileExists { path: String },
933
934    /// Circular extends reference detected
935    #[error("Circular extends reference: {path} already in chain {chain:?}")]
936    CircularExtends { path: String, chain: Vec<String> },
937
938    /// Extends chain exceeds maximum depth
939    #[error("Extends chain exceeds maximum depth of {max_depth} at {path}")]
940    ExtendsDepthExceeded { path: String, max_depth: usize },
941
942    /// Extends target file not found
943    #[error("extends target not found: {path} (referenced from {from})")]
944    ExtendsNotFound { path: String, from: String },
945
946    /// Unknown preset name
947    #[error("Unknown preset: {name}. Valid presets: default, google, relaxed")]
948    UnknownPreset { name: String },
949}
950
951/// Get a rule-specific configuration value
952/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
953/// for better markdownlint compatibility
954pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
955    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
956
957    let rule_config = config.rules.get(&norm_rule_name)?;
958
959    // Try multiple key variants to support both underscore and kebab-case formats
960    let key_variants = [
961        key.to_string(),       // Original key as provided
962        normalize_key(key),    // Normalized key (lowercase, kebab-case)
963        key.replace('-', "_"), // Convert kebab-case to snake_case
964        key.replace('_', "-"), // Convert snake_case to kebab-case
965    ];
966
967    // Try each variant until we find a match
968    for variant in &key_variants {
969        if let Some(value) = rule_config.values.get(variant)
970            && let Ok(result) = T::deserialize(value.clone())
971        {
972            return Some(result);
973        }
974    }
975
976    None
977}
978
979/// Generate preset configuration for pyproject.toml format.
980/// Converts the .rumdl.toml preset to pyproject.toml section format.
981pub fn generate_pyproject_preset_config(preset: &str) -> Result<String, ConfigError> {
982    match preset {
983        "default" => Ok(generate_pyproject_config()),
984        other => {
985            let rumdl_config = match other {
986                "google" => generate_google_preset(),
987                "relaxed" => generate_relaxed_preset(),
988                _ => {
989                    return Err(ConfigError::UnknownPreset {
990                        name: other.to_string(),
991                    });
992                }
993            };
994            Ok(convert_rumdl_to_pyproject(&rumdl_config))
995        }
996    }
997}
998
999/// Convert a .rumdl.toml config string to pyproject.toml format.
1000/// Rewrites `[global]` → `[tool.rumdl]` and `[MDXXX]` → `[tool.rumdl.MDXXX]`.
1001fn convert_rumdl_to_pyproject(rumdl_config: &str) -> String {
1002    let mut output = String::with_capacity(rumdl_config.len() + 128);
1003    for line in rumdl_config.lines() {
1004        let trimmed = line.trim();
1005        if trimmed.starts_with('[') && trimmed.ends_with(']') && !trimmed.starts_with("# [") {
1006            let section = &trimmed[1..trimmed.len() - 1];
1007            if section == "global" {
1008                output.push_str("[tool.rumdl]");
1009            } else {
1010                output.push_str(&format!("[tool.rumdl.{section}]"));
1011            }
1012        } else {
1013            output.push_str(line);
1014        }
1015        output.push('\n');
1016    }
1017    output
1018}
1019
1020/// Generate default rumdl configuration for pyproject.toml
1021pub fn generate_pyproject_config() -> String {
1022    let config_content = r#"
1023[tool.rumdl]
1024# Global configuration options
1025line-length = 100
1026disable = []
1027# extend-enable = ["MD060"]  # Add opt-in rules (additive, keeps defaults)
1028# extend-disable = []  # Additional rules to disable (additive)
1029exclude = [
1030    # Common directories to exclude
1031    ".git",
1032    ".github",
1033    "node_modules",
1034    "vendor",
1035    "dist",
1036    "build",
1037]
1038respect-gitignore = true
1039
1040# Rule-specific configurations (uncomment and modify as needed)
1041
1042# [tool.rumdl.MD003]
1043# style = "atx"  # Heading style (atx, atx_closed, setext)
1044
1045# [tool.rumdl.MD004]
1046# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
1047
1048# [tool.rumdl.MD007]
1049# indent = 4  # Unordered list indentation
1050
1051# [tool.rumdl.MD013]
1052# line-length = 100  # Line length
1053# code-blocks = false  # Exclude code blocks from line length check
1054# tables = false  # Exclude tables from line length check
1055# headings = true  # Include headings in line length check
1056
1057# [tool.rumdl.MD044]
1058# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
1059# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
1060"#;
1061
1062    config_content.to_string()
1063}