Skip to main content

rumdl_lib/config/
types.rs

1use crate::types::LineLength;
2use globset::{Glob, GlobBuilder, GlobMatcher, GlobSet, GlobSetBuilder};
3use indexmap::IndexMap;
4use serde::{Deserialize, Serialize};
5use std::collections::BTreeMap;
6use std::collections::{HashMap, HashSet};
7use std::fs;
8use std::io;
9use std::path::Path;
10use std::sync::{Arc, OnceLock};
11
12use super::flavor::{MarkdownFlavor, normalize_key};
13
14/// Represents a rule-specific configuration
15#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
16pub struct RuleConfig {
17    /// Severity override for this rule (Error, Warning, or Info)
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub severity: Option<crate::rule::Severity>,
20
21    /// Configuration values for the rule
22    #[serde(flatten)]
23    #[schemars(schema_with = "arbitrary_value_schema")]
24    pub values: BTreeMap<String, toml::Value>,
25}
26
27/// Generate a JSON schema for arbitrary configuration values
28fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
29    schemars::json_schema!({
30        "type": "object",
31        "additionalProperties": true
32    })
33}
34
35/// Represents the complete configuration loaded from rumdl.toml
36#[derive(Debug, Clone, Serialize, Deserialize, Default, schemars::JsonSchema)]
37#[schemars(
38    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
39)]
40pub struct Config {
41    /// Global configuration options
42    #[serde(default)]
43    pub global: GlobalConfig,
44
45    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
46    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
47    #[serde(default, rename = "per-file-ignores")]
48    pub per_file_ignores: HashMap<String, Vec<String>>,
49
50    /// Per-file flavor overrides: maps file patterns to Markdown flavors
51    /// Example: { "docs/**/*.md": MkDocs, "**/*.mdx": MDX }
52    /// Uses IndexMap to preserve config file order for "first match wins" semantics
53    #[serde(default, rename = "per-file-flavor")]
54    #[schemars(with = "HashMap<String, MarkdownFlavor>")]
55    pub per_file_flavor: IndexMap<String, MarkdownFlavor>,
56
57    /// Code block tools configuration for per-language linting and formatting
58    /// using external tools like ruff, prettier, shellcheck, etc.
59    #[serde(default, rename = "code-block-tools")]
60    pub code_block_tools: crate::code_block_tools::CodeBlockToolsConfig,
61
62    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
63    /// Each rule section can contain options specific to that rule.
64    ///
65    /// Common examples:
66    /// - MD013: line_length, code_blocks, tables, headings
67    /// - MD007: indent
68    /// - MD003: style ("atx", "atx_closed", "setext")
69    /// - MD044: names (array of proper names to check)
70    ///
71    /// See <https://github.com/rvben/rumdl> for full rule documentation.
72    #[serde(flatten)]
73    pub rules: BTreeMap<String, RuleConfig>,
74
75    /// Project root directory, used for resolving relative paths in per-file-ignores
76    #[serde(skip)]
77    pub project_root: Option<std::path::PathBuf>,
78
79    #[serde(skip)]
80    #[schemars(skip)]
81    pub(super) per_file_ignores_cache: Arc<OnceLock<PerFileIgnoreCache>>,
82
83    #[serde(skip)]
84    #[schemars(skip)]
85    pub(super) per_file_flavor_cache: Arc<OnceLock<PerFileFlavorCache>>,
86}
87
88impl PartialEq for Config {
89    fn eq(&self, other: &Self) -> bool {
90        self.global == other.global
91            && self.per_file_ignores == other.per_file_ignores
92            && self.per_file_flavor == other.per_file_flavor
93            && self.code_block_tools == other.code_block_tools
94            && self.rules == other.rules
95            && self.project_root == other.project_root
96    }
97}
98
99#[derive(Debug)]
100pub(super) struct PerFileIgnoreCache {
101    globset: GlobSet,
102    rules: Vec<Vec<String>>,
103}
104
105#[derive(Debug)]
106pub(super) struct PerFileFlavorCache {
107    matchers: Vec<(GlobMatcher, MarkdownFlavor)>,
108}
109
110impl Config {
111    /// Check if the Markdown flavor is set to MkDocs
112    pub fn is_mkdocs_flavor(&self) -> bool {
113        self.global.flavor == MarkdownFlavor::MkDocs
114    }
115
116    // Future methods for when GFM and CommonMark are implemented:
117    // pub fn is_gfm_flavor(&self) -> bool
118    // pub fn is_commonmark_flavor(&self) -> bool
119
120    /// Get the configured Markdown flavor
121    pub fn markdown_flavor(&self) -> MarkdownFlavor {
122        self.global.flavor
123    }
124
125    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
126    pub fn is_mkdocs_project(&self) -> bool {
127        self.is_mkdocs_flavor()
128    }
129
130    /// Get the severity override for a specific rule, if configured
131    pub fn get_rule_severity(&self, rule_name: &str) -> Option<crate::rule::Severity> {
132        self.rules.get(rule_name).and_then(|r| r.severity)
133    }
134
135    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
136    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
137    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
138        let mut ignored_rules = HashSet::new();
139
140        if self.per_file_ignores.is_empty() {
141            return ignored_rules;
142        }
143
144        // Normalize the file path to be relative to project_root for pattern matching
145        // This ensures patterns like ".github/file.md" work with absolute paths
146        let path_for_matching: std::borrow::Cow<'_, Path> = if let Some(ref root) = self.project_root {
147            if let Ok(canonical_path) = file_path.canonicalize() {
148                if let Ok(canonical_root) = root.canonicalize() {
149                    if let Ok(relative) = canonical_path.strip_prefix(&canonical_root) {
150                        std::borrow::Cow::Owned(relative.to_path_buf())
151                    } else {
152                        std::borrow::Cow::Borrowed(file_path)
153                    }
154                } else {
155                    std::borrow::Cow::Borrowed(file_path)
156                }
157            } else {
158                std::borrow::Cow::Borrowed(file_path)
159            }
160        } else {
161            std::borrow::Cow::Borrowed(file_path)
162        };
163
164        let cache = self
165            .per_file_ignores_cache
166            .get_or_init(|| PerFileIgnoreCache::new(&self.per_file_ignores));
167
168        // Match the file path against all patterns
169        for match_idx in cache.globset.matches(path_for_matching.as_ref()) {
170            if let Some(rules) = cache.rules.get(match_idx) {
171                for rule in rules.iter() {
172                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
173                    ignored_rules.insert(rule.clone());
174                }
175            }
176        }
177
178        ignored_rules
179    }
180
181    /// Get the MarkdownFlavor for a specific file based on per-file-flavor configuration.
182    /// Returns the first matching pattern's flavor, or falls back to global flavor,
183    /// or auto-detects from extension, or defaults to Standard.
184    pub fn get_flavor_for_file(&self, file_path: &Path) -> MarkdownFlavor {
185        // If no per-file patterns, use fallback logic
186        if self.per_file_flavor.is_empty() {
187            return self.resolve_flavor_fallback(file_path);
188        }
189
190        // Normalize path for matching (same logic as get_ignored_rules_for_file)
191        let path_for_matching: std::borrow::Cow<'_, Path> = if let Some(ref root) = self.project_root {
192            if let Ok(canonical_path) = file_path.canonicalize() {
193                if let Ok(canonical_root) = root.canonicalize() {
194                    if let Ok(relative) = canonical_path.strip_prefix(&canonical_root) {
195                        std::borrow::Cow::Owned(relative.to_path_buf())
196                    } else {
197                        std::borrow::Cow::Borrowed(file_path)
198                    }
199                } else {
200                    std::borrow::Cow::Borrowed(file_path)
201                }
202            } else {
203                std::borrow::Cow::Borrowed(file_path)
204            }
205        } else {
206            std::borrow::Cow::Borrowed(file_path)
207        };
208
209        let cache = self
210            .per_file_flavor_cache
211            .get_or_init(|| PerFileFlavorCache::new(&self.per_file_flavor));
212
213        // Iterate in config order and return first match (IndexMap preserves order)
214        for (matcher, flavor) in &cache.matchers {
215            if matcher.is_match(path_for_matching.as_ref()) {
216                return *flavor;
217            }
218        }
219
220        // No pattern matched, use fallback
221        self.resolve_flavor_fallback(file_path)
222    }
223
224    /// Fallback flavor resolution: global flavor → auto-detect → Standard
225    fn resolve_flavor_fallback(&self, file_path: &Path) -> MarkdownFlavor {
226        // If global flavor is explicitly set to non-Standard, use it
227        if self.global.flavor != MarkdownFlavor::Standard {
228            return self.global.flavor;
229        }
230        // Auto-detect from extension
231        MarkdownFlavor::from_path(file_path)
232    }
233
234    /// Merge inline configuration overrides into a copy of this config
235    ///
236    /// This enables automatic inline config support - the engine can merge
237    /// inline overrides and recreate rules without any per-rule changes.
238    ///
239    /// Returns a new Config with the inline overrides merged in.
240    /// If there are no inline overrides, returns a clone of self.
241    pub fn merge_with_inline_config(&self, inline_config: &crate::inline_config::InlineConfig) -> Self {
242        let overrides = inline_config.get_all_rule_configs();
243        if overrides.is_empty() {
244            return self.clone();
245        }
246
247        let mut merged = self.clone();
248
249        for (rule_name, json_override) in overrides {
250            // Get or create the rule config entry
251            let rule_config = merged.rules.entry(rule_name.clone()).or_default();
252
253            // Merge JSON values into the rule's config
254            if let Some(obj) = json_override.as_object() {
255                for (key, value) in obj {
256                    // Normalize key to kebab-case for consistency
257                    let normalized_key = key.replace('_', "-");
258
259                    // Convert JSON value to TOML value
260                    if let Some(toml_value) = json_to_toml(value) {
261                        rule_config.values.insert(normalized_key, toml_value);
262                    }
263                }
264            }
265        }
266
267        merged
268    }
269}
270
271/// Convert a serde_json::Value to a toml::Value
272pub(super) fn json_to_toml(json: &serde_json::Value) -> Option<toml::Value> {
273    match json {
274        serde_json::Value::Null => None,
275        serde_json::Value::Bool(b) => Some(toml::Value::Boolean(*b)),
276        serde_json::Value::Number(n) => n
277            .as_i64()
278            .map(toml::Value::Integer)
279            .or_else(|| n.as_f64().map(toml::Value::Float)),
280        serde_json::Value::String(s) => Some(toml::Value::String(s.clone())),
281        serde_json::Value::Array(arr) => {
282            let toml_arr: Vec<toml::Value> = arr.iter().filter_map(json_to_toml).collect();
283            Some(toml::Value::Array(toml_arr))
284        }
285        serde_json::Value::Object(obj) => {
286            let mut table = toml::map::Map::new();
287            for (k, v) in obj {
288                if let Some(tv) = json_to_toml(v) {
289                    table.insert(k.clone(), tv);
290                }
291            }
292            Some(toml::Value::Table(table))
293        }
294    }
295}
296
297impl PerFileIgnoreCache {
298    fn new(per_file_ignores: &HashMap<String, Vec<String>>) -> Self {
299        let mut builder = GlobSetBuilder::new();
300        let mut rules = Vec::new();
301
302        for (pattern, rules_list) in per_file_ignores {
303            if let Ok(glob) = Glob::new(pattern) {
304                builder.add(glob);
305                rules.push(rules_list.iter().map(|rule| normalize_key(rule)).collect());
306            } else {
307                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
308            }
309        }
310
311        let globset = builder.build().unwrap_or_else(|e| {
312            log::error!("Failed to build globset for per-file-ignores: {e}");
313            GlobSetBuilder::new().build().unwrap()
314        });
315
316        Self { globset, rules }
317    }
318}
319
320impl PerFileFlavorCache {
321    fn new(per_file_flavor: &IndexMap<String, MarkdownFlavor>) -> Self {
322        let mut matchers = Vec::new();
323
324        for (pattern, flavor) in per_file_flavor {
325            if let Ok(glob) = GlobBuilder::new(pattern).literal_separator(true).build() {
326                matchers.push((glob.compile_matcher(), *flavor));
327            } else {
328                log::warn!("Invalid glob pattern in per-file-flavor: {pattern}");
329            }
330        }
331
332        Self { matchers }
333    }
334}
335
336/// Global configuration options
337#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
338#[serde(default, rename_all = "kebab-case")]
339pub struct GlobalConfig {
340    /// Enabled rules
341    #[serde(default)]
342    pub enable: Vec<String>,
343
344    /// Disabled rules
345    #[serde(default)]
346    pub disable: Vec<String>,
347
348    /// Files to exclude
349    #[serde(default)]
350    pub exclude: Vec<String>,
351
352    /// Files to include
353    #[serde(default)]
354    pub include: Vec<String>,
355
356    /// Respect .gitignore files when scanning directories
357    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
358    pub respect_gitignore: bool,
359
360    /// Global line length setting (used by MD013 and other rules if not overridden)
361    #[serde(default, alias = "line_length")]
362    pub line_length: LineLength,
363
364    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
365    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
366    pub output_format: Option<String>,
367
368    /// Rules that are allowed to be fixed when --fix is used
369    /// If specified, only these rules will be fixed
370    #[serde(default)]
371    pub fixable: Vec<String>,
372
373    /// Rules that should never be fixed, even when --fix is used
374    /// Takes precedence over fixable
375    #[serde(default)]
376    pub unfixable: Vec<String>,
377
378    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
379    /// When set, adjusts parsing and validation rules for that specific Markdown variant
380    #[serde(default)]
381    pub flavor: MarkdownFlavor,
382
383    /// \[DEPRECATED\] Whether to enforce exclude patterns for explicitly passed paths.
384    /// This option is deprecated as of v0.0.156 and has no effect.
385    /// Exclude patterns are now always respected, even for explicitly provided files.
386    /// This prevents duplication between rumdl config and tool configs like pre-commit.
387    #[serde(default, alias = "force_exclude")]
388    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
389    pub force_exclude: bool,
390
391    /// Directory to store cache files (default: .rumdl_cache)
392    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
393    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
394    pub cache_dir: Option<String>,
395
396    /// Whether caching is enabled (default: true)
397    /// Can also be disabled via --no-cache CLI flag
398    #[serde(default = "default_true")]
399    pub cache: bool,
400
401    /// Additional rules to enable on top of the base set (additive)
402    #[serde(default, alias = "extend_enable")]
403    pub extend_enable: Vec<String>,
404
405    /// Additional rules to disable on top of the base set (additive)
406    #[serde(default, alias = "extend_disable")]
407    pub extend_disable: Vec<String>,
408
409    /// Whether the enable list was explicitly set (even if empty).
410    /// Used to distinguish "no enable list configured" from "enable list is empty"
411    /// (e.g., markdownlint `default: false` with no rules enabled).
412    #[serde(skip)]
413    pub enable_is_explicit: bool,
414}
415
416fn default_respect_gitignore() -> bool {
417    true
418}
419
420fn default_true() -> bool {
421    true
422}
423
424// Add the Default impl
425impl Default for GlobalConfig {
426    #[allow(deprecated)]
427    fn default() -> Self {
428        Self {
429            enable: Vec::new(),
430            disable: Vec::new(),
431            exclude: Vec::new(),
432            include: Vec::new(),
433            respect_gitignore: true,
434            line_length: LineLength::default(),
435            output_format: None,
436            fixable: Vec::new(),
437            unfixable: Vec::new(),
438            flavor: MarkdownFlavor::default(),
439            force_exclude: false,
440            cache_dir: None,
441            cache: true,
442            extend_enable: Vec::new(),
443            extend_disable: Vec::new(),
444            enable_is_explicit: false,
445        }
446    }
447}
448
449pub(super) const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
450    ".markdownlint.json",
451    ".markdownlint.jsonc",
452    ".markdownlint.yaml",
453    ".markdownlint.yml",
454    "markdownlint.json",
455    "markdownlint.jsonc",
456    "markdownlint.yaml",
457    "markdownlint.yml",
458];
459
460/// Create a default configuration file at the specified path
461pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
462    // Check if file already exists
463    if Path::new(path).exists() {
464        return Err(ConfigError::FileExists { path: path.to_string() });
465    }
466
467    // Default configuration content
468    let default_config = r#"# rumdl configuration file
469
470# Global configuration options
471[global]
472# List of rules to disable (uncomment and modify as needed)
473# disable = ["MD013", "MD033"]
474
475# List of rules to enable exclusively (if provided, only these rules will run)
476# enable = ["MD001", "MD003", "MD004"]
477
478# List of file/directory patterns to include for linting (if provided, only these will be linted)
479# include = [
480#    "docs/*.md",
481#    "src/**/*.md",
482#    "README.md"
483# ]
484
485# List of file/directory patterns to exclude from linting
486exclude = [
487    # Common directories to exclude
488    ".git",
489    ".github",
490    "node_modules",
491    "vendor",
492    "dist",
493    "build",
494
495    # Specific files or patterns
496    "CHANGELOG.md",
497    "LICENSE.md",
498]
499
500# Respect .gitignore files when scanning directories (default: true)
501respect-gitignore = true
502
503# Markdown flavor/dialect (uncomment to enable)
504# Options: standard (default), gfm, commonmark, mkdocs, mdx, quarto
505# flavor = "mkdocs"
506
507# Rule-specific configurations (uncomment and modify as needed)
508
509# [MD003]
510# style = "atx"  # Heading style (atx, atx_closed, setext)
511
512# [MD004]
513# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
514
515# [MD007]
516# indent = 4  # Unordered list indentation
517
518# [MD013]
519# line-length = 100  # Line length
520# code-blocks = false  # Exclude code blocks from line length check
521# tables = false  # Exclude tables from line length check
522# headings = true  # Include headings in line length check
523
524# [MD044]
525# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
526# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
527"#;
528
529    // Write the default configuration to the file
530    match fs::write(path, default_config) {
531        Ok(_) => Ok(()),
532        Err(err) => Err(ConfigError::IoError {
533            source: err,
534            path: path.to_string(),
535        }),
536    }
537}
538
539/// Errors that can occur when loading configuration
540#[derive(Debug, thiserror::Error)]
541pub enum ConfigError {
542    /// Failed to read the configuration file
543    #[error("Failed to read config file at {path}: {source}")]
544    IoError { source: io::Error, path: String },
545
546    /// Failed to parse the configuration content (TOML or JSON)
547    #[error("Failed to parse config: {0}")]
548    ParseError(String),
549
550    /// Configuration file already exists
551    #[error("Configuration file already exists at {path}")]
552    FileExists { path: String },
553}
554
555/// Get a rule-specific configuration value
556/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
557/// for better markdownlint compatibility
558pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
559    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
560
561    let rule_config = config.rules.get(&norm_rule_name)?;
562
563    // Try multiple key variants to support both underscore and kebab-case formats
564    let key_variants = [
565        key.to_string(),       // Original key as provided
566        normalize_key(key),    // Normalized key (lowercase, kebab-case)
567        key.replace('-', "_"), // Convert kebab-case to snake_case
568        key.replace('_', "-"), // Convert snake_case to kebab-case
569    ];
570
571    // Try each variant until we find a match
572    for variant in &key_variants {
573        if let Some(value) = rule_config.values.get(variant)
574            && let Ok(result) = T::deserialize(value.clone())
575        {
576            return Some(result);
577        }
578    }
579
580    None
581}
582
583/// Generate default rumdl configuration for pyproject.toml
584pub fn generate_pyproject_config() -> String {
585    let config_content = r#"
586[tool.rumdl]
587# Global configuration options
588line-length = 100
589disable = []
590exclude = [
591    # Common directories to exclude
592    ".git",
593    ".github",
594    "node_modules",
595    "vendor",
596    "dist",
597    "build",
598]
599respect-gitignore = true
600
601# Rule-specific configurations (uncomment and modify as needed)
602
603# [tool.rumdl.MD003]
604# style = "atx"  # Heading style (atx, atx_closed, setext)
605
606# [tool.rumdl.MD004]
607# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
608
609# [tool.rumdl.MD007]
610# indent = 4  # Unordered list indentation
611
612# [tool.rumdl.MD013]
613# line-length = 100  # Line length
614# code-blocks = false  # Exclude code blocks from line length check
615# tables = false  # Exclude tables from line length check
616# headings = true  # Include headings in line length check
617
618# [tool.rumdl.MD044]
619# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
620# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
621"#;
622
623    config_content.to_string()
624}