rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use crate::types::LineLength;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::marker::PhantomData;
16use std::path::Path;
17use std::str::FromStr;
18use toml_edit::DocumentMut;
19
20// ============================================================================
21// Typestate markers for configuration pipeline
22// ============================================================================
23
24/// Marker type for configuration that has been loaded but not yet validated.
25/// This is the initial state after `load_with_discovery()`.
26#[derive(Debug, Clone, Copy, Default)]
27pub struct ConfigLoaded;
28
29/// Marker type for configuration that has been validated.
30/// Only validated configs can be converted to `Config`.
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ConfigValidated;
33
34/// Markdown flavor/dialect enumeration
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
36#[serde(rename_all = "lowercase")]
37pub enum MarkdownFlavor {
38    /// Standard Markdown without flavor-specific adjustments
39    #[serde(rename = "standard", alias = "none", alias = "")]
40    #[default]
41    Standard,
42    /// MkDocs flavor with auto-reference support
43    #[serde(rename = "mkdocs")]
44    MkDocs,
45    /// MDX flavor with JSX and ESM support (.mdx files)
46    #[serde(rename = "mdx")]
47    MDX,
48    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
49    #[serde(rename = "quarto")]
50    Quarto,
51    // Future flavors can be added here when they have actual implementation differences
52    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
53    // Planned: CommonMark - for strict CommonMark compliance
54}
55
56impl fmt::Display for MarkdownFlavor {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        match self {
59            MarkdownFlavor::Standard => write!(f, "standard"),
60            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
61            MarkdownFlavor::MDX => write!(f, "mdx"),
62            MarkdownFlavor::Quarto => write!(f, "quarto"),
63        }
64    }
65}
66
67impl FromStr for MarkdownFlavor {
68    type Err = String;
69
70    fn from_str(s: &str) -> Result<Self, Self::Err> {
71        match s.to_lowercase().as_str() {
72            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
73            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
74            "mdx" => Ok(MarkdownFlavor::MDX),
75            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
76            // GFM and CommonMark are aliases for Standard since the base parser
77            // (pulldown-cmark) already supports GFM extensions (tables, task lists,
78            // strikethrough, autolinks, etc.) which are a superset of CommonMark
79            "gfm" | "github" | "commonmark" => Ok(MarkdownFlavor::Standard),
80            _ => Err(format!("Unknown markdown flavor: {s}")),
81        }
82    }
83}
84
85impl MarkdownFlavor {
86    /// Detect flavor from file extension
87    pub fn from_extension(ext: &str) -> Self {
88        match ext.to_lowercase().as_str() {
89            "mdx" => Self::MDX,
90            "qmd" => Self::Quarto,
91            "rmd" => Self::Quarto,
92            _ => Self::Standard,
93        }
94    }
95
96    /// Detect flavor from file path
97    pub fn from_path(path: &std::path::Path) -> Self {
98        path.extension()
99            .and_then(|e| e.to_str())
100            .map(Self::from_extension)
101            .unwrap_or(Self::Standard)
102    }
103
104    /// Check if this flavor supports ESM imports/exports (MDX-specific)
105    pub fn supports_esm_blocks(self) -> bool {
106        matches!(self, Self::MDX)
107    }
108
109    /// Check if this flavor supports JSX components (MDX-specific)
110    pub fn supports_jsx(self) -> bool {
111        matches!(self, Self::MDX)
112    }
113
114    /// Check if this flavor supports auto-references (MkDocs-specific)
115    pub fn supports_auto_references(self) -> bool {
116        matches!(self, Self::MkDocs)
117    }
118
119    /// Get a human-readable name for this flavor
120    pub fn name(self) -> &'static str {
121        match self {
122            Self::Standard => "Standard",
123            Self::MkDocs => "MkDocs",
124            Self::MDX => "MDX",
125            Self::Quarto => "Quarto",
126        }
127    }
128}
129
130/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
131pub fn normalize_key(key: &str) -> String {
132    // If the key looks like a rule name (e.g., MD013), uppercase it
133    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
134        key.to_ascii_uppercase()
135    } else {
136        key.replace('_', "-").to_ascii_lowercase()
137    }
138}
139
140/// Represents a rule-specific configuration
141#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
142pub struct RuleConfig {
143    /// Configuration values for the rule
144    #[serde(flatten)]
145    #[schemars(schema_with = "arbitrary_value_schema")]
146    pub values: BTreeMap<String, toml::Value>,
147}
148
149/// Generate a JSON schema for arbitrary configuration values
150fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
151    schemars::json_schema!({
152        "type": "object",
153        "additionalProperties": true
154    })
155}
156
157/// Represents the complete configuration loaded from rumdl.toml
158#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
159#[schemars(
160    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
161)]
162pub struct Config {
163    /// Global configuration options
164    #[serde(default)]
165    pub global: GlobalConfig,
166
167    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
168    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
169    #[serde(default, rename = "per-file-ignores")]
170    pub per_file_ignores: HashMap<String, Vec<String>>,
171
172    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
173    /// Each rule section can contain options specific to that rule.
174    ///
175    /// Common examples:
176    /// - MD013: line_length, code_blocks, tables, headings
177    /// - MD007: indent
178    /// - MD003: style ("atx", "atx_closed", "setext")
179    /// - MD044: names (array of proper names to check)
180    ///
181    /// See https://github.com/rvben/rumdl for full rule documentation.
182    #[serde(flatten)]
183    pub rules: BTreeMap<String, RuleConfig>,
184}
185
186impl Config {
187    /// Check if the Markdown flavor is set to MkDocs
188    pub fn is_mkdocs_flavor(&self) -> bool {
189        self.global.flavor == MarkdownFlavor::MkDocs
190    }
191
192    // Future methods for when GFM and CommonMark are implemented:
193    // pub fn is_gfm_flavor(&self) -> bool
194    // pub fn is_commonmark_flavor(&self) -> bool
195
196    /// Get the configured Markdown flavor
197    pub fn markdown_flavor(&self) -> MarkdownFlavor {
198        self.global.flavor
199    }
200
201    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
202    pub fn is_mkdocs_project(&self) -> bool {
203        self.is_mkdocs_flavor()
204    }
205
206    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
207    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
208    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
209        use globset::{Glob, GlobSetBuilder};
210
211        let mut ignored_rules = HashSet::new();
212
213        if self.per_file_ignores.is_empty() {
214            return ignored_rules;
215        }
216
217        // Build a globset for efficient matching
218        let mut builder = GlobSetBuilder::new();
219        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
220
221        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
222            if let Ok(glob) = Glob::new(pattern) {
223                builder.add(glob);
224                pattern_to_rules.push((idx, rules));
225            } else {
226                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
227            }
228        }
229
230        let globset = match builder.build() {
231            Ok(gs) => gs,
232            Err(e) => {
233                log::error!("Failed to build globset for per-file-ignores: {e}");
234                return ignored_rules;
235            }
236        };
237
238        // Match the file path against all patterns
239        for match_idx in globset.matches(file_path) {
240            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
241                for rule in rules.iter() {
242                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
243                    ignored_rules.insert(normalize_key(rule));
244                }
245            }
246        }
247
248        ignored_rules
249    }
250}
251
252/// Global configuration options
253#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
254#[serde(default, rename_all = "kebab-case")]
255pub struct GlobalConfig {
256    /// Enabled rules
257    #[serde(default)]
258    pub enable: Vec<String>,
259
260    /// Disabled rules
261    #[serde(default)]
262    pub disable: Vec<String>,
263
264    /// Files to exclude
265    #[serde(default)]
266    pub exclude: Vec<String>,
267
268    /// Files to include
269    #[serde(default)]
270    pub include: Vec<String>,
271
272    /// Respect .gitignore files when scanning directories
273    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
274    pub respect_gitignore: bool,
275
276    /// Global line length setting (used by MD013 and other rules if not overridden)
277    #[serde(default, alias = "line_length")]
278    pub line_length: LineLength,
279
280    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
281    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
282    pub output_format: Option<String>,
283
284    /// Rules that are allowed to be fixed when --fix is used
285    /// If specified, only these rules will be fixed
286    #[serde(default)]
287    pub fixable: Vec<String>,
288
289    /// Rules that should never be fixed, even when --fix is used
290    /// Takes precedence over fixable
291    #[serde(default)]
292    pub unfixable: Vec<String>,
293
294    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
295    /// When set, adjusts parsing and validation rules for that specific Markdown variant
296    #[serde(default)]
297    pub flavor: MarkdownFlavor,
298
299    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
300    /// This option is deprecated as of v0.0.156 and has no effect.
301    /// Exclude patterns are now always respected, even for explicitly provided files.
302    /// This prevents duplication between rumdl config and tool configs like pre-commit.
303    #[serde(default, alias = "force_exclude")]
304    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
305    pub force_exclude: bool,
306
307    /// Directory to store cache files (default: .rumdl_cache)
308    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
309    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
310    pub cache_dir: Option<String>,
311
312    /// Whether caching is enabled (default: true)
313    /// Can also be disabled via --no-cache CLI flag
314    #[serde(default = "default_true")]
315    pub cache: bool,
316}
317
318fn default_respect_gitignore() -> bool {
319    true
320}
321
322fn default_true() -> bool {
323    true
324}
325
326// Add the Default impl
327impl Default for GlobalConfig {
328    #[allow(deprecated)]
329    fn default() -> Self {
330        Self {
331            enable: Vec::new(),
332            disable: Vec::new(),
333            exclude: Vec::new(),
334            include: Vec::new(),
335            respect_gitignore: true,
336            line_length: LineLength::default(),
337            output_format: None,
338            fixable: Vec::new(),
339            unfixable: Vec::new(),
340            flavor: MarkdownFlavor::default(),
341            force_exclude: false,
342            cache_dir: None,
343            cache: true,
344        }
345    }
346}
347
348const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
349    ".markdownlint.json",
350    ".markdownlint.jsonc",
351    ".markdownlint.yaml",
352    ".markdownlint.yml",
353    "markdownlint.json",
354    "markdownlint.jsonc",
355    "markdownlint.yaml",
356    "markdownlint.yml",
357];
358
359/// Create a default configuration file at the specified path
360pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
361    // Check if file already exists
362    if Path::new(path).exists() {
363        return Err(ConfigError::FileExists { path: path.to_string() });
364    }
365
366    // Default configuration content
367    let default_config = r#"# rumdl configuration file
368
369# Global configuration options
370[global]
371# List of rules to disable (uncomment and modify as needed)
372# disable = ["MD013", "MD033"]
373
374# List of rules to enable exclusively (if provided, only these rules will run)
375# enable = ["MD001", "MD003", "MD004"]
376
377# List of file/directory patterns to include for linting (if provided, only these will be linted)
378# include = [
379#    "docs/*.md",
380#    "src/**/*.md",
381#    "README.md"
382# ]
383
384# List of file/directory patterns to exclude from linting
385exclude = [
386    # Common directories to exclude
387    ".git",
388    ".github",
389    "node_modules",
390    "vendor",
391    "dist",
392    "build",
393
394    # Specific files or patterns
395    "CHANGELOG.md",
396    "LICENSE.md",
397]
398
399# Respect .gitignore files when scanning directories (default: true)
400respect-gitignore = true
401
402# Markdown flavor/dialect (uncomment to enable)
403# Options: standard (default), gfm, commonmark, mkdocs, mdx, quarto
404# flavor = "mkdocs"
405
406# Rule-specific configurations (uncomment and modify as needed)
407
408# [MD003]
409# style = "atx"  # Heading style (atx, atx_closed, setext)
410
411# [MD004]
412# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
413
414# [MD007]
415# indent = 4  # Unordered list indentation
416
417# [MD013]
418# line-length = 100  # Line length
419# code-blocks = false  # Exclude code blocks from line length check
420# tables = false  # Exclude tables from line length check
421# headings = true  # Include headings in line length check
422
423# [MD044]
424# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
425# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
426"#;
427
428    // Write the default configuration to the file
429    match fs::write(path, default_config) {
430        Ok(_) => Ok(()),
431        Err(err) => Err(ConfigError::IoError {
432            source: err,
433            path: path.to_string(),
434        }),
435    }
436}
437
438/// Errors that can occur when loading configuration
439#[derive(Debug, thiserror::Error)]
440pub enum ConfigError {
441    /// Failed to read the configuration file
442    #[error("Failed to read config file at {path}: {source}")]
443    IoError { source: io::Error, path: String },
444
445    /// Failed to parse the configuration content (TOML or JSON)
446    #[error("Failed to parse config: {0}")]
447    ParseError(String),
448
449    /// Configuration file already exists
450    #[error("Configuration file already exists at {path}")]
451    FileExists { path: String },
452}
453
454/// Get a rule-specific configuration value
455/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
456/// for better markdownlint compatibility
457pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
458    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
459
460    let rule_config = config.rules.get(&norm_rule_name)?;
461
462    // Try multiple key variants to support both underscore and kebab-case formats
463    let key_variants = [
464        key.to_string(),       // Original key as provided
465        normalize_key(key),    // Normalized key (lowercase, kebab-case)
466        key.replace('-', "_"), // Convert kebab-case to snake_case
467        key.replace('_', "-"), // Convert snake_case to kebab-case
468    ];
469
470    // Try each variant until we find a match
471    for variant in &key_variants {
472        if let Some(value) = rule_config.values.get(variant)
473            && let Ok(result) = T::deserialize(value.clone())
474        {
475            return Some(result);
476        }
477    }
478
479    None
480}
481
482/// Generate default rumdl configuration for pyproject.toml
483pub fn generate_pyproject_config() -> String {
484    let config_content = r#"
485[tool.rumdl]
486# Global configuration options
487line-length = 100
488disable = []
489exclude = [
490    # Common directories to exclude
491    ".git",
492    ".github",
493    "node_modules",
494    "vendor",
495    "dist",
496    "build",
497]
498respect-gitignore = true
499
500# Rule-specific configurations (uncomment and modify as needed)
501
502# [tool.rumdl.MD003]
503# style = "atx"  # Heading style (atx, atx_closed, setext)
504
505# [tool.rumdl.MD004]
506# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
507
508# [tool.rumdl.MD007]
509# indent = 4  # Unordered list indentation
510
511# [tool.rumdl.MD013]
512# line-length = 100  # Line length
513# code-blocks = false  # Exclude code blocks from line length check
514# tables = false  # Exclude tables from line length check
515# headings = true  # Include headings in line length check
516
517# [tool.rumdl.MD044]
518# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
519# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
520"#;
521
522    config_content.to_string()
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528    use std::fs;
529    use tempfile::tempdir;
530
531    #[test]
532    fn test_flavor_loading() {
533        let temp_dir = tempdir().unwrap();
534        let config_path = temp_dir.path().join(".rumdl.toml");
535        let config_content = r#"
536[global]
537flavor = "mkdocs"
538disable = ["MD001"]
539"#;
540        fs::write(&config_path, config_content).unwrap();
541
542        // Load the config
543        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
544        let config: Config = sourced.into_validated_unchecked().into();
545
546        // Check that flavor was loaded
547        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
548        assert!(config.is_mkdocs_flavor());
549        assert!(config.is_mkdocs_project()); // Test backwards compatibility
550        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
551    }
552
553    #[test]
554    fn test_pyproject_toml_root_level_config() {
555        let temp_dir = tempdir().unwrap();
556        let config_path = temp_dir.path().join("pyproject.toml");
557
558        // Create a test pyproject.toml with root-level configuration
559        let content = r#"
560[tool.rumdl]
561line-length = 120
562disable = ["MD033"]
563enable = ["MD001", "MD004"]
564include = ["docs/*.md"]
565exclude = ["node_modules"]
566respect-gitignore = true
567        "#;
568
569        fs::write(&config_path, content).unwrap();
570
571        // Load the config with skip_auto_discovery to avoid environment config files
572        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
573        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
574
575        // Check global settings
576        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
577        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
578        // Should now contain only the configured pattern since auto-discovery is disabled
579        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
580        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
581        assert!(config.global.respect_gitignore);
582
583        // Check line-length was correctly added to MD013
584        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
585        assert_eq!(line_length, Some(120));
586    }
587
588    #[test]
589    fn test_pyproject_toml_snake_case_and_kebab_case() {
590        let temp_dir = tempdir().unwrap();
591        let config_path = temp_dir.path().join("pyproject.toml");
592
593        // Test with both kebab-case and snake_case variants
594        let content = r#"
595[tool.rumdl]
596line-length = 150
597respect_gitignore = true
598        "#;
599
600        fs::write(&config_path, content).unwrap();
601
602        // Load the config with skip_auto_discovery to avoid environment config files
603        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
604        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
605
606        // Check settings were correctly loaded
607        assert!(config.global.respect_gitignore);
608        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
609        assert_eq!(line_length, Some(150));
610    }
611
612    #[test]
613    fn test_md013_key_normalization_in_rumdl_toml() {
614        let temp_dir = tempdir().unwrap();
615        let config_path = temp_dir.path().join(".rumdl.toml");
616        let config_content = r#"
617[MD013]
618line_length = 111
619line-length = 222
620"#;
621        fs::write(&config_path, config_content).unwrap();
622        // Load the config with skip_auto_discovery to avoid environment config files
623        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
624        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
625        // Now we should only get the explicitly configured key
626        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
627        assert_eq!(keys, vec!["line-length"]);
628        let val = &rule_cfg.values["line-length"].value;
629        assert_eq!(val.as_integer(), Some(222));
630        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
631        let config: Config = sourced.clone().into_validated_unchecked().into();
632        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
633        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
634        assert_eq!(v1, Some(222));
635        assert_eq!(v2, Some(222));
636    }
637
638    #[test]
639    fn test_md013_section_case_insensitivity() {
640        let temp_dir = tempdir().unwrap();
641        let config_path = temp_dir.path().join(".rumdl.toml");
642        let config_content = r#"
643[md013]
644line-length = 101
645
646[Md013]
647line-length = 102
648
649[MD013]
650line-length = 103
651"#;
652        fs::write(&config_path, config_content).unwrap();
653        // Load the config with skip_auto_discovery to avoid environment config files
654        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
655        let config: Config = sourced.clone().into_validated_unchecked().into();
656        // Only the last section should win, and be present
657        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
658        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
659        assert_eq!(keys, vec!["line-length"]);
660        let val = &rule_cfg.values["line-length"].value;
661        assert_eq!(val.as_integer(), Some(103));
662        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
663        assert_eq!(v, Some(103));
664    }
665
666    #[test]
667    fn test_md013_key_snake_and_kebab_case() {
668        let temp_dir = tempdir().unwrap();
669        let config_path = temp_dir.path().join(".rumdl.toml");
670        let config_content = r#"
671[MD013]
672line_length = 201
673line-length = 202
674"#;
675        fs::write(&config_path, config_content).unwrap();
676        // Load the config with skip_auto_discovery to avoid environment config files
677        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
678        let config: Config = sourced.clone().into_validated_unchecked().into();
679        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
680        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
681        assert_eq!(keys, vec!["line-length"]);
682        let val = &rule_cfg.values["line-length"].value;
683        assert_eq!(val.as_integer(), Some(202));
684        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
685        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
686        assert_eq!(v1, Some(202));
687        assert_eq!(v2, Some(202));
688    }
689
690    #[test]
691    fn test_unknown_rule_section_is_ignored() {
692        let temp_dir = tempdir().unwrap();
693        let config_path = temp_dir.path().join(".rumdl.toml");
694        let config_content = r#"
695[MD999]
696foo = 1
697bar = 2
698[MD013]
699line-length = 303
700"#;
701        fs::write(&config_path, config_content).unwrap();
702        // Load the config with skip_auto_discovery to avoid environment config files
703        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
704        let config: Config = sourced.clone().into_validated_unchecked().into();
705        // MD999 should not be present
706        assert!(!sourced.rules.contains_key("MD999"));
707        // MD013 should be present and correct
708        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
709        assert_eq!(v, Some(303));
710    }
711
712    #[test]
713    fn test_invalid_toml_syntax() {
714        let temp_dir = tempdir().unwrap();
715        let config_path = temp_dir.path().join(".rumdl.toml");
716
717        // Invalid TOML with unclosed string
718        let config_content = r#"
719[MD013]
720line-length = "unclosed string
721"#;
722        fs::write(&config_path, config_content).unwrap();
723
724        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
725        assert!(result.is_err());
726        match result.unwrap_err() {
727            ConfigError::ParseError(msg) => {
728                // The actual error message from toml parser might vary
729                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
730            }
731            _ => panic!("Expected ParseError"),
732        }
733    }
734
735    #[test]
736    fn test_wrong_type_for_config_value() {
737        let temp_dir = tempdir().unwrap();
738        let config_path = temp_dir.path().join(".rumdl.toml");
739
740        // line-length should be a number, not a string
741        let config_content = r#"
742[MD013]
743line-length = "not a number"
744"#;
745        fs::write(&config_path, config_content).unwrap();
746
747        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
748        let config: Config = sourced.into_validated_unchecked().into();
749
750        // The value should be loaded as a string, not converted
751        let rule_config = config.rules.get("MD013").unwrap();
752        let value = rule_config.values.get("line-length").unwrap();
753        assert!(matches!(value, toml::Value::String(_)));
754    }
755
756    #[test]
757    fn test_empty_config_file() {
758        let temp_dir = tempdir().unwrap();
759        let config_path = temp_dir.path().join(".rumdl.toml");
760
761        // Empty file
762        fs::write(&config_path, "").unwrap();
763
764        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
765        let config: Config = sourced.into_validated_unchecked().into();
766
767        // Should have default values
768        assert_eq!(config.global.line_length.get(), 80);
769        assert!(config.global.respect_gitignore);
770        assert!(config.rules.is_empty());
771    }
772
773    #[test]
774    fn test_malformed_pyproject_toml() {
775        let temp_dir = tempdir().unwrap();
776        let config_path = temp_dir.path().join("pyproject.toml");
777
778        // Missing closing bracket
779        let content = r#"
780[tool.rumdl
781line-length = 120
782"#;
783        fs::write(&config_path, content).unwrap();
784
785        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
786        assert!(result.is_err());
787    }
788
789    #[test]
790    fn test_conflicting_config_values() {
791        let temp_dir = tempdir().unwrap();
792        let config_path = temp_dir.path().join(".rumdl.toml");
793
794        // Both enable and disable the same rule - these need to be in a global section
795        let config_content = r#"
796[global]
797enable = ["MD013"]
798disable = ["MD013"]
799"#;
800        fs::write(&config_path, config_content).unwrap();
801
802        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
803        let config: Config = sourced.into_validated_unchecked().into();
804
805        // Conflict resolution: enable wins over disable
806        assert!(config.global.enable.contains(&"MD013".to_string()));
807        assert!(!config.global.disable.contains(&"MD013".to_string()));
808    }
809
810    #[test]
811    fn test_invalid_rule_names() {
812        let temp_dir = tempdir().unwrap();
813        let config_path = temp_dir.path().join(".rumdl.toml");
814
815        let config_content = r#"
816[global]
817enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
818disable = ["MD-001", "MD_002"]
819"#;
820        fs::write(&config_path, config_content).unwrap();
821
822        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
823        let config: Config = sourced.into_validated_unchecked().into();
824
825        // All values should be preserved as-is
826        assert_eq!(config.global.enable.len(), 4);
827        assert_eq!(config.global.disable.len(), 2);
828    }
829
830    #[test]
831    fn test_deeply_nested_config() {
832        let temp_dir = tempdir().unwrap();
833        let config_path = temp_dir.path().join(".rumdl.toml");
834
835        // This should be ignored as we don't support nested tables within rule configs
836        let config_content = r#"
837[MD013]
838line-length = 100
839[MD013.nested]
840value = 42
841"#;
842        fs::write(&config_path, config_content).unwrap();
843
844        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
845        let config: Config = sourced.into_validated_unchecked().into();
846
847        let rule_config = config.rules.get("MD013").unwrap();
848        assert_eq!(
849            rule_config.values.get("line-length").unwrap(),
850            &toml::Value::Integer(100)
851        );
852        // Nested table should not be present
853        assert!(!rule_config.values.contains_key("nested"));
854    }
855
856    #[test]
857    fn test_unicode_in_config() {
858        let temp_dir = tempdir().unwrap();
859        let config_path = temp_dir.path().join(".rumdl.toml");
860
861        let config_content = r#"
862[global]
863include = ["文档/*.md", "ドキュメント/*.md"]
864exclude = ["测试/*", "🚀/*"]
865
866[MD013]
867line-length = 80
868message = "行太长了 🚨"
869"#;
870        fs::write(&config_path, config_content).unwrap();
871
872        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
873        let config: Config = sourced.into_validated_unchecked().into();
874
875        assert_eq!(config.global.include.len(), 2);
876        assert_eq!(config.global.exclude.len(), 2);
877        assert!(config.global.include[0].contains("文档"));
878        assert!(config.global.exclude[1].contains("🚀"));
879
880        let rule_config = config.rules.get("MD013").unwrap();
881        let message = rule_config.values.get("message").unwrap();
882        if let toml::Value::String(s) = message {
883            assert!(s.contains("行太长了"));
884            assert!(s.contains("🚨"));
885        }
886    }
887
888    #[test]
889    fn test_extremely_long_values() {
890        let temp_dir = tempdir().unwrap();
891        let config_path = temp_dir.path().join(".rumdl.toml");
892
893        let long_string = "a".repeat(10000);
894        let config_content = format!(
895            r#"
896[global]
897exclude = ["{long_string}"]
898
899[MD013]
900line-length = 999999999
901"#
902        );
903
904        fs::write(&config_path, config_content).unwrap();
905
906        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
907        let config: Config = sourced.into_validated_unchecked().into();
908
909        assert_eq!(config.global.exclude[0].len(), 10000);
910        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
911        assert_eq!(line_length, Some(999999999));
912    }
913
914    #[test]
915    fn test_config_with_comments() {
916        let temp_dir = tempdir().unwrap();
917        let config_path = temp_dir.path().join(".rumdl.toml");
918
919        let config_content = r#"
920[global]
921# This is a comment
922enable = ["MD001"] # Enable MD001
923# disable = ["MD002"] # This is commented out
924
925[MD013] # Line length rule
926line-length = 100 # Set to 100 characters
927# ignored = true # This setting is commented out
928"#;
929        fs::write(&config_path, config_content).unwrap();
930
931        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
932        let config: Config = sourced.into_validated_unchecked().into();
933
934        assert_eq!(config.global.enable, vec!["MD001"]);
935        assert!(config.global.disable.is_empty()); // Commented out
936
937        let rule_config = config.rules.get("MD013").unwrap();
938        assert_eq!(rule_config.values.len(), 1); // Only line-length
939        assert!(!rule_config.values.contains_key("ignored"));
940    }
941
942    #[test]
943    fn test_arrays_in_rule_config() {
944        let temp_dir = tempdir().unwrap();
945        let config_path = temp_dir.path().join(".rumdl.toml");
946
947        let config_content = r#"
948[MD003]
949levels = [1, 2, 3]
950tags = ["important", "critical"]
951mixed = [1, "two", true]
952"#;
953        fs::write(&config_path, config_content).unwrap();
954
955        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
956        let config: Config = sourced.into_validated_unchecked().into();
957
958        // Arrays should now be properly parsed
959        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
960
961        // Check that arrays are present and correctly parsed
962        assert!(rule_config.values.contains_key("levels"));
963        assert!(rule_config.values.contains_key("tags"));
964        assert!(rule_config.values.contains_key("mixed"));
965
966        // Verify array contents
967        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
968            assert_eq!(levels.len(), 3);
969            assert_eq!(levels[0], toml::Value::Integer(1));
970            assert_eq!(levels[1], toml::Value::Integer(2));
971            assert_eq!(levels[2], toml::Value::Integer(3));
972        } else {
973            panic!("levels should be an array");
974        }
975
976        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
977            assert_eq!(tags.len(), 2);
978            assert_eq!(tags[0], toml::Value::String("important".to_string()));
979            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
980        } else {
981            panic!("tags should be an array");
982        }
983
984        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
985            assert_eq!(mixed.len(), 3);
986            assert_eq!(mixed[0], toml::Value::Integer(1));
987            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
988            assert_eq!(mixed[2], toml::Value::Boolean(true));
989        } else {
990            panic!("mixed should be an array");
991        }
992    }
993
994    #[test]
995    fn test_normalize_key_edge_cases() {
996        // Rule names
997        assert_eq!(normalize_key("MD001"), "MD001");
998        assert_eq!(normalize_key("md001"), "MD001");
999        assert_eq!(normalize_key("Md001"), "MD001");
1000        assert_eq!(normalize_key("mD001"), "MD001");
1001
1002        // Non-rule names
1003        assert_eq!(normalize_key("line_length"), "line-length");
1004        assert_eq!(normalize_key("line-length"), "line-length");
1005        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
1006        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
1007
1008        // Edge cases
1009        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
1010        assert_eq!(normalize_key("MD00"), "md00"); // Too short
1011        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
1012        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
1013        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
1014        assert_eq!(normalize_key(""), "");
1015        assert_eq!(normalize_key("_"), "-");
1016        assert_eq!(normalize_key("___"), "---");
1017    }
1018
1019    #[test]
1020    fn test_missing_config_file() {
1021        let temp_dir = tempdir().unwrap();
1022        let config_path = temp_dir.path().join("nonexistent.toml");
1023
1024        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1025        assert!(result.is_err());
1026        match result.unwrap_err() {
1027            ConfigError::IoError { .. } => {}
1028            _ => panic!("Expected IoError for missing file"),
1029        }
1030    }
1031
1032    #[test]
1033    #[cfg(unix)]
1034    fn test_permission_denied_config() {
1035        use std::os::unix::fs::PermissionsExt;
1036
1037        let temp_dir = tempdir().unwrap();
1038        let config_path = temp_dir.path().join(".rumdl.toml");
1039
1040        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1041
1042        // Remove read permissions
1043        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1044        perms.set_mode(0o000);
1045        fs::set_permissions(&config_path, perms).unwrap();
1046
1047        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1048
1049        // Restore permissions for cleanup
1050        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1051        perms.set_mode(0o644);
1052        fs::set_permissions(&config_path, perms).unwrap();
1053
1054        assert!(result.is_err());
1055        match result.unwrap_err() {
1056            ConfigError::IoError { .. } => {}
1057            _ => panic!("Expected IoError for permission denied"),
1058        }
1059    }
1060
1061    #[test]
1062    fn test_circular_reference_detection() {
1063        // This test is more conceptual since TOML doesn't support circular references
1064        // But we test that deeply nested structures don't cause stack overflow
1065        let temp_dir = tempdir().unwrap();
1066        let config_path = temp_dir.path().join(".rumdl.toml");
1067
1068        let mut config_content = String::from("[MD001]\n");
1069        for i in 0..100 {
1070            config_content.push_str(&format!("key{i} = {i}\n"));
1071        }
1072
1073        fs::write(&config_path, config_content).unwrap();
1074
1075        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1076        let config: Config = sourced.into_validated_unchecked().into();
1077
1078        let rule_config = config.rules.get("MD001").unwrap();
1079        assert_eq!(rule_config.values.len(), 100);
1080    }
1081
1082    #[test]
1083    fn test_special_toml_values() {
1084        let temp_dir = tempdir().unwrap();
1085        let config_path = temp_dir.path().join(".rumdl.toml");
1086
1087        let config_content = r#"
1088[MD001]
1089infinity = inf
1090neg_infinity = -inf
1091not_a_number = nan
1092datetime = 1979-05-27T07:32:00Z
1093local_date = 1979-05-27
1094local_time = 07:32:00
1095"#;
1096        fs::write(&config_path, config_content).unwrap();
1097
1098        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1099        let config: Config = sourced.into_validated_unchecked().into();
1100
1101        // Some values might not be parsed due to parser limitations
1102        if let Some(rule_config) = config.rules.get("MD001") {
1103            // Check special float values if present
1104            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1105                assert!(f.is_infinite() && f.is_sign_positive());
1106            }
1107            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1108                assert!(f.is_infinite() && f.is_sign_negative());
1109            }
1110            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1111                assert!(f.is_nan());
1112            }
1113
1114            // Check datetime values if present
1115            if let Some(val) = rule_config.values.get("datetime") {
1116                assert!(matches!(val, toml::Value::Datetime(_)));
1117            }
1118            // Note: local_date and local_time might not be parsed by the current implementation
1119        }
1120    }
1121
1122    #[test]
1123    fn test_default_config_passes_validation() {
1124        use crate::rules;
1125
1126        let temp_dir = tempdir().unwrap();
1127        let config_path = temp_dir.path().join(".rumdl.toml");
1128        let config_path_str = config_path.to_str().unwrap();
1129
1130        // Create the default config using the same function that `rumdl init` uses
1131        create_default_config(config_path_str).unwrap();
1132
1133        // Load it back as a SourcedConfig
1134        let sourced =
1135            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1136
1137        // Create the rule registry
1138        let all_rules = rules::all_rules(&Config::default());
1139        let registry = RuleRegistry::from_rules(&all_rules);
1140
1141        // Validate the config
1142        let warnings = validate_config_sourced(&sourced, &registry);
1143
1144        // The default config should have no warnings
1145        if !warnings.is_empty() {
1146            for warning in &warnings {
1147                eprintln!("Config validation warning: {}", warning.message);
1148                if let Some(rule) = &warning.rule {
1149                    eprintln!("  Rule: {rule}");
1150                }
1151                if let Some(key) = &warning.key {
1152                    eprintln!("  Key: {key}");
1153                }
1154            }
1155        }
1156        assert!(
1157            warnings.is_empty(),
1158            "Default config from rumdl init should pass validation without warnings"
1159        );
1160    }
1161
1162    #[test]
1163    fn test_per_file_ignores_config_parsing() {
1164        let temp_dir = tempdir().unwrap();
1165        let config_path = temp_dir.path().join(".rumdl.toml");
1166        let config_content = r#"
1167[per-file-ignores]
1168"README.md" = ["MD033"]
1169"docs/**/*.md" = ["MD013", "MD033"]
1170"test/*.md" = ["MD041"]
1171"#;
1172        fs::write(&config_path, config_content).unwrap();
1173
1174        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1175        let config: Config = sourced.into_validated_unchecked().into();
1176
1177        // Verify per-file-ignores was loaded
1178        assert_eq!(config.per_file_ignores.len(), 3);
1179        assert_eq!(
1180            config.per_file_ignores.get("README.md"),
1181            Some(&vec!["MD033".to_string()])
1182        );
1183        assert_eq!(
1184            config.per_file_ignores.get("docs/**/*.md"),
1185            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1186        );
1187        assert_eq!(
1188            config.per_file_ignores.get("test/*.md"),
1189            Some(&vec!["MD041".to_string()])
1190        );
1191    }
1192
1193    #[test]
1194    fn test_per_file_ignores_glob_matching() {
1195        use std::path::PathBuf;
1196
1197        let temp_dir = tempdir().unwrap();
1198        let config_path = temp_dir.path().join(".rumdl.toml");
1199        let config_content = r#"
1200[per-file-ignores]
1201"README.md" = ["MD033"]
1202"docs/**/*.md" = ["MD013"]
1203"**/test_*.md" = ["MD041"]
1204"#;
1205        fs::write(&config_path, config_content).unwrap();
1206
1207        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1208        let config: Config = sourced.into_validated_unchecked().into();
1209
1210        // Test exact match
1211        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1212        assert!(ignored.contains("MD033"));
1213        assert_eq!(ignored.len(), 1);
1214
1215        // Test glob pattern matching
1216        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1217        assert!(ignored.contains("MD013"));
1218        assert_eq!(ignored.len(), 1);
1219
1220        // Test recursive glob pattern
1221        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1222        assert!(ignored.contains("MD041"));
1223        assert_eq!(ignored.len(), 1);
1224
1225        // Test non-matching path
1226        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1227        assert!(ignored.is_empty());
1228    }
1229
1230    #[test]
1231    fn test_per_file_ignores_pyproject_toml() {
1232        let temp_dir = tempdir().unwrap();
1233        let config_path = temp_dir.path().join("pyproject.toml");
1234        let config_content = r#"
1235[tool.rumdl]
1236[tool.rumdl.per-file-ignores]
1237"README.md" = ["MD033", "MD013"]
1238"generated/*.md" = ["MD041"]
1239"#;
1240        fs::write(&config_path, config_content).unwrap();
1241
1242        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1243        let config: Config = sourced.into_validated_unchecked().into();
1244
1245        // Verify per-file-ignores was loaded from pyproject.toml
1246        assert_eq!(config.per_file_ignores.len(), 2);
1247        assert_eq!(
1248            config.per_file_ignores.get("README.md"),
1249            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1250        );
1251        assert_eq!(
1252            config.per_file_ignores.get("generated/*.md"),
1253            Some(&vec!["MD041".to_string()])
1254        );
1255    }
1256
1257    #[test]
1258    fn test_per_file_ignores_multiple_patterns_match() {
1259        use std::path::PathBuf;
1260
1261        let temp_dir = tempdir().unwrap();
1262        let config_path = temp_dir.path().join(".rumdl.toml");
1263        let config_content = r#"
1264[per-file-ignores]
1265"docs/**/*.md" = ["MD013"]
1266"**/api/*.md" = ["MD033"]
1267"docs/api/overview.md" = ["MD041"]
1268"#;
1269        fs::write(&config_path, config_content).unwrap();
1270
1271        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1272        let config: Config = sourced.into_validated_unchecked().into();
1273
1274        // File matches multiple patterns - should get union of all rules
1275        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1276        assert_eq!(ignored.len(), 3);
1277        assert!(ignored.contains("MD013"));
1278        assert!(ignored.contains("MD033"));
1279        assert!(ignored.contains("MD041"));
1280    }
1281
1282    #[test]
1283    fn test_per_file_ignores_rule_name_normalization() {
1284        use std::path::PathBuf;
1285
1286        let temp_dir = tempdir().unwrap();
1287        let config_path = temp_dir.path().join(".rumdl.toml");
1288        let config_content = r#"
1289[per-file-ignores]
1290"README.md" = ["md033", "MD013", "Md041"]
1291"#;
1292        fs::write(&config_path, config_content).unwrap();
1293
1294        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1295        let config: Config = sourced.into_validated_unchecked().into();
1296
1297        // All rule names should be normalized to uppercase
1298        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1299        assert_eq!(ignored.len(), 3);
1300        assert!(ignored.contains("MD033"));
1301        assert!(ignored.contains("MD013"));
1302        assert!(ignored.contains("MD041"));
1303    }
1304
1305    #[test]
1306    fn test_per_file_ignores_invalid_glob_pattern() {
1307        use std::path::PathBuf;
1308
1309        let temp_dir = tempdir().unwrap();
1310        let config_path = temp_dir.path().join(".rumdl.toml");
1311        let config_content = r#"
1312[per-file-ignores]
1313"[invalid" = ["MD033"]
1314"valid/*.md" = ["MD013"]
1315"#;
1316        fs::write(&config_path, config_content).unwrap();
1317
1318        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1319        let config: Config = sourced.into_validated_unchecked().into();
1320
1321        // Invalid pattern should be skipped, valid pattern should work
1322        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1323        assert!(ignored.contains("MD013"));
1324
1325        // Invalid pattern should not cause issues
1326        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1327        assert!(ignored2.is_empty());
1328    }
1329
1330    #[test]
1331    fn test_per_file_ignores_empty_section() {
1332        use std::path::PathBuf;
1333
1334        let temp_dir = tempdir().unwrap();
1335        let config_path = temp_dir.path().join(".rumdl.toml");
1336        let config_content = r#"
1337[global]
1338disable = ["MD001"]
1339
1340[per-file-ignores]
1341"#;
1342        fs::write(&config_path, config_content).unwrap();
1343
1344        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1345        let config: Config = sourced.into_validated_unchecked().into();
1346
1347        // Empty per-file-ignores should work fine
1348        assert_eq!(config.per_file_ignores.len(), 0);
1349        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1350        assert!(ignored.is_empty());
1351    }
1352
1353    #[test]
1354    fn test_per_file_ignores_with_underscores_in_pyproject() {
1355        let temp_dir = tempdir().unwrap();
1356        let config_path = temp_dir.path().join("pyproject.toml");
1357        let config_content = r#"
1358[tool.rumdl]
1359[tool.rumdl.per_file_ignores]
1360"README.md" = ["MD033"]
1361"#;
1362        fs::write(&config_path, config_content).unwrap();
1363
1364        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1365        let config: Config = sourced.into_validated_unchecked().into();
1366
1367        // Should support both per-file-ignores and per_file_ignores
1368        assert_eq!(config.per_file_ignores.len(), 1);
1369        assert_eq!(
1370            config.per_file_ignores.get("README.md"),
1371            Some(&vec!["MD033".to_string()])
1372        );
1373    }
1374
1375    #[test]
1376    fn test_generate_json_schema() {
1377        use schemars::schema_for;
1378        use std::env;
1379
1380        let schema = schema_for!(Config);
1381        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1382
1383        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1384        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1385            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1386            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1387            println!("Schema written to: {}", schema_path.display());
1388        }
1389
1390        // Basic validation that schema was generated
1391        assert!(schema_json.contains("\"title\": \"Config\""));
1392        assert!(schema_json.contains("\"global\""));
1393        assert!(schema_json.contains("\"per-file-ignores\""));
1394    }
1395
1396    #[test]
1397    fn test_user_config_loaded_with_explicit_project_config() {
1398        // Regression test for issue #131: User config should always be loaded as base layer,
1399        // even when an explicit project config path is provided
1400        let temp_dir = tempdir().unwrap();
1401
1402        // Create a fake user config directory
1403        // Note: user_configuration_path_impl adds /rumdl to the config dir
1404        let user_config_dir = temp_dir.path().join("user_config");
1405        let rumdl_config_dir = user_config_dir.join("rumdl");
1406        fs::create_dir_all(&rumdl_config_dir).unwrap();
1407        let user_config_path = rumdl_config_dir.join("rumdl.toml");
1408
1409        // User config disables MD013 and MD041
1410        let user_config_content = r#"
1411[global]
1412disable = ["MD013", "MD041"]
1413line-length = 100
1414"#;
1415        fs::write(&user_config_path, user_config_content).unwrap();
1416
1417        // Create a project config that enables MD001
1418        let project_config_path = temp_dir.path().join("project").join("pyproject.toml");
1419        fs::create_dir_all(project_config_path.parent().unwrap()).unwrap();
1420        let project_config_content = r#"
1421[tool.rumdl]
1422enable = ["MD001"]
1423"#;
1424        fs::write(&project_config_path, project_config_content).unwrap();
1425
1426        // Load config with explicit project path, passing user_config_dir
1427        let sourced = SourcedConfig::load_with_discovery_impl(
1428            Some(project_config_path.to_str().unwrap()),
1429            None,
1430            false,
1431            Some(&user_config_dir),
1432        )
1433        .unwrap();
1434
1435        let config: Config = sourced.into_validated_unchecked().into();
1436
1437        // User config settings should be preserved
1438        assert!(
1439            config.global.disable.contains(&"MD013".to_string()),
1440            "User config disabled rules should be preserved"
1441        );
1442        assert!(
1443            config.global.disable.contains(&"MD041".to_string()),
1444            "User config disabled rules should be preserved"
1445        );
1446
1447        // Project config settings should also be applied (merged on top)
1448        assert!(
1449            config.global.enable.contains(&"MD001".to_string()),
1450            "Project config enabled rules should be applied"
1451        );
1452    }
1453
1454    #[test]
1455    fn test_typestate_validate_method() {
1456        use tempfile::tempdir;
1457
1458        let temp_dir = tempdir().expect("Failed to create temporary directory");
1459        let config_path = temp_dir.path().join("test.toml");
1460
1461        // Create config with an unknown rule option to trigger a validation warning
1462        let config_content = r#"
1463[global]
1464enable = ["MD001"]
1465
1466[MD013]
1467line_length = 80
1468unknown_option = true
1469"#;
1470        std::fs::write(&config_path, config_content).expect("Failed to write config");
1471
1472        // Load config - this returns SourcedConfig<ConfigLoaded>
1473        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1474            .expect("Should load config");
1475
1476        // Create a rule registry for validation
1477        let default_config = Config::default();
1478        let all_rules = crate::rules::all_rules(&default_config);
1479        let registry = RuleRegistry::from_rules(&all_rules);
1480
1481        // Validate - this transitions to SourcedConfig<ConfigValidated>
1482        let validated = loaded.validate(&registry).expect("Should validate config");
1483
1484        // Check that validation warnings were captured for the unknown option
1485        // Note: The validation checks rule options against the rule's schema
1486        let has_unknown_option_warning = validated
1487            .validation_warnings
1488            .iter()
1489            .any(|w| w.message.contains("unknown_option") || w.message.contains("Unknown option"));
1490
1491        // Print warnings for debugging if assertion fails
1492        if !has_unknown_option_warning {
1493            for w in &validated.validation_warnings {
1494                eprintln!("Warning: {}", w.message);
1495            }
1496        }
1497        assert!(
1498            has_unknown_option_warning,
1499            "Should have warning for unknown option. Got {} warnings: {:?}",
1500            validated.validation_warnings.len(),
1501            validated
1502                .validation_warnings
1503                .iter()
1504                .map(|w| &w.message)
1505                .collect::<Vec<_>>()
1506        );
1507
1508        // Now we can convert to Config (this would be a compile error with ConfigLoaded)
1509        let config: Config = validated.into();
1510
1511        // Verify the config values are correct
1512        assert!(config.global.enable.contains(&"MD001".to_string()));
1513    }
1514
1515    #[test]
1516    fn test_typestate_validate_into_convenience_method() {
1517        use tempfile::tempdir;
1518
1519        let temp_dir = tempdir().expect("Failed to create temporary directory");
1520        let config_path = temp_dir.path().join("test.toml");
1521
1522        let config_content = r#"
1523[global]
1524enable = ["MD022"]
1525
1526[MD022]
1527lines_above = 2
1528"#;
1529        std::fs::write(&config_path, config_content).expect("Failed to write config");
1530
1531        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1532            .expect("Should load config");
1533
1534        let default_config = Config::default();
1535        let all_rules = crate::rules::all_rules(&default_config);
1536        let registry = RuleRegistry::from_rules(&all_rules);
1537
1538        // Use the convenience method that validates and converts in one step
1539        let (config, warnings) = loaded.validate_into(&registry).expect("Should validate and convert");
1540
1541        // Should have no warnings for valid config
1542        assert!(warnings.is_empty(), "Should have no warnings for valid config");
1543
1544        // Config should be usable
1545        assert!(config.global.enable.contains(&"MD022".to_string()));
1546    }
1547}
1548
1549/// Configuration source with clear precedence hierarchy.
1550///
1551/// Precedence order (lower values override higher values):
1552/// - Default (0): Built-in defaults
1553/// - UserConfig (1): User-level ~/.config/rumdl/rumdl.toml
1554/// - PyprojectToml (2): Project-level pyproject.toml
1555/// - ProjectConfig (3): Project-level .rumdl.toml (most specific)
1556/// - Cli (4): Command-line flags (highest priority)
1557#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1558pub enum ConfigSource {
1559    /// Built-in default configuration
1560    Default,
1561    /// User-level configuration from ~/.config/rumdl/rumdl.toml
1562    UserConfig,
1563    /// Project-level configuration from pyproject.toml
1564    PyprojectToml,
1565    /// Project-level configuration from .rumdl.toml or rumdl.toml
1566    ProjectConfig,
1567    /// Command-line flags (highest precedence)
1568    Cli,
1569}
1570
1571#[derive(Debug, Clone)]
1572pub struct ConfigOverride<T> {
1573    pub value: T,
1574    pub source: ConfigSource,
1575    pub file: Option<String>,
1576    pub line: Option<usize>,
1577}
1578
1579#[derive(Debug, Clone)]
1580pub struct SourcedValue<T> {
1581    pub value: T,
1582    pub source: ConfigSource,
1583    pub overrides: Vec<ConfigOverride<T>>,
1584}
1585
1586impl<T: Clone> SourcedValue<T> {
1587    pub fn new(value: T, source: ConfigSource) -> Self {
1588        Self {
1589            value: value.clone(),
1590            source,
1591            overrides: vec![ConfigOverride {
1592                value,
1593                source,
1594                file: None,
1595                line: None,
1596            }],
1597        }
1598    }
1599
1600    /// Merges a new override into this SourcedValue based on source precedence.
1601    /// If the new source has higher or equal precedence, the value and source are updated,
1602    /// and the new override is added to the history.
1603    pub fn merge_override(
1604        &mut self,
1605        new_value: T,
1606        new_source: ConfigSource,
1607        new_file: Option<String>,
1608        new_line: Option<usize>,
1609    ) {
1610        // Helper function to get precedence, defined locally or globally
1611        fn source_precedence(src: ConfigSource) -> u8 {
1612            match src {
1613                ConfigSource::Default => 0,
1614                ConfigSource::UserConfig => 1,
1615                ConfigSource::PyprojectToml => 2,
1616                ConfigSource::ProjectConfig => 3,
1617                ConfigSource::Cli => 4,
1618            }
1619        }
1620
1621        if source_precedence(new_source) >= source_precedence(self.source) {
1622            self.value = new_value.clone();
1623            self.source = new_source;
1624            self.overrides.push(ConfigOverride {
1625                value: new_value,
1626                source: new_source,
1627                file: new_file,
1628                line: new_line,
1629            });
1630        }
1631    }
1632
1633    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1634        // This is essentially merge_override without the precedence check
1635        // We might consolidate these later, but keep separate for now during refactor
1636        self.value = value.clone();
1637        self.source = source;
1638        self.overrides.push(ConfigOverride {
1639            value,
1640            source,
1641            file,
1642            line,
1643        });
1644    }
1645}
1646
1647impl<T: Clone + Eq + std::hash::Hash> SourcedValue<Vec<T>> {
1648    /// Merges a new value using union semantics (for arrays like `disable`)
1649    /// Values from both sources are combined, with deduplication
1650    pub fn merge_union(
1651        &mut self,
1652        new_value: Vec<T>,
1653        new_source: ConfigSource,
1654        new_file: Option<String>,
1655        new_line: Option<usize>,
1656    ) {
1657        fn source_precedence(src: ConfigSource) -> u8 {
1658            match src {
1659                ConfigSource::Default => 0,
1660                ConfigSource::UserConfig => 1,
1661                ConfigSource::PyprojectToml => 2,
1662                ConfigSource::ProjectConfig => 3,
1663                ConfigSource::Cli => 4,
1664            }
1665        }
1666
1667        if source_precedence(new_source) >= source_precedence(self.source) {
1668            // Union: combine values from both sources with deduplication
1669            let mut combined = self.value.clone();
1670            for item in new_value.iter() {
1671                if !combined.contains(item) {
1672                    combined.push(item.clone());
1673                }
1674            }
1675
1676            self.value = combined;
1677            self.source = new_source;
1678            self.overrides.push(ConfigOverride {
1679                value: new_value,
1680                source: new_source,
1681                file: new_file,
1682                line: new_line,
1683            });
1684        }
1685    }
1686}
1687
1688#[derive(Debug, Clone)]
1689pub struct SourcedGlobalConfig {
1690    pub enable: SourcedValue<Vec<String>>,
1691    pub disable: SourcedValue<Vec<String>>,
1692    pub exclude: SourcedValue<Vec<String>>,
1693    pub include: SourcedValue<Vec<String>>,
1694    pub respect_gitignore: SourcedValue<bool>,
1695    pub line_length: SourcedValue<LineLength>,
1696    pub output_format: Option<SourcedValue<String>>,
1697    pub fixable: SourcedValue<Vec<String>>,
1698    pub unfixable: SourcedValue<Vec<String>>,
1699    pub flavor: SourcedValue<MarkdownFlavor>,
1700    pub force_exclude: SourcedValue<bool>,
1701    pub cache_dir: Option<SourcedValue<String>>,
1702    pub cache: SourcedValue<bool>,
1703}
1704
1705impl Default for SourcedGlobalConfig {
1706    fn default() -> Self {
1707        SourcedGlobalConfig {
1708            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1709            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1710            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1711            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1712            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1713            line_length: SourcedValue::new(LineLength::default(), ConfigSource::Default),
1714            output_format: None,
1715            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1716            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1717            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1718            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1719            cache_dir: None,
1720            cache: SourcedValue::new(true, ConfigSource::Default),
1721        }
1722    }
1723}
1724
1725#[derive(Debug, Default, Clone)]
1726pub struct SourcedRuleConfig {
1727    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1728}
1729
1730/// Represents configuration loaded from a single source file, with provenance.
1731/// Used as an intermediate step before merging into the final SourcedConfig.
1732#[derive(Debug, Clone)]
1733pub struct SourcedConfigFragment {
1734    pub global: SourcedGlobalConfig,
1735    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1736    pub rules: BTreeMap<String, SourcedRuleConfig>,
1737    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1738                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1739}
1740
1741impl Default for SourcedConfigFragment {
1742    fn default() -> Self {
1743        Self {
1744            global: SourcedGlobalConfig::default(),
1745            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1746            rules: BTreeMap::new(),
1747            unknown_keys: Vec::new(),
1748        }
1749    }
1750}
1751
1752/// Configuration with provenance tracking for values.
1753///
1754/// The `State` type parameter encodes the validation state:
1755/// - `ConfigLoaded`: Config has been loaded but not validated
1756/// - `ConfigValidated`: Config has been validated and can be converted to `Config`
1757///
1758/// # Typestate Pattern
1759///
1760/// This uses the typestate pattern to ensure validation happens before conversion:
1761///
1762/// ```ignore
1763/// let loaded: SourcedConfig<ConfigLoaded> = SourcedConfig::load_with_discovery(...)?;
1764/// let validated: SourcedConfig<ConfigValidated> = loaded.validate(&registry)?;
1765/// let config: Config = validated.into();  // Only works on ConfigValidated!
1766/// ```
1767///
1768/// Attempting to convert a `ConfigLoaded` config directly to `Config` is a compile error.
1769#[derive(Debug, Clone)]
1770pub struct SourcedConfig<State = ConfigLoaded> {
1771    pub global: SourcedGlobalConfig,
1772    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1773    pub rules: BTreeMap<String, SourcedRuleConfig>,
1774    pub loaded_files: Vec<String>,
1775    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1776    /// Project root directory (parent of config file), used for resolving relative paths
1777    pub project_root: Option<std::path::PathBuf>,
1778    /// Validation warnings (populated after validate() is called)
1779    pub validation_warnings: Vec<ConfigValidationWarning>,
1780    /// Phantom data for the state type parameter
1781    _state: PhantomData<State>,
1782}
1783
1784impl Default for SourcedConfig<ConfigLoaded> {
1785    fn default() -> Self {
1786        Self {
1787            global: SourcedGlobalConfig::default(),
1788            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1789            rules: BTreeMap::new(),
1790            loaded_files: Vec::new(),
1791            unknown_keys: Vec::new(),
1792            project_root: None,
1793            validation_warnings: Vec::new(),
1794            _state: PhantomData,
1795        }
1796    }
1797}
1798
1799impl SourcedConfig<ConfigLoaded> {
1800    /// Merges another SourcedConfigFragment into this SourcedConfig.
1801    /// Uses source precedence to determine which values take effect.
1802    fn merge(&mut self, fragment: SourcedConfigFragment) {
1803        // Merge global config
1804        // Enable uses replace semantics (project can enforce rules)
1805        self.global.enable.merge_override(
1806            fragment.global.enable.value,
1807            fragment.global.enable.source,
1808            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1809            fragment.global.enable.overrides.first().and_then(|o| o.line),
1810        );
1811
1812        // Disable uses union semantics (user can add to project disables)
1813        self.global.disable.merge_union(
1814            fragment.global.disable.value,
1815            fragment.global.disable.source,
1816            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1817            fragment.global.disable.overrides.first().and_then(|o| o.line),
1818        );
1819
1820        // Conflict resolution: Enable overrides disable
1821        // Remove any rules from disable that appear in enable
1822        self.global
1823            .disable
1824            .value
1825            .retain(|rule| !self.global.enable.value.contains(rule));
1826        self.global.include.merge_override(
1827            fragment.global.include.value,
1828            fragment.global.include.source,
1829            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1830            fragment.global.include.overrides.first().and_then(|o| o.line),
1831        );
1832        self.global.exclude.merge_override(
1833            fragment.global.exclude.value,
1834            fragment.global.exclude.source,
1835            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1836            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1837        );
1838        self.global.respect_gitignore.merge_override(
1839            fragment.global.respect_gitignore.value,
1840            fragment.global.respect_gitignore.source,
1841            fragment
1842                .global
1843                .respect_gitignore
1844                .overrides
1845                .first()
1846                .and_then(|o| o.file.clone()),
1847            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1848        );
1849        self.global.line_length.merge_override(
1850            fragment.global.line_length.value,
1851            fragment.global.line_length.source,
1852            fragment
1853                .global
1854                .line_length
1855                .overrides
1856                .first()
1857                .and_then(|o| o.file.clone()),
1858            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1859        );
1860        self.global.fixable.merge_override(
1861            fragment.global.fixable.value,
1862            fragment.global.fixable.source,
1863            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1864            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1865        );
1866        self.global.unfixable.merge_override(
1867            fragment.global.unfixable.value,
1868            fragment.global.unfixable.source,
1869            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1870            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1871        );
1872
1873        // Merge flavor
1874        self.global.flavor.merge_override(
1875            fragment.global.flavor.value,
1876            fragment.global.flavor.source,
1877            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1878            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1879        );
1880
1881        // Merge force_exclude
1882        self.global.force_exclude.merge_override(
1883            fragment.global.force_exclude.value,
1884            fragment.global.force_exclude.source,
1885            fragment
1886                .global
1887                .force_exclude
1888                .overrides
1889                .first()
1890                .and_then(|o| o.file.clone()),
1891            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1892        );
1893
1894        // Merge output_format if present
1895        if let Some(output_format_fragment) = fragment.global.output_format {
1896            if let Some(ref mut output_format) = self.global.output_format {
1897                output_format.merge_override(
1898                    output_format_fragment.value,
1899                    output_format_fragment.source,
1900                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1901                    output_format_fragment.overrides.first().and_then(|o| o.line),
1902                );
1903            } else {
1904                self.global.output_format = Some(output_format_fragment);
1905            }
1906        }
1907
1908        // Merge cache_dir if present
1909        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
1910            if let Some(ref mut cache_dir) = self.global.cache_dir {
1911                cache_dir.merge_override(
1912                    cache_dir_fragment.value,
1913                    cache_dir_fragment.source,
1914                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
1915                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
1916                );
1917            } else {
1918                self.global.cache_dir = Some(cache_dir_fragment);
1919            }
1920        }
1921
1922        // Merge cache if not default (only override when explicitly set)
1923        if fragment.global.cache.source != ConfigSource::Default {
1924            self.global.cache.merge_override(
1925                fragment.global.cache.value,
1926                fragment.global.cache.source,
1927                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
1928                fragment.global.cache.overrides.first().and_then(|o| o.line),
1929            );
1930        }
1931
1932        // Merge per_file_ignores
1933        self.per_file_ignores.merge_override(
1934            fragment.per_file_ignores.value,
1935            fragment.per_file_ignores.source,
1936            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1937            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1938        );
1939
1940        // Merge rule configs
1941        for (rule_name, rule_fragment) in fragment.rules {
1942            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1943            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1944            for (key, sourced_value_fragment) in rule_fragment.values {
1945                let sv_entry = rule_entry
1946                    .values
1947                    .entry(key.clone())
1948                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1949                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1950                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1951                sv_entry.merge_override(
1952                    sourced_value_fragment.value,  // Use the value from the fragment
1953                    sourced_value_fragment.source, // Use the source from the fragment
1954                    file_from_fragment,            // Pass the file path from the fragment override
1955                    line_from_fragment,            // Pass the line number from the fragment override
1956                );
1957            }
1958        }
1959
1960        // Merge unknown_keys from fragment
1961        for (section, key, file_path) in fragment.unknown_keys {
1962            // Deduplicate: only add if not already present
1963            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1964                self.unknown_keys.push((section, key, file_path));
1965            }
1966        }
1967    }
1968
1969    /// Load and merge configurations from files and CLI overrides.
1970    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1971        Self::load_with_discovery(config_path, cli_overrides, false)
1972    }
1973
1974    /// Finds project root by walking up from start_dir looking for .git directory.
1975    /// Falls back to start_dir if no .git found.
1976    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
1977        let mut current = start_dir.to_path_buf();
1978        const MAX_DEPTH: usize = 100;
1979
1980        for _ in 0..MAX_DEPTH {
1981            if current.join(".git").exists() {
1982                log::debug!("[rumdl-config] Found .git at: {}", current.display());
1983                return current;
1984            }
1985
1986            match current.parent() {
1987                Some(parent) => current = parent.to_path_buf(),
1988                None => break,
1989            }
1990        }
1991
1992        // No .git found, use start_dir as project root
1993        log::debug!(
1994            "[rumdl-config] No .git found, using config location as project root: {}",
1995            start_dir.display()
1996        );
1997        start_dir.to_path_buf()
1998    }
1999
2000    /// Discover configuration file by traversing up the directory tree.
2001    /// Returns the first configuration file found.
2002    /// Discovers config file and returns both the config path and project root.
2003    /// Returns: (config_file_path, project_root_path)
2004    /// Project root is the directory containing .git, or config parent as fallback.
2005    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
2006        use std::env;
2007
2008        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
2009        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
2010
2011        let start_dir = match env::current_dir() {
2012            Ok(dir) => dir,
2013            Err(e) => {
2014                log::debug!("[rumdl-config] Failed to get current directory: {e}");
2015                return None;
2016            }
2017        };
2018
2019        let mut current_dir = start_dir.clone();
2020        let mut depth = 0;
2021        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
2022
2023        loop {
2024            if depth >= MAX_DEPTH {
2025                log::debug!("[rumdl-config] Maximum traversal depth reached");
2026                break;
2027            }
2028
2029            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
2030
2031            // Check for config files in order of precedence (only if not already found)
2032            if found_config.is_none() {
2033                for config_name in CONFIG_FILES {
2034                    let config_path = current_dir.join(config_name);
2035
2036                    if config_path.exists() {
2037                        // For pyproject.toml, verify it contains [tool.rumdl] section
2038                        if *config_name == "pyproject.toml" {
2039                            if let Ok(content) = std::fs::read_to_string(&config_path) {
2040                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2041                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2042                                    // Store config, but continue looking for .git
2043                                    found_config = Some((config_path.clone(), current_dir.clone()));
2044                                    break;
2045                                }
2046                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
2047                                continue;
2048                            }
2049                        } else {
2050                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2051                            // Store config, but continue looking for .git
2052                            found_config = Some((config_path.clone(), current_dir.clone()));
2053                            break;
2054                        }
2055                    }
2056                }
2057            }
2058
2059            // Check for .git directory (stop boundary)
2060            if current_dir.join(".git").exists() {
2061                log::debug!("[rumdl-config] Stopping at .git directory");
2062                break;
2063            }
2064
2065            // Move to parent directory
2066            match current_dir.parent() {
2067                Some(parent) => {
2068                    current_dir = parent.to_owned();
2069                    depth += 1;
2070                }
2071                None => {
2072                    log::debug!("[rumdl-config] Reached filesystem root");
2073                    break;
2074                }
2075            }
2076        }
2077
2078        // If config found, determine project root by walking up from config location
2079        if let Some((config_path, config_dir)) = found_config {
2080            let project_root = Self::find_project_root_from(&config_dir);
2081            return Some((config_path, project_root));
2082        }
2083
2084        None
2085    }
2086
2087    /// Discover markdownlint configuration file by traversing up the directory tree.
2088    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
2089    /// Returns the path to the config file if found.
2090    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
2091        use std::env;
2092
2093        const MAX_DEPTH: usize = 100;
2094
2095        let start_dir = match env::current_dir() {
2096            Ok(dir) => dir,
2097            Err(e) => {
2098                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
2099                return None;
2100            }
2101        };
2102
2103        let mut current_dir = start_dir.clone();
2104        let mut depth = 0;
2105
2106        loop {
2107            if depth >= MAX_DEPTH {
2108                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
2109                break;
2110            }
2111
2112            log::debug!(
2113                "[rumdl-config] Searching for markdownlint config in: {}",
2114                current_dir.display()
2115            );
2116
2117            // Check for markdownlint config files in order of precedence
2118            for config_name in MARKDOWNLINT_CONFIG_FILES {
2119                let config_path = current_dir.join(config_name);
2120                if config_path.exists() {
2121                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
2122                    return Some(config_path);
2123                }
2124            }
2125
2126            // Check for .git directory (stop boundary)
2127            if current_dir.join(".git").exists() {
2128                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
2129                break;
2130            }
2131
2132            // Move to parent directory
2133            match current_dir.parent() {
2134                Some(parent) => {
2135                    current_dir = parent.to_owned();
2136                    depth += 1;
2137                }
2138                None => {
2139                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
2140                    break;
2141                }
2142            }
2143        }
2144
2145        None
2146    }
2147
2148    /// Internal implementation that accepts config directory for testing
2149    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
2150        let config_dir = config_dir.join("rumdl");
2151
2152        // Check for config files in precedence order (same as project discovery)
2153        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
2154
2155        log::debug!(
2156            "[rumdl-config] Checking for user configuration in: {}",
2157            config_dir.display()
2158        );
2159
2160        for filename in USER_CONFIG_FILES {
2161            let config_path = config_dir.join(filename);
2162
2163            if config_path.exists() {
2164                // For pyproject.toml, verify it contains [tool.rumdl] section
2165                if *filename == "pyproject.toml" {
2166                    if let Ok(content) = std::fs::read_to_string(&config_path) {
2167                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2168                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2169                            return Some(config_path);
2170                        }
2171                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
2172                        continue;
2173                    }
2174                } else {
2175                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2176                    return Some(config_path);
2177                }
2178            }
2179        }
2180
2181        log::debug!(
2182            "[rumdl-config] No user configuration found in: {}",
2183            config_dir.display()
2184        );
2185        None
2186    }
2187
2188    /// Discover user-level configuration file from platform-specific config directory.
2189    /// Returns the first configuration file found in the user config directory.
2190    #[cfg(feature = "native")]
2191    fn user_configuration_path() -> Option<std::path::PathBuf> {
2192        use etcetera::{BaseStrategy, choose_base_strategy};
2193
2194        match choose_base_strategy() {
2195            Ok(strategy) => {
2196                let config_dir = strategy.config_dir();
2197                Self::user_configuration_path_impl(&config_dir)
2198            }
2199            Err(e) => {
2200                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
2201                None
2202            }
2203        }
2204    }
2205
2206    /// Stub for WASM builds - user config not supported
2207    #[cfg(not(feature = "native"))]
2208    fn user_configuration_path() -> Option<std::path::PathBuf> {
2209        None
2210    }
2211
2212    /// Internal implementation that accepts user config directory for testing
2213    #[doc(hidden)]
2214    pub fn load_with_discovery_impl(
2215        config_path: Option<&str>,
2216        cli_overrides: Option<&SourcedGlobalConfig>,
2217        skip_auto_discovery: bool,
2218        user_config_dir: Option<&Path>,
2219    ) -> Result<Self, ConfigError> {
2220        use std::env;
2221        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
2222        if config_path.is_none() {
2223            if skip_auto_discovery {
2224                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
2225            } else {
2226                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
2227            }
2228        } else {
2229            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
2230        }
2231        let mut sourced_config = SourcedConfig::default();
2232
2233        // 1. Always load user configuration first (unless auto-discovery is disabled)
2234        // User config serves as the base layer that project configs build upon
2235        if !skip_auto_discovery {
2236            let user_config_path = if let Some(dir) = user_config_dir {
2237                Self::user_configuration_path_impl(dir)
2238            } else {
2239                Self::user_configuration_path()
2240            };
2241
2242            if let Some(user_config_path) = user_config_path {
2243                let path_str = user_config_path.display().to_string();
2244                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
2245
2246                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
2247
2248                if filename == "pyproject.toml" {
2249                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2250                        source: e,
2251                        path: path_str.clone(),
2252                    })?;
2253                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2254                        sourced_config.merge(fragment);
2255                        sourced_config.loaded_files.push(path_str);
2256                    }
2257                } else {
2258                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2259                        source: e,
2260                        path: path_str.clone(),
2261                    })?;
2262                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::UserConfig)?;
2263                    sourced_config.merge(fragment);
2264                    sourced_config.loaded_files.push(path_str);
2265                }
2266            } else {
2267                log::debug!("[rumdl-config] No user configuration file found");
2268            }
2269        }
2270
2271        // 2. Load explicit config path if provided (overrides user config)
2272        if let Some(path) = config_path {
2273            let path_obj = Path::new(path);
2274            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
2275            log::debug!("[rumdl-config] Trying to load config file: {filename}");
2276            let path_str = path.to_string();
2277
2278            // Find project root by walking up from config location looking for .git
2279            if let Some(config_parent) = path_obj.parent() {
2280                let project_root = Self::find_project_root_from(config_parent);
2281                log::debug!(
2282                    "[rumdl-config] Project root (from explicit config): {}",
2283                    project_root.display()
2284                );
2285                sourced_config.project_root = Some(project_root);
2286            }
2287
2288            // Known markdownlint config files
2289            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
2290
2291            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
2292                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2293                    source: e,
2294                    path: path_str.clone(),
2295                })?;
2296                if filename == "pyproject.toml" {
2297                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2298                        sourced_config.merge(fragment);
2299                        sourced_config.loaded_files.push(path_str.clone());
2300                    }
2301                } else {
2302                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2303                    sourced_config.merge(fragment);
2304                    sourced_config.loaded_files.push(path_str.clone());
2305                }
2306            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
2307                || path_str.ends_with(".json")
2308                || path_str.ends_with(".jsonc")
2309                || path_str.ends_with(".yaml")
2310                || path_str.ends_with(".yml")
2311            {
2312                // Parse as markdownlint config (JSON/YAML)
2313                let fragment = load_from_markdownlint(&path_str)?;
2314                sourced_config.merge(fragment);
2315                sourced_config.loaded_files.push(path_str.clone());
2316                // markdownlint is fallback only
2317            } else {
2318                // Try TOML only
2319                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2320                    source: e,
2321                    path: path_str.clone(),
2322                })?;
2323                let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2324                sourced_config.merge(fragment);
2325                sourced_config.loaded_files.push(path_str.clone());
2326            }
2327        }
2328
2329        // 3. Perform auto-discovery for project config if not skipped AND no explicit config path
2330        if !skip_auto_discovery && config_path.is_none() {
2331            // Look for project configuration files (override user config)
2332            if let Some((config_file, project_root)) = Self::discover_config_upward() {
2333                let path_str = config_file.display().to_string();
2334                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
2335
2336                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
2337                log::debug!("[rumdl-config] Project root: {}", project_root.display());
2338
2339                // Store project root for cache directory resolution
2340                sourced_config.project_root = Some(project_root);
2341
2342                if filename == "pyproject.toml" {
2343                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2344                        source: e,
2345                        path: path_str.clone(),
2346                    })?;
2347                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2348                        sourced_config.merge(fragment);
2349                        sourced_config.loaded_files.push(path_str);
2350                    }
2351                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
2352                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2353                        source: e,
2354                        path: path_str.clone(),
2355                    })?;
2356                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2357                    sourced_config.merge(fragment);
2358                    sourced_config.loaded_files.push(path_str);
2359                }
2360            } else {
2361                log::debug!("[rumdl-config] No configuration file found via upward traversal");
2362
2363                // If no project config found, fallback to markdownlint config via upward traversal
2364                if let Some(config_path) = Self::discover_markdownlint_config_upward() {
2365                    let path_str = config_path.display().to_string();
2366                    match load_from_markdownlint(&path_str) {
2367                        Ok(fragment) => {
2368                            sourced_config.merge(fragment);
2369                            sourced_config.loaded_files.push(path_str);
2370                        }
2371                        Err(_e) => {
2372                            log::debug!("[rumdl-config] Failed to load markdownlint config");
2373                        }
2374                    }
2375                } else {
2376                    log::debug!("[rumdl-config] No markdownlint configuration file found");
2377                }
2378            }
2379        }
2380
2381        // 4. Apply CLI overrides (highest precedence)
2382        if let Some(cli) = cli_overrides {
2383            sourced_config
2384                .global
2385                .enable
2386                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
2387            sourced_config
2388                .global
2389                .disable
2390                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
2391            sourced_config
2392                .global
2393                .exclude
2394                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2395            sourced_config
2396                .global
2397                .include
2398                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2399            sourced_config.global.respect_gitignore.merge_override(
2400                cli.respect_gitignore.value,
2401                ConfigSource::Cli,
2402                None,
2403                None,
2404            );
2405            sourced_config
2406                .global
2407                .fixable
2408                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2409            sourced_config
2410                .global
2411                .unfixable
2412                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2413            // No rule-specific CLI overrides implemented yet
2414        }
2415
2416        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2417
2418        Ok(sourced_config)
2419    }
2420
2421    /// Load and merge configurations from files and CLI overrides.
2422    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2423    pub fn load_with_discovery(
2424        config_path: Option<&str>,
2425        cli_overrides: Option<&SourcedGlobalConfig>,
2426        skip_auto_discovery: bool,
2427    ) -> Result<Self, ConfigError> {
2428        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2429    }
2430
2431    /// Validate the configuration against a rule registry.
2432    ///
2433    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
2434    /// enabling conversion to `Config`. Validation warnings are stored in the config
2435    /// and can be displayed to the user.
2436    ///
2437    /// # Example
2438    ///
2439    /// ```ignore
2440    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
2441    /// let validated = loaded.validate(&registry)?;
2442    /// let config: Config = validated.into();
2443    /// ```
2444    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
2445        let warnings = validate_config_sourced_internal(&self, registry);
2446
2447        Ok(SourcedConfig {
2448            global: self.global,
2449            per_file_ignores: self.per_file_ignores,
2450            rules: self.rules,
2451            loaded_files: self.loaded_files,
2452            unknown_keys: self.unknown_keys,
2453            project_root: self.project_root,
2454            validation_warnings: warnings,
2455            _state: PhantomData,
2456        })
2457    }
2458
2459    /// Validate and convert to Config in one step (convenience method).
2460    ///
2461    /// This combines `validate()` and `into()` for callers who want the
2462    /// validation warnings separately.
2463    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
2464        let validated = self.validate(registry)?;
2465        let warnings = validated.validation_warnings.clone();
2466        Ok((validated.into(), warnings))
2467    }
2468
2469    /// Skip validation and convert directly to ConfigValidated state.
2470    ///
2471    /// # Safety
2472    ///
2473    /// This method bypasses validation. Use only when:
2474    /// - You've already validated via `validate_config_sourced()`
2475    /// - You're in test code that doesn't need validation
2476    /// - You're migrating legacy code and will add proper validation later
2477    ///
2478    /// Prefer `validate()` for new code.
2479    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
2480        SourcedConfig {
2481            global: self.global,
2482            per_file_ignores: self.per_file_ignores,
2483            rules: self.rules,
2484            loaded_files: self.loaded_files,
2485            unknown_keys: self.unknown_keys,
2486            project_root: self.project_root,
2487            validation_warnings: Vec::new(),
2488            _state: PhantomData,
2489        }
2490    }
2491}
2492
2493/// Convert a validated configuration to the final Config type.
2494///
2495/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
2496/// ensuring that validation must occur before conversion.
2497impl From<SourcedConfig<ConfigValidated>> for Config {
2498    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
2499        let mut rules = BTreeMap::new();
2500        for (rule_name, sourced_rule_cfg) in sourced.rules {
2501            // Normalize rule name to uppercase for case-insensitive lookup
2502            let normalized_rule_name = rule_name.to_ascii_uppercase();
2503            let mut values = BTreeMap::new();
2504            for (key, sourced_val) in sourced_rule_cfg.values {
2505                values.insert(key, sourced_val.value);
2506            }
2507            rules.insert(normalized_rule_name, RuleConfig { values });
2508        }
2509        #[allow(deprecated)]
2510        let global = GlobalConfig {
2511            enable: sourced.global.enable.value,
2512            disable: sourced.global.disable.value,
2513            exclude: sourced.global.exclude.value,
2514            include: sourced.global.include.value,
2515            respect_gitignore: sourced.global.respect_gitignore.value,
2516            line_length: sourced.global.line_length.value,
2517            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2518            fixable: sourced.global.fixable.value,
2519            unfixable: sourced.global.unfixable.value,
2520            flavor: sourced.global.flavor.value,
2521            force_exclude: sourced.global.force_exclude.value,
2522            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
2523            cache: sourced.global.cache.value,
2524        };
2525        Config {
2526            global,
2527            per_file_ignores: sourced.per_file_ignores.value,
2528            rules,
2529        }
2530    }
2531}
2532
2533/// Registry of all known rules and their config schemas
2534pub struct RuleRegistry {
2535    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2536    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2537    /// Map of rule name to config key aliases
2538    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2539}
2540
2541impl RuleRegistry {
2542    /// Build a registry from a list of rules
2543    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2544        let mut rule_schemas = std::collections::BTreeMap::new();
2545        let mut rule_aliases = std::collections::BTreeMap::new();
2546
2547        for rule in rules {
2548            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2549                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2550                rule_schemas.insert(norm_name.clone(), table);
2551                norm_name
2552            } else {
2553                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2554                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2555                norm_name
2556            };
2557
2558            // Store aliases if the rule provides them
2559            if let Some(aliases) = rule.config_aliases() {
2560                rule_aliases.insert(norm_name, aliases);
2561            }
2562        }
2563
2564        RuleRegistry {
2565            rule_schemas,
2566            rule_aliases,
2567        }
2568    }
2569
2570    /// Get all known rule names
2571    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2572        self.rule_schemas.keys().cloned().collect()
2573    }
2574
2575    /// Get the valid configuration keys for a rule, including both original and normalized variants
2576    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2577        self.rule_schemas.get(rule).map(|schema| {
2578            let mut all_keys = std::collections::BTreeSet::new();
2579
2580            // Add original keys from schema
2581            for key in schema.keys() {
2582                all_keys.insert(key.clone());
2583            }
2584
2585            // Add normalized variants for markdownlint compatibility
2586            for key in schema.keys() {
2587                // Add kebab-case variant
2588                all_keys.insert(key.replace('_', "-"));
2589                // Add snake_case variant
2590                all_keys.insert(key.replace('-', "_"));
2591                // Add normalized variant
2592                all_keys.insert(normalize_key(key));
2593            }
2594
2595            // Add any aliases defined by the rule
2596            if let Some(aliases) = self.rule_aliases.get(rule) {
2597                for alias_key in aliases.keys() {
2598                    all_keys.insert(alias_key.clone());
2599                    // Also add normalized variants of the alias
2600                    all_keys.insert(alias_key.replace('_', "-"));
2601                    all_keys.insert(alias_key.replace('-', "_"));
2602                    all_keys.insert(normalize_key(alias_key));
2603                }
2604            }
2605
2606            all_keys
2607        })
2608    }
2609
2610    /// Get the expected value type for a rule's configuration key, trying variants
2611    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2612        if let Some(schema) = self.rule_schemas.get(rule) {
2613            // Check if this key is an alias
2614            if let Some(aliases) = self.rule_aliases.get(rule)
2615                && let Some(canonical_key) = aliases.get(key)
2616            {
2617                // Use the canonical key for schema lookup
2618                if let Some(value) = schema.get(canonical_key) {
2619                    return Some(value);
2620                }
2621            }
2622
2623            // Try the original key
2624            if let Some(value) = schema.get(key) {
2625                return Some(value);
2626            }
2627
2628            // Try key variants
2629            let key_variants = [
2630                key.replace('-', "_"), // Convert kebab-case to snake_case
2631                key.replace('_', "-"), // Convert snake_case to kebab-case
2632                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2633            ];
2634
2635            for variant in &key_variants {
2636                if let Some(value) = schema.get(variant) {
2637                    return Some(value);
2638                }
2639            }
2640        }
2641        None
2642    }
2643}
2644
2645/// Represents a config validation warning or error
2646#[derive(Debug, Clone)]
2647pub struct ConfigValidationWarning {
2648    pub message: String,
2649    pub rule: Option<String>,
2650    pub key: Option<String>,
2651}
2652
2653/// Internal validation function that works with any SourcedConfig state.
2654/// This is used by both the public `validate_config_sourced` and the typestate `validate()` method.
2655fn validate_config_sourced_internal<S>(
2656    sourced: &SourcedConfig<S>,
2657    registry: &RuleRegistry,
2658) -> Vec<ConfigValidationWarning> {
2659    validate_config_sourced_impl(&sourced.rules, &sourced.unknown_keys, registry)
2660}
2661
2662/// Core validation implementation that doesn't depend on SourcedConfig type parameter.
2663fn validate_config_sourced_impl(
2664    rules: &BTreeMap<String, SourcedRuleConfig>,
2665    unknown_keys: &[(String, String, Option<String>)],
2666    registry: &RuleRegistry,
2667) -> Vec<ConfigValidationWarning> {
2668    let mut warnings = Vec::new();
2669    let known_rules = registry.rule_names();
2670    // 1. Unknown rules
2671    for rule in rules.keys() {
2672        if !known_rules.contains(rule) {
2673            warnings.push(ConfigValidationWarning {
2674                message: format!("Unknown rule in config: {rule}"),
2675                rule: Some(rule.clone()),
2676                key: None,
2677            });
2678        }
2679    }
2680    // 2. Unknown options and type mismatches
2681    for (rule, rule_cfg) in rules {
2682        if let Some(valid_keys) = registry.config_keys_for(rule) {
2683            for key in rule_cfg.values.keys() {
2684                if !valid_keys.contains(key) {
2685                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2686                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2687                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2688                    } else {
2689                        format!("Unknown option for rule {rule}: {key}")
2690                    };
2691                    warnings.push(ConfigValidationWarning {
2692                        message,
2693                        rule: Some(rule.clone()),
2694                        key: Some(key.clone()),
2695                    });
2696                } else {
2697                    // Type check: compare type of value to type of default
2698                    if let Some(expected) = registry.expected_value_for(rule, key) {
2699                        let actual = &rule_cfg.values[key].value;
2700                        if !toml_value_type_matches(expected, actual) {
2701                            warnings.push(ConfigValidationWarning {
2702                                message: format!(
2703                                    "Type mismatch for {}.{}: expected {}, got {}",
2704                                    rule,
2705                                    key,
2706                                    toml_type_name(expected),
2707                                    toml_type_name(actual)
2708                                ),
2709                                rule: Some(rule.clone()),
2710                                key: Some(key.clone()),
2711                            });
2712                        }
2713                    }
2714                }
2715            }
2716        }
2717    }
2718    // 3. Unknown global options (from unknown_keys)
2719    let known_global_keys = vec![
2720        "enable".to_string(),
2721        "disable".to_string(),
2722        "include".to_string(),
2723        "exclude".to_string(),
2724        "respect-gitignore".to_string(),
2725        "line-length".to_string(),
2726        "fixable".to_string(),
2727        "unfixable".to_string(),
2728        "flavor".to_string(),
2729        "force-exclude".to_string(),
2730        "output-format".to_string(),
2731        "cache-dir".to_string(),
2732        "cache".to_string(),
2733    ];
2734
2735    for (section, key, file_path) in unknown_keys {
2736        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2737            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2738                if let Some(path) = file_path {
2739                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2740                } else {
2741                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2742                }
2743            } else if let Some(path) = file_path {
2744                format!("Unknown global option in {path}: {key}")
2745            } else {
2746                format!("Unknown global option: {key}")
2747            };
2748            warnings.push(ConfigValidationWarning {
2749                message,
2750                rule: None,
2751                key: Some(key.clone()),
2752            });
2753        } else if !key.is_empty() {
2754            // This is an unknown rule section (key is empty means it's a section header)
2755            // No suggestions for rule names - just warn
2756            continue;
2757        } else {
2758            // Unknown rule section
2759            let message = if let Some(path) = file_path {
2760                format!(
2761                    "Unknown rule in {path}: {}",
2762                    section.trim_matches(|c| c == '[' || c == ']')
2763                )
2764            } else {
2765                format!(
2766                    "Unknown rule in config: {}",
2767                    section.trim_matches(|c| c == '[' || c == ']')
2768                )
2769            };
2770            warnings.push(ConfigValidationWarning {
2771                message,
2772                rule: None,
2773                key: None,
2774            });
2775        }
2776    }
2777    warnings
2778}
2779
2780/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking.
2781///
2782/// This is the legacy API that works with `SourcedConfig<ConfigLoaded>`.
2783/// For new code, prefer using `sourced.validate(&registry)` which returns a
2784/// `SourcedConfig<ConfigValidated>` that can be converted to `Config`.
2785pub fn validate_config_sourced(
2786    sourced: &SourcedConfig<ConfigLoaded>,
2787    registry: &RuleRegistry,
2788) -> Vec<ConfigValidationWarning> {
2789    validate_config_sourced_internal(sourced, registry)
2790}
2791
2792/// Validate a config that has already been validated (no-op, returns stored warnings).
2793///
2794/// This exists for API consistency - validated configs already have their warnings stored.
2795pub fn validate_config_sourced_validated(
2796    sourced: &SourcedConfig<ConfigValidated>,
2797    _registry: &RuleRegistry,
2798) -> Vec<ConfigValidationWarning> {
2799    sourced.validation_warnings.clone()
2800}
2801
2802fn toml_type_name(val: &toml::Value) -> &'static str {
2803    match val {
2804        toml::Value::String(_) => "string",
2805        toml::Value::Integer(_) => "integer",
2806        toml::Value::Float(_) => "float",
2807        toml::Value::Boolean(_) => "boolean",
2808        toml::Value::Array(_) => "array",
2809        toml::Value::Table(_) => "table",
2810        toml::Value::Datetime(_) => "datetime",
2811    }
2812}
2813
2814/// Calculate Levenshtein distance between two strings (simple implementation)
2815fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2816    let len1 = s1.len();
2817    let len2 = s2.len();
2818
2819    if len1 == 0 {
2820        return len2;
2821    }
2822    if len2 == 0 {
2823        return len1;
2824    }
2825
2826    let s1_chars: Vec<char> = s1.chars().collect();
2827    let s2_chars: Vec<char> = s2.chars().collect();
2828
2829    let mut prev_row: Vec<usize> = (0..=len2).collect();
2830    let mut curr_row = vec![0; len2 + 1];
2831
2832    for i in 1..=len1 {
2833        curr_row[0] = i;
2834        for j in 1..=len2 {
2835            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2836            curr_row[j] = (prev_row[j] + 1)          // deletion
2837                .min(curr_row[j - 1] + 1)            // insertion
2838                .min(prev_row[j - 1] + cost); // substitution
2839        }
2840        std::mem::swap(&mut prev_row, &mut curr_row);
2841    }
2842
2843    prev_row[len2]
2844}
2845
2846/// Suggest a similar key from a list of valid keys using fuzzy matching
2847fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2848    let unknown_lower = unknown.to_lowercase();
2849    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2850
2851    let mut best_match: Option<(String, usize)> = None;
2852
2853    for valid in valid_keys {
2854        let valid_lower = valid.to_lowercase();
2855        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2856
2857        if distance <= max_distance {
2858            if let Some((_, best_dist)) = &best_match {
2859                if distance < *best_dist {
2860                    best_match = Some((valid.clone(), distance));
2861                }
2862            } else {
2863                best_match = Some((valid.clone(), distance));
2864            }
2865        }
2866    }
2867
2868    best_match.map(|(key, _)| key)
2869}
2870
2871fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2872    use toml::Value::*;
2873    match (expected, actual) {
2874        (String(_), String(_)) => true,
2875        (Integer(_), Integer(_)) => true,
2876        (Float(_), Float(_)) => true,
2877        (Boolean(_), Boolean(_)) => true,
2878        (Array(_), Array(_)) => true,
2879        (Table(_), Table(_)) => true,
2880        (Datetime(_), Datetime(_)) => true,
2881        // Allow integer for float
2882        (Float(_), Integer(_)) => true,
2883        _ => false,
2884    }
2885}
2886
2887/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2888fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2889    let doc: toml::Value =
2890        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2891    let mut fragment = SourcedConfigFragment::default();
2892    let source = ConfigSource::PyprojectToml;
2893    let file = Some(path.to_string());
2894
2895    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2896    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2897        && let Some(rumdl_table) = rumdl_config.as_table()
2898    {
2899        // Helper function to extract global config from a table
2900        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2901            // Extract global options from the given table
2902            if let Some(enable) = table.get("enable")
2903                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2904            {
2905                // Normalize rule names in the list
2906                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2907                fragment
2908                    .global
2909                    .enable
2910                    .push_override(normalized_values, source, file.clone(), None);
2911            }
2912
2913            if let Some(disable) = table.get("disable")
2914                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2915            {
2916                // Re-enable normalization
2917                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2918                fragment
2919                    .global
2920                    .disable
2921                    .push_override(normalized_values, source, file.clone(), None);
2922            }
2923
2924            if let Some(include) = table.get("include")
2925                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2926            {
2927                fragment
2928                    .global
2929                    .include
2930                    .push_override(values, source, file.clone(), None);
2931            }
2932
2933            if let Some(exclude) = table.get("exclude")
2934                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2935            {
2936                fragment
2937                    .global
2938                    .exclude
2939                    .push_override(values, source, file.clone(), None);
2940            }
2941
2942            if let Some(respect_gitignore) = table
2943                .get("respect-gitignore")
2944                .or_else(|| table.get("respect_gitignore"))
2945                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2946            {
2947                fragment
2948                    .global
2949                    .respect_gitignore
2950                    .push_override(value, source, file.clone(), None);
2951            }
2952
2953            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2954                && let Ok(value) = bool::deserialize(force_exclude.clone())
2955            {
2956                fragment
2957                    .global
2958                    .force_exclude
2959                    .push_override(value, source, file.clone(), None);
2960            }
2961
2962            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2963                && let Ok(value) = String::deserialize(output_format.clone())
2964            {
2965                if fragment.global.output_format.is_none() {
2966                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2967                } else {
2968                    fragment
2969                        .global
2970                        .output_format
2971                        .as_mut()
2972                        .unwrap()
2973                        .push_override(value, source, file.clone(), None);
2974                }
2975            }
2976
2977            if let Some(fixable) = table.get("fixable")
2978                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2979            {
2980                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2981                fragment
2982                    .global
2983                    .fixable
2984                    .push_override(normalized_values, source, file.clone(), None);
2985            }
2986
2987            if let Some(unfixable) = table.get("unfixable")
2988                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2989            {
2990                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2991                fragment
2992                    .global
2993                    .unfixable
2994                    .push_override(normalized_values, source, file.clone(), None);
2995            }
2996
2997            if let Some(flavor) = table.get("flavor")
2998                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2999            {
3000                fragment.global.flavor.push_override(value, source, file.clone(), None);
3001            }
3002
3003            // Handle line-length special case - this should set the global line_length
3004            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
3005                && let Ok(value) = u64::deserialize(line_length.clone())
3006            {
3007                fragment
3008                    .global
3009                    .line_length
3010                    .push_override(LineLength::new(value as usize), source, file.clone(), None);
3011
3012                // Also add to MD013 rule config for backward compatibility
3013                let norm_md013_key = normalize_key("MD013");
3014                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
3015                let norm_line_length_key = normalize_key("line-length");
3016                let sv = rule_entry
3017                    .values
3018                    .entry(norm_line_length_key)
3019                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
3020                sv.push_override(line_length.clone(), source, file.clone(), None);
3021            }
3022
3023            if let Some(cache_dir) = table.get("cache-dir").or_else(|| table.get("cache_dir"))
3024                && let Ok(value) = String::deserialize(cache_dir.clone())
3025            {
3026                if fragment.global.cache_dir.is_none() {
3027                    fragment.global.cache_dir = Some(SourcedValue::new(value.clone(), source));
3028                } else {
3029                    fragment
3030                        .global
3031                        .cache_dir
3032                        .as_mut()
3033                        .unwrap()
3034                        .push_override(value, source, file.clone(), None);
3035                }
3036            }
3037
3038            if let Some(cache) = table.get("cache")
3039                && let Ok(value) = bool::deserialize(cache.clone())
3040            {
3041                fragment.global.cache.push_override(value, source, file.clone(), None);
3042            }
3043        };
3044
3045        // First, check for [tool.rumdl.global] section
3046        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
3047            extract_global_config(&mut fragment, global_table);
3048        }
3049
3050        // Also extract global options from [tool.rumdl] directly (for flat structure)
3051        extract_global_config(&mut fragment, rumdl_table);
3052
3053        // --- Extract per-file-ignores configurations ---
3054        // Check both hyphenated and underscored versions for compatibility
3055        let per_file_ignores_key = rumdl_table
3056            .get("per-file-ignores")
3057            .or_else(|| rumdl_table.get("per_file_ignores"));
3058
3059        if let Some(per_file_ignores_value) = per_file_ignores_key
3060            && let Some(per_file_table) = per_file_ignores_value.as_table()
3061        {
3062            let mut per_file_map = HashMap::new();
3063            for (pattern, rules_value) in per_file_table {
3064                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
3065                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
3066                    per_file_map.insert(pattern.clone(), normalized_rules);
3067                } else {
3068                    log::warn!(
3069                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
3070                    );
3071                }
3072            }
3073            fragment
3074                .per_file_ignores
3075                .push_override(per_file_map, source, file.clone(), None);
3076        }
3077
3078        // --- Extract rule-specific configurations ---
3079        for (key, value) in rumdl_table {
3080            let norm_rule_key = normalize_key(key);
3081
3082            // Skip keys already handled as global or special cases
3083            if [
3084                "enable",
3085                "disable",
3086                "include",
3087                "exclude",
3088                "respect_gitignore",
3089                "respect-gitignore", // Added kebab-case here too
3090                "force_exclude",
3091                "force-exclude",
3092                "line_length",
3093                "line-length",
3094                "output_format",
3095                "output-format",
3096                "fixable",
3097                "unfixable",
3098                "per-file-ignores",
3099                "per_file_ignores",
3100                "global",
3101                "flavor",
3102                "cache_dir",
3103                "cache-dir",
3104                "cache",
3105            ]
3106            .contains(&norm_rule_key.as_str())
3107            {
3108                continue;
3109            }
3110
3111            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
3112            // AND if the value is actually a TOML table before processing as rule config.
3113            // This prevents misinterpreting other top-level keys under [tool.rumdl]
3114            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
3115            if norm_rule_key_upper.len() == 5
3116                && norm_rule_key_upper.starts_with("MD")
3117                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
3118                && value.is_table()
3119            {
3120                if let Some(rule_config_table) = value.as_table() {
3121                    // Get the entry for this rule (e.g., "md013")
3122                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
3123                    for (rk, rv) in rule_config_table {
3124                        let norm_rk = normalize_key(rk); // Normalize the config key itself
3125
3126                        let toml_val = rv.clone();
3127
3128                        let sv = rule_entry
3129                            .values
3130                            .entry(norm_rk.clone())
3131                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3132                        sv.push_override(toml_val, source, file.clone(), None);
3133                    }
3134                }
3135            } else {
3136                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
3137                // Track unknown keys under [tool.rumdl] for validation
3138                fragment
3139                    .unknown_keys
3140                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
3141            }
3142        }
3143    }
3144
3145    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
3146    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
3147        for (key, value) in tool_table.iter() {
3148            if let Some(rule_name) = key.strip_prefix("rumdl.") {
3149                let norm_rule_name = normalize_key(rule_name);
3150                if norm_rule_name.len() == 5
3151                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
3152                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
3153                    && let Some(rule_table) = value.as_table()
3154                {
3155                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
3156                    for (rk, rv) in rule_table {
3157                        let norm_rk = normalize_key(rk);
3158                        let toml_val = rv.clone();
3159                        let sv = rule_entry
3160                            .values
3161                            .entry(norm_rk.clone())
3162                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3163                        sv.push_override(toml_val, source, file.clone(), None);
3164                    }
3165                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
3166                    // Track unknown rule sections like [tool.rumdl.MD999]
3167                    fragment.unknown_keys.push((
3168                        format!("[tool.rumdl.{rule_name}]"),
3169                        String::new(),
3170                        Some(path.to_string()),
3171                    ));
3172                }
3173            }
3174        }
3175    }
3176
3177    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
3178    if let Some(doc_table) = doc.as_table() {
3179        for (key, value) in doc_table.iter() {
3180            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
3181                let norm_rule_name = normalize_key(rule_name);
3182                if norm_rule_name.len() == 5
3183                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
3184                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
3185                    && let Some(rule_table) = value.as_table()
3186                {
3187                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
3188                    for (rk, rv) in rule_table {
3189                        let norm_rk = normalize_key(rk);
3190                        let toml_val = rv.clone();
3191                        let sv = rule_entry
3192                            .values
3193                            .entry(norm_rk.clone())
3194                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3195                        sv.push_override(toml_val, source, file.clone(), None);
3196                    }
3197                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
3198                    // Track unknown rule sections like [tool.rumdl.MD999]
3199                    fragment.unknown_keys.push((
3200                        format!("[tool.rumdl.{rule_name}]"),
3201                        String::new(),
3202                        Some(path.to_string()),
3203                    ));
3204                }
3205            }
3206        }
3207    }
3208
3209    // Only return Some(fragment) if any config was found
3210    let has_any = !fragment.global.enable.value.is_empty()
3211        || !fragment.global.disable.value.is_empty()
3212        || !fragment.global.include.value.is_empty()
3213        || !fragment.global.exclude.value.is_empty()
3214        || !fragment.global.fixable.value.is_empty()
3215        || !fragment.global.unfixable.value.is_empty()
3216        || fragment.global.output_format.is_some()
3217        || fragment.global.cache_dir.is_some()
3218        || !fragment.global.cache.value
3219        || !fragment.per_file_ignores.value.is_empty()
3220        || !fragment.rules.is_empty();
3221    if has_any { Ok(Some(fragment)) } else { Ok(None) }
3222}
3223
3224/// Parses rumdl.toml / .rumdl.toml content.
3225fn parse_rumdl_toml(content: &str, path: &str, source: ConfigSource) -> Result<SourcedConfigFragment, ConfigError> {
3226    let doc = content
3227        .parse::<DocumentMut>()
3228        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
3229    let mut fragment = SourcedConfigFragment::default();
3230    // source parameter provided by caller
3231    let file = Some(path.to_string());
3232
3233    // Define known rules before the loop
3234    let all_rules = rules::all_rules(&Config::default());
3235    let registry = RuleRegistry::from_rules(&all_rules);
3236    let known_rule_names: BTreeSet<String> = registry
3237        .rule_names()
3238        .into_iter()
3239        .map(|s| s.to_ascii_uppercase())
3240        .collect();
3241
3242    // Handle [global] section
3243    if let Some(global_item) = doc.get("global")
3244        && let Some(global_table) = global_item.as_table()
3245    {
3246        for (key, value_item) in global_table.iter() {
3247            let norm_key = normalize_key(key);
3248            match norm_key.as_str() {
3249                "enable" | "disable" | "include" | "exclude" => {
3250                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3251                        // Corrected: Iterate directly over the Formatted<Array>
3252                        let values: Vec<String> = formatted_array
3253                                .iter()
3254                                .filter_map(|item| item.as_str()) // Extract strings
3255                                .map(|s| s.to_string())
3256                                .collect();
3257
3258                        // Normalize rule names for enable/disable
3259                        let final_values = if norm_key == "enable" || norm_key == "disable" {
3260                            // Corrected: Pass &str to normalize_key
3261                            values.into_iter().map(|s| normalize_key(&s)).collect()
3262                        } else {
3263                            values
3264                        };
3265
3266                        match norm_key.as_str() {
3267                            "enable" => fragment
3268                                .global
3269                                .enable
3270                                .push_override(final_values, source, file.clone(), None),
3271                            "disable" => {
3272                                fragment
3273                                    .global
3274                                    .disable
3275                                    .push_override(final_values, source, file.clone(), None)
3276                            }
3277                            "include" => {
3278                                fragment
3279                                    .global
3280                                    .include
3281                                    .push_override(final_values, source, file.clone(), None)
3282                            }
3283                            "exclude" => {
3284                                fragment
3285                                    .global
3286                                    .exclude
3287                                    .push_override(final_values, source, file.clone(), None)
3288                            }
3289                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
3290                        }
3291                    } else {
3292                        log::warn!(
3293                            "[WARN] Expected array for global key '{}' in {}, found {}",
3294                            key,
3295                            path,
3296                            value_item.type_name()
3297                        );
3298                    }
3299                }
3300                "respect_gitignore" | "respect-gitignore" => {
3301                    // Handle both cases
3302                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3303                        let val = *formatted_bool.value();
3304                        fragment
3305                            .global
3306                            .respect_gitignore
3307                            .push_override(val, source, file.clone(), None);
3308                    } else {
3309                        log::warn!(
3310                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3311                            key,
3312                            path,
3313                            value_item.type_name()
3314                        );
3315                    }
3316                }
3317                "force_exclude" | "force-exclude" => {
3318                    // Handle both cases
3319                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3320                        let val = *formatted_bool.value();
3321                        fragment
3322                            .global
3323                            .force_exclude
3324                            .push_override(val, source, file.clone(), None);
3325                    } else {
3326                        log::warn!(
3327                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3328                            key,
3329                            path,
3330                            value_item.type_name()
3331                        );
3332                    }
3333                }
3334                "line_length" | "line-length" => {
3335                    // Handle both cases
3336                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
3337                        let val = LineLength::new(*formatted_int.value() as usize);
3338                        fragment
3339                            .global
3340                            .line_length
3341                            .push_override(val, source, file.clone(), None);
3342                    } else {
3343                        log::warn!(
3344                            "[WARN] Expected integer for global key '{}' in {}, found {}",
3345                            key,
3346                            path,
3347                            value_item.type_name()
3348                        );
3349                    }
3350                }
3351                "output_format" | "output-format" => {
3352                    // Handle both cases
3353                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3354                        let val = formatted_string.value().clone();
3355                        if fragment.global.output_format.is_none() {
3356                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
3357                        } else {
3358                            fragment.global.output_format.as_mut().unwrap().push_override(
3359                                val,
3360                                source,
3361                                file.clone(),
3362                                None,
3363                            );
3364                        }
3365                    } else {
3366                        log::warn!(
3367                            "[WARN] Expected string for global key '{}' in {}, found {}",
3368                            key,
3369                            path,
3370                            value_item.type_name()
3371                        );
3372                    }
3373                }
3374                "cache_dir" | "cache-dir" => {
3375                    // Handle both cases
3376                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3377                        let val = formatted_string.value().clone();
3378                        if fragment.global.cache_dir.is_none() {
3379                            fragment.global.cache_dir = Some(SourcedValue::new(val.clone(), source));
3380                        } else {
3381                            fragment
3382                                .global
3383                                .cache_dir
3384                                .as_mut()
3385                                .unwrap()
3386                                .push_override(val, source, file.clone(), None);
3387                        }
3388                    } else {
3389                        log::warn!(
3390                            "[WARN] Expected string for global key '{}' in {}, found {}",
3391                            key,
3392                            path,
3393                            value_item.type_name()
3394                        );
3395                    }
3396                }
3397                "cache" => {
3398                    if let Some(toml_edit::Value::Boolean(b)) = value_item.as_value() {
3399                        let val = *b.value();
3400                        fragment.global.cache.push_override(val, source, file.clone(), None);
3401                    } else {
3402                        log::warn!(
3403                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3404                            key,
3405                            path,
3406                            value_item.type_name()
3407                        );
3408                    }
3409                }
3410                "fixable" => {
3411                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3412                        let values: Vec<String> = formatted_array
3413                            .iter()
3414                            .filter_map(|item| item.as_str())
3415                            .map(normalize_key)
3416                            .collect();
3417                        fragment
3418                            .global
3419                            .fixable
3420                            .push_override(values, source, file.clone(), None);
3421                    } else {
3422                        log::warn!(
3423                            "[WARN] Expected array for global key '{}' in {}, found {}",
3424                            key,
3425                            path,
3426                            value_item.type_name()
3427                        );
3428                    }
3429                }
3430                "unfixable" => {
3431                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3432                        let values: Vec<String> = formatted_array
3433                            .iter()
3434                            .filter_map(|item| item.as_str())
3435                            .map(normalize_key)
3436                            .collect();
3437                        fragment
3438                            .global
3439                            .unfixable
3440                            .push_override(values, source, file.clone(), None);
3441                    } else {
3442                        log::warn!(
3443                            "[WARN] Expected array for global key '{}' in {}, found {}",
3444                            key,
3445                            path,
3446                            value_item.type_name()
3447                        );
3448                    }
3449                }
3450                "flavor" => {
3451                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3452                        let val = formatted_string.value();
3453                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
3454                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
3455                        } else {
3456                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
3457                        }
3458                    } else {
3459                        log::warn!(
3460                            "[WARN] Expected string for global key '{}' in {}, found {}",
3461                            key,
3462                            path,
3463                            value_item.type_name()
3464                        );
3465                    }
3466                }
3467                _ => {
3468                    // Track unknown global keys for validation
3469                    fragment
3470                        .unknown_keys
3471                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
3472                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
3473                }
3474            }
3475        }
3476    }
3477
3478    // Handle [per-file-ignores] section
3479    if let Some(per_file_item) = doc.get("per-file-ignores")
3480        && let Some(per_file_table) = per_file_item.as_table()
3481    {
3482        let mut per_file_map = HashMap::new();
3483        for (pattern, value_item) in per_file_table.iter() {
3484            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3485                let rules: Vec<String> = formatted_array
3486                    .iter()
3487                    .filter_map(|item| item.as_str())
3488                    .map(normalize_key)
3489                    .collect();
3490                per_file_map.insert(pattern.to_string(), rules);
3491            } else {
3492                let type_name = value_item.type_name();
3493                log::warn!(
3494                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
3495                );
3496            }
3497        }
3498        fragment
3499            .per_file_ignores
3500            .push_override(per_file_map, source, file.clone(), None);
3501    }
3502
3503    // Rule-specific: all other top-level tables
3504    for (key, item) in doc.iter() {
3505        let norm_rule_name = key.to_ascii_uppercase();
3506
3507        // Skip known special sections
3508        if key == "global" || key == "per-file-ignores" {
3509            continue;
3510        }
3511
3512        // Track unknown rule sections (like [MD999])
3513        if !known_rule_names.contains(&norm_rule_name) {
3514            // Only track if it looks like a rule section (starts with MD or is uppercase)
3515            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
3516                fragment
3517                    .unknown_keys
3518                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
3519            }
3520            continue;
3521        }
3522
3523        if let Some(tbl) = item.as_table() {
3524            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
3525            for (rk, rv_item) in tbl.iter() {
3526                let norm_rk = normalize_key(rk);
3527                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
3528                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
3529                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
3530                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
3531                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
3532                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
3533                    Some(toml_edit::Value::Array(formatted_array)) => {
3534                        // Convert toml_edit Array to toml::Value::Array
3535                        let mut values = Vec::new();
3536                        for item in formatted_array.iter() {
3537                            match item {
3538                                toml_edit::Value::String(formatted) => {
3539                                    values.push(toml::Value::String(formatted.value().clone()))
3540                                }
3541                                toml_edit::Value::Integer(formatted) => {
3542                                    values.push(toml::Value::Integer(*formatted.value()))
3543                                }
3544                                toml_edit::Value::Float(formatted) => {
3545                                    values.push(toml::Value::Float(*formatted.value()))
3546                                }
3547                                toml_edit::Value::Boolean(formatted) => {
3548                                    values.push(toml::Value::Boolean(*formatted.value()))
3549                                }
3550                                toml_edit::Value::Datetime(formatted) => {
3551                                    values.push(toml::Value::Datetime(*formatted.value()))
3552                                }
3553                                _ => {
3554                                    log::warn!(
3555                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
3556                                    );
3557                                }
3558                            }
3559                        }
3560                        Some(toml::Value::Array(values))
3561                    }
3562                    Some(toml_edit::Value::InlineTable(_)) => {
3563                        log::warn!(
3564                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3565                        );
3566                        None
3567                    }
3568                    None => {
3569                        log::warn!(
3570                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3571                        );
3572                        None
3573                    }
3574                };
3575                if let Some(toml_val) = maybe_toml_val {
3576                    let sv = rule_entry
3577                        .values
3578                        .entry(norm_rk.clone())
3579                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3580                    sv.push_override(toml_val, source, file.clone(), None);
3581                }
3582            }
3583        } else if item.is_value() {
3584            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3585        }
3586    }
3587
3588    Ok(fragment)
3589}
3590
3591/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3592fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3593    // Use the unified loader from markdownlint_config.rs
3594    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3595        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3596    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3597}
3598
3599#[cfg(test)]
3600#[path = "config_intelligent_merge_tests.rs"]
3601mod config_intelligent_merge_tests;