rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use crate::types::LineLength;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::marker::PhantomData;
16use std::path::Path;
17use std::str::FromStr;
18use toml_edit::DocumentMut;
19
20// ============================================================================
21// Typestate markers for configuration pipeline
22// ============================================================================
23
24/// Marker type for configuration that has been loaded but not yet validated.
25/// This is the initial state after `load_with_discovery()`.
26#[derive(Debug, Clone, Copy, Default)]
27pub struct ConfigLoaded;
28
29/// Marker type for configuration that has been validated.
30/// Only validated configs can be converted to `Config`.
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ConfigValidated;
33
34/// Markdown flavor/dialect enumeration
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
36#[serde(rename_all = "lowercase")]
37pub enum MarkdownFlavor {
38    /// Standard Markdown without flavor-specific adjustments
39    #[serde(rename = "standard", alias = "none", alias = "")]
40    #[default]
41    Standard,
42    /// MkDocs flavor with auto-reference support
43    #[serde(rename = "mkdocs")]
44    MkDocs,
45    /// MDX flavor with JSX and ESM support (.mdx files)
46    #[serde(rename = "mdx")]
47    MDX,
48    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
49    #[serde(rename = "quarto")]
50    Quarto,
51    // Future flavors can be added here when they have actual implementation differences
52    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
53    // Planned: CommonMark - for strict CommonMark compliance
54}
55
56impl fmt::Display for MarkdownFlavor {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        match self {
59            MarkdownFlavor::Standard => write!(f, "standard"),
60            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
61            MarkdownFlavor::MDX => write!(f, "mdx"),
62            MarkdownFlavor::Quarto => write!(f, "quarto"),
63        }
64    }
65}
66
67impl FromStr for MarkdownFlavor {
68    type Err = String;
69
70    fn from_str(s: &str) -> Result<Self, Self::Err> {
71        match s.to_lowercase().as_str() {
72            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
73            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
74            "mdx" => Ok(MarkdownFlavor::MDX),
75            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
76            // GFM and CommonMark are aliases for Standard since the base parser
77            // (pulldown-cmark) already supports GFM extensions (tables, task lists,
78            // strikethrough, autolinks, etc.) which are a superset of CommonMark
79            "gfm" | "github" | "commonmark" => Ok(MarkdownFlavor::Standard),
80            _ => Err(format!("Unknown markdown flavor: {s}")),
81        }
82    }
83}
84
85impl MarkdownFlavor {
86    /// Detect flavor from file extension
87    pub fn from_extension(ext: &str) -> Self {
88        match ext.to_lowercase().as_str() {
89            "mdx" => Self::MDX,
90            "qmd" => Self::Quarto,
91            "rmd" => Self::Quarto,
92            _ => Self::Standard,
93        }
94    }
95
96    /// Detect flavor from file path
97    pub fn from_path(path: &std::path::Path) -> Self {
98        path.extension()
99            .and_then(|e| e.to_str())
100            .map(Self::from_extension)
101            .unwrap_or(Self::Standard)
102    }
103
104    /// Check if this flavor supports ESM imports/exports (MDX-specific)
105    pub fn supports_esm_blocks(self) -> bool {
106        matches!(self, Self::MDX)
107    }
108
109    /// Check if this flavor supports JSX components (MDX-specific)
110    pub fn supports_jsx(self) -> bool {
111        matches!(self, Self::MDX)
112    }
113
114    /// Check if this flavor supports auto-references (MkDocs-specific)
115    pub fn supports_auto_references(self) -> bool {
116        matches!(self, Self::MkDocs)
117    }
118
119    /// Get a human-readable name for this flavor
120    pub fn name(self) -> &'static str {
121        match self {
122            Self::Standard => "Standard",
123            Self::MkDocs => "MkDocs",
124            Self::MDX => "MDX",
125            Self::Quarto => "Quarto",
126        }
127    }
128}
129
130/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
131pub fn normalize_key(key: &str) -> String {
132    // If the key looks like a rule name (e.g., MD013), uppercase it
133    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
134        key.to_ascii_uppercase()
135    } else {
136        key.replace('_', "-").to_ascii_lowercase()
137    }
138}
139
140/// Represents a rule-specific configuration
141#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
142pub struct RuleConfig {
143    /// Configuration values for the rule
144    #[serde(flatten)]
145    #[schemars(schema_with = "arbitrary_value_schema")]
146    pub values: BTreeMap<String, toml::Value>,
147}
148
149/// Generate a JSON schema for arbitrary configuration values
150fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
151    schemars::json_schema!({
152        "type": "object",
153        "additionalProperties": true
154    })
155}
156
157/// Represents the complete configuration loaded from rumdl.toml
158#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
159#[schemars(
160    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
161)]
162pub struct Config {
163    /// Global configuration options
164    #[serde(default)]
165    pub global: GlobalConfig,
166
167    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
168    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
169    #[serde(default, rename = "per-file-ignores")]
170    pub per_file_ignores: HashMap<String, Vec<String>>,
171
172    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
173    /// Each rule section can contain options specific to that rule.
174    ///
175    /// Common examples:
176    /// - MD013: line_length, code_blocks, tables, headings
177    /// - MD007: indent
178    /// - MD003: style ("atx", "atx_closed", "setext")
179    /// - MD044: names (array of proper names to check)
180    ///
181    /// See https://github.com/rvben/rumdl for full rule documentation.
182    #[serde(flatten)]
183    pub rules: BTreeMap<String, RuleConfig>,
184
185    /// Project root directory, used for resolving relative paths in per-file-ignores
186    #[serde(skip)]
187    pub project_root: Option<std::path::PathBuf>,
188}
189
190impl Config {
191    /// Check if the Markdown flavor is set to MkDocs
192    pub fn is_mkdocs_flavor(&self) -> bool {
193        self.global.flavor == MarkdownFlavor::MkDocs
194    }
195
196    // Future methods for when GFM and CommonMark are implemented:
197    // pub fn is_gfm_flavor(&self) -> bool
198    // pub fn is_commonmark_flavor(&self) -> bool
199
200    /// Get the configured Markdown flavor
201    pub fn markdown_flavor(&self) -> MarkdownFlavor {
202        self.global.flavor
203    }
204
205    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
206    pub fn is_mkdocs_project(&self) -> bool {
207        self.is_mkdocs_flavor()
208    }
209
210    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
211    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
212    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
213        use globset::{Glob, GlobSetBuilder};
214
215        let mut ignored_rules = HashSet::new();
216
217        if self.per_file_ignores.is_empty() {
218            return ignored_rules;
219        }
220
221        // Normalize the file path to be relative to project_root for pattern matching
222        // This ensures patterns like ".github/file.md" work with absolute paths
223        let path_for_matching: std::borrow::Cow<'_, Path> = if let Some(ref root) = self.project_root {
224            if let Ok(canonical_path) = file_path.canonicalize() {
225                if let Ok(canonical_root) = root.canonicalize() {
226                    if let Ok(relative) = canonical_path.strip_prefix(&canonical_root) {
227                        std::borrow::Cow::Owned(relative.to_path_buf())
228                    } else {
229                        std::borrow::Cow::Borrowed(file_path)
230                    }
231                } else {
232                    std::borrow::Cow::Borrowed(file_path)
233                }
234            } else {
235                std::borrow::Cow::Borrowed(file_path)
236            }
237        } else {
238            std::borrow::Cow::Borrowed(file_path)
239        };
240
241        // Build a globset for efficient matching
242        let mut builder = GlobSetBuilder::new();
243        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
244
245        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
246            if let Ok(glob) = Glob::new(pattern) {
247                builder.add(glob);
248                pattern_to_rules.push((idx, rules));
249            } else {
250                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
251            }
252        }
253
254        let globset = match builder.build() {
255            Ok(gs) => gs,
256            Err(e) => {
257                log::error!("Failed to build globset for per-file-ignores: {e}");
258                return ignored_rules;
259            }
260        };
261
262        // Match the file path against all patterns
263        for match_idx in globset.matches(path_for_matching.as_ref()) {
264            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
265                for rule in rules.iter() {
266                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
267                    ignored_rules.insert(normalize_key(rule));
268                }
269            }
270        }
271
272        ignored_rules
273    }
274}
275
276/// Global configuration options
277#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
278#[serde(default, rename_all = "kebab-case")]
279pub struct GlobalConfig {
280    /// Enabled rules
281    #[serde(default)]
282    pub enable: Vec<String>,
283
284    /// Disabled rules
285    #[serde(default)]
286    pub disable: Vec<String>,
287
288    /// Files to exclude
289    #[serde(default)]
290    pub exclude: Vec<String>,
291
292    /// Files to include
293    #[serde(default)]
294    pub include: Vec<String>,
295
296    /// Respect .gitignore files when scanning directories
297    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
298    pub respect_gitignore: bool,
299
300    /// Global line length setting (used by MD013 and other rules if not overridden)
301    #[serde(default, alias = "line_length")]
302    pub line_length: LineLength,
303
304    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
305    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
306    pub output_format: Option<String>,
307
308    /// Rules that are allowed to be fixed when --fix is used
309    /// If specified, only these rules will be fixed
310    #[serde(default)]
311    pub fixable: Vec<String>,
312
313    /// Rules that should never be fixed, even when --fix is used
314    /// Takes precedence over fixable
315    #[serde(default)]
316    pub unfixable: Vec<String>,
317
318    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
319    /// When set, adjusts parsing and validation rules for that specific Markdown variant
320    #[serde(default)]
321    pub flavor: MarkdownFlavor,
322
323    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
324    /// This option is deprecated as of v0.0.156 and has no effect.
325    /// Exclude patterns are now always respected, even for explicitly provided files.
326    /// This prevents duplication between rumdl config and tool configs like pre-commit.
327    #[serde(default, alias = "force_exclude")]
328    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
329    pub force_exclude: bool,
330
331    /// Directory to store cache files (default: .rumdl_cache)
332    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
333    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
334    pub cache_dir: Option<String>,
335
336    /// Whether caching is enabled (default: true)
337    /// Can also be disabled via --no-cache CLI flag
338    #[serde(default = "default_true")]
339    pub cache: bool,
340}
341
342fn default_respect_gitignore() -> bool {
343    true
344}
345
346fn default_true() -> bool {
347    true
348}
349
350// Add the Default impl
351impl Default for GlobalConfig {
352    #[allow(deprecated)]
353    fn default() -> Self {
354        Self {
355            enable: Vec::new(),
356            disable: Vec::new(),
357            exclude: Vec::new(),
358            include: Vec::new(),
359            respect_gitignore: true,
360            line_length: LineLength::default(),
361            output_format: None,
362            fixable: Vec::new(),
363            unfixable: Vec::new(),
364            flavor: MarkdownFlavor::default(),
365            force_exclude: false,
366            cache_dir: None,
367            cache: true,
368        }
369    }
370}
371
372const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
373    ".markdownlint.json",
374    ".markdownlint.jsonc",
375    ".markdownlint.yaml",
376    ".markdownlint.yml",
377    "markdownlint.json",
378    "markdownlint.jsonc",
379    "markdownlint.yaml",
380    "markdownlint.yml",
381];
382
383/// Create a default configuration file at the specified path
384pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
385    // Check if file already exists
386    if Path::new(path).exists() {
387        return Err(ConfigError::FileExists { path: path.to_string() });
388    }
389
390    // Default configuration content
391    let default_config = r#"# rumdl configuration file
392
393# Global configuration options
394[global]
395# List of rules to disable (uncomment and modify as needed)
396# disable = ["MD013", "MD033"]
397
398# List of rules to enable exclusively (if provided, only these rules will run)
399# enable = ["MD001", "MD003", "MD004"]
400
401# List of file/directory patterns to include for linting (if provided, only these will be linted)
402# include = [
403#    "docs/*.md",
404#    "src/**/*.md",
405#    "README.md"
406# ]
407
408# List of file/directory patterns to exclude from linting
409exclude = [
410    # Common directories to exclude
411    ".git",
412    ".github",
413    "node_modules",
414    "vendor",
415    "dist",
416    "build",
417
418    # Specific files or patterns
419    "CHANGELOG.md",
420    "LICENSE.md",
421]
422
423# Respect .gitignore files when scanning directories (default: true)
424respect-gitignore = true
425
426# Markdown flavor/dialect (uncomment to enable)
427# Options: standard (default), gfm, commonmark, mkdocs, mdx, quarto
428# flavor = "mkdocs"
429
430# Rule-specific configurations (uncomment and modify as needed)
431
432# [MD003]
433# style = "atx"  # Heading style (atx, atx_closed, setext)
434
435# [MD004]
436# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
437
438# [MD007]
439# indent = 4  # Unordered list indentation
440
441# [MD013]
442# line-length = 100  # Line length
443# code-blocks = false  # Exclude code blocks from line length check
444# tables = false  # Exclude tables from line length check
445# headings = true  # Include headings in line length check
446
447# [MD044]
448# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
449# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
450"#;
451
452    // Write the default configuration to the file
453    match fs::write(path, default_config) {
454        Ok(_) => Ok(()),
455        Err(err) => Err(ConfigError::IoError {
456            source: err,
457            path: path.to_string(),
458        }),
459    }
460}
461
462/// Errors that can occur when loading configuration
463#[derive(Debug, thiserror::Error)]
464pub enum ConfigError {
465    /// Failed to read the configuration file
466    #[error("Failed to read config file at {path}: {source}")]
467    IoError { source: io::Error, path: String },
468
469    /// Failed to parse the configuration content (TOML or JSON)
470    #[error("Failed to parse config: {0}")]
471    ParseError(String),
472
473    /// Configuration file already exists
474    #[error("Configuration file already exists at {path}")]
475    FileExists { path: String },
476}
477
478/// Get a rule-specific configuration value
479/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
480/// for better markdownlint compatibility
481pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
482    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
483
484    let rule_config = config.rules.get(&norm_rule_name)?;
485
486    // Try multiple key variants to support both underscore and kebab-case formats
487    let key_variants = [
488        key.to_string(),       // Original key as provided
489        normalize_key(key),    // Normalized key (lowercase, kebab-case)
490        key.replace('-', "_"), // Convert kebab-case to snake_case
491        key.replace('_', "-"), // Convert snake_case to kebab-case
492    ];
493
494    // Try each variant until we find a match
495    for variant in &key_variants {
496        if let Some(value) = rule_config.values.get(variant)
497            && let Ok(result) = T::deserialize(value.clone())
498        {
499            return Some(result);
500        }
501    }
502
503    None
504}
505
506/// Generate default rumdl configuration for pyproject.toml
507pub fn generate_pyproject_config() -> String {
508    let config_content = r#"
509[tool.rumdl]
510# Global configuration options
511line-length = 100
512disable = []
513exclude = [
514    # Common directories to exclude
515    ".git",
516    ".github",
517    "node_modules",
518    "vendor",
519    "dist",
520    "build",
521]
522respect-gitignore = true
523
524# Rule-specific configurations (uncomment and modify as needed)
525
526# [tool.rumdl.MD003]
527# style = "atx"  # Heading style (atx, atx_closed, setext)
528
529# [tool.rumdl.MD004]
530# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
531
532# [tool.rumdl.MD007]
533# indent = 4  # Unordered list indentation
534
535# [tool.rumdl.MD013]
536# line-length = 100  # Line length
537# code-blocks = false  # Exclude code blocks from line length check
538# tables = false  # Exclude tables from line length check
539# headings = true  # Include headings in line length check
540
541# [tool.rumdl.MD044]
542# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
543# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
544"#;
545
546    config_content.to_string()
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552    use std::fs;
553    use tempfile::tempdir;
554
555    #[test]
556    fn test_flavor_loading() {
557        let temp_dir = tempdir().unwrap();
558        let config_path = temp_dir.path().join(".rumdl.toml");
559        let config_content = r#"
560[global]
561flavor = "mkdocs"
562disable = ["MD001"]
563"#;
564        fs::write(&config_path, config_content).unwrap();
565
566        // Load the config
567        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
568        let config: Config = sourced.into_validated_unchecked().into();
569
570        // Check that flavor was loaded
571        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
572        assert!(config.is_mkdocs_flavor());
573        assert!(config.is_mkdocs_project()); // Test backwards compatibility
574        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
575    }
576
577    #[test]
578    fn test_pyproject_toml_root_level_config() {
579        let temp_dir = tempdir().unwrap();
580        let config_path = temp_dir.path().join("pyproject.toml");
581
582        // Create a test pyproject.toml with root-level configuration
583        let content = r#"
584[tool.rumdl]
585line-length = 120
586disable = ["MD033"]
587enable = ["MD001", "MD004"]
588include = ["docs/*.md"]
589exclude = ["node_modules"]
590respect-gitignore = true
591        "#;
592
593        fs::write(&config_path, content).unwrap();
594
595        // Load the config with skip_auto_discovery to avoid environment config files
596        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
597        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
598
599        // Check global settings
600        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
601        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
602        // Should now contain only the configured pattern since auto-discovery is disabled
603        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
604        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
605        assert!(config.global.respect_gitignore);
606
607        // Check line-length was correctly added to MD013
608        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
609        assert_eq!(line_length, Some(120));
610    }
611
612    #[test]
613    fn test_pyproject_toml_snake_case_and_kebab_case() {
614        let temp_dir = tempdir().unwrap();
615        let config_path = temp_dir.path().join("pyproject.toml");
616
617        // Test with both kebab-case and snake_case variants
618        let content = r#"
619[tool.rumdl]
620line-length = 150
621respect_gitignore = true
622        "#;
623
624        fs::write(&config_path, content).unwrap();
625
626        // Load the config with skip_auto_discovery to avoid environment config files
627        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
628        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
629
630        // Check settings were correctly loaded
631        assert!(config.global.respect_gitignore);
632        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
633        assert_eq!(line_length, Some(150));
634    }
635
636    #[test]
637    fn test_md013_key_normalization_in_rumdl_toml() {
638        let temp_dir = tempdir().unwrap();
639        let config_path = temp_dir.path().join(".rumdl.toml");
640        let config_content = r#"
641[MD013]
642line_length = 111
643line-length = 222
644"#;
645        fs::write(&config_path, config_content).unwrap();
646        // Load the config with skip_auto_discovery to avoid environment config files
647        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
648        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
649        // Now we should only get the explicitly configured key
650        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
651        assert_eq!(keys, vec!["line-length"]);
652        let val = &rule_cfg.values["line-length"].value;
653        assert_eq!(val.as_integer(), Some(222));
654        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
655        let config: Config = sourced.clone().into_validated_unchecked().into();
656        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
657        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
658        assert_eq!(v1, Some(222));
659        assert_eq!(v2, Some(222));
660    }
661
662    #[test]
663    fn test_md013_section_case_insensitivity() {
664        let temp_dir = tempdir().unwrap();
665        let config_path = temp_dir.path().join(".rumdl.toml");
666        let config_content = r#"
667[md013]
668line-length = 101
669
670[Md013]
671line-length = 102
672
673[MD013]
674line-length = 103
675"#;
676        fs::write(&config_path, config_content).unwrap();
677        // Load the config with skip_auto_discovery to avoid environment config files
678        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
679        let config: Config = sourced.clone().into_validated_unchecked().into();
680        // Only the last section should win, and be present
681        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
682        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
683        assert_eq!(keys, vec!["line-length"]);
684        let val = &rule_cfg.values["line-length"].value;
685        assert_eq!(val.as_integer(), Some(103));
686        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
687        assert_eq!(v, Some(103));
688    }
689
690    #[test]
691    fn test_md013_key_snake_and_kebab_case() {
692        let temp_dir = tempdir().unwrap();
693        let config_path = temp_dir.path().join(".rumdl.toml");
694        let config_content = r#"
695[MD013]
696line_length = 201
697line-length = 202
698"#;
699        fs::write(&config_path, config_content).unwrap();
700        // Load the config with skip_auto_discovery to avoid environment config files
701        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
702        let config: Config = sourced.clone().into_validated_unchecked().into();
703        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
704        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
705        assert_eq!(keys, vec!["line-length"]);
706        let val = &rule_cfg.values["line-length"].value;
707        assert_eq!(val.as_integer(), Some(202));
708        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
709        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
710        assert_eq!(v1, Some(202));
711        assert_eq!(v2, Some(202));
712    }
713
714    #[test]
715    fn test_unknown_rule_section_is_ignored() {
716        let temp_dir = tempdir().unwrap();
717        let config_path = temp_dir.path().join(".rumdl.toml");
718        let config_content = r#"
719[MD999]
720foo = 1
721bar = 2
722[MD013]
723line-length = 303
724"#;
725        fs::write(&config_path, config_content).unwrap();
726        // Load the config with skip_auto_discovery to avoid environment config files
727        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
728        let config: Config = sourced.clone().into_validated_unchecked().into();
729        // MD999 should not be present
730        assert!(!sourced.rules.contains_key("MD999"));
731        // MD013 should be present and correct
732        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
733        assert_eq!(v, Some(303));
734    }
735
736    #[test]
737    fn test_invalid_toml_syntax() {
738        let temp_dir = tempdir().unwrap();
739        let config_path = temp_dir.path().join(".rumdl.toml");
740
741        // Invalid TOML with unclosed string
742        let config_content = r#"
743[MD013]
744line-length = "unclosed string
745"#;
746        fs::write(&config_path, config_content).unwrap();
747
748        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
749        assert!(result.is_err());
750        match result.unwrap_err() {
751            ConfigError::ParseError(msg) => {
752                // The actual error message from toml parser might vary
753                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
754            }
755            _ => panic!("Expected ParseError"),
756        }
757    }
758
759    #[test]
760    fn test_wrong_type_for_config_value() {
761        let temp_dir = tempdir().unwrap();
762        let config_path = temp_dir.path().join(".rumdl.toml");
763
764        // line-length should be a number, not a string
765        let config_content = r#"
766[MD013]
767line-length = "not a number"
768"#;
769        fs::write(&config_path, config_content).unwrap();
770
771        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
772        let config: Config = sourced.into_validated_unchecked().into();
773
774        // The value should be loaded as a string, not converted
775        let rule_config = config.rules.get("MD013").unwrap();
776        let value = rule_config.values.get("line-length").unwrap();
777        assert!(matches!(value, toml::Value::String(_)));
778    }
779
780    #[test]
781    fn test_empty_config_file() {
782        let temp_dir = tempdir().unwrap();
783        let config_path = temp_dir.path().join(".rumdl.toml");
784
785        // Empty file
786        fs::write(&config_path, "").unwrap();
787
788        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
789        let config: Config = sourced.into_validated_unchecked().into();
790
791        // Should have default values
792        assert_eq!(config.global.line_length.get(), 80);
793        assert!(config.global.respect_gitignore);
794        assert!(config.rules.is_empty());
795    }
796
797    #[test]
798    fn test_malformed_pyproject_toml() {
799        let temp_dir = tempdir().unwrap();
800        let config_path = temp_dir.path().join("pyproject.toml");
801
802        // Missing closing bracket
803        let content = r#"
804[tool.rumdl
805line-length = 120
806"#;
807        fs::write(&config_path, content).unwrap();
808
809        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
810        assert!(result.is_err());
811    }
812
813    #[test]
814    fn test_conflicting_config_values() {
815        let temp_dir = tempdir().unwrap();
816        let config_path = temp_dir.path().join(".rumdl.toml");
817
818        // Both enable and disable the same rule - these need to be in a global section
819        let config_content = r#"
820[global]
821enable = ["MD013"]
822disable = ["MD013"]
823"#;
824        fs::write(&config_path, config_content).unwrap();
825
826        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
827        let config: Config = sourced.into_validated_unchecked().into();
828
829        // Conflict resolution: enable wins over disable
830        assert!(config.global.enable.contains(&"MD013".to_string()));
831        assert!(!config.global.disable.contains(&"MD013".to_string()));
832    }
833
834    #[test]
835    fn test_invalid_rule_names() {
836        let temp_dir = tempdir().unwrap();
837        let config_path = temp_dir.path().join(".rumdl.toml");
838
839        let config_content = r#"
840[global]
841enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
842disable = ["MD-001", "MD_002"]
843"#;
844        fs::write(&config_path, config_content).unwrap();
845
846        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
847        let config: Config = sourced.into_validated_unchecked().into();
848
849        // All values should be preserved as-is
850        assert_eq!(config.global.enable.len(), 4);
851        assert_eq!(config.global.disable.len(), 2);
852    }
853
854    #[test]
855    fn test_deeply_nested_config() {
856        let temp_dir = tempdir().unwrap();
857        let config_path = temp_dir.path().join(".rumdl.toml");
858
859        // This should be ignored as we don't support nested tables within rule configs
860        let config_content = r#"
861[MD013]
862line-length = 100
863[MD013.nested]
864value = 42
865"#;
866        fs::write(&config_path, config_content).unwrap();
867
868        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
869        let config: Config = sourced.into_validated_unchecked().into();
870
871        let rule_config = config.rules.get("MD013").unwrap();
872        assert_eq!(
873            rule_config.values.get("line-length").unwrap(),
874            &toml::Value::Integer(100)
875        );
876        // Nested table should not be present
877        assert!(!rule_config.values.contains_key("nested"));
878    }
879
880    #[test]
881    fn test_unicode_in_config() {
882        let temp_dir = tempdir().unwrap();
883        let config_path = temp_dir.path().join(".rumdl.toml");
884
885        let config_content = r#"
886[global]
887include = ["文档/*.md", "ドキュメント/*.md"]
888exclude = ["测试/*", "🚀/*"]
889
890[MD013]
891line-length = 80
892message = "行太长了 🚨"
893"#;
894        fs::write(&config_path, config_content).unwrap();
895
896        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
897        let config: Config = sourced.into_validated_unchecked().into();
898
899        assert_eq!(config.global.include.len(), 2);
900        assert_eq!(config.global.exclude.len(), 2);
901        assert!(config.global.include[0].contains("文档"));
902        assert!(config.global.exclude[1].contains("🚀"));
903
904        let rule_config = config.rules.get("MD013").unwrap();
905        let message = rule_config.values.get("message").unwrap();
906        if let toml::Value::String(s) = message {
907            assert!(s.contains("行太长了"));
908            assert!(s.contains("🚨"));
909        }
910    }
911
912    #[test]
913    fn test_extremely_long_values() {
914        let temp_dir = tempdir().unwrap();
915        let config_path = temp_dir.path().join(".rumdl.toml");
916
917        let long_string = "a".repeat(10000);
918        let config_content = format!(
919            r#"
920[global]
921exclude = ["{long_string}"]
922
923[MD013]
924line-length = 999999999
925"#
926        );
927
928        fs::write(&config_path, config_content).unwrap();
929
930        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
931        let config: Config = sourced.into_validated_unchecked().into();
932
933        assert_eq!(config.global.exclude[0].len(), 10000);
934        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
935        assert_eq!(line_length, Some(999999999));
936    }
937
938    #[test]
939    fn test_config_with_comments() {
940        let temp_dir = tempdir().unwrap();
941        let config_path = temp_dir.path().join(".rumdl.toml");
942
943        let config_content = r#"
944[global]
945# This is a comment
946enable = ["MD001"] # Enable MD001
947# disable = ["MD002"] # This is commented out
948
949[MD013] # Line length rule
950line-length = 100 # Set to 100 characters
951# ignored = true # This setting is commented out
952"#;
953        fs::write(&config_path, config_content).unwrap();
954
955        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
956        let config: Config = sourced.into_validated_unchecked().into();
957
958        assert_eq!(config.global.enable, vec!["MD001"]);
959        assert!(config.global.disable.is_empty()); // Commented out
960
961        let rule_config = config.rules.get("MD013").unwrap();
962        assert_eq!(rule_config.values.len(), 1); // Only line-length
963        assert!(!rule_config.values.contains_key("ignored"));
964    }
965
966    #[test]
967    fn test_arrays_in_rule_config() {
968        let temp_dir = tempdir().unwrap();
969        let config_path = temp_dir.path().join(".rumdl.toml");
970
971        let config_content = r#"
972[MD003]
973levels = [1, 2, 3]
974tags = ["important", "critical"]
975mixed = [1, "two", true]
976"#;
977        fs::write(&config_path, config_content).unwrap();
978
979        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
980        let config: Config = sourced.into_validated_unchecked().into();
981
982        // Arrays should now be properly parsed
983        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
984
985        // Check that arrays are present and correctly parsed
986        assert!(rule_config.values.contains_key("levels"));
987        assert!(rule_config.values.contains_key("tags"));
988        assert!(rule_config.values.contains_key("mixed"));
989
990        // Verify array contents
991        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
992            assert_eq!(levels.len(), 3);
993            assert_eq!(levels[0], toml::Value::Integer(1));
994            assert_eq!(levels[1], toml::Value::Integer(2));
995            assert_eq!(levels[2], toml::Value::Integer(3));
996        } else {
997            panic!("levels should be an array");
998        }
999
1000        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
1001            assert_eq!(tags.len(), 2);
1002            assert_eq!(tags[0], toml::Value::String("important".to_string()));
1003            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
1004        } else {
1005            panic!("tags should be an array");
1006        }
1007
1008        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
1009            assert_eq!(mixed.len(), 3);
1010            assert_eq!(mixed[0], toml::Value::Integer(1));
1011            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
1012            assert_eq!(mixed[2], toml::Value::Boolean(true));
1013        } else {
1014            panic!("mixed should be an array");
1015        }
1016    }
1017
1018    #[test]
1019    fn test_normalize_key_edge_cases() {
1020        // Rule names
1021        assert_eq!(normalize_key("MD001"), "MD001");
1022        assert_eq!(normalize_key("md001"), "MD001");
1023        assert_eq!(normalize_key("Md001"), "MD001");
1024        assert_eq!(normalize_key("mD001"), "MD001");
1025
1026        // Non-rule names
1027        assert_eq!(normalize_key("line_length"), "line-length");
1028        assert_eq!(normalize_key("line-length"), "line-length");
1029        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
1030        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
1031
1032        // Edge cases
1033        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
1034        assert_eq!(normalize_key("MD00"), "md00"); // Too short
1035        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
1036        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
1037        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
1038        assert_eq!(normalize_key(""), "");
1039        assert_eq!(normalize_key("_"), "-");
1040        assert_eq!(normalize_key("___"), "---");
1041    }
1042
1043    #[test]
1044    fn test_missing_config_file() {
1045        let temp_dir = tempdir().unwrap();
1046        let config_path = temp_dir.path().join("nonexistent.toml");
1047
1048        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1049        assert!(result.is_err());
1050        match result.unwrap_err() {
1051            ConfigError::IoError { .. } => {}
1052            _ => panic!("Expected IoError for missing file"),
1053        }
1054    }
1055
1056    #[test]
1057    #[cfg(unix)]
1058    fn test_permission_denied_config() {
1059        use std::os::unix::fs::PermissionsExt;
1060
1061        let temp_dir = tempdir().unwrap();
1062        let config_path = temp_dir.path().join(".rumdl.toml");
1063
1064        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1065
1066        // Remove read permissions
1067        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1068        perms.set_mode(0o000);
1069        fs::set_permissions(&config_path, perms).unwrap();
1070
1071        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1072
1073        // Restore permissions for cleanup
1074        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1075        perms.set_mode(0o644);
1076        fs::set_permissions(&config_path, perms).unwrap();
1077
1078        assert!(result.is_err());
1079        match result.unwrap_err() {
1080            ConfigError::IoError { .. } => {}
1081            _ => panic!("Expected IoError for permission denied"),
1082        }
1083    }
1084
1085    #[test]
1086    fn test_circular_reference_detection() {
1087        // This test is more conceptual since TOML doesn't support circular references
1088        // But we test that deeply nested structures don't cause stack overflow
1089        let temp_dir = tempdir().unwrap();
1090        let config_path = temp_dir.path().join(".rumdl.toml");
1091
1092        let mut config_content = String::from("[MD001]\n");
1093        for i in 0..100 {
1094            config_content.push_str(&format!("key{i} = {i}\n"));
1095        }
1096
1097        fs::write(&config_path, config_content).unwrap();
1098
1099        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1100        let config: Config = sourced.into_validated_unchecked().into();
1101
1102        let rule_config = config.rules.get("MD001").unwrap();
1103        assert_eq!(rule_config.values.len(), 100);
1104    }
1105
1106    #[test]
1107    fn test_special_toml_values() {
1108        let temp_dir = tempdir().unwrap();
1109        let config_path = temp_dir.path().join(".rumdl.toml");
1110
1111        let config_content = r#"
1112[MD001]
1113infinity = inf
1114neg_infinity = -inf
1115not_a_number = nan
1116datetime = 1979-05-27T07:32:00Z
1117local_date = 1979-05-27
1118local_time = 07:32:00
1119"#;
1120        fs::write(&config_path, config_content).unwrap();
1121
1122        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1123        let config: Config = sourced.into_validated_unchecked().into();
1124
1125        // Some values might not be parsed due to parser limitations
1126        if let Some(rule_config) = config.rules.get("MD001") {
1127            // Check special float values if present
1128            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1129                assert!(f.is_infinite() && f.is_sign_positive());
1130            }
1131            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1132                assert!(f.is_infinite() && f.is_sign_negative());
1133            }
1134            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1135                assert!(f.is_nan());
1136            }
1137
1138            // Check datetime values if present
1139            if let Some(val) = rule_config.values.get("datetime") {
1140                assert!(matches!(val, toml::Value::Datetime(_)));
1141            }
1142            // Note: local_date and local_time might not be parsed by the current implementation
1143        }
1144    }
1145
1146    #[test]
1147    fn test_default_config_passes_validation() {
1148        use crate::rules;
1149
1150        let temp_dir = tempdir().unwrap();
1151        let config_path = temp_dir.path().join(".rumdl.toml");
1152        let config_path_str = config_path.to_str().unwrap();
1153
1154        // Create the default config using the same function that `rumdl init` uses
1155        create_default_config(config_path_str).unwrap();
1156
1157        // Load it back as a SourcedConfig
1158        let sourced =
1159            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1160
1161        // Create the rule registry
1162        let all_rules = rules::all_rules(&Config::default());
1163        let registry = RuleRegistry::from_rules(&all_rules);
1164
1165        // Validate the config
1166        let warnings = validate_config_sourced(&sourced, &registry);
1167
1168        // The default config should have no warnings
1169        if !warnings.is_empty() {
1170            for warning in &warnings {
1171                eprintln!("Config validation warning: {}", warning.message);
1172                if let Some(rule) = &warning.rule {
1173                    eprintln!("  Rule: {rule}");
1174                }
1175                if let Some(key) = &warning.key {
1176                    eprintln!("  Key: {key}");
1177                }
1178            }
1179        }
1180        assert!(
1181            warnings.is_empty(),
1182            "Default config from rumdl init should pass validation without warnings"
1183        );
1184    }
1185
1186    #[test]
1187    fn test_per_file_ignores_config_parsing() {
1188        let temp_dir = tempdir().unwrap();
1189        let config_path = temp_dir.path().join(".rumdl.toml");
1190        let config_content = r#"
1191[per-file-ignores]
1192"README.md" = ["MD033"]
1193"docs/**/*.md" = ["MD013", "MD033"]
1194"test/*.md" = ["MD041"]
1195"#;
1196        fs::write(&config_path, config_content).unwrap();
1197
1198        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1199        let config: Config = sourced.into_validated_unchecked().into();
1200
1201        // Verify per-file-ignores was loaded
1202        assert_eq!(config.per_file_ignores.len(), 3);
1203        assert_eq!(
1204            config.per_file_ignores.get("README.md"),
1205            Some(&vec!["MD033".to_string()])
1206        );
1207        assert_eq!(
1208            config.per_file_ignores.get("docs/**/*.md"),
1209            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1210        );
1211        assert_eq!(
1212            config.per_file_ignores.get("test/*.md"),
1213            Some(&vec!["MD041".to_string()])
1214        );
1215    }
1216
1217    #[test]
1218    fn test_per_file_ignores_glob_matching() {
1219        use std::path::PathBuf;
1220
1221        let temp_dir = tempdir().unwrap();
1222        let config_path = temp_dir.path().join(".rumdl.toml");
1223        let config_content = r#"
1224[per-file-ignores]
1225"README.md" = ["MD033"]
1226"docs/**/*.md" = ["MD013"]
1227"**/test_*.md" = ["MD041"]
1228"#;
1229        fs::write(&config_path, config_content).unwrap();
1230
1231        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1232        let config: Config = sourced.into_validated_unchecked().into();
1233
1234        // Test exact match
1235        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1236        assert!(ignored.contains("MD033"));
1237        assert_eq!(ignored.len(), 1);
1238
1239        // Test glob pattern matching
1240        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1241        assert!(ignored.contains("MD013"));
1242        assert_eq!(ignored.len(), 1);
1243
1244        // Test recursive glob pattern
1245        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1246        assert!(ignored.contains("MD041"));
1247        assert_eq!(ignored.len(), 1);
1248
1249        // Test non-matching path
1250        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1251        assert!(ignored.is_empty());
1252    }
1253
1254    #[test]
1255    fn test_per_file_ignores_pyproject_toml() {
1256        let temp_dir = tempdir().unwrap();
1257        let config_path = temp_dir.path().join("pyproject.toml");
1258        let config_content = r#"
1259[tool.rumdl]
1260[tool.rumdl.per-file-ignores]
1261"README.md" = ["MD033", "MD013"]
1262"generated/*.md" = ["MD041"]
1263"#;
1264        fs::write(&config_path, config_content).unwrap();
1265
1266        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1267        let config: Config = sourced.into_validated_unchecked().into();
1268
1269        // Verify per-file-ignores was loaded from pyproject.toml
1270        assert_eq!(config.per_file_ignores.len(), 2);
1271        assert_eq!(
1272            config.per_file_ignores.get("README.md"),
1273            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1274        );
1275        assert_eq!(
1276            config.per_file_ignores.get("generated/*.md"),
1277            Some(&vec!["MD041".to_string()])
1278        );
1279    }
1280
1281    #[test]
1282    fn test_per_file_ignores_multiple_patterns_match() {
1283        use std::path::PathBuf;
1284
1285        let temp_dir = tempdir().unwrap();
1286        let config_path = temp_dir.path().join(".rumdl.toml");
1287        let config_content = r#"
1288[per-file-ignores]
1289"docs/**/*.md" = ["MD013"]
1290"**/api/*.md" = ["MD033"]
1291"docs/api/overview.md" = ["MD041"]
1292"#;
1293        fs::write(&config_path, config_content).unwrap();
1294
1295        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1296        let config: Config = sourced.into_validated_unchecked().into();
1297
1298        // File matches multiple patterns - should get union of all rules
1299        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1300        assert_eq!(ignored.len(), 3);
1301        assert!(ignored.contains("MD013"));
1302        assert!(ignored.contains("MD033"));
1303        assert!(ignored.contains("MD041"));
1304    }
1305
1306    #[test]
1307    fn test_per_file_ignores_rule_name_normalization() {
1308        use std::path::PathBuf;
1309
1310        let temp_dir = tempdir().unwrap();
1311        let config_path = temp_dir.path().join(".rumdl.toml");
1312        let config_content = r#"
1313[per-file-ignores]
1314"README.md" = ["md033", "MD013", "Md041"]
1315"#;
1316        fs::write(&config_path, config_content).unwrap();
1317
1318        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1319        let config: Config = sourced.into_validated_unchecked().into();
1320
1321        // All rule names should be normalized to uppercase
1322        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1323        assert_eq!(ignored.len(), 3);
1324        assert!(ignored.contains("MD033"));
1325        assert!(ignored.contains("MD013"));
1326        assert!(ignored.contains("MD041"));
1327    }
1328
1329    #[test]
1330    fn test_per_file_ignores_invalid_glob_pattern() {
1331        use std::path::PathBuf;
1332
1333        let temp_dir = tempdir().unwrap();
1334        let config_path = temp_dir.path().join(".rumdl.toml");
1335        let config_content = r#"
1336[per-file-ignores]
1337"[invalid" = ["MD033"]
1338"valid/*.md" = ["MD013"]
1339"#;
1340        fs::write(&config_path, config_content).unwrap();
1341
1342        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1343        let config: Config = sourced.into_validated_unchecked().into();
1344
1345        // Invalid pattern should be skipped, valid pattern should work
1346        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1347        assert!(ignored.contains("MD013"));
1348
1349        // Invalid pattern should not cause issues
1350        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1351        assert!(ignored2.is_empty());
1352    }
1353
1354    #[test]
1355    fn test_per_file_ignores_empty_section() {
1356        use std::path::PathBuf;
1357
1358        let temp_dir = tempdir().unwrap();
1359        let config_path = temp_dir.path().join(".rumdl.toml");
1360        let config_content = r#"
1361[global]
1362disable = ["MD001"]
1363
1364[per-file-ignores]
1365"#;
1366        fs::write(&config_path, config_content).unwrap();
1367
1368        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1369        let config: Config = sourced.into_validated_unchecked().into();
1370
1371        // Empty per-file-ignores should work fine
1372        assert_eq!(config.per_file_ignores.len(), 0);
1373        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1374        assert!(ignored.is_empty());
1375    }
1376
1377    #[test]
1378    fn test_per_file_ignores_with_underscores_in_pyproject() {
1379        let temp_dir = tempdir().unwrap();
1380        let config_path = temp_dir.path().join("pyproject.toml");
1381        let config_content = r#"
1382[tool.rumdl]
1383[tool.rumdl.per_file_ignores]
1384"README.md" = ["MD033"]
1385"#;
1386        fs::write(&config_path, config_content).unwrap();
1387
1388        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1389        let config: Config = sourced.into_validated_unchecked().into();
1390
1391        // Should support both per-file-ignores and per_file_ignores
1392        assert_eq!(config.per_file_ignores.len(), 1);
1393        assert_eq!(
1394            config.per_file_ignores.get("README.md"),
1395            Some(&vec!["MD033".to_string()])
1396        );
1397    }
1398
1399    #[test]
1400    fn test_per_file_ignores_absolute_path_matching() {
1401        // Regression test for issue #208: per-file-ignores should work with absolute paths
1402        // This is critical for GitHub Actions which uses absolute paths like $GITHUB_WORKSPACE
1403        use std::path::PathBuf;
1404
1405        let temp_dir = tempdir().unwrap();
1406        let config_path = temp_dir.path().join(".rumdl.toml");
1407
1408        // Create a subdirectory and file to match against
1409        let github_dir = temp_dir.path().join(".github");
1410        fs::create_dir_all(&github_dir).unwrap();
1411        let test_file = github_dir.join("pull_request_template.md");
1412        fs::write(&test_file, "Test content").unwrap();
1413
1414        let config_content = r#"
1415[per-file-ignores]
1416".github/pull_request_template.md" = ["MD041"]
1417"docs/**/*.md" = ["MD013"]
1418"#;
1419        fs::write(&config_path, config_content).unwrap();
1420
1421        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1422        let config: Config = sourced.into_validated_unchecked().into();
1423
1424        // Test with absolute path (like GitHub Actions would use)
1425        let absolute_path = test_file.canonicalize().unwrap();
1426        let ignored = config.get_ignored_rules_for_file(&absolute_path);
1427        assert!(
1428            ignored.contains("MD041"),
1429            "Should match absolute path {absolute_path:?} against relative pattern"
1430        );
1431        assert_eq!(ignored.len(), 1);
1432
1433        // Also verify relative path still works
1434        let relative_path = PathBuf::from(".github/pull_request_template.md");
1435        let ignored = config.get_ignored_rules_for_file(&relative_path);
1436        assert!(ignored.contains("MD041"), "Should match relative path");
1437    }
1438
1439    #[test]
1440    fn test_generate_json_schema() {
1441        use schemars::schema_for;
1442        use std::env;
1443
1444        let schema = schema_for!(Config);
1445        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1446
1447        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1448        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1449            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1450            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1451            println!("Schema written to: {}", schema_path.display());
1452        }
1453
1454        // Basic validation that schema was generated
1455        assert!(schema_json.contains("\"title\": \"Config\""));
1456        assert!(schema_json.contains("\"global\""));
1457        assert!(schema_json.contains("\"per-file-ignores\""));
1458    }
1459
1460    #[test]
1461    fn test_user_config_loaded_with_explicit_project_config() {
1462        // Regression test for issue #131: User config should always be loaded as base layer,
1463        // even when an explicit project config path is provided
1464        let temp_dir = tempdir().unwrap();
1465
1466        // Create a fake user config directory
1467        // Note: user_configuration_path_impl adds /rumdl to the config dir
1468        let user_config_dir = temp_dir.path().join("user_config");
1469        let rumdl_config_dir = user_config_dir.join("rumdl");
1470        fs::create_dir_all(&rumdl_config_dir).unwrap();
1471        let user_config_path = rumdl_config_dir.join("rumdl.toml");
1472
1473        // User config disables MD013 and MD041
1474        let user_config_content = r#"
1475[global]
1476disable = ["MD013", "MD041"]
1477line-length = 100
1478"#;
1479        fs::write(&user_config_path, user_config_content).unwrap();
1480
1481        // Create a project config that enables MD001
1482        let project_config_path = temp_dir.path().join("project").join("pyproject.toml");
1483        fs::create_dir_all(project_config_path.parent().unwrap()).unwrap();
1484        let project_config_content = r#"
1485[tool.rumdl]
1486enable = ["MD001"]
1487"#;
1488        fs::write(&project_config_path, project_config_content).unwrap();
1489
1490        // Load config with explicit project path, passing user_config_dir
1491        let sourced = SourcedConfig::load_with_discovery_impl(
1492            Some(project_config_path.to_str().unwrap()),
1493            None,
1494            false,
1495            Some(&user_config_dir),
1496        )
1497        .unwrap();
1498
1499        let config: Config = sourced.into_validated_unchecked().into();
1500
1501        // User config settings should be preserved
1502        assert!(
1503            config.global.disable.contains(&"MD013".to_string()),
1504            "User config disabled rules should be preserved"
1505        );
1506        assert!(
1507            config.global.disable.contains(&"MD041".to_string()),
1508            "User config disabled rules should be preserved"
1509        );
1510
1511        // Project config settings should also be applied (merged on top)
1512        assert!(
1513            config.global.enable.contains(&"MD001".to_string()),
1514            "Project config enabled rules should be applied"
1515        );
1516    }
1517
1518    #[test]
1519    fn test_typestate_validate_method() {
1520        use tempfile::tempdir;
1521
1522        let temp_dir = tempdir().expect("Failed to create temporary directory");
1523        let config_path = temp_dir.path().join("test.toml");
1524
1525        // Create config with an unknown rule option to trigger a validation warning
1526        let config_content = r#"
1527[global]
1528enable = ["MD001"]
1529
1530[MD013]
1531line_length = 80
1532unknown_option = true
1533"#;
1534        std::fs::write(&config_path, config_content).expect("Failed to write config");
1535
1536        // Load config - this returns SourcedConfig<ConfigLoaded>
1537        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1538            .expect("Should load config");
1539
1540        // Create a rule registry for validation
1541        let default_config = Config::default();
1542        let all_rules = crate::rules::all_rules(&default_config);
1543        let registry = RuleRegistry::from_rules(&all_rules);
1544
1545        // Validate - this transitions to SourcedConfig<ConfigValidated>
1546        let validated = loaded.validate(&registry).expect("Should validate config");
1547
1548        // Check that validation warnings were captured for the unknown option
1549        // Note: The validation checks rule options against the rule's schema
1550        let has_unknown_option_warning = validated
1551            .validation_warnings
1552            .iter()
1553            .any(|w| w.message.contains("unknown_option") || w.message.contains("Unknown option"));
1554
1555        // Print warnings for debugging if assertion fails
1556        if !has_unknown_option_warning {
1557            for w in &validated.validation_warnings {
1558                eprintln!("Warning: {}", w.message);
1559            }
1560        }
1561        assert!(
1562            has_unknown_option_warning,
1563            "Should have warning for unknown option. Got {} warnings: {:?}",
1564            validated.validation_warnings.len(),
1565            validated
1566                .validation_warnings
1567                .iter()
1568                .map(|w| &w.message)
1569                .collect::<Vec<_>>()
1570        );
1571
1572        // Now we can convert to Config (this would be a compile error with ConfigLoaded)
1573        let config: Config = validated.into();
1574
1575        // Verify the config values are correct
1576        assert!(config.global.enable.contains(&"MD001".to_string()));
1577    }
1578
1579    #[test]
1580    fn test_typestate_validate_into_convenience_method() {
1581        use tempfile::tempdir;
1582
1583        let temp_dir = tempdir().expect("Failed to create temporary directory");
1584        let config_path = temp_dir.path().join("test.toml");
1585
1586        let config_content = r#"
1587[global]
1588enable = ["MD022"]
1589
1590[MD022]
1591lines_above = 2
1592"#;
1593        std::fs::write(&config_path, config_content).expect("Failed to write config");
1594
1595        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1596            .expect("Should load config");
1597
1598        let default_config = Config::default();
1599        let all_rules = crate::rules::all_rules(&default_config);
1600        let registry = RuleRegistry::from_rules(&all_rules);
1601
1602        // Use the convenience method that validates and converts in one step
1603        let (config, warnings) = loaded.validate_into(&registry).expect("Should validate and convert");
1604
1605        // Should have no warnings for valid config
1606        assert!(warnings.is_empty(), "Should have no warnings for valid config");
1607
1608        // Config should be usable
1609        assert!(config.global.enable.contains(&"MD022".to_string()));
1610    }
1611}
1612
1613/// Configuration source with clear precedence hierarchy.
1614///
1615/// Precedence order (lower values override higher values):
1616/// - Default (0): Built-in defaults
1617/// - UserConfig (1): User-level ~/.config/rumdl/rumdl.toml
1618/// - PyprojectToml (2): Project-level pyproject.toml
1619/// - ProjectConfig (3): Project-level .rumdl.toml (most specific)
1620/// - Cli (4): Command-line flags (highest priority)
1621#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1622pub enum ConfigSource {
1623    /// Built-in default configuration
1624    Default,
1625    /// User-level configuration from ~/.config/rumdl/rumdl.toml
1626    UserConfig,
1627    /// Project-level configuration from pyproject.toml
1628    PyprojectToml,
1629    /// Project-level configuration from .rumdl.toml or rumdl.toml
1630    ProjectConfig,
1631    /// Command-line flags (highest precedence)
1632    Cli,
1633}
1634
1635#[derive(Debug, Clone)]
1636pub struct ConfigOverride<T> {
1637    pub value: T,
1638    pub source: ConfigSource,
1639    pub file: Option<String>,
1640    pub line: Option<usize>,
1641}
1642
1643#[derive(Debug, Clone)]
1644pub struct SourcedValue<T> {
1645    pub value: T,
1646    pub source: ConfigSource,
1647    pub overrides: Vec<ConfigOverride<T>>,
1648}
1649
1650impl<T: Clone> SourcedValue<T> {
1651    pub fn new(value: T, source: ConfigSource) -> Self {
1652        Self {
1653            value: value.clone(),
1654            source,
1655            overrides: vec![ConfigOverride {
1656                value,
1657                source,
1658                file: None,
1659                line: None,
1660            }],
1661        }
1662    }
1663
1664    /// Merges a new override into this SourcedValue based on source precedence.
1665    /// If the new source has higher or equal precedence, the value and source are updated,
1666    /// and the new override is added to the history.
1667    pub fn merge_override(
1668        &mut self,
1669        new_value: T,
1670        new_source: ConfigSource,
1671        new_file: Option<String>,
1672        new_line: Option<usize>,
1673    ) {
1674        // Helper function to get precedence, defined locally or globally
1675        fn source_precedence(src: ConfigSource) -> u8 {
1676            match src {
1677                ConfigSource::Default => 0,
1678                ConfigSource::UserConfig => 1,
1679                ConfigSource::PyprojectToml => 2,
1680                ConfigSource::ProjectConfig => 3,
1681                ConfigSource::Cli => 4,
1682            }
1683        }
1684
1685        if source_precedence(new_source) >= source_precedence(self.source) {
1686            self.value = new_value.clone();
1687            self.source = new_source;
1688            self.overrides.push(ConfigOverride {
1689                value: new_value,
1690                source: new_source,
1691                file: new_file,
1692                line: new_line,
1693            });
1694        }
1695    }
1696
1697    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1698        // This is essentially merge_override without the precedence check
1699        // We might consolidate these later, but keep separate for now during refactor
1700        self.value = value.clone();
1701        self.source = source;
1702        self.overrides.push(ConfigOverride {
1703            value,
1704            source,
1705            file,
1706            line,
1707        });
1708    }
1709}
1710
1711impl<T: Clone + Eq + std::hash::Hash> SourcedValue<Vec<T>> {
1712    /// Merges a new value using union semantics (for arrays like `disable`)
1713    /// Values from both sources are combined, with deduplication
1714    pub fn merge_union(
1715        &mut self,
1716        new_value: Vec<T>,
1717        new_source: ConfigSource,
1718        new_file: Option<String>,
1719        new_line: Option<usize>,
1720    ) {
1721        fn source_precedence(src: ConfigSource) -> u8 {
1722            match src {
1723                ConfigSource::Default => 0,
1724                ConfigSource::UserConfig => 1,
1725                ConfigSource::PyprojectToml => 2,
1726                ConfigSource::ProjectConfig => 3,
1727                ConfigSource::Cli => 4,
1728            }
1729        }
1730
1731        if source_precedence(new_source) >= source_precedence(self.source) {
1732            // Union: combine values from both sources with deduplication
1733            let mut combined = self.value.clone();
1734            for item in new_value.iter() {
1735                if !combined.contains(item) {
1736                    combined.push(item.clone());
1737                }
1738            }
1739
1740            self.value = combined;
1741            self.source = new_source;
1742            self.overrides.push(ConfigOverride {
1743                value: new_value,
1744                source: new_source,
1745                file: new_file,
1746                line: new_line,
1747            });
1748        }
1749    }
1750}
1751
1752#[derive(Debug, Clone)]
1753pub struct SourcedGlobalConfig {
1754    pub enable: SourcedValue<Vec<String>>,
1755    pub disable: SourcedValue<Vec<String>>,
1756    pub exclude: SourcedValue<Vec<String>>,
1757    pub include: SourcedValue<Vec<String>>,
1758    pub respect_gitignore: SourcedValue<bool>,
1759    pub line_length: SourcedValue<LineLength>,
1760    pub output_format: Option<SourcedValue<String>>,
1761    pub fixable: SourcedValue<Vec<String>>,
1762    pub unfixable: SourcedValue<Vec<String>>,
1763    pub flavor: SourcedValue<MarkdownFlavor>,
1764    pub force_exclude: SourcedValue<bool>,
1765    pub cache_dir: Option<SourcedValue<String>>,
1766    pub cache: SourcedValue<bool>,
1767}
1768
1769impl Default for SourcedGlobalConfig {
1770    fn default() -> Self {
1771        SourcedGlobalConfig {
1772            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1773            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1774            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1775            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1776            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1777            line_length: SourcedValue::new(LineLength::default(), ConfigSource::Default),
1778            output_format: None,
1779            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1780            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1781            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1782            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1783            cache_dir: None,
1784            cache: SourcedValue::new(true, ConfigSource::Default),
1785        }
1786    }
1787}
1788
1789#[derive(Debug, Default, Clone)]
1790pub struct SourcedRuleConfig {
1791    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1792}
1793
1794/// Represents configuration loaded from a single source file, with provenance.
1795/// Used as an intermediate step before merging into the final SourcedConfig.
1796#[derive(Debug, Clone)]
1797pub struct SourcedConfigFragment {
1798    pub global: SourcedGlobalConfig,
1799    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1800    pub rules: BTreeMap<String, SourcedRuleConfig>,
1801    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1802                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1803}
1804
1805impl Default for SourcedConfigFragment {
1806    fn default() -> Self {
1807        Self {
1808            global: SourcedGlobalConfig::default(),
1809            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1810            rules: BTreeMap::new(),
1811            unknown_keys: Vec::new(),
1812        }
1813    }
1814}
1815
1816/// Configuration with provenance tracking for values.
1817///
1818/// The `State` type parameter encodes the validation state:
1819/// - `ConfigLoaded`: Config has been loaded but not validated
1820/// - `ConfigValidated`: Config has been validated and can be converted to `Config`
1821///
1822/// # Typestate Pattern
1823///
1824/// This uses the typestate pattern to ensure validation happens before conversion:
1825///
1826/// ```ignore
1827/// let loaded: SourcedConfig<ConfigLoaded> = SourcedConfig::load_with_discovery(...)?;
1828/// let validated: SourcedConfig<ConfigValidated> = loaded.validate(&registry)?;
1829/// let config: Config = validated.into();  // Only works on ConfigValidated!
1830/// ```
1831///
1832/// Attempting to convert a `ConfigLoaded` config directly to `Config` is a compile error.
1833#[derive(Debug, Clone)]
1834pub struct SourcedConfig<State = ConfigLoaded> {
1835    pub global: SourcedGlobalConfig,
1836    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1837    pub rules: BTreeMap<String, SourcedRuleConfig>,
1838    pub loaded_files: Vec<String>,
1839    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1840    /// Project root directory (parent of config file), used for resolving relative paths
1841    pub project_root: Option<std::path::PathBuf>,
1842    /// Validation warnings (populated after validate() is called)
1843    pub validation_warnings: Vec<ConfigValidationWarning>,
1844    /// Phantom data for the state type parameter
1845    _state: PhantomData<State>,
1846}
1847
1848impl Default for SourcedConfig<ConfigLoaded> {
1849    fn default() -> Self {
1850        Self {
1851            global: SourcedGlobalConfig::default(),
1852            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1853            rules: BTreeMap::new(),
1854            loaded_files: Vec::new(),
1855            unknown_keys: Vec::new(),
1856            project_root: None,
1857            validation_warnings: Vec::new(),
1858            _state: PhantomData,
1859        }
1860    }
1861}
1862
1863impl SourcedConfig<ConfigLoaded> {
1864    /// Merges another SourcedConfigFragment into this SourcedConfig.
1865    /// Uses source precedence to determine which values take effect.
1866    fn merge(&mut self, fragment: SourcedConfigFragment) {
1867        // Merge global config
1868        // Enable uses replace semantics (project can enforce rules)
1869        self.global.enable.merge_override(
1870            fragment.global.enable.value,
1871            fragment.global.enable.source,
1872            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1873            fragment.global.enable.overrides.first().and_then(|o| o.line),
1874        );
1875
1876        // Disable uses union semantics (user can add to project disables)
1877        self.global.disable.merge_union(
1878            fragment.global.disable.value,
1879            fragment.global.disable.source,
1880            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1881            fragment.global.disable.overrides.first().and_then(|o| o.line),
1882        );
1883
1884        // Conflict resolution: Enable overrides disable
1885        // Remove any rules from disable that appear in enable
1886        self.global
1887            .disable
1888            .value
1889            .retain(|rule| !self.global.enable.value.contains(rule));
1890        self.global.include.merge_override(
1891            fragment.global.include.value,
1892            fragment.global.include.source,
1893            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1894            fragment.global.include.overrides.first().and_then(|o| o.line),
1895        );
1896        self.global.exclude.merge_override(
1897            fragment.global.exclude.value,
1898            fragment.global.exclude.source,
1899            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1900            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1901        );
1902        self.global.respect_gitignore.merge_override(
1903            fragment.global.respect_gitignore.value,
1904            fragment.global.respect_gitignore.source,
1905            fragment
1906                .global
1907                .respect_gitignore
1908                .overrides
1909                .first()
1910                .and_then(|o| o.file.clone()),
1911            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1912        );
1913        self.global.line_length.merge_override(
1914            fragment.global.line_length.value,
1915            fragment.global.line_length.source,
1916            fragment
1917                .global
1918                .line_length
1919                .overrides
1920                .first()
1921                .and_then(|o| o.file.clone()),
1922            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1923        );
1924        self.global.fixable.merge_override(
1925            fragment.global.fixable.value,
1926            fragment.global.fixable.source,
1927            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1928            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1929        );
1930        self.global.unfixable.merge_override(
1931            fragment.global.unfixable.value,
1932            fragment.global.unfixable.source,
1933            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1934            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1935        );
1936
1937        // Merge flavor
1938        self.global.flavor.merge_override(
1939            fragment.global.flavor.value,
1940            fragment.global.flavor.source,
1941            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1942            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1943        );
1944
1945        // Merge force_exclude
1946        self.global.force_exclude.merge_override(
1947            fragment.global.force_exclude.value,
1948            fragment.global.force_exclude.source,
1949            fragment
1950                .global
1951                .force_exclude
1952                .overrides
1953                .first()
1954                .and_then(|o| o.file.clone()),
1955            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1956        );
1957
1958        // Merge output_format if present
1959        if let Some(output_format_fragment) = fragment.global.output_format {
1960            if let Some(ref mut output_format) = self.global.output_format {
1961                output_format.merge_override(
1962                    output_format_fragment.value,
1963                    output_format_fragment.source,
1964                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1965                    output_format_fragment.overrides.first().and_then(|o| o.line),
1966                );
1967            } else {
1968                self.global.output_format = Some(output_format_fragment);
1969            }
1970        }
1971
1972        // Merge cache_dir if present
1973        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
1974            if let Some(ref mut cache_dir) = self.global.cache_dir {
1975                cache_dir.merge_override(
1976                    cache_dir_fragment.value,
1977                    cache_dir_fragment.source,
1978                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
1979                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
1980                );
1981            } else {
1982                self.global.cache_dir = Some(cache_dir_fragment);
1983            }
1984        }
1985
1986        // Merge cache if not default (only override when explicitly set)
1987        if fragment.global.cache.source != ConfigSource::Default {
1988            self.global.cache.merge_override(
1989                fragment.global.cache.value,
1990                fragment.global.cache.source,
1991                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
1992                fragment.global.cache.overrides.first().and_then(|o| o.line),
1993            );
1994        }
1995
1996        // Merge per_file_ignores
1997        self.per_file_ignores.merge_override(
1998            fragment.per_file_ignores.value,
1999            fragment.per_file_ignores.source,
2000            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
2001            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
2002        );
2003
2004        // Merge rule configs
2005        for (rule_name, rule_fragment) in fragment.rules {
2006            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
2007            let rule_entry = self.rules.entry(norm_rule_name).or_default();
2008            for (key, sourced_value_fragment) in rule_fragment.values {
2009                let sv_entry = rule_entry
2010                    .values
2011                    .entry(key.clone())
2012                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
2013                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
2014                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
2015                sv_entry.merge_override(
2016                    sourced_value_fragment.value,  // Use the value from the fragment
2017                    sourced_value_fragment.source, // Use the source from the fragment
2018                    file_from_fragment,            // Pass the file path from the fragment override
2019                    line_from_fragment,            // Pass the line number from the fragment override
2020                );
2021            }
2022        }
2023
2024        // Merge unknown_keys from fragment
2025        for (section, key, file_path) in fragment.unknown_keys {
2026            // Deduplicate: only add if not already present
2027            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
2028                self.unknown_keys.push((section, key, file_path));
2029            }
2030        }
2031    }
2032
2033    /// Load and merge configurations from files and CLI overrides.
2034    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
2035        Self::load_with_discovery(config_path, cli_overrides, false)
2036    }
2037
2038    /// Finds project root by walking up from start_dir looking for .git directory.
2039    /// Falls back to start_dir if no .git found.
2040    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
2041        let mut current = start_dir.to_path_buf();
2042        const MAX_DEPTH: usize = 100;
2043
2044        for _ in 0..MAX_DEPTH {
2045            if current.join(".git").exists() {
2046                log::debug!("[rumdl-config] Found .git at: {}", current.display());
2047                return current;
2048            }
2049
2050            match current.parent() {
2051                Some(parent) => current = parent.to_path_buf(),
2052                None => break,
2053            }
2054        }
2055
2056        // No .git found, use start_dir as project root
2057        log::debug!(
2058            "[rumdl-config] No .git found, using config location as project root: {}",
2059            start_dir.display()
2060        );
2061        start_dir.to_path_buf()
2062    }
2063
2064    /// Discover configuration file by traversing up the directory tree.
2065    /// Returns the first configuration file found.
2066    /// Discovers config file and returns both the config path and project root.
2067    /// Returns: (config_file_path, project_root_path)
2068    /// Project root is the directory containing .git, or config parent as fallback.
2069    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
2070        use std::env;
2071
2072        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
2073        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
2074
2075        let start_dir = match env::current_dir() {
2076            Ok(dir) => dir,
2077            Err(e) => {
2078                log::debug!("[rumdl-config] Failed to get current directory: {e}");
2079                return None;
2080            }
2081        };
2082
2083        let mut current_dir = start_dir.clone();
2084        let mut depth = 0;
2085        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
2086
2087        loop {
2088            if depth >= MAX_DEPTH {
2089                log::debug!("[rumdl-config] Maximum traversal depth reached");
2090                break;
2091            }
2092
2093            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
2094
2095            // Check for config files in order of precedence (only if not already found)
2096            if found_config.is_none() {
2097                for config_name in CONFIG_FILES {
2098                    let config_path = current_dir.join(config_name);
2099
2100                    if config_path.exists() {
2101                        // For pyproject.toml, verify it contains [tool.rumdl] section
2102                        if *config_name == "pyproject.toml" {
2103                            if let Ok(content) = std::fs::read_to_string(&config_path) {
2104                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2105                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2106                                    // Store config, but continue looking for .git
2107                                    found_config = Some((config_path.clone(), current_dir.clone()));
2108                                    break;
2109                                }
2110                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
2111                                continue;
2112                            }
2113                        } else {
2114                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2115                            // Store config, but continue looking for .git
2116                            found_config = Some((config_path.clone(), current_dir.clone()));
2117                            break;
2118                        }
2119                    }
2120                }
2121            }
2122
2123            // Check for .git directory (stop boundary)
2124            if current_dir.join(".git").exists() {
2125                log::debug!("[rumdl-config] Stopping at .git directory");
2126                break;
2127            }
2128
2129            // Move to parent directory
2130            match current_dir.parent() {
2131                Some(parent) => {
2132                    current_dir = parent.to_owned();
2133                    depth += 1;
2134                }
2135                None => {
2136                    log::debug!("[rumdl-config] Reached filesystem root");
2137                    break;
2138                }
2139            }
2140        }
2141
2142        // If config found, determine project root by walking up from config location
2143        if let Some((config_path, config_dir)) = found_config {
2144            let project_root = Self::find_project_root_from(&config_dir);
2145            return Some((config_path, project_root));
2146        }
2147
2148        None
2149    }
2150
2151    /// Discover markdownlint configuration file by traversing up the directory tree.
2152    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
2153    /// Returns the path to the config file if found.
2154    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
2155        use std::env;
2156
2157        const MAX_DEPTH: usize = 100;
2158
2159        let start_dir = match env::current_dir() {
2160            Ok(dir) => dir,
2161            Err(e) => {
2162                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
2163                return None;
2164            }
2165        };
2166
2167        let mut current_dir = start_dir.clone();
2168        let mut depth = 0;
2169
2170        loop {
2171            if depth >= MAX_DEPTH {
2172                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
2173                break;
2174            }
2175
2176            log::debug!(
2177                "[rumdl-config] Searching for markdownlint config in: {}",
2178                current_dir.display()
2179            );
2180
2181            // Check for markdownlint config files in order of precedence
2182            for config_name in MARKDOWNLINT_CONFIG_FILES {
2183                let config_path = current_dir.join(config_name);
2184                if config_path.exists() {
2185                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
2186                    return Some(config_path);
2187                }
2188            }
2189
2190            // Check for .git directory (stop boundary)
2191            if current_dir.join(".git").exists() {
2192                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
2193                break;
2194            }
2195
2196            // Move to parent directory
2197            match current_dir.parent() {
2198                Some(parent) => {
2199                    current_dir = parent.to_owned();
2200                    depth += 1;
2201                }
2202                None => {
2203                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
2204                    break;
2205                }
2206            }
2207        }
2208
2209        None
2210    }
2211
2212    /// Internal implementation that accepts config directory for testing
2213    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
2214        let config_dir = config_dir.join("rumdl");
2215
2216        // Check for config files in precedence order (same as project discovery)
2217        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
2218
2219        log::debug!(
2220            "[rumdl-config] Checking for user configuration in: {}",
2221            config_dir.display()
2222        );
2223
2224        for filename in USER_CONFIG_FILES {
2225            let config_path = config_dir.join(filename);
2226
2227            if config_path.exists() {
2228                // For pyproject.toml, verify it contains [tool.rumdl] section
2229                if *filename == "pyproject.toml" {
2230                    if let Ok(content) = std::fs::read_to_string(&config_path) {
2231                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2232                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2233                            return Some(config_path);
2234                        }
2235                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
2236                        continue;
2237                    }
2238                } else {
2239                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2240                    return Some(config_path);
2241                }
2242            }
2243        }
2244
2245        log::debug!(
2246            "[rumdl-config] No user configuration found in: {}",
2247            config_dir.display()
2248        );
2249        None
2250    }
2251
2252    /// Discover user-level configuration file from platform-specific config directory.
2253    /// Returns the first configuration file found in the user config directory.
2254    #[cfg(feature = "native")]
2255    fn user_configuration_path() -> Option<std::path::PathBuf> {
2256        use etcetera::{BaseStrategy, choose_base_strategy};
2257
2258        match choose_base_strategy() {
2259            Ok(strategy) => {
2260                let config_dir = strategy.config_dir();
2261                Self::user_configuration_path_impl(&config_dir)
2262            }
2263            Err(e) => {
2264                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
2265                None
2266            }
2267        }
2268    }
2269
2270    /// Stub for WASM builds - user config not supported
2271    #[cfg(not(feature = "native"))]
2272    fn user_configuration_path() -> Option<std::path::PathBuf> {
2273        None
2274    }
2275
2276    /// Internal implementation that accepts user config directory for testing
2277    #[doc(hidden)]
2278    pub fn load_with_discovery_impl(
2279        config_path: Option<&str>,
2280        cli_overrides: Option<&SourcedGlobalConfig>,
2281        skip_auto_discovery: bool,
2282        user_config_dir: Option<&Path>,
2283    ) -> Result<Self, ConfigError> {
2284        use std::env;
2285        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
2286        if config_path.is_none() {
2287            if skip_auto_discovery {
2288                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
2289            } else {
2290                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
2291            }
2292        } else {
2293            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
2294        }
2295        let mut sourced_config = SourcedConfig::default();
2296
2297        // 1. Always load user configuration first (unless auto-discovery is disabled)
2298        // User config serves as the base layer that project configs build upon
2299        if !skip_auto_discovery {
2300            let user_config_path = if let Some(dir) = user_config_dir {
2301                Self::user_configuration_path_impl(dir)
2302            } else {
2303                Self::user_configuration_path()
2304            };
2305
2306            if let Some(user_config_path) = user_config_path {
2307                let path_str = user_config_path.display().to_string();
2308                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
2309
2310                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
2311
2312                if filename == "pyproject.toml" {
2313                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2314                        source: e,
2315                        path: path_str.clone(),
2316                    })?;
2317                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2318                        sourced_config.merge(fragment);
2319                        sourced_config.loaded_files.push(path_str);
2320                    }
2321                } else {
2322                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2323                        source: e,
2324                        path: path_str.clone(),
2325                    })?;
2326                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::UserConfig)?;
2327                    sourced_config.merge(fragment);
2328                    sourced_config.loaded_files.push(path_str);
2329                }
2330            } else {
2331                log::debug!("[rumdl-config] No user configuration file found");
2332            }
2333        }
2334
2335        // 2. Load explicit config path if provided (overrides user config)
2336        if let Some(path) = config_path {
2337            let path_obj = Path::new(path);
2338            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
2339            log::debug!("[rumdl-config] Trying to load config file: {filename}");
2340            let path_str = path.to_string();
2341
2342            // Find project root by walking up from config location looking for .git
2343            if let Some(config_parent) = path_obj.parent() {
2344                let project_root = Self::find_project_root_from(config_parent);
2345                log::debug!(
2346                    "[rumdl-config] Project root (from explicit config): {}",
2347                    project_root.display()
2348                );
2349                sourced_config.project_root = Some(project_root);
2350            }
2351
2352            // Known markdownlint config files
2353            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
2354
2355            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
2356                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2357                    source: e,
2358                    path: path_str.clone(),
2359                })?;
2360                if filename == "pyproject.toml" {
2361                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2362                        sourced_config.merge(fragment);
2363                        sourced_config.loaded_files.push(path_str.clone());
2364                    }
2365                } else {
2366                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2367                    sourced_config.merge(fragment);
2368                    sourced_config.loaded_files.push(path_str.clone());
2369                }
2370            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
2371                || path_str.ends_with(".json")
2372                || path_str.ends_with(".jsonc")
2373                || path_str.ends_with(".yaml")
2374                || path_str.ends_with(".yml")
2375            {
2376                // Parse as markdownlint config (JSON/YAML)
2377                let fragment = load_from_markdownlint(&path_str)?;
2378                sourced_config.merge(fragment);
2379                sourced_config.loaded_files.push(path_str.clone());
2380                // markdownlint is fallback only
2381            } else {
2382                // Try TOML only
2383                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2384                    source: e,
2385                    path: path_str.clone(),
2386                })?;
2387                let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2388                sourced_config.merge(fragment);
2389                sourced_config.loaded_files.push(path_str.clone());
2390            }
2391        }
2392
2393        // 3. Perform auto-discovery for project config if not skipped AND no explicit config path
2394        if !skip_auto_discovery && config_path.is_none() {
2395            // Look for project configuration files (override user config)
2396            if let Some((config_file, project_root)) = Self::discover_config_upward() {
2397                let path_str = config_file.display().to_string();
2398                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
2399
2400                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
2401                log::debug!("[rumdl-config] Project root: {}", project_root.display());
2402
2403                // Store project root for cache directory resolution
2404                sourced_config.project_root = Some(project_root);
2405
2406                if filename == "pyproject.toml" {
2407                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2408                        source: e,
2409                        path: path_str.clone(),
2410                    })?;
2411                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2412                        sourced_config.merge(fragment);
2413                        sourced_config.loaded_files.push(path_str);
2414                    }
2415                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
2416                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2417                        source: e,
2418                        path: path_str.clone(),
2419                    })?;
2420                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2421                    sourced_config.merge(fragment);
2422                    sourced_config.loaded_files.push(path_str);
2423                }
2424            } else {
2425                log::debug!("[rumdl-config] No configuration file found via upward traversal");
2426
2427                // If no project config found, fallback to markdownlint config via upward traversal
2428                if let Some(config_path) = Self::discover_markdownlint_config_upward() {
2429                    let path_str = config_path.display().to_string();
2430                    match load_from_markdownlint(&path_str) {
2431                        Ok(fragment) => {
2432                            sourced_config.merge(fragment);
2433                            sourced_config.loaded_files.push(path_str);
2434                        }
2435                        Err(_e) => {
2436                            log::debug!("[rumdl-config] Failed to load markdownlint config");
2437                        }
2438                    }
2439                } else {
2440                    log::debug!("[rumdl-config] No markdownlint configuration file found");
2441                }
2442            }
2443        }
2444
2445        // 4. Apply CLI overrides (highest precedence)
2446        if let Some(cli) = cli_overrides {
2447            sourced_config
2448                .global
2449                .enable
2450                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
2451            sourced_config
2452                .global
2453                .disable
2454                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
2455            sourced_config
2456                .global
2457                .exclude
2458                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2459            sourced_config
2460                .global
2461                .include
2462                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2463            sourced_config.global.respect_gitignore.merge_override(
2464                cli.respect_gitignore.value,
2465                ConfigSource::Cli,
2466                None,
2467                None,
2468            );
2469            sourced_config
2470                .global
2471                .fixable
2472                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2473            sourced_config
2474                .global
2475                .unfixable
2476                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2477            // No rule-specific CLI overrides implemented yet
2478        }
2479
2480        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2481
2482        Ok(sourced_config)
2483    }
2484
2485    /// Load and merge configurations from files and CLI overrides.
2486    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2487    pub fn load_with_discovery(
2488        config_path: Option<&str>,
2489        cli_overrides: Option<&SourcedGlobalConfig>,
2490        skip_auto_discovery: bool,
2491    ) -> Result<Self, ConfigError> {
2492        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2493    }
2494
2495    /// Validate the configuration against a rule registry.
2496    ///
2497    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
2498    /// enabling conversion to `Config`. Validation warnings are stored in the config
2499    /// and can be displayed to the user.
2500    ///
2501    /// # Example
2502    ///
2503    /// ```ignore
2504    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
2505    /// let validated = loaded.validate(&registry)?;
2506    /// let config: Config = validated.into();
2507    /// ```
2508    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
2509        let warnings = validate_config_sourced_internal(&self, registry);
2510
2511        Ok(SourcedConfig {
2512            global: self.global,
2513            per_file_ignores: self.per_file_ignores,
2514            rules: self.rules,
2515            loaded_files: self.loaded_files,
2516            unknown_keys: self.unknown_keys,
2517            project_root: self.project_root,
2518            validation_warnings: warnings,
2519            _state: PhantomData,
2520        })
2521    }
2522
2523    /// Validate and convert to Config in one step (convenience method).
2524    ///
2525    /// This combines `validate()` and `into()` for callers who want the
2526    /// validation warnings separately.
2527    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
2528        let validated = self.validate(registry)?;
2529        let warnings = validated.validation_warnings.clone();
2530        Ok((validated.into(), warnings))
2531    }
2532
2533    /// Skip validation and convert directly to ConfigValidated state.
2534    ///
2535    /// # Safety
2536    ///
2537    /// This method bypasses validation. Use only when:
2538    /// - You've already validated via `validate_config_sourced()`
2539    /// - You're in test code that doesn't need validation
2540    /// - You're migrating legacy code and will add proper validation later
2541    ///
2542    /// Prefer `validate()` for new code.
2543    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
2544        SourcedConfig {
2545            global: self.global,
2546            per_file_ignores: self.per_file_ignores,
2547            rules: self.rules,
2548            loaded_files: self.loaded_files,
2549            unknown_keys: self.unknown_keys,
2550            project_root: self.project_root,
2551            validation_warnings: Vec::new(),
2552            _state: PhantomData,
2553        }
2554    }
2555}
2556
2557/// Convert a validated configuration to the final Config type.
2558///
2559/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
2560/// ensuring that validation must occur before conversion.
2561impl From<SourcedConfig<ConfigValidated>> for Config {
2562    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
2563        let mut rules = BTreeMap::new();
2564        for (rule_name, sourced_rule_cfg) in sourced.rules {
2565            // Normalize rule name to uppercase for case-insensitive lookup
2566            let normalized_rule_name = rule_name.to_ascii_uppercase();
2567            let mut values = BTreeMap::new();
2568            for (key, sourced_val) in sourced_rule_cfg.values {
2569                values.insert(key, sourced_val.value);
2570            }
2571            rules.insert(normalized_rule_name, RuleConfig { values });
2572        }
2573        #[allow(deprecated)]
2574        let global = GlobalConfig {
2575            enable: sourced.global.enable.value,
2576            disable: sourced.global.disable.value,
2577            exclude: sourced.global.exclude.value,
2578            include: sourced.global.include.value,
2579            respect_gitignore: sourced.global.respect_gitignore.value,
2580            line_length: sourced.global.line_length.value,
2581            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2582            fixable: sourced.global.fixable.value,
2583            unfixable: sourced.global.unfixable.value,
2584            flavor: sourced.global.flavor.value,
2585            force_exclude: sourced.global.force_exclude.value,
2586            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
2587            cache: sourced.global.cache.value,
2588        };
2589        Config {
2590            global,
2591            per_file_ignores: sourced.per_file_ignores.value,
2592            rules,
2593            project_root: sourced.project_root,
2594        }
2595    }
2596}
2597
2598/// Registry of all known rules and their config schemas
2599pub struct RuleRegistry {
2600    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2601    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2602    /// Map of rule name to config key aliases
2603    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2604}
2605
2606impl RuleRegistry {
2607    /// Build a registry from a list of rules
2608    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2609        let mut rule_schemas = std::collections::BTreeMap::new();
2610        let mut rule_aliases = std::collections::BTreeMap::new();
2611
2612        for rule in rules {
2613            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2614                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2615                rule_schemas.insert(norm_name.clone(), table);
2616                norm_name
2617            } else {
2618                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2619                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2620                norm_name
2621            };
2622
2623            // Store aliases if the rule provides them
2624            if let Some(aliases) = rule.config_aliases() {
2625                rule_aliases.insert(norm_name, aliases);
2626            }
2627        }
2628
2629        RuleRegistry {
2630            rule_schemas,
2631            rule_aliases,
2632        }
2633    }
2634
2635    /// Get all known rule names
2636    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2637        self.rule_schemas.keys().cloned().collect()
2638    }
2639
2640    /// Get the valid configuration keys for a rule, including both original and normalized variants
2641    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2642        self.rule_schemas.get(rule).map(|schema| {
2643            let mut all_keys = std::collections::BTreeSet::new();
2644
2645            // Add original keys from schema
2646            for key in schema.keys() {
2647                all_keys.insert(key.clone());
2648            }
2649
2650            // Add normalized variants for markdownlint compatibility
2651            for key in schema.keys() {
2652                // Add kebab-case variant
2653                all_keys.insert(key.replace('_', "-"));
2654                // Add snake_case variant
2655                all_keys.insert(key.replace('-', "_"));
2656                // Add normalized variant
2657                all_keys.insert(normalize_key(key));
2658            }
2659
2660            // Add any aliases defined by the rule
2661            if let Some(aliases) = self.rule_aliases.get(rule) {
2662                for alias_key in aliases.keys() {
2663                    all_keys.insert(alias_key.clone());
2664                    // Also add normalized variants of the alias
2665                    all_keys.insert(alias_key.replace('_', "-"));
2666                    all_keys.insert(alias_key.replace('-', "_"));
2667                    all_keys.insert(normalize_key(alias_key));
2668                }
2669            }
2670
2671            all_keys
2672        })
2673    }
2674
2675    /// Get the expected value type for a rule's configuration key, trying variants
2676    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2677        if let Some(schema) = self.rule_schemas.get(rule) {
2678            // Check if this key is an alias
2679            if let Some(aliases) = self.rule_aliases.get(rule)
2680                && let Some(canonical_key) = aliases.get(key)
2681            {
2682                // Use the canonical key for schema lookup
2683                if let Some(value) = schema.get(canonical_key) {
2684                    return Some(value);
2685                }
2686            }
2687
2688            // Try the original key
2689            if let Some(value) = schema.get(key) {
2690                return Some(value);
2691            }
2692
2693            // Try key variants
2694            let key_variants = [
2695                key.replace('-', "_"), // Convert kebab-case to snake_case
2696                key.replace('_', "-"), // Convert snake_case to kebab-case
2697                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2698            ];
2699
2700            for variant in &key_variants {
2701                if let Some(value) = schema.get(variant) {
2702                    return Some(value);
2703                }
2704            }
2705        }
2706        None
2707    }
2708}
2709
2710/// Represents a config validation warning or error
2711#[derive(Debug, Clone)]
2712pub struct ConfigValidationWarning {
2713    pub message: String,
2714    pub rule: Option<String>,
2715    pub key: Option<String>,
2716}
2717
2718/// Internal validation function that works with any SourcedConfig state.
2719/// This is used by both the public `validate_config_sourced` and the typestate `validate()` method.
2720fn validate_config_sourced_internal<S>(
2721    sourced: &SourcedConfig<S>,
2722    registry: &RuleRegistry,
2723) -> Vec<ConfigValidationWarning> {
2724    validate_config_sourced_impl(&sourced.rules, &sourced.unknown_keys, registry)
2725}
2726
2727/// Core validation implementation that doesn't depend on SourcedConfig type parameter.
2728fn validate_config_sourced_impl(
2729    rules: &BTreeMap<String, SourcedRuleConfig>,
2730    unknown_keys: &[(String, String, Option<String>)],
2731    registry: &RuleRegistry,
2732) -> Vec<ConfigValidationWarning> {
2733    let mut warnings = Vec::new();
2734    let known_rules = registry.rule_names();
2735    // 1. Unknown rules
2736    for rule in rules.keys() {
2737        if !known_rules.contains(rule) {
2738            warnings.push(ConfigValidationWarning {
2739                message: format!("Unknown rule in config: {rule}"),
2740                rule: Some(rule.clone()),
2741                key: None,
2742            });
2743        }
2744    }
2745    // 2. Unknown options and type mismatches
2746    for (rule, rule_cfg) in rules {
2747        if let Some(valid_keys) = registry.config_keys_for(rule) {
2748            for key in rule_cfg.values.keys() {
2749                if !valid_keys.contains(key) {
2750                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2751                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2752                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2753                    } else {
2754                        format!("Unknown option for rule {rule}: {key}")
2755                    };
2756                    warnings.push(ConfigValidationWarning {
2757                        message,
2758                        rule: Some(rule.clone()),
2759                        key: Some(key.clone()),
2760                    });
2761                } else {
2762                    // Type check: compare type of value to type of default
2763                    if let Some(expected) = registry.expected_value_for(rule, key) {
2764                        let actual = &rule_cfg.values[key].value;
2765                        if !toml_value_type_matches(expected, actual) {
2766                            warnings.push(ConfigValidationWarning {
2767                                message: format!(
2768                                    "Type mismatch for {}.{}: expected {}, got {}",
2769                                    rule,
2770                                    key,
2771                                    toml_type_name(expected),
2772                                    toml_type_name(actual)
2773                                ),
2774                                rule: Some(rule.clone()),
2775                                key: Some(key.clone()),
2776                            });
2777                        }
2778                    }
2779                }
2780            }
2781        }
2782    }
2783    // 3. Unknown global options (from unknown_keys)
2784    let known_global_keys = vec![
2785        "enable".to_string(),
2786        "disable".to_string(),
2787        "include".to_string(),
2788        "exclude".to_string(),
2789        "respect-gitignore".to_string(),
2790        "line-length".to_string(),
2791        "fixable".to_string(),
2792        "unfixable".to_string(),
2793        "flavor".to_string(),
2794        "force-exclude".to_string(),
2795        "output-format".to_string(),
2796        "cache-dir".to_string(),
2797        "cache".to_string(),
2798    ];
2799
2800    for (section, key, file_path) in unknown_keys {
2801        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2802            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2803                if let Some(path) = file_path {
2804                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2805                } else {
2806                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2807                }
2808            } else if let Some(path) = file_path {
2809                format!("Unknown global option in {path}: {key}")
2810            } else {
2811                format!("Unknown global option: {key}")
2812            };
2813            warnings.push(ConfigValidationWarning {
2814                message,
2815                rule: None,
2816                key: Some(key.clone()),
2817            });
2818        } else if !key.is_empty() {
2819            // This is an unknown rule section (key is empty means it's a section header)
2820            // No suggestions for rule names - just warn
2821            continue;
2822        } else {
2823            // Unknown rule section
2824            let message = if let Some(path) = file_path {
2825                format!(
2826                    "Unknown rule in {path}: {}",
2827                    section.trim_matches(|c| c == '[' || c == ']')
2828                )
2829            } else {
2830                format!(
2831                    "Unknown rule in config: {}",
2832                    section.trim_matches(|c| c == '[' || c == ']')
2833                )
2834            };
2835            warnings.push(ConfigValidationWarning {
2836                message,
2837                rule: None,
2838                key: None,
2839            });
2840        }
2841    }
2842    warnings
2843}
2844
2845/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking.
2846///
2847/// This is the legacy API that works with `SourcedConfig<ConfigLoaded>`.
2848/// For new code, prefer using `sourced.validate(&registry)` which returns a
2849/// `SourcedConfig<ConfigValidated>` that can be converted to `Config`.
2850pub fn validate_config_sourced(
2851    sourced: &SourcedConfig<ConfigLoaded>,
2852    registry: &RuleRegistry,
2853) -> Vec<ConfigValidationWarning> {
2854    validate_config_sourced_internal(sourced, registry)
2855}
2856
2857/// Validate a config that has already been validated (no-op, returns stored warnings).
2858///
2859/// This exists for API consistency - validated configs already have their warnings stored.
2860pub fn validate_config_sourced_validated(
2861    sourced: &SourcedConfig<ConfigValidated>,
2862    _registry: &RuleRegistry,
2863) -> Vec<ConfigValidationWarning> {
2864    sourced.validation_warnings.clone()
2865}
2866
2867fn toml_type_name(val: &toml::Value) -> &'static str {
2868    match val {
2869        toml::Value::String(_) => "string",
2870        toml::Value::Integer(_) => "integer",
2871        toml::Value::Float(_) => "float",
2872        toml::Value::Boolean(_) => "boolean",
2873        toml::Value::Array(_) => "array",
2874        toml::Value::Table(_) => "table",
2875        toml::Value::Datetime(_) => "datetime",
2876    }
2877}
2878
2879/// Calculate Levenshtein distance between two strings (simple implementation)
2880fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2881    let len1 = s1.len();
2882    let len2 = s2.len();
2883
2884    if len1 == 0 {
2885        return len2;
2886    }
2887    if len2 == 0 {
2888        return len1;
2889    }
2890
2891    let s1_chars: Vec<char> = s1.chars().collect();
2892    let s2_chars: Vec<char> = s2.chars().collect();
2893
2894    let mut prev_row: Vec<usize> = (0..=len2).collect();
2895    let mut curr_row = vec![0; len2 + 1];
2896
2897    for i in 1..=len1 {
2898        curr_row[0] = i;
2899        for j in 1..=len2 {
2900            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2901            curr_row[j] = (prev_row[j] + 1)          // deletion
2902                .min(curr_row[j - 1] + 1)            // insertion
2903                .min(prev_row[j - 1] + cost); // substitution
2904        }
2905        std::mem::swap(&mut prev_row, &mut curr_row);
2906    }
2907
2908    prev_row[len2]
2909}
2910
2911/// Suggest a similar key from a list of valid keys using fuzzy matching
2912fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2913    let unknown_lower = unknown.to_lowercase();
2914    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2915
2916    let mut best_match: Option<(String, usize)> = None;
2917
2918    for valid in valid_keys {
2919        let valid_lower = valid.to_lowercase();
2920        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2921
2922        if distance <= max_distance {
2923            if let Some((_, best_dist)) = &best_match {
2924                if distance < *best_dist {
2925                    best_match = Some((valid.clone(), distance));
2926                }
2927            } else {
2928                best_match = Some((valid.clone(), distance));
2929            }
2930        }
2931    }
2932
2933    best_match.map(|(key, _)| key)
2934}
2935
2936fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2937    use toml::Value::*;
2938    match (expected, actual) {
2939        (String(_), String(_)) => true,
2940        (Integer(_), Integer(_)) => true,
2941        (Float(_), Float(_)) => true,
2942        (Boolean(_), Boolean(_)) => true,
2943        (Array(_), Array(_)) => true,
2944        (Table(_), Table(_)) => true,
2945        (Datetime(_), Datetime(_)) => true,
2946        // Allow integer for float
2947        (Float(_), Integer(_)) => true,
2948        _ => false,
2949    }
2950}
2951
2952/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2953fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2954    let doc: toml::Value =
2955        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2956    let mut fragment = SourcedConfigFragment::default();
2957    let source = ConfigSource::PyprojectToml;
2958    let file = Some(path.to_string());
2959
2960    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2961    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2962        && let Some(rumdl_table) = rumdl_config.as_table()
2963    {
2964        // Helper function to extract global config from a table
2965        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2966            // Extract global options from the given table
2967            if let Some(enable) = table.get("enable")
2968                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2969            {
2970                // Normalize rule names in the list
2971                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2972                fragment
2973                    .global
2974                    .enable
2975                    .push_override(normalized_values, source, file.clone(), None);
2976            }
2977
2978            if let Some(disable) = table.get("disable")
2979                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2980            {
2981                // Re-enable normalization
2982                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2983                fragment
2984                    .global
2985                    .disable
2986                    .push_override(normalized_values, source, file.clone(), None);
2987            }
2988
2989            if let Some(include) = table.get("include")
2990                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2991            {
2992                fragment
2993                    .global
2994                    .include
2995                    .push_override(values, source, file.clone(), None);
2996            }
2997
2998            if let Some(exclude) = table.get("exclude")
2999                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
3000            {
3001                fragment
3002                    .global
3003                    .exclude
3004                    .push_override(values, source, file.clone(), None);
3005            }
3006
3007            if let Some(respect_gitignore) = table
3008                .get("respect-gitignore")
3009                .or_else(|| table.get("respect_gitignore"))
3010                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
3011            {
3012                fragment
3013                    .global
3014                    .respect_gitignore
3015                    .push_override(value, source, file.clone(), None);
3016            }
3017
3018            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
3019                && let Ok(value) = bool::deserialize(force_exclude.clone())
3020            {
3021                fragment
3022                    .global
3023                    .force_exclude
3024                    .push_override(value, source, file.clone(), None);
3025            }
3026
3027            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
3028                && let Ok(value) = String::deserialize(output_format.clone())
3029            {
3030                if fragment.global.output_format.is_none() {
3031                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
3032                } else {
3033                    fragment
3034                        .global
3035                        .output_format
3036                        .as_mut()
3037                        .unwrap()
3038                        .push_override(value, source, file.clone(), None);
3039                }
3040            }
3041
3042            if let Some(fixable) = table.get("fixable")
3043                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
3044            {
3045                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
3046                fragment
3047                    .global
3048                    .fixable
3049                    .push_override(normalized_values, source, file.clone(), None);
3050            }
3051
3052            if let Some(unfixable) = table.get("unfixable")
3053                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
3054            {
3055                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
3056                fragment
3057                    .global
3058                    .unfixable
3059                    .push_override(normalized_values, source, file.clone(), None);
3060            }
3061
3062            if let Some(flavor) = table.get("flavor")
3063                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
3064            {
3065                fragment.global.flavor.push_override(value, source, file.clone(), None);
3066            }
3067
3068            // Handle line-length special case - this should set the global line_length
3069            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
3070                && let Ok(value) = u64::deserialize(line_length.clone())
3071            {
3072                fragment
3073                    .global
3074                    .line_length
3075                    .push_override(LineLength::new(value as usize), source, file.clone(), None);
3076
3077                // Also add to MD013 rule config for backward compatibility
3078                let norm_md013_key = normalize_key("MD013");
3079                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
3080                let norm_line_length_key = normalize_key("line-length");
3081                let sv = rule_entry
3082                    .values
3083                    .entry(norm_line_length_key)
3084                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
3085                sv.push_override(line_length.clone(), source, file.clone(), None);
3086            }
3087
3088            if let Some(cache_dir) = table.get("cache-dir").or_else(|| table.get("cache_dir"))
3089                && let Ok(value) = String::deserialize(cache_dir.clone())
3090            {
3091                if fragment.global.cache_dir.is_none() {
3092                    fragment.global.cache_dir = Some(SourcedValue::new(value.clone(), source));
3093                } else {
3094                    fragment
3095                        .global
3096                        .cache_dir
3097                        .as_mut()
3098                        .unwrap()
3099                        .push_override(value, source, file.clone(), None);
3100                }
3101            }
3102
3103            if let Some(cache) = table.get("cache")
3104                && let Ok(value) = bool::deserialize(cache.clone())
3105            {
3106                fragment.global.cache.push_override(value, source, file.clone(), None);
3107            }
3108        };
3109
3110        // First, check for [tool.rumdl.global] section
3111        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
3112            extract_global_config(&mut fragment, global_table);
3113        }
3114
3115        // Also extract global options from [tool.rumdl] directly (for flat structure)
3116        extract_global_config(&mut fragment, rumdl_table);
3117
3118        // --- Extract per-file-ignores configurations ---
3119        // Check both hyphenated and underscored versions for compatibility
3120        let per_file_ignores_key = rumdl_table
3121            .get("per-file-ignores")
3122            .or_else(|| rumdl_table.get("per_file_ignores"));
3123
3124        if let Some(per_file_ignores_value) = per_file_ignores_key
3125            && let Some(per_file_table) = per_file_ignores_value.as_table()
3126        {
3127            let mut per_file_map = HashMap::new();
3128            for (pattern, rules_value) in per_file_table {
3129                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
3130                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
3131                    per_file_map.insert(pattern.clone(), normalized_rules);
3132                } else {
3133                    log::warn!(
3134                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
3135                    );
3136                }
3137            }
3138            fragment
3139                .per_file_ignores
3140                .push_override(per_file_map, source, file.clone(), None);
3141        }
3142
3143        // --- Extract rule-specific configurations ---
3144        for (key, value) in rumdl_table {
3145            let norm_rule_key = normalize_key(key);
3146
3147            // Skip keys already handled as global or special cases
3148            if [
3149                "enable",
3150                "disable",
3151                "include",
3152                "exclude",
3153                "respect_gitignore",
3154                "respect-gitignore", // Added kebab-case here too
3155                "force_exclude",
3156                "force-exclude",
3157                "line_length",
3158                "line-length",
3159                "output_format",
3160                "output-format",
3161                "fixable",
3162                "unfixable",
3163                "per-file-ignores",
3164                "per_file_ignores",
3165                "global",
3166                "flavor",
3167                "cache_dir",
3168                "cache-dir",
3169                "cache",
3170            ]
3171            .contains(&norm_rule_key.as_str())
3172            {
3173                continue;
3174            }
3175
3176            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
3177            // AND if the value is actually a TOML table before processing as rule config.
3178            // This prevents misinterpreting other top-level keys under [tool.rumdl]
3179            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
3180            if norm_rule_key_upper.len() == 5
3181                && norm_rule_key_upper.starts_with("MD")
3182                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
3183                && value.is_table()
3184            {
3185                if let Some(rule_config_table) = value.as_table() {
3186                    // Get the entry for this rule (e.g., "md013")
3187                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
3188                    for (rk, rv) in rule_config_table {
3189                        let norm_rk = normalize_key(rk); // Normalize the config key itself
3190
3191                        let toml_val = rv.clone();
3192
3193                        let sv = rule_entry
3194                            .values
3195                            .entry(norm_rk.clone())
3196                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3197                        sv.push_override(toml_val, source, file.clone(), None);
3198                    }
3199                }
3200            } else {
3201                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
3202                // Track unknown keys under [tool.rumdl] for validation
3203                fragment
3204                    .unknown_keys
3205                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
3206            }
3207        }
3208    }
3209
3210    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
3211    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
3212        for (key, value) in tool_table.iter() {
3213            if let Some(rule_name) = key.strip_prefix("rumdl.") {
3214                let norm_rule_name = normalize_key(rule_name);
3215                if norm_rule_name.len() == 5
3216                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
3217                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
3218                    && let Some(rule_table) = value.as_table()
3219                {
3220                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
3221                    for (rk, rv) in rule_table {
3222                        let norm_rk = normalize_key(rk);
3223                        let toml_val = rv.clone();
3224                        let sv = rule_entry
3225                            .values
3226                            .entry(norm_rk.clone())
3227                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3228                        sv.push_override(toml_val, source, file.clone(), None);
3229                    }
3230                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
3231                    // Track unknown rule sections like [tool.rumdl.MD999]
3232                    fragment.unknown_keys.push((
3233                        format!("[tool.rumdl.{rule_name}]"),
3234                        String::new(),
3235                        Some(path.to_string()),
3236                    ));
3237                }
3238            }
3239        }
3240    }
3241
3242    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
3243    if let Some(doc_table) = doc.as_table() {
3244        for (key, value) in doc_table.iter() {
3245            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
3246                let norm_rule_name = normalize_key(rule_name);
3247                if norm_rule_name.len() == 5
3248                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
3249                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
3250                    && let Some(rule_table) = value.as_table()
3251                {
3252                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
3253                    for (rk, rv) in rule_table {
3254                        let norm_rk = normalize_key(rk);
3255                        let toml_val = rv.clone();
3256                        let sv = rule_entry
3257                            .values
3258                            .entry(norm_rk.clone())
3259                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3260                        sv.push_override(toml_val, source, file.clone(), None);
3261                    }
3262                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
3263                    // Track unknown rule sections like [tool.rumdl.MD999]
3264                    fragment.unknown_keys.push((
3265                        format!("[tool.rumdl.{rule_name}]"),
3266                        String::new(),
3267                        Some(path.to_string()),
3268                    ));
3269                }
3270            }
3271        }
3272    }
3273
3274    // Only return Some(fragment) if any config was found
3275    let has_any = !fragment.global.enable.value.is_empty()
3276        || !fragment.global.disable.value.is_empty()
3277        || !fragment.global.include.value.is_empty()
3278        || !fragment.global.exclude.value.is_empty()
3279        || !fragment.global.fixable.value.is_empty()
3280        || !fragment.global.unfixable.value.is_empty()
3281        || fragment.global.output_format.is_some()
3282        || fragment.global.cache_dir.is_some()
3283        || !fragment.global.cache.value
3284        || !fragment.per_file_ignores.value.is_empty()
3285        || !fragment.rules.is_empty();
3286    if has_any { Ok(Some(fragment)) } else { Ok(None) }
3287}
3288
3289/// Parses rumdl.toml / .rumdl.toml content.
3290fn parse_rumdl_toml(content: &str, path: &str, source: ConfigSource) -> Result<SourcedConfigFragment, ConfigError> {
3291    let doc = content
3292        .parse::<DocumentMut>()
3293        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
3294    let mut fragment = SourcedConfigFragment::default();
3295    // source parameter provided by caller
3296    let file = Some(path.to_string());
3297
3298    // Define known rules before the loop
3299    let all_rules = rules::all_rules(&Config::default());
3300    let registry = RuleRegistry::from_rules(&all_rules);
3301    let known_rule_names: BTreeSet<String> = registry
3302        .rule_names()
3303        .into_iter()
3304        .map(|s| s.to_ascii_uppercase())
3305        .collect();
3306
3307    // Handle [global] section
3308    if let Some(global_item) = doc.get("global")
3309        && let Some(global_table) = global_item.as_table()
3310    {
3311        for (key, value_item) in global_table.iter() {
3312            let norm_key = normalize_key(key);
3313            match norm_key.as_str() {
3314                "enable" | "disable" | "include" | "exclude" => {
3315                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3316                        // Corrected: Iterate directly over the Formatted<Array>
3317                        let values: Vec<String> = formatted_array
3318                                .iter()
3319                                .filter_map(|item| item.as_str()) // Extract strings
3320                                .map(|s| s.to_string())
3321                                .collect();
3322
3323                        // Normalize rule names for enable/disable
3324                        let final_values = if norm_key == "enable" || norm_key == "disable" {
3325                            // Corrected: Pass &str to normalize_key
3326                            values.into_iter().map(|s| normalize_key(&s)).collect()
3327                        } else {
3328                            values
3329                        };
3330
3331                        match norm_key.as_str() {
3332                            "enable" => fragment
3333                                .global
3334                                .enable
3335                                .push_override(final_values, source, file.clone(), None),
3336                            "disable" => {
3337                                fragment
3338                                    .global
3339                                    .disable
3340                                    .push_override(final_values, source, file.clone(), None)
3341                            }
3342                            "include" => {
3343                                fragment
3344                                    .global
3345                                    .include
3346                                    .push_override(final_values, source, file.clone(), None)
3347                            }
3348                            "exclude" => {
3349                                fragment
3350                                    .global
3351                                    .exclude
3352                                    .push_override(final_values, source, file.clone(), None)
3353                            }
3354                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
3355                        }
3356                    } else {
3357                        log::warn!(
3358                            "[WARN] Expected array for global key '{}' in {}, found {}",
3359                            key,
3360                            path,
3361                            value_item.type_name()
3362                        );
3363                    }
3364                }
3365                "respect_gitignore" | "respect-gitignore" => {
3366                    // Handle both cases
3367                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3368                        let val = *formatted_bool.value();
3369                        fragment
3370                            .global
3371                            .respect_gitignore
3372                            .push_override(val, source, file.clone(), None);
3373                    } else {
3374                        log::warn!(
3375                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3376                            key,
3377                            path,
3378                            value_item.type_name()
3379                        );
3380                    }
3381                }
3382                "force_exclude" | "force-exclude" => {
3383                    // Handle both cases
3384                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3385                        let val = *formatted_bool.value();
3386                        fragment
3387                            .global
3388                            .force_exclude
3389                            .push_override(val, source, file.clone(), None);
3390                    } else {
3391                        log::warn!(
3392                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3393                            key,
3394                            path,
3395                            value_item.type_name()
3396                        );
3397                    }
3398                }
3399                "line_length" | "line-length" => {
3400                    // Handle both cases
3401                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
3402                        let val = LineLength::new(*formatted_int.value() as usize);
3403                        fragment
3404                            .global
3405                            .line_length
3406                            .push_override(val, source, file.clone(), None);
3407                    } else {
3408                        log::warn!(
3409                            "[WARN] Expected integer for global key '{}' in {}, found {}",
3410                            key,
3411                            path,
3412                            value_item.type_name()
3413                        );
3414                    }
3415                }
3416                "output_format" | "output-format" => {
3417                    // Handle both cases
3418                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3419                        let val = formatted_string.value().clone();
3420                        if fragment.global.output_format.is_none() {
3421                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
3422                        } else {
3423                            fragment.global.output_format.as_mut().unwrap().push_override(
3424                                val,
3425                                source,
3426                                file.clone(),
3427                                None,
3428                            );
3429                        }
3430                    } else {
3431                        log::warn!(
3432                            "[WARN] Expected string for global key '{}' in {}, found {}",
3433                            key,
3434                            path,
3435                            value_item.type_name()
3436                        );
3437                    }
3438                }
3439                "cache_dir" | "cache-dir" => {
3440                    // Handle both cases
3441                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3442                        let val = formatted_string.value().clone();
3443                        if fragment.global.cache_dir.is_none() {
3444                            fragment.global.cache_dir = Some(SourcedValue::new(val.clone(), source));
3445                        } else {
3446                            fragment
3447                                .global
3448                                .cache_dir
3449                                .as_mut()
3450                                .unwrap()
3451                                .push_override(val, source, file.clone(), None);
3452                        }
3453                    } else {
3454                        log::warn!(
3455                            "[WARN] Expected string for global key '{}' in {}, found {}",
3456                            key,
3457                            path,
3458                            value_item.type_name()
3459                        );
3460                    }
3461                }
3462                "cache" => {
3463                    if let Some(toml_edit::Value::Boolean(b)) = value_item.as_value() {
3464                        let val = *b.value();
3465                        fragment.global.cache.push_override(val, source, file.clone(), None);
3466                    } else {
3467                        log::warn!(
3468                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3469                            key,
3470                            path,
3471                            value_item.type_name()
3472                        );
3473                    }
3474                }
3475                "fixable" => {
3476                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3477                        let values: Vec<String> = formatted_array
3478                            .iter()
3479                            .filter_map(|item| item.as_str())
3480                            .map(normalize_key)
3481                            .collect();
3482                        fragment
3483                            .global
3484                            .fixable
3485                            .push_override(values, source, file.clone(), None);
3486                    } else {
3487                        log::warn!(
3488                            "[WARN] Expected array for global key '{}' in {}, found {}",
3489                            key,
3490                            path,
3491                            value_item.type_name()
3492                        );
3493                    }
3494                }
3495                "unfixable" => {
3496                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3497                        let values: Vec<String> = formatted_array
3498                            .iter()
3499                            .filter_map(|item| item.as_str())
3500                            .map(normalize_key)
3501                            .collect();
3502                        fragment
3503                            .global
3504                            .unfixable
3505                            .push_override(values, source, file.clone(), None);
3506                    } else {
3507                        log::warn!(
3508                            "[WARN] Expected array for global key '{}' in {}, found {}",
3509                            key,
3510                            path,
3511                            value_item.type_name()
3512                        );
3513                    }
3514                }
3515                "flavor" => {
3516                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3517                        let val = formatted_string.value();
3518                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
3519                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
3520                        } else {
3521                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
3522                        }
3523                    } else {
3524                        log::warn!(
3525                            "[WARN] Expected string for global key '{}' in {}, found {}",
3526                            key,
3527                            path,
3528                            value_item.type_name()
3529                        );
3530                    }
3531                }
3532                _ => {
3533                    // Track unknown global keys for validation
3534                    fragment
3535                        .unknown_keys
3536                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
3537                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
3538                }
3539            }
3540        }
3541    }
3542
3543    // Handle [per-file-ignores] section
3544    if let Some(per_file_item) = doc.get("per-file-ignores")
3545        && let Some(per_file_table) = per_file_item.as_table()
3546    {
3547        let mut per_file_map = HashMap::new();
3548        for (pattern, value_item) in per_file_table.iter() {
3549            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3550                let rules: Vec<String> = formatted_array
3551                    .iter()
3552                    .filter_map(|item| item.as_str())
3553                    .map(normalize_key)
3554                    .collect();
3555                per_file_map.insert(pattern.to_string(), rules);
3556            } else {
3557                let type_name = value_item.type_name();
3558                log::warn!(
3559                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
3560                );
3561            }
3562        }
3563        fragment
3564            .per_file_ignores
3565            .push_override(per_file_map, source, file.clone(), None);
3566    }
3567
3568    // Rule-specific: all other top-level tables
3569    for (key, item) in doc.iter() {
3570        let norm_rule_name = key.to_ascii_uppercase();
3571
3572        // Skip known special sections
3573        if key == "global" || key == "per-file-ignores" {
3574            continue;
3575        }
3576
3577        // Track unknown rule sections (like [MD999])
3578        if !known_rule_names.contains(&norm_rule_name) {
3579            // Only track if it looks like a rule section (starts with MD or is uppercase)
3580            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
3581                fragment
3582                    .unknown_keys
3583                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
3584            }
3585            continue;
3586        }
3587
3588        if let Some(tbl) = item.as_table() {
3589            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
3590            for (rk, rv_item) in tbl.iter() {
3591                let norm_rk = normalize_key(rk);
3592                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
3593                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
3594                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
3595                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
3596                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
3597                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
3598                    Some(toml_edit::Value::Array(formatted_array)) => {
3599                        // Convert toml_edit Array to toml::Value::Array
3600                        let mut values = Vec::new();
3601                        for item in formatted_array.iter() {
3602                            match item {
3603                                toml_edit::Value::String(formatted) => {
3604                                    values.push(toml::Value::String(formatted.value().clone()))
3605                                }
3606                                toml_edit::Value::Integer(formatted) => {
3607                                    values.push(toml::Value::Integer(*formatted.value()))
3608                                }
3609                                toml_edit::Value::Float(formatted) => {
3610                                    values.push(toml::Value::Float(*formatted.value()))
3611                                }
3612                                toml_edit::Value::Boolean(formatted) => {
3613                                    values.push(toml::Value::Boolean(*formatted.value()))
3614                                }
3615                                toml_edit::Value::Datetime(formatted) => {
3616                                    values.push(toml::Value::Datetime(*formatted.value()))
3617                                }
3618                                _ => {
3619                                    log::warn!(
3620                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
3621                                    );
3622                                }
3623                            }
3624                        }
3625                        Some(toml::Value::Array(values))
3626                    }
3627                    Some(toml_edit::Value::InlineTable(_)) => {
3628                        log::warn!(
3629                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3630                        );
3631                        None
3632                    }
3633                    None => {
3634                        log::warn!(
3635                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3636                        );
3637                        None
3638                    }
3639                };
3640                if let Some(toml_val) = maybe_toml_val {
3641                    let sv = rule_entry
3642                        .values
3643                        .entry(norm_rk.clone())
3644                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3645                    sv.push_override(toml_val, source, file.clone(), None);
3646                }
3647            }
3648        } else if item.is_value() {
3649            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3650        }
3651    }
3652
3653    Ok(fragment)
3654}
3655
3656/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3657fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3658    // Use the unified loader from markdownlint_config.rs
3659    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3660        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3661    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3662}
3663
3664#[cfg(test)]
3665#[path = "config_intelligent_merge_tests.rs"]
3666mod config_intelligent_merge_tests;