Skip to main content

rumdl_lib/config/
loading.rs

1use indexmap::IndexSet;
2use std::collections::BTreeMap;
3use std::marker::PhantomData;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::flavor::ConfigLoaded;
8use super::flavor::ConfigValidated;
9use super::parsers;
10use super::registry::RuleRegistry;
11use super::source_tracking::{
12    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
13};
14use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RUMDL_CONFIG_FILES, RuleConfig};
15use super::validation::validate_config_sourced_internal;
16
17/// Maximum depth for extends chains to prevent runaway recursion
18const MAX_EXTENDS_DEPTH: usize = 10;
19
20/// Resolve an `extends` path relative to the config file that contains it.
21///
22/// - `~/` prefix: expanded to home directory
23/// - Relative paths: resolved against the config file's parent directory
24/// - Absolute paths: used as-is
25fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> PathBuf {
26    if let Some(suffix) = extends_value.strip_prefix("~/") {
27        // Expand tilde to home directory
28        #[cfg(feature = "native")]
29        {
30            use etcetera::{BaseStrategy, choose_base_strategy};
31            let home = choose_base_strategy().map_or_else(|_| PathBuf::from("~"), |s| s.home_dir().to_path_buf());
32            home.join(suffix)
33        }
34        #[cfg(not(feature = "native"))]
35        {
36            let _ = suffix;
37            PathBuf::from(extends_value)
38        }
39    } else {
40        let path = PathBuf::from(extends_value);
41        if path.is_absolute() {
42            path
43        } else {
44            // Resolve relative to config file's directory
45            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
46            config_dir.join(extends_value)
47        }
48    }
49}
50
51/// Determine ConfigSource from a config filename.
52fn source_from_filename(filename: &str) -> ConfigSource {
53    if filename == "pyproject.toml" {
54        ConfigSource::PyprojectToml
55    } else {
56        ConfigSource::ProjectConfig
57    }
58}
59
60/// Load a config file (and any base configs it extends) into a SourcedConfig.
61///
62/// This function handles the recursive `extends` chain:
63/// 1. Parse the config file into a fragment
64/// 2. If the fragment has `extends`, recursively load the base config first
65/// 3. Merge the base config, then merge this fragment on top
66fn load_config_with_extends(
67    sourced_config: &mut SourcedConfig<ConfigLoaded>,
68    config_file_path: &Path,
69    visited: &mut IndexSet<PathBuf>,
70    chain_source: ConfigSource,
71) -> Result<(), ConfigError> {
72    // Canonicalize the path for circular reference detection
73    let canonical = config_file_path
74        .canonicalize()
75        .unwrap_or_else(|_| config_file_path.to_path_buf());
76
77    // Check for circular references
78    if visited.contains(&canonical) {
79        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
80        return Err(ConfigError::CircularExtends {
81            path: config_file_path.display().to_string(),
82            chain,
83        });
84    }
85
86    // Check depth limit
87    if visited.len() >= MAX_EXTENDS_DEPTH {
88        return Err(ConfigError::ExtendsDepthExceeded {
89            path: config_file_path.display().to_string(),
90            max_depth: MAX_EXTENDS_DEPTH,
91        });
92    }
93
94    // Mark as visited
95    visited.insert(canonical);
96
97    let path_str = config_file_path.display().to_string();
98    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
99
100    // Read and parse the config file
101    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
102        source: e,
103        path: path_str.clone(),
104    })?;
105
106    let fragment = if filename == "pyproject.toml" {
107        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
108            Some(f) => f,
109            None => return Ok(()), // No [tool.rumdl] section
110        }
111    } else {
112        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
113    };
114
115    // If this fragment has `extends`, load the base config first
116    if let Some(ref extends_value) = fragment.extends {
117        let base_path = resolve_extends_path(extends_value, config_file_path);
118
119        if !base_path.exists() {
120            return Err(ConfigError::ExtendsNotFound {
121                path: base_path.display().to_string(),
122                from: path_str.clone(),
123            });
124        }
125
126        log::debug!(
127            "[rumdl-config] Config {} extends {}, loading base first",
128            path_str,
129            base_path.display()
130        );
131
132        // Recursively load the base config
133        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
134    }
135
136    // Merge this fragment on top (base config was already merged if present)
137    // Strip the `extends` field since it's been consumed
138    let mut fragment_for_merge = fragment;
139    fragment_for_merge.extends = None;
140    sourced_config.merge(fragment_for_merge);
141    sourced_config.loaded_files.push(path_str);
142
143    Ok(())
144}
145
146impl SourcedConfig<ConfigLoaded> {
147    /// Merges another SourcedConfigFragment into this SourcedConfig.
148    /// Uses source precedence to determine which values take effect.
149    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
150        // Merge global config
151        // Enable uses replace semantics (project can enforce rules)
152        self.global.enable.merge_override(
153            fragment.global.enable.value,
154            fragment.global.enable.source,
155            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
156            fragment.global.enable.overrides.first().and_then(|o| o.line),
157        );
158
159        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
160        self.global.disable.merge_override(
161            fragment.global.disable.value,
162            fragment.global.disable.source,
163            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
164            fragment.global.disable.overrides.first().and_then(|o| o.line),
165        );
166
167        // Extend-enable uses union semantics (additive across config levels)
168        self.global.extend_enable.merge_union(
169            fragment.global.extend_enable.value,
170            fragment.global.extend_enable.source,
171            fragment
172                .global
173                .extend_enable
174                .overrides
175                .first()
176                .and_then(|o| o.file.clone()),
177            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
178        );
179
180        // Extend-disable uses union semantics (additive across config levels)
181        self.global.extend_disable.merge_union(
182            fragment.global.extend_disable.value,
183            fragment.global.extend_disable.source,
184            fragment
185                .global
186                .extend_disable
187                .overrides
188                .first()
189                .and_then(|o| o.file.clone()),
190            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
191        );
192
193        // Conflict resolution: Enable overrides disable
194        // Remove any rules from disable that appear in enable
195        self.global
196            .disable
197            .value
198            .retain(|rule| !self.global.enable.value.contains(rule));
199        self.global.include.merge_override(
200            fragment.global.include.value,
201            fragment.global.include.source,
202            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
203            fragment.global.include.overrides.first().and_then(|o| o.line),
204        );
205        self.global.exclude.merge_override(
206            fragment.global.exclude.value,
207            fragment.global.exclude.source,
208            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
209            fragment.global.exclude.overrides.first().and_then(|o| o.line),
210        );
211        self.global.respect_gitignore.merge_override(
212            fragment.global.respect_gitignore.value,
213            fragment.global.respect_gitignore.source,
214            fragment
215                .global
216                .respect_gitignore
217                .overrides
218                .first()
219                .and_then(|o| o.file.clone()),
220            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
221        );
222        self.global.line_length.merge_override(
223            fragment.global.line_length.value,
224            fragment.global.line_length.source,
225            fragment
226                .global
227                .line_length
228                .overrides
229                .first()
230                .and_then(|o| o.file.clone()),
231            fragment.global.line_length.overrides.first().and_then(|o| o.line),
232        );
233        self.global.fixable.merge_override(
234            fragment.global.fixable.value,
235            fragment.global.fixable.source,
236            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
237            fragment.global.fixable.overrides.first().and_then(|o| o.line),
238        );
239        self.global.unfixable.merge_override(
240            fragment.global.unfixable.value,
241            fragment.global.unfixable.source,
242            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
243            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
244        );
245
246        // Merge flavor
247        self.global.flavor.merge_override(
248            fragment.global.flavor.value,
249            fragment.global.flavor.source,
250            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
251            fragment.global.flavor.overrides.first().and_then(|o| o.line),
252        );
253
254        // Merge force_exclude
255        self.global.force_exclude.merge_override(
256            fragment.global.force_exclude.value,
257            fragment.global.force_exclude.source,
258            fragment
259                .global
260                .force_exclude
261                .overrides
262                .first()
263                .and_then(|o| o.file.clone()),
264            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
265        );
266
267        // Merge output_format if present
268        if let Some(output_format_fragment) = fragment.global.output_format {
269            if let Some(ref mut output_format) = self.global.output_format {
270                output_format.merge_override(
271                    output_format_fragment.value,
272                    output_format_fragment.source,
273                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
274                    output_format_fragment.overrides.first().and_then(|o| o.line),
275                );
276            } else {
277                self.global.output_format = Some(output_format_fragment);
278            }
279        }
280
281        // Merge cache_dir if present
282        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
283            if let Some(ref mut cache_dir) = self.global.cache_dir {
284                cache_dir.merge_override(
285                    cache_dir_fragment.value,
286                    cache_dir_fragment.source,
287                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
288                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
289                );
290            } else {
291                self.global.cache_dir = Some(cache_dir_fragment);
292            }
293        }
294
295        // Merge cache if not default (only override when explicitly set)
296        if fragment.global.cache.source != ConfigSource::Default {
297            self.global.cache.merge_override(
298                fragment.global.cache.value,
299                fragment.global.cache.source,
300                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
301                fragment.global.cache.overrides.first().and_then(|o| o.line),
302            );
303        }
304
305        // Merge per_file_ignores
306        self.per_file_ignores.merge_override(
307            fragment.per_file_ignores.value,
308            fragment.per_file_ignores.source,
309            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
310            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
311        );
312
313        // Merge per_file_flavor
314        self.per_file_flavor.merge_override(
315            fragment.per_file_flavor.value,
316            fragment.per_file_flavor.source,
317            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
318            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
319        );
320
321        // Merge code_block_tools
322        self.code_block_tools.merge_override(
323            fragment.code_block_tools.value,
324            fragment.code_block_tools.source,
325            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
326            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
327        );
328
329        // Merge rule configs
330        for (rule_name, rule_fragment) in fragment.rules {
331            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
332            let rule_entry = self.rules.entry(norm_rule_name).or_default();
333
334            // Merge severity if present in fragment
335            if let Some(severity_fragment) = rule_fragment.severity {
336                if let Some(ref mut existing_severity) = rule_entry.severity {
337                    existing_severity.merge_override(
338                        severity_fragment.value,
339                        severity_fragment.source,
340                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
341                        severity_fragment.overrides.first().and_then(|o| o.line),
342                    );
343                } else {
344                    rule_entry.severity = Some(severity_fragment);
345                }
346            }
347
348            // Merge values
349            for (key, sourced_value_fragment) in rule_fragment.values {
350                let sv_entry = rule_entry
351                    .values
352                    .entry(key.clone())
353                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
354                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
355                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
356                sv_entry.merge_override(
357                    sourced_value_fragment.value,  // Use the value from the fragment
358                    sourced_value_fragment.source, // Use the source from the fragment
359                    file_from_fragment,            // Pass the file path from the fragment override
360                    line_from_fragment,            // Pass the line number from the fragment override
361                );
362            }
363        }
364
365        // Merge unknown_keys from fragment
366        for (section, key, file_path) in fragment.unknown_keys {
367            // Deduplicate: only add if not already present
368            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
369                self.unknown_keys.push((section, key, file_path));
370            }
371        }
372    }
373
374    /// Load and merge configurations from files and CLI overrides.
375    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
376        Self::load_with_discovery(config_path, cli_overrides, false)
377    }
378
379    /// Finds project root by walking up from start_dir looking for .git directory.
380    /// Falls back to start_dir if no .git found.
381    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
382        // Convert relative paths to absolute to ensure correct traversal
383        let mut current = if start_dir.is_relative() {
384            std::env::current_dir().map_or_else(|_| start_dir.to_path_buf(), |cwd| cwd.join(start_dir))
385        } else {
386            start_dir.to_path_buf()
387        };
388        const MAX_DEPTH: usize = 100;
389
390        for _ in 0..MAX_DEPTH {
391            if current.join(".git").exists() {
392                log::debug!("[rumdl-config] Found .git at: {}", current.display());
393                return current;
394            }
395
396            match current.parent() {
397                Some(parent) => current = parent.to_path_buf(),
398                None => break,
399            }
400        }
401
402        // No .git found, use start_dir as project root
403        log::debug!(
404            "[rumdl-config] No .git found, using config location as project root: {}",
405            start_dir.display()
406        );
407        start_dir.to_path_buf()
408    }
409
410    /// Discover configuration file by traversing up the directory tree.
411    /// Returns the first configuration file found.
412    /// Discovers config file and returns both the config path and project root.
413    /// Returns: (config_file_path, project_root_path)
414    /// Project root is the directory containing .git, or config parent as fallback.
415    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
416        use std::env;
417
418        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
419
420        let start_dir = match env::current_dir() {
421            Ok(dir) => dir,
422            Err(e) => {
423                log::debug!("[rumdl-config] Failed to get current directory: {e}");
424                return None;
425            }
426        };
427
428        let mut current_dir = start_dir.clone();
429        let mut depth = 0;
430        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
431
432        loop {
433            if depth >= MAX_DEPTH {
434                log::debug!("[rumdl-config] Maximum traversal depth reached");
435                break;
436            }
437
438            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
439
440            // Check for config files in order of precedence (only if not already found)
441            if found_config.is_none() {
442                for config_name in RUMDL_CONFIG_FILES {
443                    let config_path = current_dir.join(config_name);
444
445                    if config_path.exists() {
446                        // For pyproject.toml, verify it contains [tool.rumdl] section
447                        if *config_name == "pyproject.toml" {
448                            if let Ok(content) = std::fs::read_to_string(&config_path) {
449                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
450                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
451                                    // Store config, but continue looking for .git
452                                    found_config = Some((config_path.clone(), current_dir.clone()));
453                                    break;
454                                }
455                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
456                                continue;
457                            }
458                        } else {
459                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
460                            // Store config, but continue looking for .git
461                            found_config = Some((config_path.clone(), current_dir.clone()));
462                            break;
463                        }
464                    }
465                }
466            }
467
468            // Check for .git directory (stop boundary)
469            if current_dir.join(".git").exists() {
470                log::debug!("[rumdl-config] Stopping at .git directory");
471                break;
472            }
473
474            // Move to parent directory
475            match current_dir.parent() {
476                Some(parent) => {
477                    current_dir = parent.to_owned();
478                    depth += 1;
479                }
480                None => {
481                    log::debug!("[rumdl-config] Reached filesystem root");
482                    break;
483                }
484            }
485        }
486
487        // If config found, determine project root by walking up from config location
488        if let Some((config_path, config_dir)) = found_config {
489            let project_root = Self::find_project_root_from(&config_dir);
490            return Some((config_path, project_root));
491        }
492
493        None
494    }
495
496    /// Discover markdownlint configuration file by traversing up the directory tree.
497    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
498    /// Returns the path to the config file if found.
499    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
500        use std::env;
501
502        const MAX_DEPTH: usize = 100;
503
504        let start_dir = match env::current_dir() {
505            Ok(dir) => dir,
506            Err(e) => {
507                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
508                return None;
509            }
510        };
511
512        let mut current_dir = start_dir.clone();
513        let mut depth = 0;
514
515        loop {
516            if depth >= MAX_DEPTH {
517                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
518                break;
519            }
520
521            log::debug!(
522                "[rumdl-config] Searching for markdownlint config in: {}",
523                current_dir.display()
524            );
525
526            // Check for markdownlint config files in order of precedence
527            for config_name in MARKDOWNLINT_CONFIG_FILES {
528                let config_path = current_dir.join(config_name);
529                if config_path.exists() {
530                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
531                    return Some(config_path);
532                }
533            }
534
535            // Check for .git directory (stop boundary)
536            if current_dir.join(".git").exists() {
537                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
538                break;
539            }
540
541            // Move to parent directory
542            match current_dir.parent() {
543                Some(parent) => {
544                    current_dir = parent.to_owned();
545                    depth += 1;
546                }
547                None => {
548                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
549                    break;
550                }
551            }
552        }
553
554        None
555    }
556
557    /// Internal implementation that accepts config directory for testing
558    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
559        let config_dir = config_dir.join("rumdl");
560
561        // Check for config files in precedence order (same as project discovery)
562        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
563
564        log::debug!(
565            "[rumdl-config] Checking for user configuration in: {}",
566            config_dir.display()
567        );
568
569        for filename in USER_CONFIG_FILES {
570            let config_path = config_dir.join(filename);
571
572            if config_path.exists() {
573                // For pyproject.toml, verify it contains [tool.rumdl] section
574                if *filename == "pyproject.toml" {
575                    if let Ok(content) = std::fs::read_to_string(&config_path) {
576                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
577                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
578                            return Some(config_path);
579                        }
580                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
581                        continue;
582                    }
583                } else {
584                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
585                    return Some(config_path);
586                }
587            }
588        }
589
590        log::debug!(
591            "[rumdl-config] No user configuration found in: {}",
592            config_dir.display()
593        );
594        None
595    }
596
597    /// Discover user-level configuration file from platform-specific config directory.
598    /// Returns the first configuration file found in the user config directory.
599    #[cfg(feature = "native")]
600    fn user_configuration_path() -> Option<std::path::PathBuf> {
601        use etcetera::{BaseStrategy, choose_base_strategy};
602
603        match choose_base_strategy() {
604            Ok(strategy) => {
605                let config_dir = strategy.config_dir();
606                Self::user_configuration_path_impl(&config_dir)
607            }
608            Err(e) => {
609                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
610                None
611            }
612        }
613    }
614
615    /// Stub for WASM builds - user config not supported
616    #[cfg(not(feature = "native"))]
617    fn user_configuration_path() -> Option<std::path::PathBuf> {
618        None
619    }
620
621    /// Load an explicit config file (standalone, no user config merging)
622    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
623        let path_obj = Path::new(path);
624        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
625        let path_str = path.to_string();
626
627        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
628
629        // Find project root by walking up from config location looking for .git
630        if let Some(config_parent) = path_obj.parent() {
631            let project_root = Self::find_project_root_from(config_parent);
632            log::debug!(
633                "[rumdl-config] Project root (from explicit config): {}",
634                project_root.display()
635            );
636            sourced_config.project_root = Some(project_root);
637        }
638
639        // Known markdownlint config files
640        const MARKDOWNLINT_FILENAMES: &[&str] = &[
641            ".markdownlint-cli2.jsonc",
642            ".markdownlint-cli2.yaml",
643            ".markdownlint-cli2.yml",
644            ".markdownlint.json",
645            ".markdownlint.yaml",
646            ".markdownlint.yml",
647        ];
648
649        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
650            // Use extends-aware loading for rumdl TOML configs
651            let mut visited = IndexSet::new();
652            let chain_source = source_from_filename(filename);
653            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
654        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
655            || path_str.ends_with(".json")
656            || path_str.ends_with(".jsonc")
657            || path_str.ends_with(".yaml")
658            || path_str.ends_with(".yml")
659        {
660            // Parse as markdownlint config (JSON/YAML) - no extends support
661            let fragment = parsers::load_from_markdownlint(&path_str)?;
662            sourced_config.merge(fragment);
663            sourced_config.loaded_files.push(path_str);
664        } else {
665            // Try TOML with extends support
666            let mut visited = IndexSet::new();
667            let chain_source = source_from_filename(filename);
668            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
669        }
670
671        Ok(())
672    }
673
674    /// Load and merge user-level configuration into this `SourcedConfig`.
675    ///
676    /// Discovers the user config file from the platform config directory
677    /// (or `user_config_dir` if provided for testing). Resolves any `extends`
678    /// chain and merges each fragment with `ConfigSource::UserConfig` precedence.
679    ///
680    /// Called in two contexts:
681    /// - When no project config is found: provides user defaults as the sole base
682    /// - When a markdownlint project config is found: provides rumdl-specific
683    ///   defaults that the markdownlint format cannot express; the markdownlint
684    ///   fragment is merged on top and wins on any overlapping key
685    fn load_user_config(sourced_config: &mut Self, user_config_dir: Option<&Path>) -> Result<(), ConfigError> {
686        let user_config_path = if let Some(dir) = user_config_dir {
687            Self::user_configuration_path_impl(dir)
688        } else {
689            Self::user_configuration_path()
690        };
691
692        if let Some(user_config_path) = user_config_path {
693            let path_str = user_config_path.display().to_string();
694
695            log::debug!("[rumdl-config] Loading user config: {path_str}");
696
697            // User config fallback also supports extends chains.
698            // Use a uniform source across the chain so child overrides are determined by chain order.
699            let mut visited = IndexSet::new();
700            load_config_with_extends(
701                sourced_config,
702                &user_config_path,
703                &mut visited,
704                ConfigSource::UserConfig,
705            )?;
706        } else {
707            log::debug!("[rumdl-config] No user configuration file found");
708        }
709
710        Ok(())
711    }
712
713    /// Internal implementation that accepts user config directory for testing
714    #[doc(hidden)]
715    pub fn load_with_discovery_impl(
716        config_path: Option<&str>,
717        cli_overrides: Option<&SourcedGlobalConfig>,
718        skip_auto_discovery: bool,
719        user_config_dir: Option<&Path>,
720    ) -> Result<Self, ConfigError> {
721        use std::env;
722        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
723
724        let mut sourced_config = SourcedConfig::default();
725
726        // Ruff model: Project config is standalone, user config is fallback only
727        //
728        // Priority order:
729        // 1. If explicit config path provided → use ONLY that (standalone)
730        // 2. Else if project config discovered → use ONLY that (standalone)
731        // 3. Else if user config exists → use it as fallback
732        // 4. CLI overrides always apply last
733        //
734        // This ensures project configs are reproducible across machines and
735        // CI/local runs behave identically.
736
737        // Explicit config path always takes precedence
738        if let Some(path) = config_path {
739            // Explicit config path provided - use ONLY this config (standalone)
740            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
741            Self::load_explicit_config(&mut sourced_config, path)?;
742        } else if skip_auto_discovery {
743            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
744            // No config loading, just apply CLI overrides at the end
745        } else {
746            // No explicit path - try auto-discovery
747            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
748
749            // Try to discover project config first
750            if let Some((config_file, project_root)) = Self::discover_config_upward() {
751                // Project config found - use ONLY this (standalone, no user config).
752                // Rumdl project configs can express all settings directly, so user config
753                // is not needed and omitting it ensures CI and local runs are identical.
754                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
755                log::debug!("[rumdl-config] Project root: {}", project_root.display());
756
757                sourced_config.project_root = Some(project_root);
758
759                // Use extends-aware loading for discovered configs
760                let mut visited = IndexSet::new();
761                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
762                let chain_source = source_from_filename(root_filename);
763                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
764            } else {
765                // No rumdl project config - try markdownlint config
766                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
767
768                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
769                    let path_str = markdownlint_path.display().to_string();
770                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
771                    // Load user config first as a base so rumdl-specific settings (e.g. flavor,
772                    // cache) take effect. Markdownlint configs cannot express these settings.
773                    // The markdownlint fragment uses ConfigSource::ProjectConfig (precedence 3)
774                    // vs UserConfig (precedence 1), so project settings always win on overlap.
775                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
776                    match parsers::load_from_markdownlint(&path_str) {
777                        Ok(fragment) => {
778                            sourced_config.merge(fragment);
779                            sourced_config.loaded_files.push(path_str);
780                        }
781                        Err(_e) => {
782                            log::debug!("[rumdl-config] Failed to load markdownlint config");
783                        }
784                    }
785                } else {
786                    // No project config at all - use user config as fallback
787                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
788                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
789                }
790            }
791        }
792
793        // Apply CLI overrides (highest precedence)
794        if let Some(cli) = cli_overrides {
795            sourced_config
796                .global
797                .enable
798                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
799            sourced_config
800                .global
801                .disable
802                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
803            sourced_config
804                .global
805                .exclude
806                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
807            sourced_config
808                .global
809                .include
810                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
811            sourced_config.global.respect_gitignore.merge_override(
812                cli.respect_gitignore.value,
813                ConfigSource::Cli,
814                None,
815                None,
816            );
817            sourced_config
818                .global
819                .fixable
820                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
821            sourced_config
822                .global
823                .unfixable
824                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
825            // No rule-specific CLI overrides implemented yet
826        }
827
828        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
829
830        Ok(sourced_config)
831    }
832
833    /// Load and merge configurations from files and CLI overrides.
834    /// If skip_auto_discovery is true, only explicit config paths are loaded.
835    pub fn load_with_discovery(
836        config_path: Option<&str>,
837        cli_overrides: Option<&SourcedGlobalConfig>,
838        skip_auto_discovery: bool,
839    ) -> Result<Self, ConfigError> {
840        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
841    }
842
843    /// Validate the configuration against a rule registry.
844    ///
845    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
846    /// enabling conversion to `Config`. Validation warnings are stored in the config
847    /// and can be displayed to the user.
848    ///
849    /// # Example
850    ///
851    /// ```ignore
852    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
853    /// let validated = loaded.validate(&registry)?;
854    /// let config: Config = validated.into();
855    /// ```
856    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
857        let warnings = validate_config_sourced_internal(&self, registry);
858
859        Ok(SourcedConfig {
860            global: self.global,
861            per_file_ignores: self.per_file_ignores,
862            per_file_flavor: self.per_file_flavor,
863            code_block_tools: self.code_block_tools,
864            rules: self.rules,
865            loaded_files: self.loaded_files,
866            unknown_keys: self.unknown_keys,
867            project_root: self.project_root,
868            validation_warnings: warnings,
869            _state: PhantomData,
870        })
871    }
872
873    /// Validate and convert to Config in one step (convenience method).
874    ///
875    /// This combines `validate()` and `into()` for callers who want the
876    /// validation warnings separately.
877    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
878        let validated = self.validate(registry)?;
879        let warnings = validated.validation_warnings.clone();
880        Ok((validated.into(), warnings))
881    }
882
883    /// Skip validation and convert directly to ConfigValidated state.
884    ///
885    /// # Safety
886    ///
887    /// This method bypasses validation. Use only when:
888    /// - You've already validated via `validate_config_sourced()`
889    /// - You're in test code that doesn't need validation
890    /// - You're migrating legacy code and will add proper validation later
891    ///
892    /// Prefer `validate()` for new code.
893    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
894        SourcedConfig {
895            global: self.global,
896            per_file_ignores: self.per_file_ignores,
897            per_file_flavor: self.per_file_flavor,
898            code_block_tools: self.code_block_tools,
899            rules: self.rules,
900            loaded_files: self.loaded_files,
901            unknown_keys: self.unknown_keys,
902            project_root: self.project_root,
903            validation_warnings: Vec::new(),
904            _state: PhantomData,
905        }
906    }
907
908    /// Discover the nearest config file for a specific directory,
909    /// walking upward to `project_root` (inclusive).
910    ///
911    /// Searches for rumdl config files (`.rumdl.toml`, `rumdl.toml`,
912    /// `.config/rumdl.toml`, `pyproject.toml` with `[tool.rumdl]`) and
913    /// markdownlint config files at each directory level.
914    ///
915    /// Returns the config file path if found. Does NOT use CWD.
916    pub fn discover_config_for_dir(dir: &Path, project_root: &Path) -> Option<PathBuf> {
917        let mut current_dir = dir.to_path_buf();
918
919        loop {
920            // Check rumdl config files first (higher precedence)
921            for config_name in RUMDL_CONFIG_FILES {
922                let config_path = current_dir.join(config_name);
923                if config_path.exists() {
924                    if *config_name == "pyproject.toml" {
925                        if let Ok(content) = std::fs::read_to_string(&config_path)
926                            && (content.contains("[tool.rumdl]") || content.contains("tool.rumdl"))
927                        {
928                            return Some(config_path);
929                        }
930                        continue;
931                    }
932                    return Some(config_path);
933                }
934            }
935
936            // Check markdownlint config files (lower precedence)
937            for config_name in MARKDOWNLINT_CONFIG_FILES {
938                let config_path = current_dir.join(config_name);
939                if config_path.exists() {
940                    return Some(config_path);
941                }
942            }
943
944            // Stop at project root (inclusive - we already checked it)
945            if current_dir == project_root {
946                break;
947            }
948
949            // Move to parent directory
950            match current_dir.parent() {
951                Some(parent) => current_dir = parent.to_path_buf(),
952                None => break,
953            }
954        }
955
956        None
957    }
958
959    /// Load a config from a specific file path, with extends resolution.
960    ///
961    /// Creates a fresh `SourcedConfig`, loads the config file using the
962    /// appropriate parser, and converts to `Config`. Used for per-directory
963    /// config loading where each subdirectory config is standalone.
964    pub fn load_config_for_path(config_path: &Path, project_root: &Path) -> Result<Config, ConfigError> {
965        let mut sourced_config = SourcedConfig {
966            project_root: Some(project_root.to_path_buf()),
967            ..SourcedConfig::default()
968        };
969
970        let filename = config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
971        let path_str = config_path.display().to_string();
972
973        // Determine if this is a markdownlint config or rumdl config
974        let is_markdownlint = MARKDOWNLINT_CONFIG_FILES.contains(&filename)
975            || (filename != "pyproject.toml"
976                && filename != ".rumdl.toml"
977                && filename != "rumdl.toml"
978                && (path_str.ends_with(".json")
979                    || path_str.ends_with(".jsonc")
980                    || path_str.ends_with(".yaml")
981                    || path_str.ends_with(".yml")));
982
983        if is_markdownlint {
984            let fragment = parsers::load_from_markdownlint(&path_str)?;
985            sourced_config.merge(fragment);
986            sourced_config.loaded_files.push(path_str);
987        } else {
988            let mut visited = IndexSet::new();
989            let chain_source = source_from_filename(filename);
990            load_config_with_extends(&mut sourced_config, config_path, &mut visited, chain_source)?;
991        }
992
993        Ok(sourced_config.into_validated_unchecked().into())
994    }
995}
996
997/// Convert a validated configuration to the final Config type.
998///
999/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
1000/// ensuring that validation must occur before conversion.
1001impl From<SourcedConfig<ConfigValidated>> for Config {
1002    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
1003        let mut rules = BTreeMap::new();
1004        for (rule_name, sourced_rule_cfg) in sourced.rules {
1005            // Normalize rule name to uppercase for case-insensitive lookup
1006            let normalized_rule_name = rule_name.to_ascii_uppercase();
1007            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
1008            let mut values = BTreeMap::new();
1009            for (key, sourced_val) in sourced_rule_cfg.values {
1010                values.insert(key, sourced_val.value);
1011            }
1012            rules.insert(normalized_rule_name, RuleConfig { severity, values });
1013        }
1014        // Enable is "explicit" if it was set by something other than the Default source
1015        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
1016
1017        #[allow(deprecated)]
1018        let global = GlobalConfig {
1019            enable: sourced.global.enable.value,
1020            disable: sourced.global.disable.value,
1021            exclude: sourced.global.exclude.value,
1022            include: sourced.global.include.value,
1023            respect_gitignore: sourced.global.respect_gitignore.value,
1024            line_length: sourced.global.line_length.value,
1025            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1026            fixable: sourced.global.fixable.value,
1027            unfixable: sourced.global.unfixable.value,
1028            flavor: sourced.global.flavor.value,
1029            force_exclude: sourced.global.force_exclude.value,
1030            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
1031            cache: sourced.global.cache.value,
1032            extend_enable: sourced.global.extend_enable.value,
1033            extend_disable: sourced.global.extend_disable.value,
1034            enable_is_explicit,
1035        };
1036
1037        let mut config = Config {
1038            extends: None,
1039            global,
1040            per_file_ignores: sourced.per_file_ignores.value,
1041            per_file_flavor: sourced.per_file_flavor.value,
1042            code_block_tools: sourced.code_block_tools.value,
1043            rules,
1044            project_root: sourced.project_root,
1045            per_file_ignores_cache: Arc::new(OnceLock::new()),
1046            per_file_flavor_cache: Arc::new(OnceLock::new()),
1047            canonical_project_root_cache: Arc::new(OnceLock::new()),
1048        };
1049
1050        // Apply per-rule `enabled = true/false` to global enable/disable lists
1051        config.apply_per_rule_enabled();
1052
1053        // Enforce the runtime invariant: every rule-name list is canonicalised.
1054        // After this point, downstream consumers (`rules::filter_rules`, the LSP,
1055        // WASM, fix coordinator, per-file-ignores) can match against
1056        // `Rule::name()` with simple string equality regardless of whether the
1057        // user's config used canonical IDs (`"MD033"`) or aliases
1058        // (`"no-inline-html"`).
1059        config.canonicalize_rule_lists();
1060
1061        config
1062    }
1063}