Skip to main content

rumdl_lib/config/
loading.rs

1use std::collections::{BTreeMap, HashSet};
2use std::marker::PhantomData;
3use std::path::{Path, PathBuf};
4use std::sync::{Arc, OnceLock};
5
6use super::flavor::ConfigLoaded;
7use super::flavor::ConfigValidated;
8use super::parsers;
9use super::registry::RuleRegistry;
10use super::source_tracking::{
11    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
12};
13use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RuleConfig};
14use super::validation::validate_config_sourced_internal;
15
16/// Maximum depth for extends chains to prevent runaway recursion
17const MAX_EXTENDS_DEPTH: usize = 10;
18
19/// Resolve an `extends` path relative to the config file that contains it.
20///
21/// - `~/` prefix: expanded to home directory
22/// - Relative paths: resolved against the config file's parent directory
23/// - Absolute paths: used as-is
24fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> Result<PathBuf, ConfigError> {
25    let path = if extends_value.starts_with("~/") {
26        // Expand tilde to home directory
27        #[cfg(feature = "native")]
28        {
29            use etcetera::{BaseStrategy, choose_base_strategy};
30            let home = choose_base_strategy()
31                .map(|s| s.home_dir().to_path_buf())
32                .unwrap_or_else(|_| PathBuf::from("~"));
33            home.join(&extends_value[2..])
34        }
35        #[cfg(not(feature = "native"))]
36        {
37            // WASM: no home directory support
38            PathBuf::from(extends_value)
39        }
40    } else {
41        let path = PathBuf::from(extends_value);
42        if path.is_absolute() {
43            path
44        } else {
45            // Resolve relative to config file's directory
46            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
47            config_dir.join(extends_value)
48        }
49    };
50
51    Ok(path)
52}
53
54/// Determine ConfigSource from a config filename.
55fn source_from_filename(filename: &str) -> ConfigSource {
56    if filename == "pyproject.toml" {
57        ConfigSource::PyprojectToml
58    } else {
59        ConfigSource::ProjectConfig
60    }
61}
62
63/// Load a config file (and any base configs it extends) into a SourcedConfig.
64///
65/// This function handles the recursive `extends` chain:
66/// 1. Parse the config file into a fragment
67/// 2. If the fragment has `extends`, recursively load the base config first
68/// 3. Merge the base config, then merge this fragment on top
69fn load_config_with_extends(
70    sourced_config: &mut SourcedConfig<ConfigLoaded>,
71    config_file_path: &Path,
72    visited: &mut HashSet<PathBuf>,
73    chain_source: ConfigSource,
74) -> Result<(), ConfigError> {
75    // Canonicalize the path for circular reference detection
76    let canonical = config_file_path
77        .canonicalize()
78        .unwrap_or_else(|_| config_file_path.to_path_buf());
79
80    // Check for circular references
81    if visited.contains(&canonical) {
82        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
83        return Err(ConfigError::CircularExtends {
84            path: config_file_path.display().to_string(),
85            chain,
86        });
87    }
88
89    // Check depth limit
90    if visited.len() >= MAX_EXTENDS_DEPTH {
91        return Err(ConfigError::ExtendsDepthExceeded {
92            path: config_file_path.display().to_string(),
93            max_depth: MAX_EXTENDS_DEPTH,
94        });
95    }
96
97    // Mark as visited
98    visited.insert(canonical);
99
100    let path_str = config_file_path.display().to_string();
101    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
102
103    // Read and parse the config file
104    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
105        source: e,
106        path: path_str.clone(),
107    })?;
108
109    let fragment = if filename == "pyproject.toml" {
110        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
111            Some(f) => f,
112            None => return Ok(()), // No [tool.rumdl] section
113        }
114    } else {
115        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
116    };
117
118    // If this fragment has `extends`, load the base config first
119    if let Some(ref extends_value) = fragment.extends {
120        let base_path = resolve_extends_path(extends_value, config_file_path)?;
121
122        if !base_path.exists() {
123            return Err(ConfigError::ExtendsNotFound {
124                path: base_path.display().to_string(),
125                from: path_str.clone(),
126            });
127        }
128
129        log::debug!(
130            "[rumdl-config] Config {} extends {}, loading base first",
131            path_str,
132            base_path.display()
133        );
134
135        // Recursively load the base config
136        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
137    }
138
139    // Merge this fragment on top (base config was already merged if present)
140    // Strip the `extends` field since it's been consumed
141    let mut fragment_for_merge = fragment;
142    fragment_for_merge.extends = None;
143    sourced_config.merge(fragment_for_merge);
144    sourced_config.loaded_files.push(path_str);
145
146    Ok(())
147}
148
149impl SourcedConfig<ConfigLoaded> {
150    /// Merges another SourcedConfigFragment into this SourcedConfig.
151    /// Uses source precedence to determine which values take effect.
152    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
153        // Merge global config
154        // Enable uses replace semantics (project can enforce rules)
155        self.global.enable.merge_override(
156            fragment.global.enable.value,
157            fragment.global.enable.source,
158            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
159            fragment.global.enable.overrides.first().and_then(|o| o.line),
160        );
161
162        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
163        self.global.disable.merge_override(
164            fragment.global.disable.value,
165            fragment.global.disable.source,
166            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
167            fragment.global.disable.overrides.first().and_then(|o| o.line),
168        );
169
170        // Extend-enable uses union semantics (additive across config levels)
171        self.global.extend_enable.merge_union(
172            fragment.global.extend_enable.value,
173            fragment.global.extend_enable.source,
174            fragment
175                .global
176                .extend_enable
177                .overrides
178                .first()
179                .and_then(|o| o.file.clone()),
180            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
181        );
182
183        // Extend-disable uses union semantics (additive across config levels)
184        self.global.extend_disable.merge_union(
185            fragment.global.extend_disable.value,
186            fragment.global.extend_disable.source,
187            fragment
188                .global
189                .extend_disable
190                .overrides
191                .first()
192                .and_then(|o| o.file.clone()),
193            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
194        );
195
196        // Conflict resolution: Enable overrides disable
197        // Remove any rules from disable that appear in enable
198        self.global
199            .disable
200            .value
201            .retain(|rule| !self.global.enable.value.contains(rule));
202        self.global.include.merge_override(
203            fragment.global.include.value,
204            fragment.global.include.source,
205            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
206            fragment.global.include.overrides.first().and_then(|o| o.line),
207        );
208        self.global.exclude.merge_override(
209            fragment.global.exclude.value,
210            fragment.global.exclude.source,
211            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
212            fragment.global.exclude.overrides.first().and_then(|o| o.line),
213        );
214        self.global.respect_gitignore.merge_override(
215            fragment.global.respect_gitignore.value,
216            fragment.global.respect_gitignore.source,
217            fragment
218                .global
219                .respect_gitignore
220                .overrides
221                .first()
222                .and_then(|o| o.file.clone()),
223            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
224        );
225        self.global.line_length.merge_override(
226            fragment.global.line_length.value,
227            fragment.global.line_length.source,
228            fragment
229                .global
230                .line_length
231                .overrides
232                .first()
233                .and_then(|o| o.file.clone()),
234            fragment.global.line_length.overrides.first().and_then(|o| o.line),
235        );
236        self.global.fixable.merge_override(
237            fragment.global.fixable.value,
238            fragment.global.fixable.source,
239            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
240            fragment.global.fixable.overrides.first().and_then(|o| o.line),
241        );
242        self.global.unfixable.merge_override(
243            fragment.global.unfixable.value,
244            fragment.global.unfixable.source,
245            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
246            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
247        );
248
249        // Merge flavor
250        self.global.flavor.merge_override(
251            fragment.global.flavor.value,
252            fragment.global.flavor.source,
253            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
254            fragment.global.flavor.overrides.first().and_then(|o| o.line),
255        );
256
257        // Merge force_exclude
258        self.global.force_exclude.merge_override(
259            fragment.global.force_exclude.value,
260            fragment.global.force_exclude.source,
261            fragment
262                .global
263                .force_exclude
264                .overrides
265                .first()
266                .and_then(|o| o.file.clone()),
267            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
268        );
269
270        // Merge output_format if present
271        if let Some(output_format_fragment) = fragment.global.output_format {
272            if let Some(ref mut output_format) = self.global.output_format {
273                output_format.merge_override(
274                    output_format_fragment.value,
275                    output_format_fragment.source,
276                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
277                    output_format_fragment.overrides.first().and_then(|o| o.line),
278                );
279            } else {
280                self.global.output_format = Some(output_format_fragment);
281            }
282        }
283
284        // Merge cache_dir if present
285        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
286            if let Some(ref mut cache_dir) = self.global.cache_dir {
287                cache_dir.merge_override(
288                    cache_dir_fragment.value,
289                    cache_dir_fragment.source,
290                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
291                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
292                );
293            } else {
294                self.global.cache_dir = Some(cache_dir_fragment);
295            }
296        }
297
298        // Merge cache if not default (only override when explicitly set)
299        if fragment.global.cache.source != ConfigSource::Default {
300            self.global.cache.merge_override(
301                fragment.global.cache.value,
302                fragment.global.cache.source,
303                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
304                fragment.global.cache.overrides.first().and_then(|o| o.line),
305            );
306        }
307
308        // Merge per_file_ignores
309        self.per_file_ignores.merge_override(
310            fragment.per_file_ignores.value,
311            fragment.per_file_ignores.source,
312            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
313            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
314        );
315
316        // Merge per_file_flavor
317        self.per_file_flavor.merge_override(
318            fragment.per_file_flavor.value,
319            fragment.per_file_flavor.source,
320            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
321            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
322        );
323
324        // Merge code_block_tools
325        self.code_block_tools.merge_override(
326            fragment.code_block_tools.value,
327            fragment.code_block_tools.source,
328            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
329            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
330        );
331
332        // Merge rule configs
333        for (rule_name, rule_fragment) in fragment.rules {
334            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
335            let rule_entry = self.rules.entry(norm_rule_name).or_default();
336
337            // Merge severity if present in fragment
338            if let Some(severity_fragment) = rule_fragment.severity {
339                if let Some(ref mut existing_severity) = rule_entry.severity {
340                    existing_severity.merge_override(
341                        severity_fragment.value,
342                        severity_fragment.source,
343                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
344                        severity_fragment.overrides.first().and_then(|o| o.line),
345                    );
346                } else {
347                    rule_entry.severity = Some(severity_fragment);
348                }
349            }
350
351            // Merge values
352            for (key, sourced_value_fragment) in rule_fragment.values {
353                let sv_entry = rule_entry
354                    .values
355                    .entry(key.clone())
356                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
357                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
358                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
359                sv_entry.merge_override(
360                    sourced_value_fragment.value,  // Use the value from the fragment
361                    sourced_value_fragment.source, // Use the source from the fragment
362                    file_from_fragment,            // Pass the file path from the fragment override
363                    line_from_fragment,            // Pass the line number from the fragment override
364                );
365            }
366        }
367
368        // Merge unknown_keys from fragment
369        for (section, key, file_path) in fragment.unknown_keys {
370            // Deduplicate: only add if not already present
371            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
372                self.unknown_keys.push((section, key, file_path));
373            }
374        }
375    }
376
377    /// Load and merge configurations from files and CLI overrides.
378    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
379        Self::load_with_discovery(config_path, cli_overrides, false)
380    }
381
382    /// Finds project root by walking up from start_dir looking for .git directory.
383    /// Falls back to start_dir if no .git found.
384    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
385        // Convert relative paths to absolute to ensure correct traversal
386        let mut current = if start_dir.is_relative() {
387            std::env::current_dir()
388                .map(|cwd| cwd.join(start_dir))
389                .unwrap_or_else(|_| start_dir.to_path_buf())
390        } else {
391            start_dir.to_path_buf()
392        };
393        const MAX_DEPTH: usize = 100;
394
395        for _ in 0..MAX_DEPTH {
396            if current.join(".git").exists() {
397                log::debug!("[rumdl-config] Found .git at: {}", current.display());
398                return current;
399            }
400
401            match current.parent() {
402                Some(parent) => current = parent.to_path_buf(),
403                None => break,
404            }
405        }
406
407        // No .git found, use start_dir as project root
408        log::debug!(
409            "[rumdl-config] No .git found, using config location as project root: {}",
410            start_dir.display()
411        );
412        start_dir.to_path_buf()
413    }
414
415    /// Discover configuration file by traversing up the directory tree.
416    /// Returns the first configuration file found.
417    /// Discovers config file and returns both the config path and project root.
418    /// Returns: (config_file_path, project_root_path)
419    /// Project root is the directory containing .git, or config parent as fallback.
420    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
421        use std::env;
422
423        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
424        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
425
426        let start_dir = match env::current_dir() {
427            Ok(dir) => dir,
428            Err(e) => {
429                log::debug!("[rumdl-config] Failed to get current directory: {e}");
430                return None;
431            }
432        };
433
434        let mut current_dir = start_dir.clone();
435        let mut depth = 0;
436        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
437
438        loop {
439            if depth >= MAX_DEPTH {
440                log::debug!("[rumdl-config] Maximum traversal depth reached");
441                break;
442            }
443
444            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
445
446            // Check for config files in order of precedence (only if not already found)
447            if found_config.is_none() {
448                for config_name in CONFIG_FILES {
449                    let config_path = current_dir.join(config_name);
450
451                    if config_path.exists() {
452                        // For pyproject.toml, verify it contains [tool.rumdl] section
453                        if *config_name == "pyproject.toml" {
454                            if let Ok(content) = std::fs::read_to_string(&config_path) {
455                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
456                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
457                                    // Store config, but continue looking for .git
458                                    found_config = Some((config_path.clone(), current_dir.clone()));
459                                    break;
460                                }
461                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
462                                continue;
463                            }
464                        } else {
465                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
466                            // Store config, but continue looking for .git
467                            found_config = Some((config_path.clone(), current_dir.clone()));
468                            break;
469                        }
470                    }
471                }
472            }
473
474            // Check for .git directory (stop boundary)
475            if current_dir.join(".git").exists() {
476                log::debug!("[rumdl-config] Stopping at .git directory");
477                break;
478            }
479
480            // Move to parent directory
481            match current_dir.parent() {
482                Some(parent) => {
483                    current_dir = parent.to_owned();
484                    depth += 1;
485                }
486                None => {
487                    log::debug!("[rumdl-config] Reached filesystem root");
488                    break;
489                }
490            }
491        }
492
493        // If config found, determine project root by walking up from config location
494        if let Some((config_path, config_dir)) = found_config {
495            let project_root = Self::find_project_root_from(&config_dir);
496            return Some((config_path, project_root));
497        }
498
499        None
500    }
501
502    /// Discover markdownlint configuration file by traversing up the directory tree.
503    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
504    /// Returns the path to the config file if found.
505    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
506        use std::env;
507
508        const MAX_DEPTH: usize = 100;
509
510        let start_dir = match env::current_dir() {
511            Ok(dir) => dir,
512            Err(e) => {
513                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
514                return None;
515            }
516        };
517
518        let mut current_dir = start_dir.clone();
519        let mut depth = 0;
520
521        loop {
522            if depth >= MAX_DEPTH {
523                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
524                break;
525            }
526
527            log::debug!(
528                "[rumdl-config] Searching for markdownlint config in: {}",
529                current_dir.display()
530            );
531
532            // Check for markdownlint config files in order of precedence
533            for config_name in MARKDOWNLINT_CONFIG_FILES {
534                let config_path = current_dir.join(config_name);
535                if config_path.exists() {
536                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
537                    return Some(config_path);
538                }
539            }
540
541            // Check for .git directory (stop boundary)
542            if current_dir.join(".git").exists() {
543                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
544                break;
545            }
546
547            // Move to parent directory
548            match current_dir.parent() {
549                Some(parent) => {
550                    current_dir = parent.to_owned();
551                    depth += 1;
552                }
553                None => {
554                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
555                    break;
556                }
557            }
558        }
559
560        None
561    }
562
563    /// Internal implementation that accepts config directory for testing
564    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
565        let config_dir = config_dir.join("rumdl");
566
567        // Check for config files in precedence order (same as project discovery)
568        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
569
570        log::debug!(
571            "[rumdl-config] Checking for user configuration in: {}",
572            config_dir.display()
573        );
574
575        for filename in USER_CONFIG_FILES {
576            let config_path = config_dir.join(filename);
577
578            if config_path.exists() {
579                // For pyproject.toml, verify it contains [tool.rumdl] section
580                if *filename == "pyproject.toml" {
581                    if let Ok(content) = std::fs::read_to_string(&config_path) {
582                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
583                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
584                            return Some(config_path);
585                        }
586                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
587                        continue;
588                    }
589                } else {
590                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
591                    return Some(config_path);
592                }
593            }
594        }
595
596        log::debug!(
597            "[rumdl-config] No user configuration found in: {}",
598            config_dir.display()
599        );
600        None
601    }
602
603    /// Discover user-level configuration file from platform-specific config directory.
604    /// Returns the first configuration file found in the user config directory.
605    #[cfg(feature = "native")]
606    fn user_configuration_path() -> Option<std::path::PathBuf> {
607        use etcetera::{BaseStrategy, choose_base_strategy};
608
609        match choose_base_strategy() {
610            Ok(strategy) => {
611                let config_dir = strategy.config_dir();
612                Self::user_configuration_path_impl(&config_dir)
613            }
614            Err(e) => {
615                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
616                None
617            }
618        }
619    }
620
621    /// Stub for WASM builds - user config not supported
622    #[cfg(not(feature = "native"))]
623    fn user_configuration_path() -> Option<std::path::PathBuf> {
624        None
625    }
626
627    /// Load an explicit config file (standalone, no user config merging)
628    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
629        let path_obj = Path::new(path);
630        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
631        let path_str = path.to_string();
632
633        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
634
635        // Find project root by walking up from config location looking for .git
636        if let Some(config_parent) = path_obj.parent() {
637            let project_root = Self::find_project_root_from(config_parent);
638            log::debug!(
639                "[rumdl-config] Project root (from explicit config): {}",
640                project_root.display()
641            );
642            sourced_config.project_root = Some(project_root);
643        }
644
645        // Known markdownlint config files
646        const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
647
648        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
649            // Use extends-aware loading for rumdl TOML configs
650            let mut visited = HashSet::new();
651            let chain_source = source_from_filename(filename);
652            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
653        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
654            || path_str.ends_with(".json")
655            || path_str.ends_with(".jsonc")
656            || path_str.ends_with(".yaml")
657            || path_str.ends_with(".yml")
658        {
659            // Parse as markdownlint config (JSON/YAML) - no extends support
660            let fragment = parsers::load_from_markdownlint(&path_str)?;
661            sourced_config.merge(fragment);
662            sourced_config.loaded_files.push(path_str);
663        } else {
664            // Try TOML with extends support
665            let mut visited = HashSet::new();
666            let chain_source = source_from_filename(filename);
667            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
668        }
669
670        Ok(())
671    }
672
673    /// Load user config as fallback when no project config exists
674    fn load_user_config_as_fallback(
675        sourced_config: &mut Self,
676        user_config_dir: Option<&Path>,
677    ) -> Result<(), ConfigError> {
678        let user_config_path = if let Some(dir) = user_config_dir {
679            Self::user_configuration_path_impl(dir)
680        } else {
681            Self::user_configuration_path()
682        };
683
684        if let Some(user_config_path) = user_config_path {
685            let path_str = user_config_path.display().to_string();
686
687            log::debug!("[rumdl-config] Loading user config as fallback: {path_str}");
688
689            // User config fallback also supports extends chains.
690            // Use a uniform source across the chain so child overrides are determined by chain order.
691            let mut visited = HashSet::new();
692            load_config_with_extends(
693                sourced_config,
694                &user_config_path,
695                &mut visited,
696                ConfigSource::UserConfig,
697            )?;
698        } else {
699            log::debug!("[rumdl-config] No user configuration file found");
700        }
701
702        Ok(())
703    }
704
705    /// Internal implementation that accepts user config directory for testing
706    #[doc(hidden)]
707    pub fn load_with_discovery_impl(
708        config_path: Option<&str>,
709        cli_overrides: Option<&SourcedGlobalConfig>,
710        skip_auto_discovery: bool,
711        user_config_dir: Option<&Path>,
712    ) -> Result<Self, ConfigError> {
713        use std::env;
714        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
715
716        let mut sourced_config = SourcedConfig::default();
717
718        // Ruff model: Project config is standalone, user config is fallback only
719        //
720        // Priority order:
721        // 1. If explicit config path provided → use ONLY that (standalone)
722        // 2. Else if project config discovered → use ONLY that (standalone)
723        // 3. Else if user config exists → use it as fallback
724        // 4. CLI overrides always apply last
725        //
726        // This ensures project configs are reproducible across machines and
727        // CI/local runs behave identically.
728
729        // Explicit config path always takes precedence
730        if let Some(path) = config_path {
731            // Explicit config path provided - use ONLY this config (standalone)
732            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
733            Self::load_explicit_config(&mut sourced_config, path)?;
734        } else if skip_auto_discovery {
735            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
736            // No config loading, just apply CLI overrides at the end
737        } else {
738            // No explicit path - try auto-discovery
739            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
740
741            // Try to discover project config first
742            if let Some((config_file, project_root)) = Self::discover_config_upward() {
743                // Project config found - use ONLY this (standalone, no user config)
744                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
745                log::debug!("[rumdl-config] Project root: {}", project_root.display());
746
747                sourced_config.project_root = Some(project_root);
748
749                // Use extends-aware loading for discovered configs
750                let mut visited = HashSet::new();
751                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
752                let chain_source = source_from_filename(root_filename);
753                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
754            } else {
755                // No rumdl project config - try markdownlint config
756                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
757
758                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
759                    let path_str = markdownlint_path.display().to_string();
760                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
761                    match parsers::load_from_markdownlint(&path_str) {
762                        Ok(fragment) => {
763                            sourced_config.merge(fragment);
764                            sourced_config.loaded_files.push(path_str);
765                        }
766                        Err(_e) => {
767                            log::debug!("[rumdl-config] Failed to load markdownlint config, trying user config");
768                            Self::load_user_config_as_fallback(&mut sourced_config, user_config_dir)?;
769                        }
770                    }
771                } else {
772                    // No project config at all - use user config as fallback
773                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
774                    Self::load_user_config_as_fallback(&mut sourced_config, user_config_dir)?;
775                }
776            }
777        }
778
779        // Apply CLI overrides (highest precedence)
780        if let Some(cli) = cli_overrides {
781            sourced_config
782                .global
783                .enable
784                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
785            sourced_config
786                .global
787                .disable
788                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
789            sourced_config
790                .global
791                .exclude
792                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
793            sourced_config
794                .global
795                .include
796                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
797            sourced_config.global.respect_gitignore.merge_override(
798                cli.respect_gitignore.value,
799                ConfigSource::Cli,
800                None,
801                None,
802            );
803            sourced_config
804                .global
805                .fixable
806                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
807            sourced_config
808                .global
809                .unfixable
810                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
811            // No rule-specific CLI overrides implemented yet
812        }
813
814        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
815
816        Ok(sourced_config)
817    }
818
819    /// Load and merge configurations from files and CLI overrides.
820    /// If skip_auto_discovery is true, only explicit config paths are loaded.
821    pub fn load_with_discovery(
822        config_path: Option<&str>,
823        cli_overrides: Option<&SourcedGlobalConfig>,
824        skip_auto_discovery: bool,
825    ) -> Result<Self, ConfigError> {
826        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
827    }
828
829    /// Validate the configuration against a rule registry.
830    ///
831    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
832    /// enabling conversion to `Config`. Validation warnings are stored in the config
833    /// and can be displayed to the user.
834    ///
835    /// # Example
836    ///
837    /// ```ignore
838    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
839    /// let validated = loaded.validate(&registry)?;
840    /// let config: Config = validated.into();
841    /// ```
842    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
843        let warnings = validate_config_sourced_internal(&self, registry);
844
845        Ok(SourcedConfig {
846            global: self.global,
847            per_file_ignores: self.per_file_ignores,
848            per_file_flavor: self.per_file_flavor,
849            code_block_tools: self.code_block_tools,
850            rules: self.rules,
851            loaded_files: self.loaded_files,
852            unknown_keys: self.unknown_keys,
853            project_root: self.project_root,
854            validation_warnings: warnings,
855            _state: PhantomData,
856        })
857    }
858
859    /// Validate and convert to Config in one step (convenience method).
860    ///
861    /// This combines `validate()` and `into()` for callers who want the
862    /// validation warnings separately.
863    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
864        let validated = self.validate(registry)?;
865        let warnings = validated.validation_warnings.clone();
866        Ok((validated.into(), warnings))
867    }
868
869    /// Skip validation and convert directly to ConfigValidated state.
870    ///
871    /// # Safety
872    ///
873    /// This method bypasses validation. Use only when:
874    /// - You've already validated via `validate_config_sourced()`
875    /// - You're in test code that doesn't need validation
876    /// - You're migrating legacy code and will add proper validation later
877    ///
878    /// Prefer `validate()` for new code.
879    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
880        SourcedConfig {
881            global: self.global,
882            per_file_ignores: self.per_file_ignores,
883            per_file_flavor: self.per_file_flavor,
884            code_block_tools: self.code_block_tools,
885            rules: self.rules,
886            loaded_files: self.loaded_files,
887            unknown_keys: self.unknown_keys,
888            project_root: self.project_root,
889            validation_warnings: Vec::new(),
890            _state: PhantomData,
891        }
892    }
893}
894
895/// Convert a validated configuration to the final Config type.
896///
897/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
898/// ensuring that validation must occur before conversion.
899impl From<SourcedConfig<ConfigValidated>> for Config {
900    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
901        let mut rules = BTreeMap::new();
902        for (rule_name, sourced_rule_cfg) in sourced.rules {
903            // Normalize rule name to uppercase for case-insensitive lookup
904            let normalized_rule_name = rule_name.to_ascii_uppercase();
905            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
906            let mut values = BTreeMap::new();
907            for (key, sourced_val) in sourced_rule_cfg.values {
908                values.insert(key, sourced_val.value);
909            }
910            rules.insert(normalized_rule_name, RuleConfig { severity, values });
911        }
912        // Enable is "explicit" if it was set by something other than the Default source
913        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
914
915        #[allow(deprecated)]
916        let global = GlobalConfig {
917            enable: sourced.global.enable.value,
918            disable: sourced.global.disable.value,
919            exclude: sourced.global.exclude.value,
920            include: sourced.global.include.value,
921            respect_gitignore: sourced.global.respect_gitignore.value,
922            line_length: sourced.global.line_length.value,
923            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
924            fixable: sourced.global.fixable.value,
925            unfixable: sourced.global.unfixable.value,
926            flavor: sourced.global.flavor.value,
927            force_exclude: sourced.global.force_exclude.value,
928            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
929            cache: sourced.global.cache.value,
930            extend_enable: sourced.global.extend_enable.value,
931            extend_disable: sourced.global.extend_disable.value,
932            enable_is_explicit,
933        };
934
935        // Backward compatibility bridge: per-rule `enabled = true` → extend_enable
936        // For opt-in rules (MD060, MD063, MD072, MD073), if the per-rule config
937        // has `enabled = true`, add the rule to extend_enable and emit a warning.
938        let opt_in_with_enabled = ["MD060", "MD063", "MD072", "MD073"];
939        let mut global = global;
940        for rule_name in opt_in_with_enabled {
941            if let Some(rule_cfg) = rules.get(rule_name)
942                && let Some(toml::Value::Boolean(true)) = rule_cfg.values.get("enabled")
943                && !global.extend_enable.contains(&rule_name.to_string())
944            {
945                log::warn!(
946                    "[DEPRECATED] [{rule_name}] enabled = true is deprecated. \
947                     Use `extend-enable = [\"{rule_name}\"]` in [global] instead.",
948                );
949                global.extend_enable.push(rule_name.to_string());
950            }
951        }
952
953        Config {
954            global,
955            per_file_ignores: sourced.per_file_ignores.value,
956            per_file_flavor: sourced.per_file_flavor.value,
957            code_block_tools: sourced.code_block_tools.value,
958            rules,
959            project_root: sourced.project_root,
960            per_file_ignores_cache: Arc::new(OnceLock::new()),
961            per_file_flavor_cache: Arc::new(OnceLock::new()),
962        }
963    }
964}