Skip to main content

rumdl_lib/config/
loading.rs

1use indexmap::IndexSet;
2use std::collections::BTreeMap;
3use std::marker::PhantomData;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::flavor::ConfigLoaded;
8use super::flavor::ConfigValidated;
9use super::parsers;
10use super::registry::RuleRegistry;
11use super::source_tracking::{
12    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
13};
14use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RuleConfig};
15use super::validation::validate_config_sourced_internal;
16
17/// Maximum depth for extends chains to prevent runaway recursion
18const MAX_EXTENDS_DEPTH: usize = 10;
19
20/// Resolve an `extends` path relative to the config file that contains it.
21///
22/// - `~/` prefix: expanded to home directory
23/// - Relative paths: resolved against the config file's parent directory
24/// - Absolute paths: used as-is
25fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> PathBuf {
26    if let Some(suffix) = extends_value.strip_prefix("~/") {
27        // Expand tilde to home directory
28        #[cfg(feature = "native")]
29        {
30            use etcetera::{BaseStrategy, choose_base_strategy};
31            let home = choose_base_strategy().map_or_else(|_| PathBuf::from("~"), |s| s.home_dir().to_path_buf());
32            home.join(suffix)
33        }
34        #[cfg(not(feature = "native"))]
35        {
36            let _ = suffix;
37            PathBuf::from(extends_value)
38        }
39    } else {
40        let path = PathBuf::from(extends_value);
41        if path.is_absolute() {
42            path
43        } else {
44            // Resolve relative to config file's directory
45            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
46            config_dir.join(extends_value)
47        }
48    }
49}
50
51/// Determine ConfigSource from a config filename.
52fn source_from_filename(filename: &str) -> ConfigSource {
53    if filename == "pyproject.toml" {
54        ConfigSource::PyprojectToml
55    } else {
56        ConfigSource::ProjectConfig
57    }
58}
59
60/// Load a config file (and any base configs it extends) into a SourcedConfig.
61///
62/// This function handles the recursive `extends` chain:
63/// 1. Parse the config file into a fragment
64/// 2. If the fragment has `extends`, recursively load the base config first
65/// 3. Merge the base config, then merge this fragment on top
66fn load_config_with_extends(
67    sourced_config: &mut SourcedConfig<ConfigLoaded>,
68    config_file_path: &Path,
69    visited: &mut IndexSet<PathBuf>,
70    chain_source: ConfigSource,
71) -> Result<(), ConfigError> {
72    // Canonicalize the path for circular reference detection
73    let canonical = config_file_path
74        .canonicalize()
75        .unwrap_or_else(|_| config_file_path.to_path_buf());
76
77    // Check for circular references
78    if visited.contains(&canonical) {
79        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
80        return Err(ConfigError::CircularExtends {
81            path: config_file_path.display().to_string(),
82            chain,
83        });
84    }
85
86    // Check depth limit
87    if visited.len() >= MAX_EXTENDS_DEPTH {
88        return Err(ConfigError::ExtendsDepthExceeded {
89            path: config_file_path.display().to_string(),
90            max_depth: MAX_EXTENDS_DEPTH,
91        });
92    }
93
94    // Mark as visited
95    visited.insert(canonical);
96
97    let path_str = config_file_path.display().to_string();
98    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
99
100    // Read and parse the config file
101    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
102        source: e,
103        path: path_str.clone(),
104    })?;
105
106    let fragment = if filename == "pyproject.toml" {
107        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
108            Some(f) => f,
109            None => return Ok(()), // No [tool.rumdl] section
110        }
111    } else {
112        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
113    };
114
115    // If this fragment has `extends`, load the base config first
116    if let Some(ref extends_value) = fragment.extends {
117        let base_path = resolve_extends_path(extends_value, config_file_path);
118
119        if !base_path.exists() {
120            return Err(ConfigError::ExtendsNotFound {
121                path: base_path.display().to_string(),
122                from: path_str.clone(),
123            });
124        }
125
126        log::debug!(
127            "[rumdl-config] Config {} extends {}, loading base first",
128            path_str,
129            base_path.display()
130        );
131
132        // Recursively load the base config
133        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
134    }
135
136    // Merge this fragment on top (base config was already merged if present)
137    // Strip the `extends` field since it's been consumed
138    let mut fragment_for_merge = fragment;
139    fragment_for_merge.extends = None;
140    sourced_config.merge(fragment_for_merge);
141    sourced_config.loaded_files.push(path_str);
142
143    Ok(())
144}
145
146impl SourcedConfig<ConfigLoaded> {
147    /// Merges another SourcedConfigFragment into this SourcedConfig.
148    /// Uses source precedence to determine which values take effect.
149    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
150        // Merge global config
151        // Enable uses replace semantics (project can enforce rules)
152        self.global.enable.merge_override(
153            fragment.global.enable.value,
154            fragment.global.enable.source,
155            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
156            fragment.global.enable.overrides.first().and_then(|o| o.line),
157        );
158
159        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
160        self.global.disable.merge_override(
161            fragment.global.disable.value,
162            fragment.global.disable.source,
163            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
164            fragment.global.disable.overrides.first().and_then(|o| o.line),
165        );
166
167        // Extend-enable uses union semantics (additive across config levels)
168        self.global.extend_enable.merge_union(
169            fragment.global.extend_enable.value,
170            fragment.global.extend_enable.source,
171            fragment
172                .global
173                .extend_enable
174                .overrides
175                .first()
176                .and_then(|o| o.file.clone()),
177            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
178        );
179
180        // Extend-disable uses union semantics (additive across config levels)
181        self.global.extend_disable.merge_union(
182            fragment.global.extend_disable.value,
183            fragment.global.extend_disable.source,
184            fragment
185                .global
186                .extend_disable
187                .overrides
188                .first()
189                .and_then(|o| o.file.clone()),
190            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
191        );
192
193        // Conflict resolution: Enable overrides disable
194        // Remove any rules from disable that appear in enable
195        self.global
196            .disable
197            .value
198            .retain(|rule| !self.global.enable.value.contains(rule));
199        self.global.include.merge_override(
200            fragment.global.include.value,
201            fragment.global.include.source,
202            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
203            fragment.global.include.overrides.first().and_then(|o| o.line),
204        );
205        self.global.exclude.merge_override(
206            fragment.global.exclude.value,
207            fragment.global.exclude.source,
208            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
209            fragment.global.exclude.overrides.first().and_then(|o| o.line),
210        );
211        self.global.respect_gitignore.merge_override(
212            fragment.global.respect_gitignore.value,
213            fragment.global.respect_gitignore.source,
214            fragment
215                .global
216                .respect_gitignore
217                .overrides
218                .first()
219                .and_then(|o| o.file.clone()),
220            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
221        );
222        self.global.line_length.merge_override(
223            fragment.global.line_length.value,
224            fragment.global.line_length.source,
225            fragment
226                .global
227                .line_length
228                .overrides
229                .first()
230                .and_then(|o| o.file.clone()),
231            fragment.global.line_length.overrides.first().and_then(|o| o.line),
232        );
233        self.global.fixable.merge_override(
234            fragment.global.fixable.value,
235            fragment.global.fixable.source,
236            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
237            fragment.global.fixable.overrides.first().and_then(|o| o.line),
238        );
239        self.global.unfixable.merge_override(
240            fragment.global.unfixable.value,
241            fragment.global.unfixable.source,
242            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
243            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
244        );
245
246        // Merge flavor
247        self.global.flavor.merge_override(
248            fragment.global.flavor.value,
249            fragment.global.flavor.source,
250            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
251            fragment.global.flavor.overrides.first().and_then(|o| o.line),
252        );
253
254        // Merge force_exclude
255        self.global.force_exclude.merge_override(
256            fragment.global.force_exclude.value,
257            fragment.global.force_exclude.source,
258            fragment
259                .global
260                .force_exclude
261                .overrides
262                .first()
263                .and_then(|o| o.file.clone()),
264            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
265        );
266
267        // Merge output_format if present
268        if let Some(output_format_fragment) = fragment.global.output_format {
269            if let Some(ref mut output_format) = self.global.output_format {
270                output_format.merge_override(
271                    output_format_fragment.value,
272                    output_format_fragment.source,
273                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
274                    output_format_fragment.overrides.first().and_then(|o| o.line),
275                );
276            } else {
277                self.global.output_format = Some(output_format_fragment);
278            }
279        }
280
281        // Merge cache_dir if present
282        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
283            if let Some(ref mut cache_dir) = self.global.cache_dir {
284                cache_dir.merge_override(
285                    cache_dir_fragment.value,
286                    cache_dir_fragment.source,
287                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
288                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
289                );
290            } else {
291                self.global.cache_dir = Some(cache_dir_fragment);
292            }
293        }
294
295        // Merge cache if not default (only override when explicitly set)
296        if fragment.global.cache.source != ConfigSource::Default {
297            self.global.cache.merge_override(
298                fragment.global.cache.value,
299                fragment.global.cache.source,
300                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
301                fragment.global.cache.overrides.first().and_then(|o| o.line),
302            );
303        }
304
305        // Merge per_file_ignores
306        self.per_file_ignores.merge_override(
307            fragment.per_file_ignores.value,
308            fragment.per_file_ignores.source,
309            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
310            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
311        );
312
313        // Merge per_file_flavor
314        self.per_file_flavor.merge_override(
315            fragment.per_file_flavor.value,
316            fragment.per_file_flavor.source,
317            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
318            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
319        );
320
321        // Merge code_block_tools
322        self.code_block_tools.merge_override(
323            fragment.code_block_tools.value,
324            fragment.code_block_tools.source,
325            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
326            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
327        );
328
329        // Merge rule configs
330        for (rule_name, rule_fragment) in fragment.rules {
331            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
332            let rule_entry = self.rules.entry(norm_rule_name).or_default();
333
334            // Merge severity if present in fragment
335            if let Some(severity_fragment) = rule_fragment.severity {
336                if let Some(ref mut existing_severity) = rule_entry.severity {
337                    existing_severity.merge_override(
338                        severity_fragment.value,
339                        severity_fragment.source,
340                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
341                        severity_fragment.overrides.first().and_then(|o| o.line),
342                    );
343                } else {
344                    rule_entry.severity = Some(severity_fragment);
345                }
346            }
347
348            // Merge values
349            for (key, sourced_value_fragment) in rule_fragment.values {
350                let sv_entry = rule_entry
351                    .values
352                    .entry(key.clone())
353                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
354                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
355                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
356                sv_entry.merge_override(
357                    sourced_value_fragment.value,  // Use the value from the fragment
358                    sourced_value_fragment.source, // Use the source from the fragment
359                    file_from_fragment,            // Pass the file path from the fragment override
360                    line_from_fragment,            // Pass the line number from the fragment override
361                );
362            }
363        }
364
365        // Merge unknown_keys from fragment
366        for (section, key, file_path) in fragment.unknown_keys {
367            // Deduplicate: only add if not already present
368            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
369                self.unknown_keys.push((section, key, file_path));
370            }
371        }
372    }
373
374    /// Load and merge configurations from files and CLI overrides.
375    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
376        Self::load_with_discovery(config_path, cli_overrides, false)
377    }
378
379    /// Finds project root by walking up from start_dir looking for .git directory.
380    /// Falls back to start_dir if no .git found.
381    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
382        // Convert relative paths to absolute to ensure correct traversal
383        let mut current = if start_dir.is_relative() {
384            std::env::current_dir().map_or_else(|_| start_dir.to_path_buf(), |cwd| cwd.join(start_dir))
385        } else {
386            start_dir.to_path_buf()
387        };
388        const MAX_DEPTH: usize = 100;
389
390        for _ in 0..MAX_DEPTH {
391            if current.join(".git").exists() {
392                log::debug!("[rumdl-config] Found .git at: {}", current.display());
393                return current;
394            }
395
396            match current.parent() {
397                Some(parent) => current = parent.to_path_buf(),
398                None => break,
399            }
400        }
401
402        // No .git found, use start_dir as project root
403        log::debug!(
404            "[rumdl-config] No .git found, using config location as project root: {}",
405            start_dir.display()
406        );
407        start_dir.to_path_buf()
408    }
409
410    /// Discover configuration file by traversing up the directory tree.
411    /// Returns the first configuration file found.
412    /// Discovers config file and returns both the config path and project root.
413    /// Returns: (config_file_path, project_root_path)
414    /// Project root is the directory containing .git, or config parent as fallback.
415    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
416        use std::env;
417
418        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
419        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
420
421        let start_dir = match env::current_dir() {
422            Ok(dir) => dir,
423            Err(e) => {
424                log::debug!("[rumdl-config] Failed to get current directory: {e}");
425                return None;
426            }
427        };
428
429        let mut current_dir = start_dir.clone();
430        let mut depth = 0;
431        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
432
433        loop {
434            if depth >= MAX_DEPTH {
435                log::debug!("[rumdl-config] Maximum traversal depth reached");
436                break;
437            }
438
439            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
440
441            // Check for config files in order of precedence (only if not already found)
442            if found_config.is_none() {
443                for config_name in CONFIG_FILES {
444                    let config_path = current_dir.join(config_name);
445
446                    if config_path.exists() {
447                        // For pyproject.toml, verify it contains [tool.rumdl] section
448                        if *config_name == "pyproject.toml" {
449                            if let Ok(content) = std::fs::read_to_string(&config_path) {
450                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
451                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
452                                    // Store config, but continue looking for .git
453                                    found_config = Some((config_path.clone(), current_dir.clone()));
454                                    break;
455                                }
456                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
457                                continue;
458                            }
459                        } else {
460                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
461                            // Store config, but continue looking for .git
462                            found_config = Some((config_path.clone(), current_dir.clone()));
463                            break;
464                        }
465                    }
466                }
467            }
468
469            // Check for .git directory (stop boundary)
470            if current_dir.join(".git").exists() {
471                log::debug!("[rumdl-config] Stopping at .git directory");
472                break;
473            }
474
475            // Move to parent directory
476            match current_dir.parent() {
477                Some(parent) => {
478                    current_dir = parent.to_owned();
479                    depth += 1;
480                }
481                None => {
482                    log::debug!("[rumdl-config] Reached filesystem root");
483                    break;
484                }
485            }
486        }
487
488        // If config found, determine project root by walking up from config location
489        if let Some((config_path, config_dir)) = found_config {
490            let project_root = Self::find_project_root_from(&config_dir);
491            return Some((config_path, project_root));
492        }
493
494        None
495    }
496
497    /// Discover markdownlint configuration file by traversing up the directory tree.
498    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
499    /// Returns the path to the config file if found.
500    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
501        use std::env;
502
503        const MAX_DEPTH: usize = 100;
504
505        let start_dir = match env::current_dir() {
506            Ok(dir) => dir,
507            Err(e) => {
508                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
509                return None;
510            }
511        };
512
513        let mut current_dir = start_dir.clone();
514        let mut depth = 0;
515
516        loop {
517            if depth >= MAX_DEPTH {
518                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
519                break;
520            }
521
522            log::debug!(
523                "[rumdl-config] Searching for markdownlint config in: {}",
524                current_dir.display()
525            );
526
527            // Check for markdownlint config files in order of precedence
528            for config_name in MARKDOWNLINT_CONFIG_FILES {
529                let config_path = current_dir.join(config_name);
530                if config_path.exists() {
531                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
532                    return Some(config_path);
533                }
534            }
535
536            // Check for .git directory (stop boundary)
537            if current_dir.join(".git").exists() {
538                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
539                break;
540            }
541
542            // Move to parent directory
543            match current_dir.parent() {
544                Some(parent) => {
545                    current_dir = parent.to_owned();
546                    depth += 1;
547                }
548                None => {
549                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
550                    break;
551                }
552            }
553        }
554
555        None
556    }
557
558    /// Internal implementation that accepts config directory for testing
559    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
560        let config_dir = config_dir.join("rumdl");
561
562        // Check for config files in precedence order (same as project discovery)
563        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
564
565        log::debug!(
566            "[rumdl-config] Checking for user configuration in: {}",
567            config_dir.display()
568        );
569
570        for filename in USER_CONFIG_FILES {
571            let config_path = config_dir.join(filename);
572
573            if config_path.exists() {
574                // For pyproject.toml, verify it contains [tool.rumdl] section
575                if *filename == "pyproject.toml" {
576                    if let Ok(content) = std::fs::read_to_string(&config_path) {
577                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
578                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
579                            return Some(config_path);
580                        }
581                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
582                        continue;
583                    }
584                } else {
585                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
586                    return Some(config_path);
587                }
588            }
589        }
590
591        log::debug!(
592            "[rumdl-config] No user configuration found in: {}",
593            config_dir.display()
594        );
595        None
596    }
597
598    /// Discover user-level configuration file from platform-specific config directory.
599    /// Returns the first configuration file found in the user config directory.
600    #[cfg(feature = "native")]
601    fn user_configuration_path() -> Option<std::path::PathBuf> {
602        use etcetera::{BaseStrategy, choose_base_strategy};
603
604        match choose_base_strategy() {
605            Ok(strategy) => {
606                let config_dir = strategy.config_dir();
607                Self::user_configuration_path_impl(&config_dir)
608            }
609            Err(e) => {
610                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
611                None
612            }
613        }
614    }
615
616    /// Stub for WASM builds - user config not supported
617    #[cfg(not(feature = "native"))]
618    fn user_configuration_path() -> Option<std::path::PathBuf> {
619        None
620    }
621
622    /// Load an explicit config file (standalone, no user config merging)
623    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
624        let path_obj = Path::new(path);
625        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
626        let path_str = path.to_string();
627
628        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
629
630        // Find project root by walking up from config location looking for .git
631        if let Some(config_parent) = path_obj.parent() {
632            let project_root = Self::find_project_root_from(config_parent);
633            log::debug!(
634                "[rumdl-config] Project root (from explicit config): {}",
635                project_root.display()
636            );
637            sourced_config.project_root = Some(project_root);
638        }
639
640        // Known markdownlint config files
641        const MARKDOWNLINT_FILENAMES: &[&str] = &[
642            ".markdownlint-cli2.jsonc",
643            ".markdownlint-cli2.yaml",
644            ".markdownlint-cli2.yml",
645            ".markdownlint.json",
646            ".markdownlint.yaml",
647            ".markdownlint.yml",
648        ];
649
650        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
651            // Use extends-aware loading for rumdl TOML configs
652            let mut visited = IndexSet::new();
653            let chain_source = source_from_filename(filename);
654            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
655        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
656            || path_str.ends_with(".json")
657            || path_str.ends_with(".jsonc")
658            || path_str.ends_with(".yaml")
659            || path_str.ends_with(".yml")
660        {
661            // Parse as markdownlint config (JSON/YAML) - no extends support
662            let fragment = parsers::load_from_markdownlint(&path_str)?;
663            sourced_config.merge(fragment);
664            sourced_config.loaded_files.push(path_str);
665        } else {
666            // Try TOML with extends support
667            let mut visited = IndexSet::new();
668            let chain_source = source_from_filename(filename);
669            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
670        }
671
672        Ok(())
673    }
674
675    /// Load and merge user-level configuration into this `SourcedConfig`.
676    ///
677    /// Discovers the user config file from the platform config directory
678    /// (or `user_config_dir` if provided for testing). Resolves any `extends`
679    /// chain and merges each fragment with `ConfigSource::UserConfig` precedence.
680    ///
681    /// Called in two contexts:
682    /// - When no project config is found: provides user defaults as the sole base
683    /// - When a markdownlint project config is found: provides rumdl-specific
684    ///   defaults that the markdownlint format cannot express; the markdownlint
685    ///   fragment is merged on top and wins on any overlapping key
686    fn load_user_config(sourced_config: &mut Self, user_config_dir: Option<&Path>) -> Result<(), ConfigError> {
687        let user_config_path = if let Some(dir) = user_config_dir {
688            Self::user_configuration_path_impl(dir)
689        } else {
690            Self::user_configuration_path()
691        };
692
693        if let Some(user_config_path) = user_config_path {
694            let path_str = user_config_path.display().to_string();
695
696            log::debug!("[rumdl-config] Loading user config: {path_str}");
697
698            // User config fallback also supports extends chains.
699            // Use a uniform source across the chain so child overrides are determined by chain order.
700            let mut visited = IndexSet::new();
701            load_config_with_extends(
702                sourced_config,
703                &user_config_path,
704                &mut visited,
705                ConfigSource::UserConfig,
706            )?;
707        } else {
708            log::debug!("[rumdl-config] No user configuration file found");
709        }
710
711        Ok(())
712    }
713
714    /// Internal implementation that accepts user config directory for testing
715    #[doc(hidden)]
716    pub fn load_with_discovery_impl(
717        config_path: Option<&str>,
718        cli_overrides: Option<&SourcedGlobalConfig>,
719        skip_auto_discovery: bool,
720        user_config_dir: Option<&Path>,
721    ) -> Result<Self, ConfigError> {
722        use std::env;
723        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
724
725        let mut sourced_config = SourcedConfig::default();
726
727        // Ruff model: Project config is standalone, user config is fallback only
728        //
729        // Priority order:
730        // 1. If explicit config path provided → use ONLY that (standalone)
731        // 2. Else if project config discovered → use ONLY that (standalone)
732        // 3. Else if user config exists → use it as fallback
733        // 4. CLI overrides always apply last
734        //
735        // This ensures project configs are reproducible across machines and
736        // CI/local runs behave identically.
737
738        // Explicit config path always takes precedence
739        if let Some(path) = config_path {
740            // Explicit config path provided - use ONLY this config (standalone)
741            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
742            Self::load_explicit_config(&mut sourced_config, path)?;
743        } else if skip_auto_discovery {
744            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
745            // No config loading, just apply CLI overrides at the end
746        } else {
747            // No explicit path - try auto-discovery
748            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
749
750            // Try to discover project config first
751            if let Some((config_file, project_root)) = Self::discover_config_upward() {
752                // Project config found - use ONLY this (standalone, no user config).
753                // Rumdl project configs can express all settings directly, so user config
754                // is not needed and omitting it ensures CI and local runs are identical.
755                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
756                log::debug!("[rumdl-config] Project root: {}", project_root.display());
757
758                sourced_config.project_root = Some(project_root);
759
760                // Use extends-aware loading for discovered configs
761                let mut visited = IndexSet::new();
762                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
763                let chain_source = source_from_filename(root_filename);
764                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
765            } else {
766                // No rumdl project config - try markdownlint config
767                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
768
769                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
770                    let path_str = markdownlint_path.display().to_string();
771                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
772                    // Load user config first as a base so rumdl-specific settings (e.g. flavor,
773                    // cache) take effect. Markdownlint configs cannot express these settings.
774                    // The markdownlint fragment uses ConfigSource::ProjectConfig (precedence 3)
775                    // vs UserConfig (precedence 1), so project settings always win on overlap.
776                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
777                    match parsers::load_from_markdownlint(&path_str) {
778                        Ok(fragment) => {
779                            sourced_config.merge(fragment);
780                            sourced_config.loaded_files.push(path_str);
781                        }
782                        Err(_e) => {
783                            log::debug!("[rumdl-config] Failed to load markdownlint config");
784                        }
785                    }
786                } else {
787                    // No project config at all - use user config as fallback
788                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
789                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
790                }
791            }
792        }
793
794        // Apply CLI overrides (highest precedence)
795        if let Some(cli) = cli_overrides {
796            sourced_config
797                .global
798                .enable
799                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
800            sourced_config
801                .global
802                .disable
803                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
804            sourced_config
805                .global
806                .exclude
807                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
808            sourced_config
809                .global
810                .include
811                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
812            sourced_config.global.respect_gitignore.merge_override(
813                cli.respect_gitignore.value,
814                ConfigSource::Cli,
815                None,
816                None,
817            );
818            sourced_config
819                .global
820                .fixable
821                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
822            sourced_config
823                .global
824                .unfixable
825                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
826            // No rule-specific CLI overrides implemented yet
827        }
828
829        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
830
831        Ok(sourced_config)
832    }
833
834    /// Load and merge configurations from files and CLI overrides.
835    /// If skip_auto_discovery is true, only explicit config paths are loaded.
836    pub fn load_with_discovery(
837        config_path: Option<&str>,
838        cli_overrides: Option<&SourcedGlobalConfig>,
839        skip_auto_discovery: bool,
840    ) -> Result<Self, ConfigError> {
841        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
842    }
843
844    /// Validate the configuration against a rule registry.
845    ///
846    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
847    /// enabling conversion to `Config`. Validation warnings are stored in the config
848    /// and can be displayed to the user.
849    ///
850    /// # Example
851    ///
852    /// ```ignore
853    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
854    /// let validated = loaded.validate(&registry)?;
855    /// let config: Config = validated.into();
856    /// ```
857    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
858        let warnings = validate_config_sourced_internal(&self, registry);
859
860        Ok(SourcedConfig {
861            global: self.global,
862            per_file_ignores: self.per_file_ignores,
863            per_file_flavor: self.per_file_flavor,
864            code_block_tools: self.code_block_tools,
865            rules: self.rules,
866            loaded_files: self.loaded_files,
867            unknown_keys: self.unknown_keys,
868            project_root: self.project_root,
869            validation_warnings: warnings,
870            _state: PhantomData,
871        })
872    }
873
874    /// Validate and convert to Config in one step (convenience method).
875    ///
876    /// This combines `validate()` and `into()` for callers who want the
877    /// validation warnings separately.
878    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
879        let validated = self.validate(registry)?;
880        let warnings = validated.validation_warnings.clone();
881        Ok((validated.into(), warnings))
882    }
883
884    /// Skip validation and convert directly to ConfigValidated state.
885    ///
886    /// # Safety
887    ///
888    /// This method bypasses validation. Use only when:
889    /// - You've already validated via `validate_config_sourced()`
890    /// - You're in test code that doesn't need validation
891    /// - You're migrating legacy code and will add proper validation later
892    ///
893    /// Prefer `validate()` for new code.
894    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
895        SourcedConfig {
896            global: self.global,
897            per_file_ignores: self.per_file_ignores,
898            per_file_flavor: self.per_file_flavor,
899            code_block_tools: self.code_block_tools,
900            rules: self.rules,
901            loaded_files: self.loaded_files,
902            unknown_keys: self.unknown_keys,
903            project_root: self.project_root,
904            validation_warnings: Vec::new(),
905            _state: PhantomData,
906        }
907    }
908
909    /// Discover the nearest config file for a specific directory,
910    /// walking upward to `project_root` (inclusive).
911    ///
912    /// Searches for rumdl config files (`.rumdl.toml`, `rumdl.toml`,
913    /// `.config/rumdl.toml`, `pyproject.toml` with `[tool.rumdl]`) and
914    /// markdownlint config files at each directory level.
915    ///
916    /// Returns the config file path if found. Does NOT use CWD.
917    pub fn discover_config_for_dir(dir: &Path, project_root: &Path) -> Option<PathBuf> {
918        const RUMDL_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
919
920        let mut current_dir = dir.to_path_buf();
921
922        loop {
923            // Check rumdl config files first (higher precedence)
924            for config_name in RUMDL_CONFIG_FILES {
925                let config_path = current_dir.join(config_name);
926                if config_path.exists() {
927                    if *config_name == "pyproject.toml" {
928                        if let Ok(content) = std::fs::read_to_string(&config_path)
929                            && (content.contains("[tool.rumdl]") || content.contains("tool.rumdl"))
930                        {
931                            return Some(config_path);
932                        }
933                        continue;
934                    }
935                    return Some(config_path);
936                }
937            }
938
939            // Check markdownlint config files (lower precedence)
940            for config_name in MARKDOWNLINT_CONFIG_FILES {
941                let config_path = current_dir.join(config_name);
942                if config_path.exists() {
943                    return Some(config_path);
944                }
945            }
946
947            // Stop at project root (inclusive - we already checked it)
948            if current_dir == project_root {
949                break;
950            }
951
952            // Move to parent directory
953            match current_dir.parent() {
954                Some(parent) => current_dir = parent.to_path_buf(),
955                None => break,
956            }
957        }
958
959        None
960    }
961
962    /// Load a config from a specific file path, with extends resolution.
963    ///
964    /// Creates a fresh `SourcedConfig`, loads the config file using the
965    /// appropriate parser, and converts to `Config`. Used for per-directory
966    /// config loading where each subdirectory config is standalone.
967    pub fn load_config_for_path(config_path: &Path, project_root: &Path) -> Result<Config, ConfigError> {
968        let mut sourced_config = SourcedConfig {
969            project_root: Some(project_root.to_path_buf()),
970            ..SourcedConfig::default()
971        };
972
973        let filename = config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
974        let path_str = config_path.display().to_string();
975
976        // Determine if this is a markdownlint config or rumdl config
977        let is_markdownlint = MARKDOWNLINT_CONFIG_FILES.contains(&filename)
978            || (filename != "pyproject.toml"
979                && filename != ".rumdl.toml"
980                && filename != "rumdl.toml"
981                && (path_str.ends_with(".json")
982                    || path_str.ends_with(".jsonc")
983                    || path_str.ends_with(".yaml")
984                    || path_str.ends_with(".yml")));
985
986        if is_markdownlint {
987            let fragment = parsers::load_from_markdownlint(&path_str)?;
988            sourced_config.merge(fragment);
989            sourced_config.loaded_files.push(path_str);
990        } else {
991            let mut visited = IndexSet::new();
992            let chain_source = source_from_filename(filename);
993            load_config_with_extends(&mut sourced_config, config_path, &mut visited, chain_source)?;
994        }
995
996        Ok(sourced_config.into_validated_unchecked().into())
997    }
998}
999
1000/// Convert a validated configuration to the final Config type.
1001///
1002/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
1003/// ensuring that validation must occur before conversion.
1004impl From<SourcedConfig<ConfigValidated>> for Config {
1005    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
1006        let mut rules = BTreeMap::new();
1007        for (rule_name, sourced_rule_cfg) in sourced.rules {
1008            // Normalize rule name to uppercase for case-insensitive lookup
1009            let normalized_rule_name = rule_name.to_ascii_uppercase();
1010            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
1011            let mut values = BTreeMap::new();
1012            for (key, sourced_val) in sourced_rule_cfg.values {
1013                values.insert(key, sourced_val.value);
1014            }
1015            rules.insert(normalized_rule_name, RuleConfig { severity, values });
1016        }
1017        // Enable is "explicit" if it was set by something other than the Default source
1018        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
1019
1020        #[allow(deprecated)]
1021        let global = GlobalConfig {
1022            enable: sourced.global.enable.value,
1023            disable: sourced.global.disable.value,
1024            exclude: sourced.global.exclude.value,
1025            include: sourced.global.include.value,
1026            respect_gitignore: sourced.global.respect_gitignore.value,
1027            line_length: sourced.global.line_length.value,
1028            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1029            fixable: sourced.global.fixable.value,
1030            unfixable: sourced.global.unfixable.value,
1031            flavor: sourced.global.flavor.value,
1032            force_exclude: sourced.global.force_exclude.value,
1033            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
1034            cache: sourced.global.cache.value,
1035            extend_enable: sourced.global.extend_enable.value,
1036            extend_disable: sourced.global.extend_disable.value,
1037            enable_is_explicit,
1038        };
1039
1040        let mut config = Config {
1041            extends: None,
1042            global,
1043            per_file_ignores: sourced.per_file_ignores.value,
1044            per_file_flavor: sourced.per_file_flavor.value,
1045            code_block_tools: sourced.code_block_tools.value,
1046            rules,
1047            project_root: sourced.project_root,
1048            per_file_ignores_cache: Arc::new(OnceLock::new()),
1049            per_file_flavor_cache: Arc::new(OnceLock::new()),
1050        };
1051
1052        // Apply per-rule `enabled = true/false` to global enable/disable lists
1053        config.apply_per_rule_enabled();
1054
1055        config
1056    }
1057}