Skip to main content

rumdl_lib/config/
loading.rs

1use indexmap::IndexSet;
2use std::collections::BTreeMap;
3use std::marker::PhantomData;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::flavor::ConfigLoaded;
8use super::flavor::ConfigValidated;
9use super::parsers;
10use super::registry::RuleRegistry;
11use super::source_tracking::{
12    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
13};
14use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RuleConfig};
15use super::validation::validate_config_sourced_internal;
16
17/// Maximum depth for extends chains to prevent runaway recursion
18const MAX_EXTENDS_DEPTH: usize = 10;
19
20/// Resolve an `extends` path relative to the config file that contains it.
21///
22/// - `~/` prefix: expanded to home directory
23/// - Relative paths: resolved against the config file's parent directory
24/// - Absolute paths: used as-is
25fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> Result<PathBuf, ConfigError> {
26    let path = if let Some(suffix) = extends_value.strip_prefix("~/") {
27        // Expand tilde to home directory
28        #[cfg(feature = "native")]
29        {
30            use etcetera::{BaseStrategy, choose_base_strategy};
31            let home = choose_base_strategy()
32                .map(|s| s.home_dir().to_path_buf())
33                .unwrap_or_else(|_| PathBuf::from("~"));
34            home.join(suffix)
35        }
36        #[cfg(not(feature = "native"))]
37        {
38            let _ = suffix;
39            PathBuf::from(extends_value)
40        }
41    } else {
42        let path = PathBuf::from(extends_value);
43        if path.is_absolute() {
44            path
45        } else {
46            // Resolve relative to config file's directory
47            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
48            config_dir.join(extends_value)
49        }
50    };
51
52    Ok(path)
53}
54
55/// Determine ConfigSource from a config filename.
56fn source_from_filename(filename: &str) -> ConfigSource {
57    if filename == "pyproject.toml" {
58        ConfigSource::PyprojectToml
59    } else {
60        ConfigSource::ProjectConfig
61    }
62}
63
64/// Load a config file (and any base configs it extends) into a SourcedConfig.
65///
66/// This function handles the recursive `extends` chain:
67/// 1. Parse the config file into a fragment
68/// 2. If the fragment has `extends`, recursively load the base config first
69/// 3. Merge the base config, then merge this fragment on top
70fn load_config_with_extends(
71    sourced_config: &mut SourcedConfig<ConfigLoaded>,
72    config_file_path: &Path,
73    visited: &mut IndexSet<PathBuf>,
74    chain_source: ConfigSource,
75) -> Result<(), ConfigError> {
76    // Canonicalize the path for circular reference detection
77    let canonical = config_file_path
78        .canonicalize()
79        .unwrap_or_else(|_| config_file_path.to_path_buf());
80
81    // Check for circular references
82    if visited.contains(&canonical) {
83        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
84        return Err(ConfigError::CircularExtends {
85            path: config_file_path.display().to_string(),
86            chain,
87        });
88    }
89
90    // Check depth limit
91    if visited.len() >= MAX_EXTENDS_DEPTH {
92        return Err(ConfigError::ExtendsDepthExceeded {
93            path: config_file_path.display().to_string(),
94            max_depth: MAX_EXTENDS_DEPTH,
95        });
96    }
97
98    // Mark as visited
99    visited.insert(canonical);
100
101    let path_str = config_file_path.display().to_string();
102    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
103
104    // Read and parse the config file
105    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
106        source: e,
107        path: path_str.clone(),
108    })?;
109
110    let fragment = if filename == "pyproject.toml" {
111        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
112            Some(f) => f,
113            None => return Ok(()), // No [tool.rumdl] section
114        }
115    } else {
116        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
117    };
118
119    // If this fragment has `extends`, load the base config first
120    if let Some(ref extends_value) = fragment.extends {
121        let base_path = resolve_extends_path(extends_value, config_file_path)?;
122
123        if !base_path.exists() {
124            return Err(ConfigError::ExtendsNotFound {
125                path: base_path.display().to_string(),
126                from: path_str.clone(),
127            });
128        }
129
130        log::debug!(
131            "[rumdl-config] Config {} extends {}, loading base first",
132            path_str,
133            base_path.display()
134        );
135
136        // Recursively load the base config
137        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
138    }
139
140    // Merge this fragment on top (base config was already merged if present)
141    // Strip the `extends` field since it's been consumed
142    let mut fragment_for_merge = fragment;
143    fragment_for_merge.extends = None;
144    sourced_config.merge(fragment_for_merge);
145    sourced_config.loaded_files.push(path_str);
146
147    Ok(())
148}
149
150impl SourcedConfig<ConfigLoaded> {
151    /// Merges another SourcedConfigFragment into this SourcedConfig.
152    /// Uses source precedence to determine which values take effect.
153    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
154        // Merge global config
155        // Enable uses replace semantics (project can enforce rules)
156        self.global.enable.merge_override(
157            fragment.global.enable.value,
158            fragment.global.enable.source,
159            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
160            fragment.global.enable.overrides.first().and_then(|o| o.line),
161        );
162
163        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
164        self.global.disable.merge_override(
165            fragment.global.disable.value,
166            fragment.global.disable.source,
167            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
168            fragment.global.disable.overrides.first().and_then(|o| o.line),
169        );
170
171        // Extend-enable uses union semantics (additive across config levels)
172        self.global.extend_enable.merge_union(
173            fragment.global.extend_enable.value,
174            fragment.global.extend_enable.source,
175            fragment
176                .global
177                .extend_enable
178                .overrides
179                .first()
180                .and_then(|o| o.file.clone()),
181            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
182        );
183
184        // Extend-disable uses union semantics (additive across config levels)
185        self.global.extend_disable.merge_union(
186            fragment.global.extend_disable.value,
187            fragment.global.extend_disable.source,
188            fragment
189                .global
190                .extend_disable
191                .overrides
192                .first()
193                .and_then(|o| o.file.clone()),
194            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
195        );
196
197        // Conflict resolution: Enable overrides disable
198        // Remove any rules from disable that appear in enable
199        self.global
200            .disable
201            .value
202            .retain(|rule| !self.global.enable.value.contains(rule));
203        self.global.include.merge_override(
204            fragment.global.include.value,
205            fragment.global.include.source,
206            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
207            fragment.global.include.overrides.first().and_then(|o| o.line),
208        );
209        self.global.exclude.merge_override(
210            fragment.global.exclude.value,
211            fragment.global.exclude.source,
212            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
213            fragment.global.exclude.overrides.first().and_then(|o| o.line),
214        );
215        self.global.respect_gitignore.merge_override(
216            fragment.global.respect_gitignore.value,
217            fragment.global.respect_gitignore.source,
218            fragment
219                .global
220                .respect_gitignore
221                .overrides
222                .first()
223                .and_then(|o| o.file.clone()),
224            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
225        );
226        self.global.line_length.merge_override(
227            fragment.global.line_length.value,
228            fragment.global.line_length.source,
229            fragment
230                .global
231                .line_length
232                .overrides
233                .first()
234                .and_then(|o| o.file.clone()),
235            fragment.global.line_length.overrides.first().and_then(|o| o.line),
236        );
237        self.global.fixable.merge_override(
238            fragment.global.fixable.value,
239            fragment.global.fixable.source,
240            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
241            fragment.global.fixable.overrides.first().and_then(|o| o.line),
242        );
243        self.global.unfixable.merge_override(
244            fragment.global.unfixable.value,
245            fragment.global.unfixable.source,
246            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
247            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
248        );
249
250        // Merge flavor
251        self.global.flavor.merge_override(
252            fragment.global.flavor.value,
253            fragment.global.flavor.source,
254            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
255            fragment.global.flavor.overrides.first().and_then(|o| o.line),
256        );
257
258        // Merge force_exclude
259        self.global.force_exclude.merge_override(
260            fragment.global.force_exclude.value,
261            fragment.global.force_exclude.source,
262            fragment
263                .global
264                .force_exclude
265                .overrides
266                .first()
267                .and_then(|o| o.file.clone()),
268            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
269        );
270
271        // Merge output_format if present
272        if let Some(output_format_fragment) = fragment.global.output_format {
273            if let Some(ref mut output_format) = self.global.output_format {
274                output_format.merge_override(
275                    output_format_fragment.value,
276                    output_format_fragment.source,
277                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
278                    output_format_fragment.overrides.first().and_then(|o| o.line),
279                );
280            } else {
281                self.global.output_format = Some(output_format_fragment);
282            }
283        }
284
285        // Merge cache_dir if present
286        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
287            if let Some(ref mut cache_dir) = self.global.cache_dir {
288                cache_dir.merge_override(
289                    cache_dir_fragment.value,
290                    cache_dir_fragment.source,
291                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
292                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
293                );
294            } else {
295                self.global.cache_dir = Some(cache_dir_fragment);
296            }
297        }
298
299        // Merge cache if not default (only override when explicitly set)
300        if fragment.global.cache.source != ConfigSource::Default {
301            self.global.cache.merge_override(
302                fragment.global.cache.value,
303                fragment.global.cache.source,
304                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
305                fragment.global.cache.overrides.first().and_then(|o| o.line),
306            );
307        }
308
309        // Merge per_file_ignores
310        self.per_file_ignores.merge_override(
311            fragment.per_file_ignores.value,
312            fragment.per_file_ignores.source,
313            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
314            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
315        );
316
317        // Merge per_file_flavor
318        self.per_file_flavor.merge_override(
319            fragment.per_file_flavor.value,
320            fragment.per_file_flavor.source,
321            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
322            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
323        );
324
325        // Merge code_block_tools
326        self.code_block_tools.merge_override(
327            fragment.code_block_tools.value,
328            fragment.code_block_tools.source,
329            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
330            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
331        );
332
333        // Merge rule configs
334        for (rule_name, rule_fragment) in fragment.rules {
335            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
336            let rule_entry = self.rules.entry(norm_rule_name).or_default();
337
338            // Merge severity if present in fragment
339            if let Some(severity_fragment) = rule_fragment.severity {
340                if let Some(ref mut existing_severity) = rule_entry.severity {
341                    existing_severity.merge_override(
342                        severity_fragment.value,
343                        severity_fragment.source,
344                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
345                        severity_fragment.overrides.first().and_then(|o| o.line),
346                    );
347                } else {
348                    rule_entry.severity = Some(severity_fragment);
349                }
350            }
351
352            // Merge values
353            for (key, sourced_value_fragment) in rule_fragment.values {
354                let sv_entry = rule_entry
355                    .values
356                    .entry(key.clone())
357                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
358                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
359                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
360                sv_entry.merge_override(
361                    sourced_value_fragment.value,  // Use the value from the fragment
362                    sourced_value_fragment.source, // Use the source from the fragment
363                    file_from_fragment,            // Pass the file path from the fragment override
364                    line_from_fragment,            // Pass the line number from the fragment override
365                );
366            }
367        }
368
369        // Merge unknown_keys from fragment
370        for (section, key, file_path) in fragment.unknown_keys {
371            // Deduplicate: only add if not already present
372            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
373                self.unknown_keys.push((section, key, file_path));
374            }
375        }
376    }
377
378    /// Load and merge configurations from files and CLI overrides.
379    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
380        Self::load_with_discovery(config_path, cli_overrides, false)
381    }
382
383    /// Finds project root by walking up from start_dir looking for .git directory.
384    /// Falls back to start_dir if no .git found.
385    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
386        // Convert relative paths to absolute to ensure correct traversal
387        let mut current = if start_dir.is_relative() {
388            std::env::current_dir()
389                .map(|cwd| cwd.join(start_dir))
390                .unwrap_or_else(|_| start_dir.to_path_buf())
391        } else {
392            start_dir.to_path_buf()
393        };
394        const MAX_DEPTH: usize = 100;
395
396        for _ in 0..MAX_DEPTH {
397            if current.join(".git").exists() {
398                log::debug!("[rumdl-config] Found .git at: {}", current.display());
399                return current;
400            }
401
402            match current.parent() {
403                Some(parent) => current = parent.to_path_buf(),
404                None => break,
405            }
406        }
407
408        // No .git found, use start_dir as project root
409        log::debug!(
410            "[rumdl-config] No .git found, using config location as project root: {}",
411            start_dir.display()
412        );
413        start_dir.to_path_buf()
414    }
415
416    /// Discover configuration file by traversing up the directory tree.
417    /// Returns the first configuration file found.
418    /// Discovers config file and returns both the config path and project root.
419    /// Returns: (config_file_path, project_root_path)
420    /// Project root is the directory containing .git, or config parent as fallback.
421    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
422        use std::env;
423
424        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
425        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
426
427        let start_dir = match env::current_dir() {
428            Ok(dir) => dir,
429            Err(e) => {
430                log::debug!("[rumdl-config] Failed to get current directory: {e}");
431                return None;
432            }
433        };
434
435        let mut current_dir = start_dir.clone();
436        let mut depth = 0;
437        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
438
439        loop {
440            if depth >= MAX_DEPTH {
441                log::debug!("[rumdl-config] Maximum traversal depth reached");
442                break;
443            }
444
445            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
446
447            // Check for config files in order of precedence (only if not already found)
448            if found_config.is_none() {
449                for config_name in CONFIG_FILES {
450                    let config_path = current_dir.join(config_name);
451
452                    if config_path.exists() {
453                        // For pyproject.toml, verify it contains [tool.rumdl] section
454                        if *config_name == "pyproject.toml" {
455                            if let Ok(content) = std::fs::read_to_string(&config_path) {
456                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
457                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
458                                    // Store config, but continue looking for .git
459                                    found_config = Some((config_path.clone(), current_dir.clone()));
460                                    break;
461                                }
462                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
463                                continue;
464                            }
465                        } else {
466                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
467                            // Store config, but continue looking for .git
468                            found_config = Some((config_path.clone(), current_dir.clone()));
469                            break;
470                        }
471                    }
472                }
473            }
474
475            // Check for .git directory (stop boundary)
476            if current_dir.join(".git").exists() {
477                log::debug!("[rumdl-config] Stopping at .git directory");
478                break;
479            }
480
481            // Move to parent directory
482            match current_dir.parent() {
483                Some(parent) => {
484                    current_dir = parent.to_owned();
485                    depth += 1;
486                }
487                None => {
488                    log::debug!("[rumdl-config] Reached filesystem root");
489                    break;
490                }
491            }
492        }
493
494        // If config found, determine project root by walking up from config location
495        if let Some((config_path, config_dir)) = found_config {
496            let project_root = Self::find_project_root_from(&config_dir);
497            return Some((config_path, project_root));
498        }
499
500        None
501    }
502
503    /// Discover markdownlint configuration file by traversing up the directory tree.
504    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
505    /// Returns the path to the config file if found.
506    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
507        use std::env;
508
509        const MAX_DEPTH: usize = 100;
510
511        let start_dir = match env::current_dir() {
512            Ok(dir) => dir,
513            Err(e) => {
514                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
515                return None;
516            }
517        };
518
519        let mut current_dir = start_dir.clone();
520        let mut depth = 0;
521
522        loop {
523            if depth >= MAX_DEPTH {
524                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
525                break;
526            }
527
528            log::debug!(
529                "[rumdl-config] Searching for markdownlint config in: {}",
530                current_dir.display()
531            );
532
533            // Check for markdownlint config files in order of precedence
534            for config_name in MARKDOWNLINT_CONFIG_FILES {
535                let config_path = current_dir.join(config_name);
536                if config_path.exists() {
537                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
538                    return Some(config_path);
539                }
540            }
541
542            // Check for .git directory (stop boundary)
543            if current_dir.join(".git").exists() {
544                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
545                break;
546            }
547
548            // Move to parent directory
549            match current_dir.parent() {
550                Some(parent) => {
551                    current_dir = parent.to_owned();
552                    depth += 1;
553                }
554                None => {
555                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
556                    break;
557                }
558            }
559        }
560
561        None
562    }
563
564    /// Internal implementation that accepts config directory for testing
565    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
566        let config_dir = config_dir.join("rumdl");
567
568        // Check for config files in precedence order (same as project discovery)
569        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
570
571        log::debug!(
572            "[rumdl-config] Checking for user configuration in: {}",
573            config_dir.display()
574        );
575
576        for filename in USER_CONFIG_FILES {
577            let config_path = config_dir.join(filename);
578
579            if config_path.exists() {
580                // For pyproject.toml, verify it contains [tool.rumdl] section
581                if *filename == "pyproject.toml" {
582                    if let Ok(content) = std::fs::read_to_string(&config_path) {
583                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
584                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
585                            return Some(config_path);
586                        }
587                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
588                        continue;
589                    }
590                } else {
591                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
592                    return Some(config_path);
593                }
594            }
595        }
596
597        log::debug!(
598            "[rumdl-config] No user configuration found in: {}",
599            config_dir.display()
600        );
601        None
602    }
603
604    /// Discover user-level configuration file from platform-specific config directory.
605    /// Returns the first configuration file found in the user config directory.
606    #[cfg(feature = "native")]
607    fn user_configuration_path() -> Option<std::path::PathBuf> {
608        use etcetera::{BaseStrategy, choose_base_strategy};
609
610        match choose_base_strategy() {
611            Ok(strategy) => {
612                let config_dir = strategy.config_dir();
613                Self::user_configuration_path_impl(&config_dir)
614            }
615            Err(e) => {
616                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
617                None
618            }
619        }
620    }
621
622    /// Stub for WASM builds - user config not supported
623    #[cfg(not(feature = "native"))]
624    fn user_configuration_path() -> Option<std::path::PathBuf> {
625        None
626    }
627
628    /// Load an explicit config file (standalone, no user config merging)
629    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
630        let path_obj = Path::new(path);
631        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
632        let path_str = path.to_string();
633
634        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
635
636        // Find project root by walking up from config location looking for .git
637        if let Some(config_parent) = path_obj.parent() {
638            let project_root = Self::find_project_root_from(config_parent);
639            log::debug!(
640                "[rumdl-config] Project root (from explicit config): {}",
641                project_root.display()
642            );
643            sourced_config.project_root = Some(project_root);
644        }
645
646        // Known markdownlint config files
647        const MARKDOWNLINT_FILENAMES: &[&str] = &[
648            ".markdownlint-cli2.jsonc",
649            ".markdownlint-cli2.yaml",
650            ".markdownlint-cli2.yml",
651            ".markdownlint.json",
652            ".markdownlint.yaml",
653            ".markdownlint.yml",
654        ];
655
656        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
657            // Use extends-aware loading for rumdl TOML configs
658            let mut visited = IndexSet::new();
659            let chain_source = source_from_filename(filename);
660            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
661        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
662            || path_str.ends_with(".json")
663            || path_str.ends_with(".jsonc")
664            || path_str.ends_with(".yaml")
665            || path_str.ends_with(".yml")
666        {
667            // Parse as markdownlint config (JSON/YAML) - no extends support
668            let fragment = parsers::load_from_markdownlint(&path_str)?;
669            sourced_config.merge(fragment);
670            sourced_config.loaded_files.push(path_str);
671        } else {
672            // Try TOML with extends support
673            let mut visited = IndexSet::new();
674            let chain_source = source_from_filename(filename);
675            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
676        }
677
678        Ok(())
679    }
680
681    /// Load user config as fallback when no project config exists
682    fn load_user_config_as_fallback(
683        sourced_config: &mut Self,
684        user_config_dir: Option<&Path>,
685    ) -> Result<(), ConfigError> {
686        let user_config_path = if let Some(dir) = user_config_dir {
687            Self::user_configuration_path_impl(dir)
688        } else {
689            Self::user_configuration_path()
690        };
691
692        if let Some(user_config_path) = user_config_path {
693            let path_str = user_config_path.display().to_string();
694
695            log::debug!("[rumdl-config] Loading user config as fallback: {path_str}");
696
697            // User config fallback also supports extends chains.
698            // Use a uniform source across the chain so child overrides are determined by chain order.
699            let mut visited = IndexSet::new();
700            load_config_with_extends(
701                sourced_config,
702                &user_config_path,
703                &mut visited,
704                ConfigSource::UserConfig,
705            )?;
706        } else {
707            log::debug!("[rumdl-config] No user configuration file found");
708        }
709
710        Ok(())
711    }
712
713    /// Internal implementation that accepts user config directory for testing
714    #[doc(hidden)]
715    pub fn load_with_discovery_impl(
716        config_path: Option<&str>,
717        cli_overrides: Option<&SourcedGlobalConfig>,
718        skip_auto_discovery: bool,
719        user_config_dir: Option<&Path>,
720    ) -> Result<Self, ConfigError> {
721        use std::env;
722        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
723
724        let mut sourced_config = SourcedConfig::default();
725
726        // Ruff model: Project config is standalone, user config is fallback only
727        //
728        // Priority order:
729        // 1. If explicit config path provided → use ONLY that (standalone)
730        // 2. Else if project config discovered → use ONLY that (standalone)
731        // 3. Else if user config exists → use it as fallback
732        // 4. CLI overrides always apply last
733        //
734        // This ensures project configs are reproducible across machines and
735        // CI/local runs behave identically.
736
737        // Explicit config path always takes precedence
738        if let Some(path) = config_path {
739            // Explicit config path provided - use ONLY this config (standalone)
740            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
741            Self::load_explicit_config(&mut sourced_config, path)?;
742        } else if skip_auto_discovery {
743            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
744            // No config loading, just apply CLI overrides at the end
745        } else {
746            // No explicit path - try auto-discovery
747            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
748
749            // Try to discover project config first
750            if let Some((config_file, project_root)) = Self::discover_config_upward() {
751                // Project config found - use ONLY this (standalone, no user config)
752                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
753                log::debug!("[rumdl-config] Project root: {}", project_root.display());
754
755                sourced_config.project_root = Some(project_root);
756
757                // Use extends-aware loading for discovered configs
758                let mut visited = IndexSet::new();
759                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
760                let chain_source = source_from_filename(root_filename);
761                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
762            } else {
763                // No rumdl project config - try markdownlint config
764                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
765
766                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
767                    let path_str = markdownlint_path.display().to_string();
768                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
769                    match parsers::load_from_markdownlint(&path_str) {
770                        Ok(fragment) => {
771                            sourced_config.merge(fragment);
772                            sourced_config.loaded_files.push(path_str);
773                        }
774                        Err(_e) => {
775                            log::debug!("[rumdl-config] Failed to load markdownlint config, trying user config");
776                            Self::load_user_config_as_fallback(&mut sourced_config, user_config_dir)?;
777                        }
778                    }
779                } else {
780                    // No project config at all - use user config as fallback
781                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
782                    Self::load_user_config_as_fallback(&mut sourced_config, user_config_dir)?;
783                }
784            }
785        }
786
787        // Apply CLI overrides (highest precedence)
788        if let Some(cli) = cli_overrides {
789            sourced_config
790                .global
791                .enable
792                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
793            sourced_config
794                .global
795                .disable
796                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
797            sourced_config
798                .global
799                .exclude
800                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
801            sourced_config
802                .global
803                .include
804                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
805            sourced_config.global.respect_gitignore.merge_override(
806                cli.respect_gitignore.value,
807                ConfigSource::Cli,
808                None,
809                None,
810            );
811            sourced_config
812                .global
813                .fixable
814                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
815            sourced_config
816                .global
817                .unfixable
818                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
819            // No rule-specific CLI overrides implemented yet
820        }
821
822        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
823
824        Ok(sourced_config)
825    }
826
827    /// Load and merge configurations from files and CLI overrides.
828    /// If skip_auto_discovery is true, only explicit config paths are loaded.
829    pub fn load_with_discovery(
830        config_path: Option<&str>,
831        cli_overrides: Option<&SourcedGlobalConfig>,
832        skip_auto_discovery: bool,
833    ) -> Result<Self, ConfigError> {
834        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
835    }
836
837    /// Validate the configuration against a rule registry.
838    ///
839    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
840    /// enabling conversion to `Config`. Validation warnings are stored in the config
841    /// and can be displayed to the user.
842    ///
843    /// # Example
844    ///
845    /// ```ignore
846    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
847    /// let validated = loaded.validate(&registry)?;
848    /// let config: Config = validated.into();
849    /// ```
850    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
851        let warnings = validate_config_sourced_internal(&self, registry);
852
853        Ok(SourcedConfig {
854            global: self.global,
855            per_file_ignores: self.per_file_ignores,
856            per_file_flavor: self.per_file_flavor,
857            code_block_tools: self.code_block_tools,
858            rules: self.rules,
859            loaded_files: self.loaded_files,
860            unknown_keys: self.unknown_keys,
861            project_root: self.project_root,
862            validation_warnings: warnings,
863            _state: PhantomData,
864        })
865    }
866
867    /// Validate and convert to Config in one step (convenience method).
868    ///
869    /// This combines `validate()` and `into()` for callers who want the
870    /// validation warnings separately.
871    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
872        let validated = self.validate(registry)?;
873        let warnings = validated.validation_warnings.clone();
874        Ok((validated.into(), warnings))
875    }
876
877    /// Skip validation and convert directly to ConfigValidated state.
878    ///
879    /// # Safety
880    ///
881    /// This method bypasses validation. Use only when:
882    /// - You've already validated via `validate_config_sourced()`
883    /// - You're in test code that doesn't need validation
884    /// - You're migrating legacy code and will add proper validation later
885    ///
886    /// Prefer `validate()` for new code.
887    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
888        SourcedConfig {
889            global: self.global,
890            per_file_ignores: self.per_file_ignores,
891            per_file_flavor: self.per_file_flavor,
892            code_block_tools: self.code_block_tools,
893            rules: self.rules,
894            loaded_files: self.loaded_files,
895            unknown_keys: self.unknown_keys,
896            project_root: self.project_root,
897            validation_warnings: Vec::new(),
898            _state: PhantomData,
899        }
900    }
901
902    /// Discover the nearest config file for a specific directory,
903    /// walking upward to `project_root` (inclusive).
904    ///
905    /// Searches for rumdl config files (`.rumdl.toml`, `rumdl.toml`,
906    /// `.config/rumdl.toml`, `pyproject.toml` with `[tool.rumdl]`) and
907    /// markdownlint config files at each directory level.
908    ///
909    /// Returns the config file path if found. Does NOT use CWD.
910    pub fn discover_config_for_dir(dir: &Path, project_root: &Path) -> Option<PathBuf> {
911        const RUMDL_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
912
913        let mut current_dir = dir.to_path_buf();
914
915        loop {
916            // Check rumdl config files first (higher precedence)
917            for config_name in RUMDL_CONFIG_FILES {
918                let config_path = current_dir.join(config_name);
919                if config_path.exists() {
920                    if *config_name == "pyproject.toml" {
921                        if let Ok(content) = std::fs::read_to_string(&config_path)
922                            && (content.contains("[tool.rumdl]") || content.contains("tool.rumdl"))
923                        {
924                            return Some(config_path);
925                        }
926                        continue;
927                    }
928                    return Some(config_path);
929                }
930            }
931
932            // Check markdownlint config files (lower precedence)
933            for config_name in MARKDOWNLINT_CONFIG_FILES {
934                let config_path = current_dir.join(config_name);
935                if config_path.exists() {
936                    return Some(config_path);
937                }
938            }
939
940            // Stop at project root (inclusive - we already checked it)
941            if current_dir == project_root {
942                break;
943            }
944
945            // Move to parent directory
946            match current_dir.parent() {
947                Some(parent) => current_dir = parent.to_path_buf(),
948                None => break,
949            }
950        }
951
952        None
953    }
954
955    /// Load a config from a specific file path, with extends resolution.
956    ///
957    /// Creates a fresh `SourcedConfig`, loads the config file using the
958    /// appropriate parser, and converts to `Config`. Used for per-directory
959    /// config loading where each subdirectory config is standalone.
960    pub fn load_config_for_path(config_path: &Path, project_root: &Path) -> Result<Config, ConfigError> {
961        let mut sourced_config = SourcedConfig {
962            project_root: Some(project_root.to_path_buf()),
963            ..SourcedConfig::default()
964        };
965
966        let filename = config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
967        let path_str = config_path.display().to_string();
968
969        // Determine if this is a markdownlint config or rumdl config
970        let is_markdownlint = MARKDOWNLINT_CONFIG_FILES.contains(&filename)
971            || (filename != "pyproject.toml"
972                && filename != ".rumdl.toml"
973                && filename != "rumdl.toml"
974                && (path_str.ends_with(".json")
975                    || path_str.ends_with(".jsonc")
976                    || path_str.ends_with(".yaml")
977                    || path_str.ends_with(".yml")));
978
979        if is_markdownlint {
980            let fragment = parsers::load_from_markdownlint(&path_str)?;
981            sourced_config.merge(fragment);
982            sourced_config.loaded_files.push(path_str);
983        } else {
984            let mut visited = IndexSet::new();
985            let chain_source = source_from_filename(filename);
986            load_config_with_extends(&mut sourced_config, config_path, &mut visited, chain_source)?;
987        }
988
989        Ok(sourced_config.into_validated_unchecked().into())
990    }
991}
992
993/// Convert a validated configuration to the final Config type.
994///
995/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
996/// ensuring that validation must occur before conversion.
997impl From<SourcedConfig<ConfigValidated>> for Config {
998    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
999        let mut rules = BTreeMap::new();
1000        for (rule_name, sourced_rule_cfg) in sourced.rules {
1001            // Normalize rule name to uppercase for case-insensitive lookup
1002            let normalized_rule_name = rule_name.to_ascii_uppercase();
1003            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
1004            let mut values = BTreeMap::new();
1005            for (key, sourced_val) in sourced_rule_cfg.values {
1006                values.insert(key, sourced_val.value);
1007            }
1008            rules.insert(normalized_rule_name, RuleConfig { severity, values });
1009        }
1010        // Enable is "explicit" if it was set by something other than the Default source
1011        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
1012
1013        #[allow(deprecated)]
1014        let global = GlobalConfig {
1015            enable: sourced.global.enable.value,
1016            disable: sourced.global.disable.value,
1017            exclude: sourced.global.exclude.value,
1018            include: sourced.global.include.value,
1019            respect_gitignore: sourced.global.respect_gitignore.value,
1020            line_length: sourced.global.line_length.value,
1021            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1022            fixable: sourced.global.fixable.value,
1023            unfixable: sourced.global.unfixable.value,
1024            flavor: sourced.global.flavor.value,
1025            force_exclude: sourced.global.force_exclude.value,
1026            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
1027            cache: sourced.global.cache.value,
1028            extend_enable: sourced.global.extend_enable.value,
1029            extend_disable: sourced.global.extend_disable.value,
1030            enable_is_explicit,
1031        };
1032
1033        let mut config = Config {
1034            extends: None,
1035            global,
1036            per_file_ignores: sourced.per_file_ignores.value,
1037            per_file_flavor: sourced.per_file_flavor.value,
1038            code_block_tools: sourced.code_block_tools.value,
1039            rules,
1040            project_root: sourced.project_root,
1041            per_file_ignores_cache: Arc::new(OnceLock::new()),
1042            per_file_flavor_cache: Arc::new(OnceLock::new()),
1043        };
1044
1045        // Apply per-rule `enabled = true/false` to global enable/disable lists
1046        config.apply_per_rule_enabled();
1047
1048        config
1049    }
1050}