Skip to main content

rumdl_lib/config/
loading.rs

1use indexmap::IndexSet;
2use std::collections::BTreeMap;
3use std::marker::PhantomData;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::flavor::ConfigLoaded;
8use super::flavor::ConfigValidated;
9use super::parsers;
10use super::registry::RuleRegistry;
11use super::source_tracking::{
12    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
13};
14use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RuleConfig};
15use super::validation::validate_config_sourced_internal;
16
17/// Maximum depth for extends chains to prevent runaway recursion
18const MAX_EXTENDS_DEPTH: usize = 10;
19
20/// Resolve an `extends` path relative to the config file that contains it.
21///
22/// - `~/` prefix: expanded to home directory
23/// - Relative paths: resolved against the config file's parent directory
24/// - Absolute paths: used as-is
25fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> Result<PathBuf, ConfigError> {
26    let path = if let Some(suffix) = extends_value.strip_prefix("~/") {
27        // Expand tilde to home directory
28        #[cfg(feature = "native")]
29        {
30            use etcetera::{BaseStrategy, choose_base_strategy};
31            let home = choose_base_strategy()
32                .map(|s| s.home_dir().to_path_buf())
33                .unwrap_or_else(|_| PathBuf::from("~"));
34            home.join(suffix)
35        }
36        #[cfg(not(feature = "native"))]
37        {
38            let _ = suffix;
39            PathBuf::from(extends_value)
40        }
41    } else {
42        let path = PathBuf::from(extends_value);
43        if path.is_absolute() {
44            path
45        } else {
46            // Resolve relative to config file's directory
47            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
48            config_dir.join(extends_value)
49        }
50    };
51
52    Ok(path)
53}
54
55/// Determine ConfigSource from a config filename.
56fn source_from_filename(filename: &str) -> ConfigSource {
57    if filename == "pyproject.toml" {
58        ConfigSource::PyprojectToml
59    } else {
60        ConfigSource::ProjectConfig
61    }
62}
63
64/// Load a config file (and any base configs it extends) into a SourcedConfig.
65///
66/// This function handles the recursive `extends` chain:
67/// 1. Parse the config file into a fragment
68/// 2. If the fragment has `extends`, recursively load the base config first
69/// 3. Merge the base config, then merge this fragment on top
70fn load_config_with_extends(
71    sourced_config: &mut SourcedConfig<ConfigLoaded>,
72    config_file_path: &Path,
73    visited: &mut IndexSet<PathBuf>,
74    chain_source: ConfigSource,
75) -> Result<(), ConfigError> {
76    // Canonicalize the path for circular reference detection
77    let canonical = config_file_path
78        .canonicalize()
79        .unwrap_or_else(|_| config_file_path.to_path_buf());
80
81    // Check for circular references
82    if visited.contains(&canonical) {
83        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
84        return Err(ConfigError::CircularExtends {
85            path: config_file_path.display().to_string(),
86            chain,
87        });
88    }
89
90    // Check depth limit
91    if visited.len() >= MAX_EXTENDS_DEPTH {
92        return Err(ConfigError::ExtendsDepthExceeded {
93            path: config_file_path.display().to_string(),
94            max_depth: MAX_EXTENDS_DEPTH,
95        });
96    }
97
98    // Mark as visited
99    visited.insert(canonical);
100
101    let path_str = config_file_path.display().to_string();
102    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
103
104    // Read and parse the config file
105    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
106        source: e,
107        path: path_str.clone(),
108    })?;
109
110    let fragment = if filename == "pyproject.toml" {
111        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
112            Some(f) => f,
113            None => return Ok(()), // No [tool.rumdl] section
114        }
115    } else {
116        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
117    };
118
119    // If this fragment has `extends`, load the base config first
120    if let Some(ref extends_value) = fragment.extends {
121        let base_path = resolve_extends_path(extends_value, config_file_path)?;
122
123        if !base_path.exists() {
124            return Err(ConfigError::ExtendsNotFound {
125                path: base_path.display().to_string(),
126                from: path_str.clone(),
127            });
128        }
129
130        log::debug!(
131            "[rumdl-config] Config {} extends {}, loading base first",
132            path_str,
133            base_path.display()
134        );
135
136        // Recursively load the base config
137        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
138    }
139
140    // Merge this fragment on top (base config was already merged if present)
141    // Strip the `extends` field since it's been consumed
142    let mut fragment_for_merge = fragment;
143    fragment_for_merge.extends = None;
144    sourced_config.merge(fragment_for_merge);
145    sourced_config.loaded_files.push(path_str);
146
147    Ok(())
148}
149
150impl SourcedConfig<ConfigLoaded> {
151    /// Merges another SourcedConfigFragment into this SourcedConfig.
152    /// Uses source precedence to determine which values take effect.
153    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
154        // Merge global config
155        // Enable uses replace semantics (project can enforce rules)
156        self.global.enable.merge_override(
157            fragment.global.enable.value,
158            fragment.global.enable.source,
159            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
160            fragment.global.enable.overrides.first().and_then(|o| o.line),
161        );
162
163        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
164        self.global.disable.merge_override(
165            fragment.global.disable.value,
166            fragment.global.disable.source,
167            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
168            fragment.global.disable.overrides.first().and_then(|o| o.line),
169        );
170
171        // Extend-enable uses union semantics (additive across config levels)
172        self.global.extend_enable.merge_union(
173            fragment.global.extend_enable.value,
174            fragment.global.extend_enable.source,
175            fragment
176                .global
177                .extend_enable
178                .overrides
179                .first()
180                .and_then(|o| o.file.clone()),
181            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
182        );
183
184        // Extend-disable uses union semantics (additive across config levels)
185        self.global.extend_disable.merge_union(
186            fragment.global.extend_disable.value,
187            fragment.global.extend_disable.source,
188            fragment
189                .global
190                .extend_disable
191                .overrides
192                .first()
193                .and_then(|o| o.file.clone()),
194            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
195        );
196
197        // Conflict resolution: Enable overrides disable
198        // Remove any rules from disable that appear in enable
199        self.global
200            .disable
201            .value
202            .retain(|rule| !self.global.enable.value.contains(rule));
203        self.global.include.merge_override(
204            fragment.global.include.value,
205            fragment.global.include.source,
206            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
207            fragment.global.include.overrides.first().and_then(|o| o.line),
208        );
209        self.global.exclude.merge_override(
210            fragment.global.exclude.value,
211            fragment.global.exclude.source,
212            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
213            fragment.global.exclude.overrides.first().and_then(|o| o.line),
214        );
215        self.global.respect_gitignore.merge_override(
216            fragment.global.respect_gitignore.value,
217            fragment.global.respect_gitignore.source,
218            fragment
219                .global
220                .respect_gitignore
221                .overrides
222                .first()
223                .and_then(|o| o.file.clone()),
224            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
225        );
226        self.global.line_length.merge_override(
227            fragment.global.line_length.value,
228            fragment.global.line_length.source,
229            fragment
230                .global
231                .line_length
232                .overrides
233                .first()
234                .and_then(|o| o.file.clone()),
235            fragment.global.line_length.overrides.first().and_then(|o| o.line),
236        );
237        self.global.fixable.merge_override(
238            fragment.global.fixable.value,
239            fragment.global.fixable.source,
240            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
241            fragment.global.fixable.overrides.first().and_then(|o| o.line),
242        );
243        self.global.unfixable.merge_override(
244            fragment.global.unfixable.value,
245            fragment.global.unfixable.source,
246            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
247            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
248        );
249
250        // Merge flavor
251        self.global.flavor.merge_override(
252            fragment.global.flavor.value,
253            fragment.global.flavor.source,
254            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
255            fragment.global.flavor.overrides.first().and_then(|o| o.line),
256        );
257
258        // Merge force_exclude
259        self.global.force_exclude.merge_override(
260            fragment.global.force_exclude.value,
261            fragment.global.force_exclude.source,
262            fragment
263                .global
264                .force_exclude
265                .overrides
266                .first()
267                .and_then(|o| o.file.clone()),
268            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
269        );
270
271        // Merge output_format if present
272        if let Some(output_format_fragment) = fragment.global.output_format {
273            if let Some(ref mut output_format) = self.global.output_format {
274                output_format.merge_override(
275                    output_format_fragment.value,
276                    output_format_fragment.source,
277                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
278                    output_format_fragment.overrides.first().and_then(|o| o.line),
279                );
280            } else {
281                self.global.output_format = Some(output_format_fragment);
282            }
283        }
284
285        // Merge cache_dir if present
286        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
287            if let Some(ref mut cache_dir) = self.global.cache_dir {
288                cache_dir.merge_override(
289                    cache_dir_fragment.value,
290                    cache_dir_fragment.source,
291                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
292                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
293                );
294            } else {
295                self.global.cache_dir = Some(cache_dir_fragment);
296            }
297        }
298
299        // Merge cache if not default (only override when explicitly set)
300        if fragment.global.cache.source != ConfigSource::Default {
301            self.global.cache.merge_override(
302                fragment.global.cache.value,
303                fragment.global.cache.source,
304                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
305                fragment.global.cache.overrides.first().and_then(|o| o.line),
306            );
307        }
308
309        // Merge per_file_ignores
310        self.per_file_ignores.merge_override(
311            fragment.per_file_ignores.value,
312            fragment.per_file_ignores.source,
313            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
314            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
315        );
316
317        // Merge per_file_flavor
318        self.per_file_flavor.merge_override(
319            fragment.per_file_flavor.value,
320            fragment.per_file_flavor.source,
321            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
322            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
323        );
324
325        // Merge code_block_tools
326        self.code_block_tools.merge_override(
327            fragment.code_block_tools.value,
328            fragment.code_block_tools.source,
329            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
330            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
331        );
332
333        // Merge rule configs
334        for (rule_name, rule_fragment) in fragment.rules {
335            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
336            let rule_entry = self.rules.entry(norm_rule_name).or_default();
337
338            // Merge severity if present in fragment
339            if let Some(severity_fragment) = rule_fragment.severity {
340                if let Some(ref mut existing_severity) = rule_entry.severity {
341                    existing_severity.merge_override(
342                        severity_fragment.value,
343                        severity_fragment.source,
344                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
345                        severity_fragment.overrides.first().and_then(|o| o.line),
346                    );
347                } else {
348                    rule_entry.severity = Some(severity_fragment);
349                }
350            }
351
352            // Merge values
353            for (key, sourced_value_fragment) in rule_fragment.values {
354                let sv_entry = rule_entry
355                    .values
356                    .entry(key.clone())
357                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
358                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
359                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
360                sv_entry.merge_override(
361                    sourced_value_fragment.value,  // Use the value from the fragment
362                    sourced_value_fragment.source, // Use the source from the fragment
363                    file_from_fragment,            // Pass the file path from the fragment override
364                    line_from_fragment,            // Pass the line number from the fragment override
365                );
366            }
367        }
368
369        // Merge unknown_keys from fragment
370        for (section, key, file_path) in fragment.unknown_keys {
371            // Deduplicate: only add if not already present
372            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
373                self.unknown_keys.push((section, key, file_path));
374            }
375        }
376    }
377
378    /// Load and merge configurations from files and CLI overrides.
379    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
380        Self::load_with_discovery(config_path, cli_overrides, false)
381    }
382
383    /// Finds project root by walking up from start_dir looking for .git directory.
384    /// Falls back to start_dir if no .git found.
385    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
386        // Convert relative paths to absolute to ensure correct traversal
387        let mut current = if start_dir.is_relative() {
388            std::env::current_dir()
389                .map(|cwd| cwd.join(start_dir))
390                .unwrap_or_else(|_| start_dir.to_path_buf())
391        } else {
392            start_dir.to_path_buf()
393        };
394        const MAX_DEPTH: usize = 100;
395
396        for _ in 0..MAX_DEPTH {
397            if current.join(".git").exists() {
398                log::debug!("[rumdl-config] Found .git at: {}", current.display());
399                return current;
400            }
401
402            match current.parent() {
403                Some(parent) => current = parent.to_path_buf(),
404                None => break,
405            }
406        }
407
408        // No .git found, use start_dir as project root
409        log::debug!(
410            "[rumdl-config] No .git found, using config location as project root: {}",
411            start_dir.display()
412        );
413        start_dir.to_path_buf()
414    }
415
416    /// Discover configuration file by traversing up the directory tree.
417    /// Returns the first configuration file found.
418    /// Discovers config file and returns both the config path and project root.
419    /// Returns: (config_file_path, project_root_path)
420    /// Project root is the directory containing .git, or config parent as fallback.
421    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
422        use std::env;
423
424        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
425        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
426
427        let start_dir = match env::current_dir() {
428            Ok(dir) => dir,
429            Err(e) => {
430                log::debug!("[rumdl-config] Failed to get current directory: {e}");
431                return None;
432            }
433        };
434
435        let mut current_dir = start_dir.clone();
436        let mut depth = 0;
437        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
438
439        loop {
440            if depth >= MAX_DEPTH {
441                log::debug!("[rumdl-config] Maximum traversal depth reached");
442                break;
443            }
444
445            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
446
447            // Check for config files in order of precedence (only if not already found)
448            if found_config.is_none() {
449                for config_name in CONFIG_FILES {
450                    let config_path = current_dir.join(config_name);
451
452                    if config_path.exists() {
453                        // For pyproject.toml, verify it contains [tool.rumdl] section
454                        if *config_name == "pyproject.toml" {
455                            if let Ok(content) = std::fs::read_to_string(&config_path) {
456                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
457                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
458                                    // Store config, but continue looking for .git
459                                    found_config = Some((config_path.clone(), current_dir.clone()));
460                                    break;
461                                }
462                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
463                                continue;
464                            }
465                        } else {
466                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
467                            // Store config, but continue looking for .git
468                            found_config = Some((config_path.clone(), current_dir.clone()));
469                            break;
470                        }
471                    }
472                }
473            }
474
475            // Check for .git directory (stop boundary)
476            if current_dir.join(".git").exists() {
477                log::debug!("[rumdl-config] Stopping at .git directory");
478                break;
479            }
480
481            // Move to parent directory
482            match current_dir.parent() {
483                Some(parent) => {
484                    current_dir = parent.to_owned();
485                    depth += 1;
486                }
487                None => {
488                    log::debug!("[rumdl-config] Reached filesystem root");
489                    break;
490                }
491            }
492        }
493
494        // If config found, determine project root by walking up from config location
495        if let Some((config_path, config_dir)) = found_config {
496            let project_root = Self::find_project_root_from(&config_dir);
497            return Some((config_path, project_root));
498        }
499
500        None
501    }
502
503    /// Discover markdownlint configuration file by traversing up the directory tree.
504    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
505    /// Returns the path to the config file if found.
506    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
507        use std::env;
508
509        const MAX_DEPTH: usize = 100;
510
511        let start_dir = match env::current_dir() {
512            Ok(dir) => dir,
513            Err(e) => {
514                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
515                return None;
516            }
517        };
518
519        let mut current_dir = start_dir.clone();
520        let mut depth = 0;
521
522        loop {
523            if depth >= MAX_DEPTH {
524                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
525                break;
526            }
527
528            log::debug!(
529                "[rumdl-config] Searching for markdownlint config in: {}",
530                current_dir.display()
531            );
532
533            // Check for markdownlint config files in order of precedence
534            for config_name in MARKDOWNLINT_CONFIG_FILES {
535                let config_path = current_dir.join(config_name);
536                if config_path.exists() {
537                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
538                    return Some(config_path);
539                }
540            }
541
542            // Check for .git directory (stop boundary)
543            if current_dir.join(".git").exists() {
544                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
545                break;
546            }
547
548            // Move to parent directory
549            match current_dir.parent() {
550                Some(parent) => {
551                    current_dir = parent.to_owned();
552                    depth += 1;
553                }
554                None => {
555                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
556                    break;
557                }
558            }
559        }
560
561        None
562    }
563
564    /// Internal implementation that accepts config directory for testing
565    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
566        let config_dir = config_dir.join("rumdl");
567
568        // Check for config files in precedence order (same as project discovery)
569        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
570
571        log::debug!(
572            "[rumdl-config] Checking for user configuration in: {}",
573            config_dir.display()
574        );
575
576        for filename in USER_CONFIG_FILES {
577            let config_path = config_dir.join(filename);
578
579            if config_path.exists() {
580                // For pyproject.toml, verify it contains [tool.rumdl] section
581                if *filename == "pyproject.toml" {
582                    if let Ok(content) = std::fs::read_to_string(&config_path) {
583                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
584                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
585                            return Some(config_path);
586                        }
587                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
588                        continue;
589                    }
590                } else {
591                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
592                    return Some(config_path);
593                }
594            }
595        }
596
597        log::debug!(
598            "[rumdl-config] No user configuration found in: {}",
599            config_dir.display()
600        );
601        None
602    }
603
604    /// Discover user-level configuration file from platform-specific config directory.
605    /// Returns the first configuration file found in the user config directory.
606    #[cfg(feature = "native")]
607    fn user_configuration_path() -> Option<std::path::PathBuf> {
608        use etcetera::{BaseStrategy, choose_base_strategy};
609
610        match choose_base_strategy() {
611            Ok(strategy) => {
612                let config_dir = strategy.config_dir();
613                Self::user_configuration_path_impl(&config_dir)
614            }
615            Err(e) => {
616                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
617                None
618            }
619        }
620    }
621
622    /// Stub for WASM builds - user config not supported
623    #[cfg(not(feature = "native"))]
624    fn user_configuration_path() -> Option<std::path::PathBuf> {
625        None
626    }
627
628    /// Load an explicit config file (standalone, no user config merging)
629    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
630        let path_obj = Path::new(path);
631        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
632        let path_str = path.to_string();
633
634        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
635
636        // Find project root by walking up from config location looking for .git
637        if let Some(config_parent) = path_obj.parent() {
638            let project_root = Self::find_project_root_from(config_parent);
639            log::debug!(
640                "[rumdl-config] Project root (from explicit config): {}",
641                project_root.display()
642            );
643            sourced_config.project_root = Some(project_root);
644        }
645
646        // Known markdownlint config files
647        const MARKDOWNLINT_FILENAMES: &[&str] = &[
648            ".markdownlint-cli2.jsonc",
649            ".markdownlint-cli2.yaml",
650            ".markdownlint-cli2.yml",
651            ".markdownlint.json",
652            ".markdownlint.yaml",
653            ".markdownlint.yml",
654        ];
655
656        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
657            // Use extends-aware loading for rumdl TOML configs
658            let mut visited = IndexSet::new();
659            let chain_source = source_from_filename(filename);
660            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
661        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
662            || path_str.ends_with(".json")
663            || path_str.ends_with(".jsonc")
664            || path_str.ends_with(".yaml")
665            || path_str.ends_with(".yml")
666        {
667            // Parse as markdownlint config (JSON/YAML) - no extends support
668            let fragment = parsers::load_from_markdownlint(&path_str)?;
669            sourced_config.merge(fragment);
670            sourced_config.loaded_files.push(path_str);
671        } else {
672            // Try TOML with extends support
673            let mut visited = IndexSet::new();
674            let chain_source = source_from_filename(filename);
675            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
676        }
677
678        Ok(())
679    }
680
681    /// Load and merge user-level configuration into this `SourcedConfig`.
682    ///
683    /// Discovers the user config file from the platform config directory
684    /// (or `user_config_dir` if provided for testing). Resolves any `extends`
685    /// chain and merges each fragment with `ConfigSource::UserConfig` precedence.
686    ///
687    /// Called in two contexts:
688    /// - When no project config is found: provides user defaults as the sole base
689    /// - When a markdownlint project config is found: provides rumdl-specific
690    ///   defaults that the markdownlint format cannot express; the markdownlint
691    ///   fragment is merged on top and wins on any overlapping key
692    fn load_user_config(sourced_config: &mut Self, user_config_dir: Option<&Path>) -> Result<(), ConfigError> {
693        let user_config_path = if let Some(dir) = user_config_dir {
694            Self::user_configuration_path_impl(dir)
695        } else {
696            Self::user_configuration_path()
697        };
698
699        if let Some(user_config_path) = user_config_path {
700            let path_str = user_config_path.display().to_string();
701
702            log::debug!("[rumdl-config] Loading user config: {path_str}");
703
704            // User config fallback also supports extends chains.
705            // Use a uniform source across the chain so child overrides are determined by chain order.
706            let mut visited = IndexSet::new();
707            load_config_with_extends(
708                sourced_config,
709                &user_config_path,
710                &mut visited,
711                ConfigSource::UserConfig,
712            )?;
713        } else {
714            log::debug!("[rumdl-config] No user configuration file found");
715        }
716
717        Ok(())
718    }
719
720    /// Internal implementation that accepts user config directory for testing
721    #[doc(hidden)]
722    pub fn load_with_discovery_impl(
723        config_path: Option<&str>,
724        cli_overrides: Option<&SourcedGlobalConfig>,
725        skip_auto_discovery: bool,
726        user_config_dir: Option<&Path>,
727    ) -> Result<Self, ConfigError> {
728        use std::env;
729        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
730
731        let mut sourced_config = SourcedConfig::default();
732
733        // Ruff model: Project config is standalone, user config is fallback only
734        //
735        // Priority order:
736        // 1. If explicit config path provided → use ONLY that (standalone)
737        // 2. Else if project config discovered → use ONLY that (standalone)
738        // 3. Else if user config exists → use it as fallback
739        // 4. CLI overrides always apply last
740        //
741        // This ensures project configs are reproducible across machines and
742        // CI/local runs behave identically.
743
744        // Explicit config path always takes precedence
745        if let Some(path) = config_path {
746            // Explicit config path provided - use ONLY this config (standalone)
747            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
748            Self::load_explicit_config(&mut sourced_config, path)?;
749        } else if skip_auto_discovery {
750            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
751            // No config loading, just apply CLI overrides at the end
752        } else {
753            // No explicit path - try auto-discovery
754            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
755
756            // Try to discover project config first
757            if let Some((config_file, project_root)) = Self::discover_config_upward() {
758                // Project config found - use ONLY this (standalone, no user config).
759                // Rumdl project configs can express all settings directly, so user config
760                // is not needed and omitting it ensures CI and local runs are identical.
761                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
762                log::debug!("[rumdl-config] Project root: {}", project_root.display());
763
764                sourced_config.project_root = Some(project_root);
765
766                // Use extends-aware loading for discovered configs
767                let mut visited = IndexSet::new();
768                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
769                let chain_source = source_from_filename(root_filename);
770                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
771            } else {
772                // No rumdl project config - try markdownlint config
773                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
774
775                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
776                    let path_str = markdownlint_path.display().to_string();
777                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
778                    // Load user config first as a base so rumdl-specific settings (e.g. flavor,
779                    // cache) take effect. Markdownlint configs cannot express these settings.
780                    // The markdownlint fragment uses ConfigSource::ProjectConfig (precedence 3)
781                    // vs UserConfig (precedence 1), so project settings always win on overlap.
782                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
783                    match parsers::load_from_markdownlint(&path_str) {
784                        Ok(fragment) => {
785                            sourced_config.merge(fragment);
786                            sourced_config.loaded_files.push(path_str);
787                        }
788                        Err(_e) => {
789                            log::debug!("[rumdl-config] Failed to load markdownlint config");
790                        }
791                    }
792                } else {
793                    // No project config at all - use user config as fallback
794                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
795                    Self::load_user_config(&mut sourced_config, user_config_dir)?;
796                }
797            }
798        }
799
800        // Apply CLI overrides (highest precedence)
801        if let Some(cli) = cli_overrides {
802            sourced_config
803                .global
804                .enable
805                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
806            sourced_config
807                .global
808                .disable
809                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
810            sourced_config
811                .global
812                .exclude
813                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
814            sourced_config
815                .global
816                .include
817                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
818            sourced_config.global.respect_gitignore.merge_override(
819                cli.respect_gitignore.value,
820                ConfigSource::Cli,
821                None,
822                None,
823            );
824            sourced_config
825                .global
826                .fixable
827                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
828            sourced_config
829                .global
830                .unfixable
831                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
832            // No rule-specific CLI overrides implemented yet
833        }
834
835        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
836
837        Ok(sourced_config)
838    }
839
840    /// Load and merge configurations from files and CLI overrides.
841    /// If skip_auto_discovery is true, only explicit config paths are loaded.
842    pub fn load_with_discovery(
843        config_path: Option<&str>,
844        cli_overrides: Option<&SourcedGlobalConfig>,
845        skip_auto_discovery: bool,
846    ) -> Result<Self, ConfigError> {
847        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
848    }
849
850    /// Validate the configuration against a rule registry.
851    ///
852    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
853    /// enabling conversion to `Config`. Validation warnings are stored in the config
854    /// and can be displayed to the user.
855    ///
856    /// # Example
857    ///
858    /// ```ignore
859    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
860    /// let validated = loaded.validate(&registry)?;
861    /// let config: Config = validated.into();
862    /// ```
863    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
864        let warnings = validate_config_sourced_internal(&self, registry);
865
866        Ok(SourcedConfig {
867            global: self.global,
868            per_file_ignores: self.per_file_ignores,
869            per_file_flavor: self.per_file_flavor,
870            code_block_tools: self.code_block_tools,
871            rules: self.rules,
872            loaded_files: self.loaded_files,
873            unknown_keys: self.unknown_keys,
874            project_root: self.project_root,
875            validation_warnings: warnings,
876            _state: PhantomData,
877        })
878    }
879
880    /// Validate and convert to Config in one step (convenience method).
881    ///
882    /// This combines `validate()` and `into()` for callers who want the
883    /// validation warnings separately.
884    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
885        let validated = self.validate(registry)?;
886        let warnings = validated.validation_warnings.clone();
887        Ok((validated.into(), warnings))
888    }
889
890    /// Skip validation and convert directly to ConfigValidated state.
891    ///
892    /// # Safety
893    ///
894    /// This method bypasses validation. Use only when:
895    /// - You've already validated via `validate_config_sourced()`
896    /// - You're in test code that doesn't need validation
897    /// - You're migrating legacy code and will add proper validation later
898    ///
899    /// Prefer `validate()` for new code.
900    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
901        SourcedConfig {
902            global: self.global,
903            per_file_ignores: self.per_file_ignores,
904            per_file_flavor: self.per_file_flavor,
905            code_block_tools: self.code_block_tools,
906            rules: self.rules,
907            loaded_files: self.loaded_files,
908            unknown_keys: self.unknown_keys,
909            project_root: self.project_root,
910            validation_warnings: Vec::new(),
911            _state: PhantomData,
912        }
913    }
914
915    /// Discover the nearest config file for a specific directory,
916    /// walking upward to `project_root` (inclusive).
917    ///
918    /// Searches for rumdl config files (`.rumdl.toml`, `rumdl.toml`,
919    /// `.config/rumdl.toml`, `pyproject.toml` with `[tool.rumdl]`) and
920    /// markdownlint config files at each directory level.
921    ///
922    /// Returns the config file path if found. Does NOT use CWD.
923    pub fn discover_config_for_dir(dir: &Path, project_root: &Path) -> Option<PathBuf> {
924        const RUMDL_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
925
926        let mut current_dir = dir.to_path_buf();
927
928        loop {
929            // Check rumdl config files first (higher precedence)
930            for config_name in RUMDL_CONFIG_FILES {
931                let config_path = current_dir.join(config_name);
932                if config_path.exists() {
933                    if *config_name == "pyproject.toml" {
934                        if let Ok(content) = std::fs::read_to_string(&config_path)
935                            && (content.contains("[tool.rumdl]") || content.contains("tool.rumdl"))
936                        {
937                            return Some(config_path);
938                        }
939                        continue;
940                    }
941                    return Some(config_path);
942                }
943            }
944
945            // Check markdownlint config files (lower precedence)
946            for config_name in MARKDOWNLINT_CONFIG_FILES {
947                let config_path = current_dir.join(config_name);
948                if config_path.exists() {
949                    return Some(config_path);
950                }
951            }
952
953            // Stop at project root (inclusive - we already checked it)
954            if current_dir == project_root {
955                break;
956            }
957
958            // Move to parent directory
959            match current_dir.parent() {
960                Some(parent) => current_dir = parent.to_path_buf(),
961                None => break,
962            }
963        }
964
965        None
966    }
967
968    /// Load a config from a specific file path, with extends resolution.
969    ///
970    /// Creates a fresh `SourcedConfig`, loads the config file using the
971    /// appropriate parser, and converts to `Config`. Used for per-directory
972    /// config loading where each subdirectory config is standalone.
973    pub fn load_config_for_path(config_path: &Path, project_root: &Path) -> Result<Config, ConfigError> {
974        let mut sourced_config = SourcedConfig {
975            project_root: Some(project_root.to_path_buf()),
976            ..SourcedConfig::default()
977        };
978
979        let filename = config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
980        let path_str = config_path.display().to_string();
981
982        // Determine if this is a markdownlint config or rumdl config
983        let is_markdownlint = MARKDOWNLINT_CONFIG_FILES.contains(&filename)
984            || (filename != "pyproject.toml"
985                && filename != ".rumdl.toml"
986                && filename != "rumdl.toml"
987                && (path_str.ends_with(".json")
988                    || path_str.ends_with(".jsonc")
989                    || path_str.ends_with(".yaml")
990                    || path_str.ends_with(".yml")));
991
992        if is_markdownlint {
993            let fragment = parsers::load_from_markdownlint(&path_str)?;
994            sourced_config.merge(fragment);
995            sourced_config.loaded_files.push(path_str);
996        } else {
997            let mut visited = IndexSet::new();
998            let chain_source = source_from_filename(filename);
999            load_config_with_extends(&mut sourced_config, config_path, &mut visited, chain_source)?;
1000        }
1001
1002        Ok(sourced_config.into_validated_unchecked().into())
1003    }
1004}
1005
1006/// Convert a validated configuration to the final Config type.
1007///
1008/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
1009/// ensuring that validation must occur before conversion.
1010impl From<SourcedConfig<ConfigValidated>> for Config {
1011    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
1012        let mut rules = BTreeMap::new();
1013        for (rule_name, sourced_rule_cfg) in sourced.rules {
1014            // Normalize rule name to uppercase for case-insensitive lookup
1015            let normalized_rule_name = rule_name.to_ascii_uppercase();
1016            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
1017            let mut values = BTreeMap::new();
1018            for (key, sourced_val) in sourced_rule_cfg.values {
1019                values.insert(key, sourced_val.value);
1020            }
1021            rules.insert(normalized_rule_name, RuleConfig { severity, values });
1022        }
1023        // Enable is "explicit" if it was set by something other than the Default source
1024        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
1025
1026        #[allow(deprecated)]
1027        let global = GlobalConfig {
1028            enable: sourced.global.enable.value,
1029            disable: sourced.global.disable.value,
1030            exclude: sourced.global.exclude.value,
1031            include: sourced.global.include.value,
1032            respect_gitignore: sourced.global.respect_gitignore.value,
1033            line_length: sourced.global.line_length.value,
1034            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1035            fixable: sourced.global.fixable.value,
1036            unfixable: sourced.global.unfixable.value,
1037            flavor: sourced.global.flavor.value,
1038            force_exclude: sourced.global.force_exclude.value,
1039            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
1040            cache: sourced.global.cache.value,
1041            extend_enable: sourced.global.extend_enable.value,
1042            extend_disable: sourced.global.extend_disable.value,
1043            enable_is_explicit,
1044        };
1045
1046        let mut config = Config {
1047            extends: None,
1048            global,
1049            per_file_ignores: sourced.per_file_ignores.value,
1050            per_file_flavor: sourced.per_file_flavor.value,
1051            code_block_tools: sourced.code_block_tools.value,
1052            rules,
1053            project_root: sourced.project_root,
1054            per_file_ignores_cache: Arc::new(OnceLock::new()),
1055            per_file_flavor_cache: Arc::new(OnceLock::new()),
1056        };
1057
1058        // Apply per-rule `enabled = true/false` to global enable/disable lists
1059        config.apply_per_rule_enabled();
1060
1061        config
1062    }
1063}