Skip to main content

rumdl_lib/config/
loading.rs

1use indexmap::IndexSet;
2use std::collections::BTreeMap;
3use std::marker::PhantomData;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use super::flavor::ConfigLoaded;
8use super::flavor::ConfigValidated;
9use super::parsers;
10use super::registry::RuleRegistry;
11use super::source_tracking::{
12    ConfigSource, ConfigValidationWarning, SourcedConfig, SourcedConfigFragment, SourcedGlobalConfig, SourcedValue,
13};
14use super::types::{Config, ConfigError, GlobalConfig, MARKDOWNLINT_CONFIG_FILES, RUMDL_CONFIG_FILES, RuleConfig};
15use super::validation::validate_config_sourced_internal;
16
17/// Maximum depth for extends chains to prevent runaway recursion
18const MAX_EXTENDS_DEPTH: usize = 10;
19
20/// Resolve an `extends` path relative to the config file that contains it.
21///
22/// - `~/` prefix: expanded to home directory
23/// - Relative paths: resolved against the config file's parent directory
24/// - Absolute paths: used as-is
25fn resolve_extends_path(extends_value: &str, config_file_path: &Path) -> PathBuf {
26    if let Some(suffix) = extends_value.strip_prefix("~/") {
27        // Expand tilde to home directory
28        #[cfg(feature = "native")]
29        {
30            use etcetera::{BaseStrategy, choose_base_strategy};
31            let home = choose_base_strategy().map_or_else(|_| PathBuf::from("~"), |s| s.home_dir().to_path_buf());
32            home.join(suffix)
33        }
34        #[cfg(not(feature = "native"))]
35        {
36            let _ = suffix;
37            PathBuf::from(extends_value)
38        }
39    } else {
40        let path = PathBuf::from(extends_value);
41        if path.is_absolute() {
42            path
43        } else {
44            // Resolve relative to config file's directory
45            let config_dir = config_file_path.parent().unwrap_or(Path::new("."));
46            config_dir.join(extends_value)
47        }
48    }
49}
50
51/// Determine ConfigSource from a config filename.
52fn source_from_filename(filename: &str) -> ConfigSource {
53    if filename == "pyproject.toml" {
54        ConfigSource::PyprojectToml
55    } else {
56        ConfigSource::ProjectConfig
57    }
58}
59
60/// Load a config file (and any base configs it extends) into a SourcedConfig.
61///
62/// This function handles the recursive `extends` chain:
63/// 1. Parse the config file into a fragment
64/// 2. If the fragment has `extends`, recursively load the base config first
65/// 3. Merge the base config, then merge this fragment on top
66fn load_config_with_extends(
67    sourced_config: &mut SourcedConfig<ConfigLoaded>,
68    config_file_path: &Path,
69    visited: &mut IndexSet<PathBuf>,
70    chain_source: ConfigSource,
71) -> Result<(), ConfigError> {
72    // Canonicalize the path for circular reference detection
73    let canonical = config_file_path
74        .canonicalize()
75        .unwrap_or_else(|_| config_file_path.to_path_buf());
76
77    // Check for circular references
78    if visited.contains(&canonical) {
79        let chain: Vec<String> = visited.iter().map(|p| p.display().to_string()).collect();
80        return Err(ConfigError::CircularExtends {
81            path: config_file_path.display().to_string(),
82            chain,
83        });
84    }
85
86    // Check depth limit
87    if visited.len() >= MAX_EXTENDS_DEPTH {
88        return Err(ConfigError::ExtendsDepthExceeded {
89            path: config_file_path.display().to_string(),
90            max_depth: MAX_EXTENDS_DEPTH,
91        });
92    }
93
94    // Mark as visited
95    visited.insert(canonical);
96
97    let path_str = config_file_path.display().to_string();
98    let filename = config_file_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
99
100    // Read and parse the config file
101    let content = std::fs::read_to_string(config_file_path).map_err(|e| ConfigError::IoError {
102        source: e,
103        path: path_str.clone(),
104    })?;
105
106    let fragment = if filename == "pyproject.toml" {
107        match parsers::parse_pyproject_toml(&content, &path_str, chain_source)? {
108            Some(f) => f,
109            None => return Ok(()), // No [tool.rumdl] section
110        }
111    } else {
112        parsers::parse_rumdl_toml(&content, &path_str, chain_source)?
113    };
114
115    // If this fragment has `extends`, load the base config first
116    if let Some(ref extends_value) = fragment.extends {
117        let base_path = resolve_extends_path(extends_value, config_file_path);
118
119        if !base_path.exists() {
120            return Err(ConfigError::ExtendsNotFound {
121                path: base_path.display().to_string(),
122                from: path_str.clone(),
123            });
124        }
125
126        log::debug!(
127            "[rumdl-config] Config {} extends {}, loading base first",
128            path_str,
129            base_path.display()
130        );
131
132        // Recursively load the base config
133        load_config_with_extends(sourced_config, &base_path, visited, chain_source)?;
134    }
135
136    // Merge this fragment on top (base config was already merged if present)
137    // Strip the `extends` field since it's been consumed
138    let mut fragment_for_merge = fragment;
139    fragment_for_merge.extends = None;
140    sourced_config.merge(fragment_for_merge);
141    sourced_config.loaded_files.push(path_str);
142
143    Ok(())
144}
145
146impl SourcedConfig<ConfigLoaded> {
147    /// Merges another SourcedConfigFragment into this SourcedConfig.
148    /// Uses source precedence to determine which values take effect.
149    pub(super) fn merge(&mut self, fragment: SourcedConfigFragment) {
150        // Merge global config
151        // Enable uses replace semantics (project can enforce rules)
152        self.global.enable.merge_override(
153            fragment.global.enable.value,
154            fragment.global.enable.source,
155            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
156            fragment.global.enable.overrides.first().and_then(|o| o.line),
157        );
158
159        // Disable uses replace semantics (child config overrides parent, matching Ruff's `ignore`)
160        self.global.disable.merge_override(
161            fragment.global.disable.value,
162            fragment.global.disable.source,
163            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
164            fragment.global.disable.overrides.first().and_then(|o| o.line),
165        );
166
167        // Extend-enable uses union semantics (additive across config levels)
168        self.global.extend_enable.merge_union(
169            fragment.global.extend_enable.value,
170            fragment.global.extend_enable.source,
171            fragment
172                .global
173                .extend_enable
174                .overrides
175                .first()
176                .and_then(|o| o.file.clone()),
177            fragment.global.extend_enable.overrides.first().and_then(|o| o.line),
178        );
179
180        // Extend-disable uses union semantics (additive across config levels)
181        self.global.extend_disable.merge_union(
182            fragment.global.extend_disable.value,
183            fragment.global.extend_disable.source,
184            fragment
185                .global
186                .extend_disable
187                .overrides
188                .first()
189                .and_then(|o| o.file.clone()),
190            fragment.global.extend_disable.overrides.first().and_then(|o| o.line),
191        );
192
193        // Conflict resolution: Enable overrides disable
194        // Remove any rules from disable that appear in enable
195        self.global
196            .disable
197            .value
198            .retain(|rule| !self.global.enable.value.contains(rule));
199        self.global.include.merge_override(
200            fragment.global.include.value,
201            fragment.global.include.source,
202            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
203            fragment.global.include.overrides.first().and_then(|o| o.line),
204        );
205        self.global.exclude.merge_override(
206            fragment.global.exclude.value,
207            fragment.global.exclude.source,
208            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
209            fragment.global.exclude.overrides.first().and_then(|o| o.line),
210        );
211        self.global.respect_gitignore.merge_override(
212            fragment.global.respect_gitignore.value,
213            fragment.global.respect_gitignore.source,
214            fragment
215                .global
216                .respect_gitignore
217                .overrides
218                .first()
219                .and_then(|o| o.file.clone()),
220            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
221        );
222        self.global.line_length.merge_override(
223            fragment.global.line_length.value,
224            fragment.global.line_length.source,
225            fragment
226                .global
227                .line_length
228                .overrides
229                .first()
230                .and_then(|o| o.file.clone()),
231            fragment.global.line_length.overrides.first().and_then(|o| o.line),
232        );
233        self.global.fixable.merge_override(
234            fragment.global.fixable.value,
235            fragment.global.fixable.source,
236            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
237            fragment.global.fixable.overrides.first().and_then(|o| o.line),
238        );
239        self.global.unfixable.merge_override(
240            fragment.global.unfixable.value,
241            fragment.global.unfixable.source,
242            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
243            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
244        );
245
246        // Merge flavor
247        self.global.flavor.merge_override(
248            fragment.global.flavor.value,
249            fragment.global.flavor.source,
250            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
251            fragment.global.flavor.overrides.first().and_then(|o| o.line),
252        );
253
254        // Merge force_exclude
255        self.global.force_exclude.merge_override(
256            fragment.global.force_exclude.value,
257            fragment.global.force_exclude.source,
258            fragment
259                .global
260                .force_exclude
261                .overrides
262                .first()
263                .and_then(|o| o.file.clone()),
264            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
265        );
266
267        // Merge output_format if present
268        if let Some(output_format_fragment) = fragment.global.output_format {
269            if let Some(ref mut output_format) = self.global.output_format {
270                output_format.merge_override(
271                    output_format_fragment.value,
272                    output_format_fragment.source,
273                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
274                    output_format_fragment.overrides.first().and_then(|o| o.line),
275                );
276            } else {
277                self.global.output_format = Some(output_format_fragment);
278            }
279        }
280
281        // Merge cache_dir if present
282        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
283            if let Some(ref mut cache_dir) = self.global.cache_dir {
284                cache_dir.merge_override(
285                    cache_dir_fragment.value,
286                    cache_dir_fragment.source,
287                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
288                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
289                );
290            } else {
291                self.global.cache_dir = Some(cache_dir_fragment);
292            }
293        }
294
295        // Merge cache if not default (only override when explicitly set)
296        if fragment.global.cache.source != ConfigSource::Default {
297            self.global.cache.merge_override(
298                fragment.global.cache.value,
299                fragment.global.cache.source,
300                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
301                fragment.global.cache.overrides.first().and_then(|o| o.line),
302            );
303        }
304
305        // Merge per_file_ignores
306        self.per_file_ignores.merge_override(
307            fragment.per_file_ignores.value,
308            fragment.per_file_ignores.source,
309            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
310            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
311        );
312
313        // Merge per_file_flavor
314        self.per_file_flavor.merge_override(
315            fragment.per_file_flavor.value,
316            fragment.per_file_flavor.source,
317            fragment.per_file_flavor.overrides.first().and_then(|o| o.file.clone()),
318            fragment.per_file_flavor.overrides.first().and_then(|o| o.line),
319        );
320
321        // Merge code_block_tools
322        self.code_block_tools.merge_override(
323            fragment.code_block_tools.value,
324            fragment.code_block_tools.source,
325            fragment.code_block_tools.overrides.first().and_then(|o| o.file.clone()),
326            fragment.code_block_tools.overrides.first().and_then(|o| o.line),
327        );
328
329        // Merge rule configs
330        for (rule_name, rule_fragment) in fragment.rules {
331            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
332            let rule_entry = self.rules.entry(norm_rule_name).or_default();
333
334            // Merge severity if present in fragment
335            if let Some(severity_fragment) = rule_fragment.severity {
336                if let Some(ref mut existing_severity) = rule_entry.severity {
337                    existing_severity.merge_override(
338                        severity_fragment.value,
339                        severity_fragment.source,
340                        severity_fragment.overrides.first().and_then(|o| o.file.clone()),
341                        severity_fragment.overrides.first().and_then(|o| o.line),
342                    );
343                } else {
344                    rule_entry.severity = Some(severity_fragment);
345                }
346            }
347
348            // Merge values
349            for (key, sourced_value_fragment) in rule_fragment.values {
350                let sv_entry = rule_entry
351                    .values
352                    .entry(key.clone())
353                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
354                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
355                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
356                sv_entry.merge_override(
357                    sourced_value_fragment.value,  // Use the value from the fragment
358                    sourced_value_fragment.source, // Use the source from the fragment
359                    file_from_fragment,            // Pass the file path from the fragment override
360                    line_from_fragment,            // Pass the line number from the fragment override
361                );
362            }
363        }
364
365        // Merge unknown_keys from fragment
366        for (section, key, file_path) in fragment.unknown_keys {
367            // Deduplicate: only add if not already present
368            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
369                self.unknown_keys.push((section, key, file_path));
370            }
371        }
372    }
373
374    /// Load and merge configurations from files and CLI overrides.
375    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
376        Self::load_with_discovery(config_path, cli_overrides, false)
377    }
378
379    /// Finds project root by walking up from start_dir looking for .git directory.
380    /// Falls back to start_dir if no .git found.
381    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
382        // Convert relative paths to absolute to ensure correct traversal
383        let mut current = if start_dir.is_relative() {
384            std::env::current_dir().map_or_else(|_| start_dir.to_path_buf(), |cwd| cwd.join(start_dir))
385        } else {
386            start_dir.to_path_buf()
387        };
388        const MAX_DEPTH: usize = 100;
389
390        for _ in 0..MAX_DEPTH {
391            if current.join(".git").exists() {
392                log::debug!("[rumdl-config] Found .git at: {}", current.display());
393                return current;
394            }
395
396            match current.parent() {
397                Some(parent) => current = parent.to_path_buf(),
398                None => break,
399            }
400        }
401
402        // No .git found, use start_dir as project root
403        log::debug!(
404            "[rumdl-config] No .git found, using config location as project root: {}",
405            start_dir.display()
406        );
407        start_dir.to_path_buf()
408    }
409
410    /// Discover configuration file by traversing up the directory tree.
411    /// Returns the first configuration file found.
412    /// Discovers config file and returns both the config path and project root.
413    /// Returns: (config_file_path, project_root_path)
414    /// Project root is the directory containing .git, or config parent as fallback.
415    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
416        use std::env;
417
418        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
419
420        let start_dir = match env::current_dir() {
421            Ok(dir) => dir,
422            Err(e) => {
423                log::debug!("[rumdl-config] Failed to get current directory: {e}");
424                return None;
425            }
426        };
427
428        let mut current_dir = start_dir.clone();
429        let mut depth = 0;
430        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
431
432        loop {
433            if depth >= MAX_DEPTH {
434                log::debug!("[rumdl-config] Maximum traversal depth reached");
435                break;
436            }
437
438            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
439
440            // Check for config files in order of precedence (only if not already found)
441            if found_config.is_none() {
442                for config_name in RUMDL_CONFIG_FILES {
443                    let config_path = current_dir.join(config_name);
444
445                    if config_path.exists() {
446                        // For pyproject.toml, verify it contains [tool.rumdl] section
447                        if *config_name == "pyproject.toml" {
448                            if let Ok(content) = std::fs::read_to_string(&config_path) {
449                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
450                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
451                                    // Store config, but continue looking for .git
452                                    found_config = Some((config_path.clone(), current_dir.clone()));
453                                    break;
454                                }
455                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
456                                continue;
457                            }
458                        } else {
459                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
460                            // Store config, but continue looking for .git
461                            found_config = Some((config_path.clone(), current_dir.clone()));
462                            break;
463                        }
464                    }
465                }
466            }
467
468            // Check for .git directory (stop boundary)
469            if current_dir.join(".git").exists() {
470                log::debug!("[rumdl-config] Stopping at .git directory");
471                break;
472            }
473
474            // Move to parent directory
475            match current_dir.parent() {
476                Some(parent) => {
477                    current_dir = parent.to_owned();
478                    depth += 1;
479                }
480                None => {
481                    log::debug!("[rumdl-config] Reached filesystem root");
482                    break;
483                }
484            }
485        }
486
487        // If config found, determine project root by walking up from config location
488        if let Some((config_path, config_dir)) = found_config {
489            let project_root = Self::find_project_root_from(&config_dir);
490            return Some((config_path, project_root));
491        }
492
493        None
494    }
495
496    /// Discover markdownlint configuration file by traversing up the directory tree.
497    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
498    /// Returns the path to the config file if found.
499    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
500        use std::env;
501
502        const MAX_DEPTH: usize = 100;
503
504        let start_dir = match env::current_dir() {
505            Ok(dir) => dir,
506            Err(e) => {
507                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
508                return None;
509            }
510        };
511
512        let mut current_dir = start_dir.clone();
513        let mut depth = 0;
514
515        loop {
516            if depth >= MAX_DEPTH {
517                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
518                break;
519            }
520
521            log::debug!(
522                "[rumdl-config] Searching for markdownlint config in: {}",
523                current_dir.display()
524            );
525
526            // Check for markdownlint config files in order of precedence
527            for config_name in MARKDOWNLINT_CONFIG_FILES {
528                let config_path = current_dir.join(config_name);
529                if config_path.exists() {
530                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
531                    return Some(config_path);
532                }
533            }
534
535            // Check for .git directory (stop boundary)
536            if current_dir.join(".git").exists() {
537                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
538                break;
539            }
540
541            // Move to parent directory
542            match current_dir.parent() {
543                Some(parent) => {
544                    current_dir = parent.to_owned();
545                    depth += 1;
546                }
547                None => {
548                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
549                    break;
550                }
551            }
552        }
553
554        None
555    }
556
557    /// Internal implementation that accepts config directory for testing
558    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
559        let config_dir = config_dir.join("rumdl");
560
561        // Check for config files in precedence order (same as project discovery)
562        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
563
564        log::debug!(
565            "[rumdl-config] Checking for user configuration in: {}",
566            config_dir.display()
567        );
568
569        for filename in USER_CONFIG_FILES {
570            let config_path = config_dir.join(filename);
571
572            if config_path.exists() {
573                // For pyproject.toml, verify it contains [tool.rumdl] section
574                if *filename == "pyproject.toml" {
575                    if let Ok(content) = std::fs::read_to_string(&config_path) {
576                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
577                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
578                            return Some(config_path);
579                        }
580                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
581                        continue;
582                    }
583                } else {
584                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
585                    return Some(config_path);
586                }
587            }
588        }
589
590        log::debug!(
591            "[rumdl-config] No user configuration found in: {}",
592            config_dir.display()
593        );
594        None
595    }
596
597    /// Discover user-level configuration file from platform-specific config directory.
598    /// Returns the first configuration file found in the user config directory.
599    #[cfg(feature = "native")]
600    fn user_configuration_path() -> Option<std::path::PathBuf> {
601        use etcetera::{BaseStrategy, choose_base_strategy};
602
603        match choose_base_strategy() {
604            Ok(strategy) => {
605                let config_dir = strategy.config_dir();
606                Self::user_configuration_path_impl(&config_dir)
607            }
608            Err(e) => {
609                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
610                None
611            }
612        }
613    }
614
615    /// Stub for WASM builds - user config not supported
616    #[cfg(not(feature = "native"))]
617    fn user_configuration_path() -> Option<std::path::PathBuf> {
618        None
619    }
620
621    /// Internal implementation that accepts the home directory for testing.
622    ///
623    /// Probes `<home>/.rumdl.toml` then `<home>/rumdl.toml`, returning the first match.
624    ///
625    /// `pyproject.toml` is intentionally **not** searched in `$HOME`, even though
626    /// `user_configuration_path_impl` does check it inside the platform config dir.
627    /// The asymmetry is deliberate: a `pyproject.toml` directly in `$HOME` almost
628    /// always belongs to unrelated python tooling (poetry/uv/pip's user-level config),
629    /// and silently picking it up as a rumdl config would surprise users. The
630    /// platform config dir (`~/.config/rumdl/`) is rumdl-scoped, so the same
631    /// concern doesn't apply there.
632    fn home_configuration_path_impl(home_dir: &Path) -> Option<std::path::PathBuf> {
633        const HOME_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml"];
634
635        log::debug!(
636            "[rumdl-config] Checking for home-directory configuration in: {}",
637            home_dir.display()
638        );
639
640        for filename in HOME_CONFIG_FILES {
641            let config_path = home_dir.join(filename);
642            if config_path.exists() {
643                log::debug!(
644                    "[rumdl-config] Found home-directory configuration at: {}",
645                    config_path.display()
646                );
647                return Some(config_path);
648            }
649        }
650
651        log::debug!(
652            "[rumdl-config] No home-directory configuration found in: {}",
653            home_dir.display()
654        );
655        None
656    }
657
658    /// Discover a home-directory configuration file (`~/.rumdl.toml` or `~/rumdl.toml`).
659    ///
660    /// This is a final fallback after the platform user-config directory
661    /// (`user_configuration_path`). It honors the classic Unix dotfile convention so
662    /// users who keep tool config in `$HOME` rather than `$XDG_CONFIG_HOME` are picked up.
663    #[cfg(feature = "native")]
664    fn home_configuration_path() -> Option<std::path::PathBuf> {
665        use etcetera::{BaseStrategy, choose_base_strategy};
666
667        match choose_base_strategy() {
668            Ok(strategy) => Self::home_configuration_path_impl(strategy.home_dir()),
669            Err(e) => {
670                log::debug!("[rumdl-config] Failed to determine home directory: {e}");
671                None
672            }
673        }
674    }
675
676    /// Stub for WASM builds - home config not supported
677    #[cfg(not(feature = "native"))]
678    fn home_configuration_path() -> Option<std::path::PathBuf> {
679        None
680    }
681
682    /// Load an explicit config file (standalone, no user config merging)
683    fn load_explicit_config(sourced_config: &mut Self, path: &str) -> Result<(), ConfigError> {
684        let path_obj = Path::new(path);
685        let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
686        let path_str = path.to_string();
687
688        log::debug!("[rumdl-config] Loading explicit config file: {filename}");
689
690        // Find project root by walking up from config location looking for .git
691        if let Some(config_parent) = path_obj.parent() {
692            let project_root = Self::find_project_root_from(config_parent);
693            log::debug!(
694                "[rumdl-config] Project root (from explicit config): {}",
695                project_root.display()
696            );
697            sourced_config.project_root = Some(project_root);
698        }
699
700        // Known markdownlint config files
701        const MARKDOWNLINT_FILENAMES: &[&str] = &[
702            ".markdownlint-cli2.jsonc",
703            ".markdownlint-cli2.yaml",
704            ".markdownlint-cli2.yml",
705            ".markdownlint.json",
706            ".markdownlint.yaml",
707            ".markdownlint.yml",
708        ];
709
710        if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
711            // Use extends-aware loading for rumdl TOML configs
712            let mut visited = IndexSet::new();
713            let chain_source = source_from_filename(filename);
714            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
715        } else if MARKDOWNLINT_FILENAMES.contains(&filename)
716            || path_str.ends_with(".json")
717            || path_str.ends_with(".jsonc")
718            || path_str.ends_with(".yaml")
719            || path_str.ends_with(".yml")
720        {
721            // Parse as markdownlint config (JSON/YAML) - no extends support
722            let fragment = parsers::load_from_markdownlint(&path_str)?;
723            sourced_config.merge(fragment);
724            sourced_config.loaded_files.push(path_str);
725        } else {
726            // Try TOML with extends support
727            let mut visited = IndexSet::new();
728            let chain_source = source_from_filename(filename);
729            load_config_with_extends(sourced_config, path_obj, &mut visited, chain_source)?;
730        }
731
732        Ok(())
733    }
734
735    /// Load and merge user-level configuration into this `SourcedConfig`.
736    ///
737    /// Discovers the user config file in this order, taking the first match:
738    /// 1. Platform user-config directory (XDG on Linux, `~/Library/Application Support`
739    ///    on macOS, `%APPDATA%` on Windows). Override with `user_config_dir` for tests.
740    /// 2. Home-directory dotfile (`~/.rumdl.toml`, then `~/rumdl.toml`). Override with
741    ///    `home_dir` for tests. Honors the classic Unix dotfile convention.
742    ///
743    /// Resolves any `extends` chain and merges each fragment with
744    /// `ConfigSource::UserConfig` precedence.
745    ///
746    /// Called in two contexts:
747    /// - When no project config is found: provides user defaults as the sole base
748    /// - When a markdownlint project config is found: provides rumdl-specific
749    ///   defaults that the markdownlint format cannot express; the markdownlint
750    ///   fragment is merged on top and wins on any overlapping key
751    fn load_user_config(
752        sourced_config: &mut Self,
753        user_config_dir: Option<&Path>,
754        home_dir: Option<&Path>,
755    ) -> Result<(), ConfigError> {
756        let user_config_path = if let Some(dir) = user_config_dir {
757            Self::user_configuration_path_impl(dir)
758        } else {
759            Self::user_configuration_path()
760        };
761
762        let user_config_path = user_config_path.or_else(|| match home_dir {
763            Some(home) => Self::home_configuration_path_impl(home),
764            None => Self::home_configuration_path(),
765        });
766
767        if let Some(user_config_path) = user_config_path {
768            let path_str = user_config_path.display().to_string();
769
770            log::debug!("[rumdl-config] Loading user config: {path_str}");
771
772            // User config fallback also supports extends chains.
773            // Use a uniform source across the chain so child overrides are determined by chain order.
774            let mut visited = IndexSet::new();
775            load_config_with_extends(
776                sourced_config,
777                &user_config_path,
778                &mut visited,
779                ConfigSource::UserConfig,
780            )?;
781        } else {
782            log::debug!("[rumdl-config] No user configuration file found");
783        }
784
785        Ok(())
786    }
787
788    /// Internal implementation that accepts user config directory and home directory for testing
789    #[doc(hidden)]
790    pub fn load_with_discovery_impl(
791        config_path: Option<&str>,
792        cli_overrides: Option<&SourcedGlobalConfig>,
793        skip_auto_discovery: bool,
794        user_config_dir: Option<&Path>,
795        home_dir: Option<&Path>,
796    ) -> Result<Self, ConfigError> {
797        use std::env;
798        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
799
800        let mut sourced_config = SourcedConfig::default();
801
802        // Ruff model: Project config is standalone, user config is fallback only
803        //
804        // Priority order:
805        // 1. If explicit config path provided → use ONLY that (standalone)
806        // 2. Else if project config discovered → use ONLY that (standalone)
807        // 3. Else if user config exists → use it as fallback
808        // 4. CLI overrides always apply last
809        //
810        // This ensures project configs are reproducible across machines and
811        // CI/local runs behave identically.
812
813        // Explicit config path always takes precedence
814        if let Some(path) = config_path {
815            // Explicit config path provided - use ONLY this config (standalone)
816            log::debug!("[rumdl-config] Explicit config_path provided: {path:?}");
817            Self::load_explicit_config(&mut sourced_config, path)?;
818        } else if skip_auto_discovery {
819            log::debug!("[rumdl-config] Skipping config discovery due to --no-config/--isolated flag");
820            // No config loading, just apply CLI overrides at the end
821        } else {
822            // No explicit path - try auto-discovery
823            log::debug!("[rumdl-config] No explicit config_path, searching default locations");
824
825            // Try to discover project config first
826            if let Some((config_file, project_root)) = Self::discover_config_upward() {
827                // Project config found - use ONLY this (standalone, no user config).
828                // Rumdl project configs can express all settings directly, so user config
829                // is not needed and omitting it ensures CI and local runs are identical.
830                log::debug!("[rumdl-config] Found project config: {}", config_file.display());
831                log::debug!("[rumdl-config] Project root: {}", project_root.display());
832
833                sourced_config.project_root = Some(project_root);
834
835                // Use extends-aware loading for discovered configs
836                let mut visited = IndexSet::new();
837                let root_filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
838                let chain_source = source_from_filename(root_filename);
839                load_config_with_extends(&mut sourced_config, &config_file, &mut visited, chain_source)?;
840            } else {
841                // No rumdl project config - try markdownlint config
842                log::debug!("[rumdl-config] No rumdl config found, checking markdownlint config");
843
844                if let Some(markdownlint_path) = Self::discover_markdownlint_config_upward() {
845                    let path_str = markdownlint_path.display().to_string();
846                    log::debug!("[rumdl-config] Found markdownlint config: {path_str}");
847                    // Load user config first as a base so rumdl-specific settings (e.g. flavor,
848                    // cache) take effect. Markdownlint configs cannot express these settings.
849                    // The markdownlint fragment uses ConfigSource::ProjectConfig (precedence 3)
850                    // vs UserConfig (precedence 1), so project settings always win on overlap.
851                    Self::load_user_config(&mut sourced_config, user_config_dir, home_dir)?;
852                    match parsers::load_from_markdownlint(&path_str) {
853                        Ok(fragment) => {
854                            sourced_config.merge(fragment);
855                            sourced_config.loaded_files.push(path_str);
856                        }
857                        Err(_e) => {
858                            log::debug!("[rumdl-config] Failed to load markdownlint config");
859                        }
860                    }
861                } else {
862                    // No project config at all - use user config as fallback
863                    log::debug!("[rumdl-config] No project config found, using user config as fallback");
864                    Self::load_user_config(&mut sourced_config, user_config_dir, home_dir)?;
865                }
866            }
867        }
868
869        // Apply CLI overrides (highest precedence)
870        if let Some(cli) = cli_overrides {
871            sourced_config
872                .global
873                .enable
874                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
875            sourced_config
876                .global
877                .disable
878                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
879            sourced_config
880                .global
881                .exclude
882                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
883            sourced_config
884                .global
885                .include
886                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
887            sourced_config.global.respect_gitignore.merge_override(
888                cli.respect_gitignore.value,
889                ConfigSource::Cli,
890                None,
891                None,
892            );
893            sourced_config
894                .global
895                .fixable
896                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
897            sourced_config
898                .global
899                .unfixable
900                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
901            // No rule-specific CLI overrides implemented yet
902        }
903
904        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
905
906        Ok(sourced_config)
907    }
908
909    /// Load and merge configurations from files and CLI overrides.
910    /// If skip_auto_discovery is true, only explicit config paths are loaded.
911    pub fn load_with_discovery(
912        config_path: Option<&str>,
913        cli_overrides: Option<&SourcedGlobalConfig>,
914        skip_auto_discovery: bool,
915    ) -> Result<Self, ConfigError> {
916        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None, None)
917    }
918
919    /// Validate the configuration against a rule registry.
920    ///
921    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
922    /// enabling conversion to `Config`. Validation warnings are stored in the config
923    /// and can be displayed to the user.
924    ///
925    /// # Example
926    ///
927    /// ```ignore
928    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
929    /// let validated = loaded.validate(&registry)?;
930    /// let config: Config = validated.into();
931    /// ```
932    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
933        let warnings = validate_config_sourced_internal(&self, registry);
934
935        Ok(SourcedConfig {
936            global: self.global,
937            per_file_ignores: self.per_file_ignores,
938            per_file_flavor: self.per_file_flavor,
939            code_block_tools: self.code_block_tools,
940            rules: self.rules,
941            loaded_files: self.loaded_files,
942            unknown_keys: self.unknown_keys,
943            project_root: self.project_root,
944            validation_warnings: warnings,
945            _state: PhantomData,
946        })
947    }
948
949    /// Validate and convert to Config in one step (convenience method).
950    ///
951    /// This combines `validate()` and `into()` for callers who want the
952    /// validation warnings separately.
953    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
954        let validated = self.validate(registry)?;
955        let warnings = validated.validation_warnings.clone();
956        Ok((validated.into(), warnings))
957    }
958
959    /// Skip validation and convert directly to ConfigValidated state.
960    ///
961    /// # Safety
962    ///
963    /// This method bypasses validation. Use only when:
964    /// - You've already validated via `validate_config_sourced()`
965    /// - You're in test code that doesn't need validation
966    /// - You're migrating legacy code and will add proper validation later
967    ///
968    /// Prefer `validate()` for new code.
969    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
970        SourcedConfig {
971            global: self.global,
972            per_file_ignores: self.per_file_ignores,
973            per_file_flavor: self.per_file_flavor,
974            code_block_tools: self.code_block_tools,
975            rules: self.rules,
976            loaded_files: self.loaded_files,
977            unknown_keys: self.unknown_keys,
978            project_root: self.project_root,
979            validation_warnings: Vec::new(),
980            _state: PhantomData,
981        }
982    }
983
984    /// Discover the nearest config file for a specific directory,
985    /// walking upward to `project_root` (inclusive).
986    ///
987    /// Searches for rumdl config files (`.rumdl.toml`, `rumdl.toml`,
988    /// `.config/rumdl.toml`, `pyproject.toml` with `[tool.rumdl]`) and
989    /// markdownlint config files at each directory level.
990    ///
991    /// Returns the config file path if found. Does NOT use CWD.
992    pub fn discover_config_for_dir(dir: &Path, project_root: &Path) -> Option<PathBuf> {
993        let mut current_dir = dir.to_path_buf();
994
995        loop {
996            // Check rumdl config files first (higher precedence)
997            for config_name in RUMDL_CONFIG_FILES {
998                let config_path = current_dir.join(config_name);
999                if config_path.exists() {
1000                    if *config_name == "pyproject.toml" {
1001                        if let Ok(content) = std::fs::read_to_string(&config_path)
1002                            && (content.contains("[tool.rumdl]") || content.contains("tool.rumdl"))
1003                        {
1004                            return Some(config_path);
1005                        }
1006                        continue;
1007                    }
1008                    return Some(config_path);
1009                }
1010            }
1011
1012            // Check markdownlint config files (lower precedence)
1013            for config_name in MARKDOWNLINT_CONFIG_FILES {
1014                let config_path = current_dir.join(config_name);
1015                if config_path.exists() {
1016                    return Some(config_path);
1017                }
1018            }
1019
1020            // Stop at project root (inclusive - we already checked it)
1021            if current_dir == project_root {
1022                break;
1023            }
1024
1025            // Move to parent directory
1026            match current_dir.parent() {
1027                Some(parent) => current_dir = parent.to_path_buf(),
1028                None => break,
1029            }
1030        }
1031
1032        None
1033    }
1034
1035    /// Load a config from a specific file path, with extends resolution.
1036    ///
1037    /// Creates a fresh `SourcedConfig`, loads the config file using the
1038    /// appropriate parser, and converts to `Config`. Used for per-directory
1039    /// config loading where each subdirectory config is standalone.
1040    pub fn load_config_for_path(config_path: &Path, project_root: &Path) -> Result<Config, ConfigError> {
1041        let mut sourced_config = SourcedConfig {
1042            project_root: Some(project_root.to_path_buf()),
1043            ..SourcedConfig::default()
1044        };
1045
1046        let filename = config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1047        let path_str = config_path.display().to_string();
1048
1049        // Determine if this is a markdownlint config or rumdl config
1050        let is_markdownlint = MARKDOWNLINT_CONFIG_FILES.contains(&filename)
1051            || (filename != "pyproject.toml"
1052                && filename != ".rumdl.toml"
1053                && filename != "rumdl.toml"
1054                && (path_str.ends_with(".json")
1055                    || path_str.ends_with(".jsonc")
1056                    || path_str.ends_with(".yaml")
1057                    || path_str.ends_with(".yml")));
1058
1059        if is_markdownlint {
1060            let fragment = parsers::load_from_markdownlint(&path_str)?;
1061            sourced_config.merge(fragment);
1062            sourced_config.loaded_files.push(path_str);
1063        } else {
1064            let mut visited = IndexSet::new();
1065            let chain_source = source_from_filename(filename);
1066            load_config_with_extends(&mut sourced_config, config_path, &mut visited, chain_source)?;
1067        }
1068
1069        Ok(sourced_config.into_validated_unchecked().into())
1070    }
1071}
1072
1073/// Convert a validated configuration to the final Config type.
1074///
1075/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
1076/// ensuring that validation must occur before conversion.
1077impl From<SourcedConfig<ConfigValidated>> for Config {
1078    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
1079        let mut rules = BTreeMap::new();
1080        for (rule_name, sourced_rule_cfg) in sourced.rules {
1081            // Normalize rule name to uppercase for case-insensitive lookup
1082            let normalized_rule_name = rule_name.to_ascii_uppercase();
1083            let severity = sourced_rule_cfg.severity.map(|sv| sv.value);
1084            let mut values = BTreeMap::new();
1085            for (key, sourced_val) in sourced_rule_cfg.values {
1086                values.insert(key, sourced_val.value);
1087            }
1088            rules.insert(normalized_rule_name, RuleConfig { severity, values });
1089        }
1090        // Enable is "explicit" if it was set by something other than the Default source
1091        let enable_is_explicit = sourced.global.enable.source != ConfigSource::Default;
1092
1093        #[allow(deprecated)]
1094        let global = GlobalConfig {
1095            enable: sourced.global.enable.value,
1096            disable: sourced.global.disable.value,
1097            exclude: sourced.global.exclude.value,
1098            include: sourced.global.include.value,
1099            respect_gitignore: sourced.global.respect_gitignore.value,
1100            line_length: sourced.global.line_length.value,
1101            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1102            fixable: sourced.global.fixable.value,
1103            unfixable: sourced.global.unfixable.value,
1104            flavor: sourced.global.flavor.value,
1105            force_exclude: sourced.global.force_exclude.value,
1106            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
1107            cache: sourced.global.cache.value,
1108            extend_enable: sourced.global.extend_enable.value,
1109            extend_disable: sourced.global.extend_disable.value,
1110            enable_is_explicit,
1111        };
1112
1113        let mut config = Config {
1114            extends: None,
1115            global,
1116            per_file_ignores: sourced.per_file_ignores.value,
1117            per_file_flavor: sourced.per_file_flavor.value,
1118            code_block_tools: sourced.code_block_tools.value,
1119            rules,
1120            project_root: sourced.project_root,
1121            per_file_ignores_cache: Arc::new(OnceLock::new()),
1122            per_file_flavor_cache: Arc::new(OnceLock::new()),
1123            canonical_project_root_cache: Arc::new(OnceLock::new()),
1124        };
1125
1126        // Apply per-rule `enabled = true/false` to global enable/disable lists
1127        config.apply_per_rule_enabled();
1128
1129        // Enforce the runtime invariant: every rule-name list is canonicalised.
1130        // After this point, downstream consumers (`rules::filter_rules`, the LSP,
1131        // WASM, fix coordinator, per-file-ignores) can match against
1132        // `Rule::name()` with simple string equality regardless of whether the
1133        // user's config used canonical IDs (`"MD033"`) or aliases
1134        // (`"no-inline-html"`).
1135        config.canonicalize_rule_lists();
1136
1137        config
1138    }
1139}