Skip to main content

marque_config/
lib.rs

1//! marque-config — layered configuration loading.
2//!
3//! Precedence (highest wins): CLI flags → env vars → `.marque.local.toml` → `.marque.toml`
4//!
5//! # Hard-fail validators (T023)
6//!
7//! The loader refuses to produce a `Config` if any of these conditions hold:
8//! - `.marque.toml` contains a `[user]` section (FR-010, SC-006) → exit 65
9//! - `[capco] version` mismatches `marque_ism::SCHEMA_VERSION` (FR-011) → exit 65
10//! - `confidence_threshold` outside `[0.0, 1.0]` → exit 65
11
12use marque_rules::Severity;
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15use std::path::PathBuf;
16use thiserror::Error;
17
18/// Exit code 65 (`EX_DATAERR`) per `contracts/cli.md`.
19pub const EX_DATAERR: i32 = 65;
20
21#[derive(Debug, Error)]
22pub enum ConfigError {
23    #[error("failed to read config file {path}: {source}")]
24    ReadError {
25        path: PathBuf,
26        source: std::io::Error,
27    },
28
29    #[error("failed to parse config: {0}")]
30    ParseError(#[from] toml::de::Error),
31
32    /// `.marque.toml` contains a `[user]` section (FR-010, SC-006).
33    #[error(
34        "committed config file {path} contains a [user] section — classifier identity \
35         must live only in .marque.local.toml or env vars (FR-010)"
36    )]
37    UserSectionInCommitted { path: PathBuf },
38
39    /// Schema version in config doesn't match compiled schema.
40    #[error(
41        "schema version mismatch: config says {config_version:?} but marque was compiled \
42         against {compiled_version:?} (FR-011). Update [capco] version in .marque.toml."
43    )]
44    SchemaVersionMismatch {
45        config_version: String,
46        compiled_version: &'static str,
47    },
48
49    /// Confidence threshold out of range.
50    #[error("confidence_threshold {value} is outside [0.0, 1.0]")]
51    ThresholdOutOfRange { value: f32 },
52
53    /// Environment variable could not be parsed into the expected type.
54    #[error("environment variable {var} has invalid value {raw:?}: {reason}")]
55    InvalidEnvVar {
56        var: &'static str,
57        raw: String,
58        reason: &'static str,
59    },
60
61    /// Rule severity string in config is not one of the recognized values.
62    #[error(
63        "rule {rule:?} has unrecognized severity {value:?} — expected one of \
64         \"off\", \"warn\", \"error\", \"fix\""
65    )]
66    UnknownSeverity { rule: String, value: String },
67}
68
69impl ConfigError {
70    /// Returns the exit code for this error per `contracts/cli.md`.
71    pub fn exit_code(&self) -> i32 {
72        match self {
73            Self::ReadError { .. } => 74, // EX_IOERR
74            Self::ParseError(_) => EX_DATAERR,
75            Self::UserSectionInCommitted { .. } => EX_DATAERR,
76            Self::SchemaVersionMismatch { .. } => EX_DATAERR,
77            Self::ThresholdOutOfRange { .. } => EX_DATAERR,
78            Self::InvalidEnvVar { .. } => EX_DATAERR,
79            Self::UnknownSeverity { .. } => EX_DATAERR,
80        }
81    }
82}
83
84/// Resolved, merged configuration ready for engine use.
85#[derive(Debug, Clone)]
86pub struct Config {
87    pub user: UserConfig,
88    pub rules: RuleConfig,
89    /// Organization-specific typo corrections from `[corrections]` in `.marque.toml`.
90    ///
91    /// **Do not mutate after passing to `Engine::new`** — the engine caches
92    /// this as an `Arc<HashMap>` at construction time. Post-construction
93    /// mutation leaves the cached copy stale.
94    pub corrections: HashMap<String, String>,
95    pub capco: CapcoConfig,
96    /// Fix confidence threshold. Fixes with confidence >= this value are auto-applied.
97    /// Default: 0.95 per spec.
98    confidence_threshold: f32,
99}
100
101impl Default for Config {
102    fn default() -> Self {
103        Self {
104            user: UserConfig::default(),
105            rules: RuleConfig::default(),
106            corrections: HashMap::new(),
107            capco: CapcoConfig::default(),
108            confidence_threshold: 0.95,
109        }
110    }
111}
112
113impl Config {
114    /// Returns the confidence threshold for auto-applying fixes.
115    pub fn confidence_threshold(&self) -> f32 {
116        self.confidence_threshold
117    }
118
119    /// Set confidence threshold (validated at load time).
120    pub fn set_confidence_threshold(&mut self, value: f32) -> Result<(), ConfigError> {
121        if !(0.0..=1.0).contains(&value) || value.is_nan() {
122            return Err(ConfigError::ThresholdOutOfRange { value });
123        }
124        self.confidence_threshold = value;
125        Ok(())
126    }
127}
128
129/// User identity — always from local config, never committed.
130#[derive(Debug, Clone, Default)]
131pub struct UserConfig {
132    pub classifier_id: Option<String>,
133    pub classification_authority: Option<String>,
134    pub default_reason: Option<String>,
135    pub derived_from_default: Option<String>,
136}
137
138/// Per-rule severity overrides.
139#[derive(Debug, Clone, Default)]
140pub struct RuleConfig {
141    /// Map of rule ID → configured severity string ("fix", "warn", "error", "off").
142    pub overrides: HashMap<String, String>,
143}
144
145/// CAPCO-specific configuration.
146#[derive(Debug, Clone)]
147pub struct CapcoConfig {
148    /// Pinned ISM schema version. Must match the compiled marque-ism version.
149    pub version: String,
150}
151
152impl Default for CapcoConfig {
153    fn default() -> Self {
154        Self {
155            version: marque_ism::generated::values::SCHEMA_VERSION.to_owned(),
156        }
157    }
158}
159
160// ---------------------------------------------------------------------------
161// TOML-deserialisable file format
162// ---------------------------------------------------------------------------
163
164#[derive(Debug, Deserialize, Serialize, Default)]
165struct ConfigFile {
166    #[serde(default)]
167    user: Option<UserConfigFile>,
168    #[serde(default)]
169    rules: HashMap<String, String>,
170    #[serde(default)]
171    corrections: HashMap<String, String>,
172    #[serde(default)]
173    capco: CapcoConfigFile,
174    #[serde(default)]
175    confidence_threshold: Option<f32>,
176}
177
178#[derive(Debug, Deserialize, Serialize, Default)]
179struct UserConfigFile {
180    classifier_id: Option<String>,
181    classification_authority: Option<String>,
182    default_reason: Option<String>,
183    derived_from_default: Option<String>,
184}
185
186#[derive(Debug, Deserialize, Serialize, Default)]
187struct CapcoConfigFile {
188    version: Option<String>,
189}
190
191// ---------------------------------------------------------------------------
192// Config loading
193// ---------------------------------------------------------------------------
194
195/// Load and merge configuration from standard locations.
196///
197/// Search order (first found wins for each layer):
198/// 1. `.marque.toml` discovered by walking upward from `start` per
199///    `contracts/cli.md`. The walk stops at the **first** of:
200///    - a directory containing `.marque.toml`
201///    - a directory containing `.git/` (git repository root)
202///    - the filesystem root
203///
204///    If the walk finds a `.marque.toml`, that directory is the project root
205///    for both Layer 1 (committed) and Layer 2 (local). If the walk finds a
206///    git root or filesystem root first, no project config is loaded —
207///    Layer 3 (env vars) still runs.
208/// 2. `.marque.local.toml` **only in the same directory** as the discovered
209///    `.marque.toml`. The local-config search is never independently walked,
210///    so a stray `.marque.local.toml` in a parent directory cannot silently
211///    attach to a child project's config.
212/// 3. Environment variables (`MARQUE_CLASSIFIER_ID`, `MARQUE_CONFIDENCE_THRESHOLD`,
213///    `MARQUE_LOG`).
214///
215/// Hard-fail validators run after merging all layers.
216pub fn load(start: &std::path::Path) -> Result<Config, ConfigError> {
217    let mut config = Config::default();
218
219    // Layer 1+2: walk upward for the project config.
220    if let Some(project_dir) = discover_project_dir(start) {
221        // Layer 1: project config
222        let project_config = project_dir.join(".marque.toml");
223        let raw = std::fs::read_to_string(&project_config).map_err(|e| ConfigError::ReadError {
224            path: project_config.clone(),
225            source: e,
226        })?;
227        let file: ConfigFile = toml::from_str(&raw)?;
228
229        // T023: refuse [user] section in committed config (FR-010, SC-006)
230        if file.user.is_some() {
231            return Err(ConfigError::UserSectionInCommitted {
232                path: project_config,
233            });
234        }
235
236        merge_project_into(&mut config, file)?;
237
238        // Layer 2: user-local config in the SAME directory only.
239        let local_config = project_dir.join(".marque.local.toml");
240        if local_config.exists() {
241            let raw =
242                std::fs::read_to_string(&local_config).map_err(|e| ConfigError::ReadError {
243                    path: local_config.clone(),
244                    source: e,
245                })?;
246            let file: ConfigFile = toml::from_str(&raw)?;
247            merge_user_into(&mut config, file);
248        }
249    }
250
251    // Layer 3: environment variables
252    apply_env(&mut config)?;
253
254    // T023: validate schema version (FR-011)
255    validate_schema_version(&config)?;
256
257    Ok(config)
258}
259
260/// Load configuration from an explicit `.marque.toml` path, bypassing the
261/// upward walk. Used by `--config <PATH>` per `contracts/cli.md`:
262/// "short-circuits the walk and uses the specified path as the project
263/// config; the local-config search still applies, only in the directory
264/// containing the supplied path."
265pub fn load_with_explicit_config(project_config: &std::path::Path) -> Result<Config, ConfigError> {
266    let mut config = Config::default();
267
268    // Layer 1: explicit project config — required to exist.
269    let raw = std::fs::read_to_string(project_config).map_err(|e| ConfigError::ReadError {
270        path: project_config.to_path_buf(),
271        source: e,
272    })?;
273    let file: ConfigFile = toml::from_str(&raw)?;
274
275    if file.user.is_some() {
276        return Err(ConfigError::UserSectionInCommitted {
277            path: project_config.to_path_buf(),
278        });
279    }
280
281    merge_project_into(&mut config, file)?;
282
283    // Layer 2: local config in the same directory as the explicit path.
284    if let Some(parent) = project_config.parent() {
285        let local_config = parent.join(".marque.local.toml");
286        if local_config.exists() {
287            let raw =
288                std::fs::read_to_string(&local_config).map_err(|e| ConfigError::ReadError {
289                    path: local_config.clone(),
290                    source: e,
291                })?;
292            let file: ConfigFile = toml::from_str(&raw)?;
293            merge_user_into(&mut config, file);
294        }
295    }
296
297    apply_env(&mut config)?;
298    validate_schema_version(&config)?;
299    Ok(config)
300}
301
302/// Walk upward from `start` looking for a directory containing `.marque.toml`.
303///
304/// Returns `Some(dir)` if a `.marque.toml` is found before hitting either a
305/// git repository root (a directory containing `.git/`) or the filesystem
306/// root. Returns `None` otherwise — falling back to built-in defaults is the
307/// caller's responsibility.
308///
309/// The walk treats `.git` as a hard stop *only when* the directory does not
310/// also contain `.marque.toml`. A repo with `.marque.toml` at its root is
311/// the common case and must succeed.
312fn discover_project_dir(start: &std::path::Path) -> Option<std::path::PathBuf> {
313    let mut current = start.to_path_buf();
314    loop {
315        if current.join(".marque.toml").is_file() {
316            return Some(current);
317        }
318        // Hit a git repo root that did not contain .marque.toml — stop.
319        // The check is for `.git` as either a file (git worktree pointer)
320        // or a directory (normal repo).
321        if current.join(".git").exists() {
322            return None;
323        }
324        if !current.pop() {
325            // Filesystem root — nothing more to walk.
326            return None;
327        }
328    }
329}
330
331fn merge_project_into(config: &mut Config, file: ConfigFile) -> Result<(), ConfigError> {
332    // H-6: validate every severity override at load time. A typo like
333    // `banner-abbreviation = "err"` must fail loudly, not silently fall back
334    // to the rule default.
335    for (rule, value) in &file.rules {
336        if Severity::parse_config(value).is_none() {
337            return Err(ConfigError::UnknownSeverity {
338                rule: rule.clone(),
339                value: value.clone(),
340            });
341        }
342    }
343    config.rules.overrides.extend(file.rules);
344    config.corrections.extend(file.corrections);
345    if let Some(v) = file.capco.version {
346        config.capco.version = v;
347    }
348    if let Some(threshold) = file.confidence_threshold {
349        config.set_confidence_threshold(threshold)?;
350    }
351    Ok(())
352}
353
354fn merge_user_into(config: &mut Config, file: ConfigFile) {
355    // L-2: an empty string is semantically equivalent to "not set". Without
356    // this guard, a .marque.local.toml entry of `classifier_id = ""` would
357    // silently overwrite a populated value from another layer with an empty
358    // string. For a security tool where classifier identity ends up in the
359    // audit record, that is a meaningful correctness hole.
360    fn non_empty(s: Option<String>) -> Option<String> {
361        s.filter(|v| !v.trim().is_empty())
362    }
363
364    if let Some(user) = file.user {
365        if let Some(v) = non_empty(user.classifier_id) {
366            config.user.classifier_id = Some(v);
367        }
368        if let Some(v) = non_empty(user.classification_authority) {
369            config.user.classification_authority = Some(v);
370        }
371        if let Some(v) = non_empty(user.default_reason) {
372            config.user.default_reason = Some(v);
373        }
374        if let Some(v) = non_empty(user.derived_from_default) {
375            config.user.derived_from_default = Some(v);
376        }
377    }
378}
379
380fn apply_env(config: &mut Config) -> Result<(), ConfigError> {
381    // L-2 parity: apply the same non-empty guard as merge_user_into so that
382    // `MARQUE_CLASSIFIER_ID=""` does not silently overwrite a populated
383    // local-config value with an empty string.
384    if let Ok(id) = std::env::var("MARQUE_CLASSIFIER_ID") {
385        if !id.trim().is_empty() {
386            config.user.classifier_id = Some(id);
387        }
388    }
389    // C-2: propagate parse failures. `MARQUE_CONFIDENCE_THRESHOLD=0.9o` must
390    // hard-fail, not silently apply the default.
391    if let Ok(raw) = std::env::var("MARQUE_CONFIDENCE_THRESHOLD") {
392        let threshold = raw.parse::<f32>().map_err(|_| ConfigError::InvalidEnvVar {
393            var: "MARQUE_CONFIDENCE_THRESHOLD",
394            raw: raw.clone(),
395            reason: "expected a floating-point number in [0.0, 1.0]",
396        })?;
397        config.set_confidence_threshold(threshold)?;
398    }
399    // MARQUE_LOG is handled by the tracing subscriber, not by config loading.
400    Ok(())
401}
402
403/// T023: validate schema version matches compiled marque-ism (FR-011).
404///
405/// Exact match required — the config must use the canonical form (e.g., "ISM-v2022-DEC").
406fn validate_schema_version(config: &Config) -> Result<(), ConfigError> {
407    let compiled = marque_ism::generated::values::SCHEMA_VERSION;
408    let config_ver = &config.capco.version;
409
410    if config_ver != compiled {
411        return Err(ConfigError::SchemaVersionMismatch {
412            config_version: config_ver.clone(),
413            compiled_version: compiled,
414        });
415    }
416    Ok(())
417}
418
419// ---------------------------------------------------------------------------
420// Tests
421// ---------------------------------------------------------------------------
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    fn config_file_with_rules(rules: &[(&str, &str)]) -> ConfigFile {
428        let mut file = ConfigFile::default();
429        for (k, v) in rules {
430            file.rules.insert((*k).to_owned(), (*v).to_owned());
431        }
432        file
433    }
434
435    #[test]
436    fn set_confidence_threshold_accepts_boundaries() {
437        let mut c = Config::default();
438        assert!(c.set_confidence_threshold(0.0).is_ok());
439        assert!(c.set_confidence_threshold(1.0).is_ok());
440        assert!(c.set_confidence_threshold(0.5).is_ok());
441    }
442
443    #[test]
444    fn set_confidence_threshold_rejects_out_of_range() {
445        let mut c = Config::default();
446        assert!(matches!(
447            c.set_confidence_threshold(-0.1),
448            Err(ConfigError::ThresholdOutOfRange { .. })
449        ));
450        assert!(matches!(
451            c.set_confidence_threshold(1.1),
452            Err(ConfigError::ThresholdOutOfRange { .. })
453        ));
454    }
455
456    #[test]
457    fn set_confidence_threshold_rejects_nan() {
458        let mut c = Config::default();
459        assert!(matches!(
460            c.set_confidence_threshold(f32::NAN),
461            Err(ConfigError::ThresholdOutOfRange { .. })
462        ));
463    }
464
465    #[test]
466    fn merge_project_accepts_valid_severity_strings() {
467        let mut c = Config::default();
468        let file = config_file_with_rules(&[
469            ("E001", "fix"),
470            ("E002", "warn"),
471            ("E003", "error"),
472            ("E004", "off"),
473        ]);
474        assert!(merge_project_into(&mut c, file).is_ok());
475        assert_eq!(c.rules.overrides.len(), 4);
476    }
477
478    #[test]
479    fn merge_project_rejects_unknown_severity() {
480        let mut c = Config::default();
481        let file = config_file_with_rules(&[("E001", "err")]);
482        let err = merge_project_into(&mut c, file).unwrap_err();
483        match err {
484            ConfigError::UnknownSeverity { rule, value } => {
485                assert_eq!(rule, "E001");
486                assert_eq!(value, "err");
487            }
488            other => panic!("expected UnknownSeverity, got {other:?}"),
489        }
490    }
491
492    #[test]
493    fn merge_project_rejects_severity_is_case_sensitive() {
494        // Severity::parse_config is case-sensitive by design — uppercase must fail.
495        let mut c = Config::default();
496        let file = config_file_with_rules(&[("E001", "FIX")]);
497        assert!(matches!(
498            merge_project_into(&mut c, file),
499            Err(ConfigError::UnknownSeverity { .. })
500        ));
501    }
502
503    #[test]
504    fn merge_project_rejects_empty_severity() {
505        let mut c = Config::default();
506        let file = config_file_with_rules(&[("E001", "")]);
507        assert!(matches!(
508            merge_project_into(&mut c, file),
509            Err(ConfigError::UnknownSeverity { .. })
510        ));
511    }
512
513    #[test]
514    fn exit_code_matches_contract() {
515        assert_eq!(
516            ConfigError::ThresholdOutOfRange { value: 2.0 }.exit_code(),
517            EX_DATAERR
518        );
519        assert_eq!(
520            ConfigError::UnknownSeverity {
521                rule: "E001".into(),
522                value: "err".into(),
523            }
524            .exit_code(),
525            EX_DATAERR
526        );
527        assert_eq!(
528            ConfigError::InvalidEnvVar {
529                var: "MARQUE_CONFIDENCE_THRESHOLD",
530                raw: "bananas".into(),
531                reason: "not a float",
532            }
533            .exit_code(),
534            EX_DATAERR
535        );
536    }
537
538    // ---------------------------------------------------------------------
539    // D.1: discover_project_dir upward-walk semantics
540    // ---------------------------------------------------------------------
541
542    use std::fs;
543    use std::path::PathBuf;
544
545    fn make_tmpdir(name: &str) -> PathBuf {
546        let dir =
547            std::env::temp_dir().join(format!("marque-config-test-{name}-{}", std::process::id()));
548        let _ = fs::remove_dir_all(&dir);
549        fs::create_dir_all(&dir).expect("create tmpdir");
550        dir
551    }
552
553    #[test]
554    fn discover_finds_marque_toml_in_start_dir() {
555        let dir = make_tmpdir("discover-here");
556        fs::write(dir.join(".marque.toml"), b"").unwrap();
557        assert_eq!(super::discover_project_dir(&dir), Some(dir.clone()));
558        let _ = fs::remove_dir_all(&dir);
559    }
560
561    #[test]
562    fn discover_walks_upward_for_marque_toml() {
563        // tmp/root/.marque.toml; start from tmp/root/sub/deeper.
564        let root = make_tmpdir("discover-walk");
565        fs::write(root.join(".marque.toml"), b"").unwrap();
566        let sub = root.join("sub").join("deeper");
567        fs::create_dir_all(&sub).unwrap();
568        assert_eq!(super::discover_project_dir(&sub), Some(root.clone()));
569        let _ = fs::remove_dir_all(&root);
570    }
571
572    #[test]
573    fn discover_stops_at_git_root_without_marque_toml() {
574        // tmp/root/.git/ + tmp/root/sub/ — start from sub, walk should hit
575        // .git in root and return None (no project config above this point).
576        let root = make_tmpdir("discover-git-stop");
577        fs::create_dir_all(root.join(".git")).unwrap();
578        let sub = root.join("sub");
579        fs::create_dir_all(&sub).unwrap();
580        assert_eq!(super::discover_project_dir(&sub), None);
581        let _ = fs::remove_dir_all(&root);
582    }
583
584    #[test]
585    fn discover_returns_marque_toml_at_git_root_when_both_present() {
586        // The common case: a repo whose root has both .git and .marque.toml.
587        // The walk must NOT stop at .git before checking .marque.toml.
588        let root = make_tmpdir("discover-both");
589        fs::create_dir_all(root.join(".git")).unwrap();
590        fs::write(root.join(".marque.toml"), b"").unwrap();
591        let sub = root.join("crates").join("foo");
592        fs::create_dir_all(&sub).unwrap();
593        assert_eq!(super::discover_project_dir(&sub), Some(root.clone()));
594        let _ = fs::remove_dir_all(&root);
595    }
596
597    #[test]
598    fn load_walks_upward_to_find_project_config() {
599        // tmp/root/.marque.toml + tmp/root/sub/, load from sub.
600        let root = make_tmpdir("load-walk");
601        fs::write(
602            root.join(".marque.toml"),
603            br#"
604[rules]
605E001 = "warn"
606"#,
607        )
608        .unwrap();
609        let sub = root.join("sub");
610        fs::create_dir_all(&sub).unwrap();
611        let config = super::load(&sub).expect("load should succeed");
612        assert_eq!(config.rules.overrides.get("E001"), Some(&"warn".to_owned()));
613        let _ = fs::remove_dir_all(&root);
614    }
615
616    #[test]
617    fn load_returns_defaults_when_walk_finds_no_marque_toml() {
618        // tmp/root/.git but no .marque.toml — load returns defaults.
619        let root = make_tmpdir("load-defaults");
620        fs::create_dir_all(root.join(".git")).unwrap();
621        let sub = root.join("sub");
622        fs::create_dir_all(&sub).unwrap();
623        let config = super::load(&sub).expect("load should succeed with defaults");
624        assert!(config.rules.overrides.is_empty());
625        let _ = fs::remove_dir_all(&root);
626    }
627
628    #[test]
629    fn load_local_config_only_in_same_dir_as_marque_toml() {
630        // tmp/root/.marque.toml + tmp/root/.marque.local.toml
631        // tmp/root/sub/.marque.local.toml (should NOT be loaded)
632        let root = make_tmpdir("load-local-same-dir");
633        fs::write(
634            root.join(".marque.toml"),
635            br#"
636[capco]
637"#,
638        )
639        .unwrap();
640        fs::write(
641            root.join(".marque.local.toml"),
642            br#"
643[user]
644classifier_id = "from-root"
645"#,
646        )
647        .unwrap();
648        let sub = root.join("sub");
649        fs::create_dir_all(&sub).unwrap();
650        // A stray local config in `sub` should NOT be loaded — the local
651        // search is anchored to the directory of the project config.
652        fs::write(
653            sub.join(".marque.local.toml"),
654            br#"
655[user]
656classifier_id = "from-sub"
657"#,
658        )
659        .unwrap();
660        let config = super::load(&sub).expect("load should succeed");
661        assert_eq!(
662            config.user.classifier_id.as_deref(),
663            Some("from-root"),
664            "local config must be the one alongside .marque.toml, not in sub"
665        );
666        let _ = fs::remove_dir_all(&root);
667    }
668}