Skip to main content

nodex_core/
config.rs

1use serde::{Deserialize, Serialize};
2use std::collections::BTreeMap;
3use std::path::Path;
4
5use crate::error::{Error, Result};
6
7/// Root configuration deserialized from `nodex.toml`.
8#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9pub struct Config {
10    #[serde(default)]
11    pub scope: ScopeConfig,
12    #[serde(default)]
13    pub kinds: KindsConfig,
14    #[serde(default)]
15    pub statuses: StatusesConfig,
16    #[serde(default)]
17    pub identity: IdentityConfig,
18    #[serde(default)]
19    pub schema: SchemaConfig,
20    #[serde(default)]
21    pub rules: RulesConfig,
22    #[serde(default)]
23    pub parser: ParserConfig,
24    #[serde(default)]
25    pub detection: DetectionConfig,
26    #[serde(default)]
27    pub output: OutputConfig,
28    #[serde(default)]
29    pub report: ReportConfig,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ScopeConfig {
34    #[serde(default)]
35    pub include: Vec<String>,
36    #[serde(default)]
37    pub exclude: Vec<String>,
38    #[serde(default)]
39    pub conditional_exclude: Vec<ConditionalExclude>,
40}
41
42impl Default for ScopeConfig {
43    fn default() -> Self {
44        Self {
45            include: vec!["**/*.md".to_string()],
46            exclude: vec![],
47            conditional_exclude: vec![],
48        }
49    }
50}
51
52/// When a file matching `parent_glob` satisfies `condition` (today the
53/// only supported condition is `status_terminal`), every other file in
54/// the parent's directory is dropped from scan scope. The parent itself
55/// stays in scope so it still parses into the graph.
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct ConditionalExclude {
58    pub parent_glob: String,
59    #[serde(default = "default_condition")]
60    pub condition: String,
61}
62
63fn default_condition() -> String {
64    "status_terminal".to_string()
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct KindsConfig {
69    #[serde(default = "default_kinds")]
70    pub allowed: Vec<String>,
71}
72
73impl Default for KindsConfig {
74    fn default() -> Self {
75        Self {
76            allowed: default_kinds(),
77        }
78    }
79}
80
81fn default_kinds() -> Vec<String> {
82    ["generic", "guide", "readme"]
83        .iter()
84        .map(|s| s.to_string())
85        .collect()
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct StatusesConfig {
90    #[serde(default = "default_statuses")]
91    pub allowed: Vec<String>,
92    #[serde(default = "default_terminal")]
93    pub terminal: Vec<String>,
94}
95
96impl Default for StatusesConfig {
97    fn default() -> Self {
98        Self {
99            allowed: default_statuses(),
100            terminal: default_terminal(),
101        }
102    }
103}
104
105fn default_statuses() -> Vec<String> {
106    [
107        "active",
108        "superseded",
109        "archived",
110        "deprecated",
111        "abandoned",
112    ]
113    .iter()
114    .map(|s| s.to_string())
115    .collect()
116}
117
118fn default_terminal() -> Vec<String> {
119    ["superseded", "archived", "deprecated", "abandoned"]
120        .iter()
121        .map(|s| s.to_string())
122        .collect()
123}
124
125#[derive(Debug, Clone, Default, Serialize, Deserialize)]
126pub struct IdentityConfig {
127    #[serde(default)]
128    pub kind_rules: Vec<KindRule>,
129    #[serde(default)]
130    pub id_rules: Vec<IdRule>,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct KindRule {
135    pub glob: String,
136    pub kind: String,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct IdRule {
141    #[serde(default)]
142    pub kind: String,
143    #[serde(default)]
144    pub glob: Option<String>,
145    pub template: String,
146}
147
148/// Document-schema constraints.
149///
150/// Top-level entries (`required`, `types`, `enums`, `cross_field`)
151/// apply to **every** document. Per-kind tightening is expressed in
152/// `overrides`; rules combine the global set with the first matching
153/// override so kinds inherit a project-wide baseline without ceremony.
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct SchemaConfig {
156    #[serde(default = "default_required")]
157    pub required: Vec<String>,
158    #[serde(default)]
159    pub types: BTreeMap<String, FieldType>,
160    #[serde(default)]
161    pub enums: BTreeMap<String, Vec<String>>,
162    #[serde(default)]
163    pub cross_field: Vec<CrossFieldSpec>,
164    #[serde(default)]
165    pub overrides: Vec<SchemaOverride>,
166}
167
168impl Default for SchemaConfig {
169    fn default() -> Self {
170        Self {
171            required: default_required(),
172            types: BTreeMap::new(),
173            enums: BTreeMap::new(),
174            cross_field: vec![],
175            overrides: vec![],
176        }
177    }
178}
179
180fn default_required() -> Vec<String> {
181    ["id", "title", "kind", "status"]
182        .iter()
183        .map(|s| s.to_string())
184        .collect()
185}
186
187/// Per-kind schema constraints.
188///
189/// Every field except `kinds` and `required` defaults to an empty
190/// collection, and each corresponding rule short-circuits when empty.
191/// Projects that never configure these keep today's behaviour verbatim.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct SchemaOverride {
194    pub kinds: Vec<String>,
195    pub required: Vec<String>,
196    #[serde(default)]
197    pub types: BTreeMap<String, FieldType>,
198    #[serde(default)]
199    pub enums: BTreeMap<String, Vec<String>>,
200    #[serde(default)]
201    pub cross_field: Vec<CrossFieldSpec>,
202}
203
204/// Accepted frontmatter field types. Covers the scalars that actually
205/// appear in document frontmatter. Add a variant when a real need arises —
206/// the `match` statement in the validator will force every consumer to
207/// acknowledge the new type.
208#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
209#[serde(rename_all = "snake_case")]
210pub enum FieldType {
211    String,
212    Integer,
213    Bool,
214    Date,
215}
216
217/// Conditional field requirement: "when LHS predicate holds, `require` must be present".
218///
219/// v1 parser accepts only `"<field>=<value>"` equality. Extending to new
220/// predicates (e.g. `in`, `matches`) happens by versioning the `when`
221/// string into a richer type, without invalidating existing configs.
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct CrossFieldSpec {
224    pub when: String,
225    pub require: String,
226}
227
228#[derive(Debug, Clone, Default, Serialize, Deserialize)]
229pub struct RulesConfig {
230    #[serde(default)]
231    pub naming: Vec<NamingRule>,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct NamingRule {
236    pub glob: String,
237    pub pattern: String,
238    #[serde(default)]
239    pub sequential: bool,
240    #[serde(default)]
241    pub unique: bool,
242}
243
244#[derive(Debug, Clone, Default, Serialize, Deserialize)]
245pub struct ParserConfig {
246    #[serde(default)]
247    pub link_patterns: Vec<LinkPattern>,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct LinkPattern {
252    pub pattern: String,
253    pub relation: String,
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct DetectionConfig {
258    #[serde(default = "default_stale_days")]
259    pub stale_days: u32,
260    #[serde(default = "default_orphan_grace_days")]
261    pub orphan_grace_days: u32,
262    /// Kinds whose nodes are skipped by orphan detection regardless of incoming-edge count.
263    ///
264    /// Use for kinds that are leaf-by-design (entry-point skills, package READMEs, runbook
265    /// procedures, architecture overviews) where a missing inbound edge is the expected
266    /// shape rather than a defect. Per-node `orphan_ok: true` remains available for the
267    /// per-instance opt-out within tracked kinds.
268    #[serde(default)]
269    pub orphan_ok_kinds: Vec<String>,
270}
271
272impl Default for DetectionConfig {
273    fn default() -> Self {
274        Self {
275            stale_days: default_stale_days(),
276            orphan_grace_days: default_orphan_grace_days(),
277            orphan_ok_kinds: Vec::new(),
278        }
279    }
280}
281
282fn default_stale_days() -> u32 {
283    180
284}
285
286fn default_orphan_grace_days() -> u32 {
287    14
288}
289
290#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct OutputConfig {
292    #[serde(default = "default_output_dir")]
293    pub dir: String,
294}
295
296impl Default for OutputConfig {
297    fn default() -> Self {
298        Self {
299            dir: default_output_dir(),
300        }
301    }
302}
303
304fn default_output_dir() -> String {
305    "_index".to_string()
306}
307
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct ReportConfig {
310    #[serde(default = "default_report_title")]
311    pub title: String,
312    #[serde(default = "default_god_node_display_limit")]
313    pub god_node_display_limit: usize,
314    #[serde(default = "default_display_limit")]
315    pub orphan_display_limit: usize,
316    #[serde(default = "default_display_limit")]
317    pub stale_display_limit: usize,
318}
319
320impl Default for ReportConfig {
321    fn default() -> Self {
322        Self {
323            title: default_report_title(),
324            god_node_display_limit: default_god_node_display_limit(),
325            orphan_display_limit: default_display_limit(),
326            stale_display_limit: default_display_limit(),
327        }
328    }
329}
330
331fn default_report_title() -> String {
332    "Document Graph".to_string()
333}
334
335fn default_god_node_display_limit() -> usize {
336    10
337}
338
339fn default_display_limit() -> usize {
340    20
341}
342
343impl Config {
344    /// Load config from a `nodex.toml` file. Returns default config if not found.
345    ///
346    /// Config is validated for internal consistency before it is returned,
347    /// so downstream code can assume that `enums` / `cross_field` references
348    /// are well-formed.
349    pub fn load(root: &Path) -> Result<Self> {
350        let path = root.join("nodex.toml");
351        if !path.exists() {
352            return Ok(Self::default());
353        }
354        let content = std::fs::read_to_string(&path).map_err(|e| Error::Io {
355            path: path.clone(),
356            source: e,
357        })?;
358        let config: Self =
359            toml::from_str(&content).map_err(|e| Error::Config(format!("{path:?}: {e}")))?;
360        config.validate()?;
361        Ok(config)
362    }
363
364    /// Validate internal consistency. Called automatically by `load()`.
365    ///
366    /// Rejects definitions that would otherwise only surface as
367    /// confusing runtime behaviour:
368    /// - `enums` on collection-valued built-in fields (`tags`,
369    ///   `supersedes`, `implements`, `related`) — these cannot be
370    ///   validated against a scalar set, so silent ignore would trap
371    ///   users who typed the obvious syntax and saw no effect.
372    /// - `enums.status` / `enums.kind` values that are not in the
373    ///   corresponding global `allowed` list.
374    /// - `cross_field.when` expressions that don't parse.
375    /// - `cross_field.when`'s LHS and `cross_field.require` referring
376    ///   to a field name that is not a built-in scalar and is not
377    ///   declared in the override's `types` / `enums` / `required`.
378    pub fn validate(&self) -> Result<()> {
379        // Refuse structurally-broken configs: empty `kinds.allowed`
380        // means every document would be kind-less (inference falls
381        // back to "generic") yet no kind would ever be valid — either
382        // the user is mis-configured or they meant "accept all kinds"
383        // (which is the default when the key is omitted entirely).
384        if self.kinds.allowed.is_empty() {
385            return Err(Error::Config(
386                "kinds.allowed must not be empty; omit the key to accept the defaults, \
387                 or list every kind your project uses"
388                    .to_string(),
389            ));
390        }
391
392        // Same rationale as `kinds.allowed`: an empty `statuses.allowed`
393        // would make every status value invalid and break scaffolding,
394        // which picks the first allowed status for the initial value.
395        if self.statuses.allowed.is_empty() {
396            return Err(Error::Config(
397                "statuses.allowed must not be empty; omit the key to accept the defaults, \
398                 or list every status your project uses"
399                    .to_string(),
400            ));
401        }
402
403        // `nodex lifecycle <action>` writes a fixed target status per
404        // action (supersede → "superseded", archive → "archived", …).
405        // If the project's `statuses.allowed` omits any of those, a
406        // lifecycle transition would silently produce a document that
407        // then fails enum validation. Surface the mismatch at load time
408        // instead, with a message pointing at the exact missing values.
409        let missing: Vec<&str> = crate::lifecycle::LIFECYCLE_TARGET_STATUSES
410            .iter()
411            .copied()
412            .filter(|s| !self.statuses.allowed.iter().any(|a| a == s))
413            .collect();
414        if !missing.is_empty() {
415            return Err(Error::Config(format!(
416                "statuses.allowed is missing lifecycle target status(es): {missing:?}; \
417                 add them to `statuses.allowed` or omit the key to accept the defaults"
418            )));
419        }
420
421        // `FALLBACK_KIND` is what `parser::identity::infer_kind`
422        // assigns when no `identity.kind_rules` glob matches a
423        // document's path, and what `migrate` injects when scaffolding
424        // frontmatter onto a bare file. Leaving this out of
425        // `kinds.allowed` was the exact defect that let `migrate` /
426        // `parse_document` write documents their own config then
427        // rejected. Require its presence at load; projects that want
428        // every document strongly classified can still write
429        // exhaustive `kind_rules`, in which case the fallback simply
430        // never fires.
431        if !self
432            .kinds
433            .allowed
434            .iter()
435            .any(|k| k == crate::parser::identity::FALLBACK_KIND)
436        {
437            return Err(Error::Config(format!(
438                "kinds.allowed is missing the fallback kind {:?}; \
439                 either include it, or omit `kinds.allowed` to accept the defaults",
440                crate::parser::identity::FALLBACK_KIND
441            )));
442        }
443
444        // Every `detection.orphan_ok_kinds` entry must reference a kind
445        // the project actually accepts. Otherwise a typo ("skll" instead
446        // of "skill") loads cleanly and the runtime exempts nothing —
447        // the silent-skip pattern the config-driven rule explicitly
448        // forbids. Same shape as the `enums.status` / `enums.kind`
449        // subset-of-global-allowed checks below.
450        for k in &self.detection.orphan_ok_kinds {
451            if !self.kinds.allowed.iter().any(|a| a == k) {
452                return Err(Error::Config(format!(
453                    "detection.orphan_ok_kinds contains {k:?} which is not in \
454                     kinds.allowed; add it to kinds.allowed or remove the exemption"
455                )));
456            }
457        }
458
459        // `output.dir` is joined to the project root whenever build /
460        // report / cache writes their artefacts, so a value like
461        // `"../escape"` or `"/etc/out"` would silently write files
462        // outside the project. `path_guard::reject_traversal` already
463        // enforces this invariant for user-supplied paths on rename /
464        // scaffold / migrate; extend it to the config surface.
465        if !self.output.dir.is_empty() {
466            crate::path_guard::reject_traversal(std::path::Path::new(&self.output.dir)).map_err(
467                |_| {
468                    Error::Config(format!(
469                        "output.dir {:?} escapes the project root; \
470                         use a relative path without `..` or a leading `/`",
471                        self.output.dir
472                    ))
473                },
474            )?;
475        }
476
477        self.validate_block(
478            "schema",
479            &self.schema.required,
480            &self.schema.types,
481            &self.schema.enums,
482            &self.schema.cross_field,
483        )?;
484
485        // Validate naming rules at load time rather than silently
486        // skipping invalid patterns at check time — a typo in a glob
487        // or regex would otherwise validate zero files forever.
488        for (idx, nr) in self.rules.naming.iter().enumerate() {
489            if globset::Glob::new(&nr.glob).is_err() {
490                return Err(Error::Config(format!(
491                    "rules.naming[{idx}].glob {:?} is not a valid glob",
492                    nr.glob
493                )));
494            }
495            if regex::Regex::new(&nr.pattern).is_err() {
496                return Err(Error::Config(format!(
497                    "rules.naming[{idx}].pattern {:?} is not a valid regex",
498                    nr.pattern
499                )));
500            }
501        }
502
503        for (idx, ov) in self.schema.overrides.iter().enumerate() {
504            let ctx = format!("schema.overrides[{idx}] (kinds={:?})", ov.kinds);
505            self.validate_block(&ctx, &ov.required, &ov.types, &ov.enums, &ov.cross_field)?;
506            // Reject cross_field entries that duplicate a global entry.
507            // `cross_field_for` accumulates global + override — if a
508            // user copy-pastes the same rule into both slots, every
509            // matching node would get two violations. Fail loud at
510            // load time rather than debug silently.
511            for cf in &ov.cross_field {
512                if self
513                    .schema
514                    .cross_field
515                    .iter()
516                    .any(|g| g.when == cf.when && g.require == cf.require)
517                {
518                    return Err(Error::Config(format!(
519                        "{ctx}: cross_field {{ when={:?}, require={:?} }} \
520                         is already declared in [schema].cross_field — \
521                         remove the override copy or change its predicate",
522                        cf.when, cf.require
523                    )));
524                }
525            }
526        }
527        Ok(())
528    }
529
530    /// Validate one schema block (the global [schema] or one override).
531    /// Extracted so both share the same rules.
532    fn validate_block(
533        &self,
534        ctx: &str,
535        required: &[String],
536        types: &BTreeMap<String, FieldType>,
537        enums: &BTreeMap<String, Vec<String>>,
538        cross_field: &[CrossFieldSpec],
539    ) -> Result<()> {
540        for (field, allowed) in enums {
541            if is_collection_builtin(field) {
542                return Err(Error::Config(format!(
543                    "{ctx}: enums.{field} — collection-valued built-in \
544                     fields cannot have a scalar enum constraint"
545                )));
546            }
547            let global = match field.as_str() {
548                "status" => Some((&self.statuses.allowed, "statuses.allowed")),
549                "kind" => Some((&self.kinds.allowed, "kinds.allowed")),
550                _ => None,
551            };
552            if let Some((global, key)) = global {
553                for value in allowed {
554                    if !global.contains(value) {
555                        return Err(Error::Config(format!(
556                            "{ctx}: enums.{field} contains {value:?} \
557                             which is not in {key}"
558                        )));
559                    }
560                }
561            }
562
563            // A narrowing enum on `status` — whether at the global
564            // `[schema]` level or inside a `[[schema.overrides]]` block —
565            // must still cover the four lifecycle target statuses.
566            // Otherwise `nodex lifecycle <action>` on a matching document
567            // would write a status value that immediately fails its own
568            // enum validation, producing a config the tool can mutate
569            // only by violating itself.
570            if field == "status" {
571                let missing: Vec<&str> = crate::lifecycle::LIFECYCLE_TARGET_STATUSES
572                    .iter()
573                    .copied()
574                    .filter(|s| !allowed.iter().any(|a| a == s))
575                    .collect();
576                if !missing.is_empty() {
577                    return Err(Error::Config(format!(
578                        "{ctx}: enums.status narrows below the lifecycle target set; \
579                         missing {missing:?}. Either include all four \
580                         (superseded, archived, deprecated, abandoned) or drop \
581                         the enum constraint on status"
582                    )));
583                }
584            }
585
586            // If the same field also declares a non-string `types`
587            // constraint, every enum value has to parse as that type.
588            // Otherwise `scaffold`'s default ("first allowed enum
589            // value") writes a document that immediately fails
590            // `field_type` on the next `check` — observed with
591            // `types = { priority = "integer" }` combined with
592            // `enums = { priority = ["low", "medium", "high"] }`.
593            if let Some(ty) = types.get(field)
594                && let Some(bad) = allowed.iter().find(|v| !value_matches_field_type(v, *ty))
595            {
596                return Err(Error::Config(format!(
597                    "{ctx}: enums.{field} value {bad:?} is not a valid \
598                     {ty:?}; either drop the enum or widen types.{field}"
599                )));
600            }
601        }
602
603        for cf in cross_field {
604            let predicate = parse_when(&cf.when).map_err(|e| {
605                Error::Config(format!("{ctx}: cross_field.when {:?}: {e}", cf.when))
606            })?;
607            let WhenPredicate::Equals { field, .. } = &predicate;
608            ensure_field_known(field, required, types, enums, ctx, "cross_field.when")?;
609            ensure_field_known(
610                &cf.require,
611                required,
612                types,
613                enums,
614                ctx,
615                "cross_field.require",
616            )?;
617        }
618        Ok(())
619    }
620
621    /// Merged view: return every field-type constraint that applies to
622    /// a given kind (global + first matching override). Scaffold and
623    /// rules use this so every declared constraint is honoured once.
624    pub fn types_for(&self, kind: &str) -> BTreeMap<String, FieldType> {
625        let mut out = self.schema.types.clone();
626        if let Some(ov) = self.schema_override_for(kind) {
627            for (k, v) in &ov.types {
628                out.insert(k.clone(), *v);
629            }
630        }
631        out
632    }
633
634    /// Merged view: every enum constraint that applies to a given kind.
635    pub fn enums_for(&self, kind: &str) -> BTreeMap<String, Vec<String>> {
636        let mut out = self.schema.enums.clone();
637        if let Some(ov) = self.schema_override_for(kind) {
638            for (k, v) in &ov.enums {
639                out.insert(k.clone(), v.clone());
640            }
641        }
642        out
643    }
644
645    /// Merged view: every cross-field constraint that applies to a
646    /// given kind. Global and override entries accumulate; an override
647    /// never silently drops a global rule.
648    pub fn cross_field_for(&self, kind: &str) -> Vec<CrossFieldSpec> {
649        let mut out = self.schema.cross_field.clone();
650        if let Some(ov) = self.schema_override_for(kind) {
651            out.extend_from_slice(&ov.cross_field);
652        }
653        out
654    }
655
656    /// Check whether a status string is terminal.
657    pub fn is_terminal(&self, status: &str) -> bool {
658        self.statuses.terminal.iter().any(|t| t == status)
659    }
660
661    /// Whether nodes of the given kind are exempt from orphan detection.
662    ///
663    /// Driven by `detection.orphan_ok_kinds`. Pairs with the per-instance
664    /// `node.orphan_ok` opt-out so callers can express both "this entire
665    /// kind is leaf-by-design" and "this specific document is exceptional".
666    /// Named to mirror the field and the per-node flag, paralleling
667    /// `is_terminal` ↔ `statuses.terminal`.
668    pub fn is_orphan_ok_kind(&self, kind: &str) -> bool {
669        self.detection.orphan_ok_kinds.iter().any(|k| k == kind)
670    }
671
672    /// Get required fields for a given kind. Falls back to the global
673    /// `schema.required` list when no override matches.
674    pub fn required_for(&self, kind: &str) -> &[String] {
675        for ov in &self.schema.overrides {
676            if ov.kinds.iter().any(|k| k == kind) {
677                return &ov.required;
678            }
679        }
680        &self.schema.required
681    }
682
683    /// Find the schema override that applies to a given kind, if any.
684    pub fn schema_override_for(&self, kind: &str) -> Option<&SchemaOverride> {
685        self.schema
686            .overrides
687            .iter()
688            .find(|ov| ov.kinds.iter().any(|k| k == kind))
689    }
690
691    /// The status value that tool-level actions (`scaffold`, `migrate`)
692    /// should write when they create a new document of a given kind.
693    ///
694    /// Walks from the narrowest declaration to the broadest: per-kind
695    /// override's `enums.status`, then the global `schema.enums.status`,
696    /// then `statuses.allowed`. The first hit's `first()` wins.
697    /// `Config::validate` guarantees each of these is either absent or
698    /// non-empty, and that any `enums.status` covers the four lifecycle
699    /// targets — so the result is always in-vocabulary and the invariant
700    /// holding migrate / scaffold together with `check` never breaks.
701    pub fn initial_status_for(&self, kind: &str) -> &str {
702        if let Some(ov) = self.schema_override_for(kind)
703            && let Some(allowed) = ov.enums.get("status")
704            && let Some(first) = allowed.first()
705        {
706            return first.as_str();
707        }
708        if let Some(allowed) = self.schema.enums.get("status")
709            && let Some(first) = allowed.first()
710        {
711            return first.as_str();
712        }
713        self.statuses
714            .allowed
715            .first()
716            .map(String::as_str)
717            .expect("statuses.allowed non-empty — enforced by Config::validate")
718    }
719}
720
721/// Parsed `cross_field.when` predicate.
722#[derive(Debug, Clone, PartialEq, Eq)]
723pub enum WhenPredicate {
724    /// `<field>=<value>` — match when the given field equals the value exactly.
725    Equals { field: String, value: String },
726}
727
728/// Every built-in scalar field on `Node`. Kept here (not on `Node`) so
729/// config validation sees the canonical list without pulling in the
730/// whole model module. Collections (`tags`, `supersedes`, etc.) are
731/// intentionally excluded — they cannot be members of a scalar enum.
732pub const BUILTIN_SCALAR_FIELDS: &[&str] = &[
733    "id",
734    "title",
735    "kind",
736    "status",
737    "created",
738    "updated",
739    "reviewed",
740    "owner",
741    "superseded_by",
742];
743
744/// Collection-valued built-in fields. Enum/type constraints on these
745/// must be rejected — there is no single scalar value to check.
746pub const BUILTIN_COLLECTION_FIELDS: &[&str] = &["tags", "supersedes", "implements", "related"];
747
748/// True when `field` is one of the built-in `Node` fields of any kind.
749pub fn is_builtin_node_field(field: &str) -> bool {
750    BUILTIN_SCALAR_FIELDS.contains(&field) || BUILTIN_COLLECTION_FIELDS.contains(&field)
751}
752
753/// True when `field` is a built-in collection-valued field.
754pub fn is_collection_builtin(field: &str) -> bool {
755    BUILTIN_COLLECTION_FIELDS.contains(&field)
756}
757
758/// True when the raw frontmatter-style string `value` is a valid
759/// member of the declared `FieldType`. Used by `Config::validate` to
760/// reject configs that pair a typed field with an enum containing
761/// values that can never satisfy the type.
762fn value_matches_field_type(value: &str, ty: FieldType) -> bool {
763    match ty {
764        FieldType::String => true,
765        FieldType::Integer => value.parse::<i64>().is_ok(),
766        FieldType::Bool => matches!(value, "true" | "false"),
767        FieldType::Date => chrono::NaiveDate::parse_from_str(value, "%Y-%m-%d").is_ok(),
768    }
769}
770
771/// Reject field names in `cross_field.when` / `cross_field.require`
772/// that are not built-in and not explicitly declared in the current
773/// schema block. Keeps typos from turning into silently-skipped checks.
774fn ensure_field_known(
775    field: &str,
776    required: &[String],
777    types: &BTreeMap<String, FieldType>,
778    enums: &BTreeMap<String, Vec<String>>,
779    ctx: &str,
780    slot: &str,
781) -> Result<()> {
782    if is_builtin_node_field(field)
783        || required.iter().any(|r| r == field)
784        || types.contains_key(field)
785        || enums.contains_key(field)
786    {
787        return Ok(());
788    }
789    Err(Error::Config(format!(
790        "{ctx}: {slot} references unknown field {field:?}; declare it \
791         in required / types / enums or use a built-in name"
792    )))
793}
794
795/// Parse a `cross_field.when` expression. v1 accepts only `field=value`.
796///
797/// Rejects `==` and any form where the value starts with `=`, so a typo
798/// can never silently turn into a predicate that matches nothing. Also
799/// rejects empty LHS / RHS and expressions with multiple top-level `=`.
800pub fn parse_when(raw: &str) -> std::result::Result<WhenPredicate, String> {
801    let trimmed = raw.trim();
802    let parts: Vec<&str> = trimmed.splitn(3, '=').collect();
803    if parts.len() != 2 {
804        return Err(format!(
805            "expected exactly one '=' in <field>=<value>; values with \
806             embedded '=' are not supported in v1 (got {raw:?})"
807        ));
808    }
809    let field = parts[0].trim();
810    let value = parts[1].trim();
811    if field.is_empty() || value.is_empty() {
812        return Err("expected non-empty <field>=<value>".to_string());
813    }
814    if value.starts_with('=') {
815        return Err("value must not start with '=' (use a single '=' separator)".to_string());
816    }
817    Ok(WhenPredicate::Equals {
818        field: field.to_string(),
819        value: value.to_string(),
820    })
821}
822
823#[cfg(test)]
824mod tests {
825    use super::*;
826
827    #[test]
828    fn parse_when_accepts_simple_equality() {
829        let p = parse_when("status=superseded").unwrap();
830        assert_eq!(
831            p,
832            WhenPredicate::Equals {
833                field: "status".into(),
834                value: "superseded".into()
835            }
836        );
837    }
838
839    #[test]
840    fn parse_when_trims_whitespace() {
841        let p = parse_when("  status  =  superseded  ").unwrap();
842        let WhenPredicate::Equals { field, value } = p;
843        assert_eq!(field, "status");
844        assert_eq!(value, "superseded");
845    }
846
847    #[test]
848    fn parse_when_rejects_double_equals() {
849        assert!(parse_when("status==foo").is_err());
850    }
851
852    #[test]
853    fn parse_when_rejects_empty_sides() {
854        assert!(parse_when("=foo").is_err());
855        assert!(parse_when("field=").is_err());
856        assert!(parse_when("").is_err());
857    }
858
859    #[test]
860    fn parse_when_rejects_triple_equals() {
861        assert!(parse_when("a=b=c").is_err());
862    }
863
864    fn override_with(kind: &str, mut ov: SchemaOverride) -> Config {
865        ov.kinds = vec![kind.into()];
866        Config {
867            schema: SchemaConfig {
868                overrides: vec![ov],
869                ..Default::default()
870            },
871            ..Config::default()
872        }
873    }
874
875    #[test]
876    fn validate_rejects_enum_on_collection_field() {
877        let config = override_with(
878            "adr",
879            SchemaOverride {
880                kinds: vec![],
881                required: vec![],
882                types: BTreeMap::new(),
883                enums: [("tags".to_string(), vec!["foo".into()])]
884                    .into_iter()
885                    .collect(),
886                cross_field: vec![],
887            },
888        );
889        let err = config.validate().unwrap_err();
890        match err {
891            Error::Config(msg) => assert!(msg.contains("collection-valued"), "{msg}"),
892            _ => panic!("expected Config error"),
893        }
894    }
895
896    #[test]
897    fn validate_rejects_enum_value_outside_global_allowed() {
898        // `statuses.allowed` must cover the four lifecycle target
899        // statuses (superseded / archived / deprecated / abandoned);
900        // include them so this test isolates the "enum value outside
901        // allowed" check rather than tripping the lifecycle-coverage
902        // check first.
903        let config = Config {
904            statuses: StatusesConfig {
905                allowed: vec![
906                    "active".into(),
907                    "superseded".into(),
908                    "archived".into(),
909                    "deprecated".into(),
910                    "abandoned".into(),
911                ],
912                terminal: vec![],
913            },
914            schema: SchemaConfig {
915                overrides: vec![SchemaOverride {
916                    kinds: vec!["adr".into()],
917                    required: vec![],
918                    types: BTreeMap::new(),
919                    enums: [("status".to_string(), vec!["active".into(), "bogus".into()])]
920                        .into_iter()
921                        .collect(),
922                    cross_field: vec![],
923                }],
924                ..Default::default()
925            },
926            ..Config::default()
927        };
928        let err = config.validate().unwrap_err();
929        match err {
930            Error::Config(msg) => {
931                assert!(msg.contains("bogus"));
932                assert!(msg.contains("statuses.allowed"));
933            }
934            _ => panic!("expected Config error"),
935        }
936    }
937
938    #[test]
939    fn validate_rejects_cross_field_unknown_field() {
940        let config = override_with(
941            "adr",
942            SchemaOverride {
943                kinds: vec![],
944                required: vec![],
945                types: BTreeMap::new(),
946                enums: BTreeMap::new(),
947                cross_field: vec![CrossFieldSpec {
948                    when: "statuz=superseded".into(),
949                    require: "superseded_by".into(),
950                }],
951            },
952        );
953        let err = config.validate().unwrap_err();
954        match err {
955            Error::Config(msg) => assert!(msg.contains("unknown field"), "{msg}"),
956            _ => panic!("expected Config error"),
957        }
958    }
959
960    #[test]
961    fn validate_error_includes_override_context() {
962        let config = Config {
963            schema: SchemaConfig {
964                overrides: vec![SchemaOverride {
965                    kinds: vec!["adr".into(), "guide".into()],
966                    required: vec![],
967                    types: BTreeMap::new(),
968                    enums: [("tags".to_string(), vec!["x".into()])]
969                        .into_iter()
970                        .collect(),
971                    cross_field: vec![],
972                }],
973                ..Default::default()
974            },
975            ..Config::default()
976        };
977        let err = config.validate().unwrap_err();
978        match err {
979            Error::Config(msg) => {
980                assert!(msg.contains("overrides[0]"));
981                assert!(msg.contains("\"adr\""));
982            }
983            _ => panic!("expected Config error"),
984        }
985    }
986
987    #[test]
988    fn validate_accepts_empty_schema() {
989        Config::default().validate().unwrap();
990    }
991
992    #[test]
993    fn validate_rejects_statuses_allowed_missing_lifecycle_target() {
994        // Omitting "archived" would let `nodex lifecycle archive` write
995        // a status value the rest of the project's config treats as
996        // invalid. The config must fail fast at load time.
997        let config = Config {
998            statuses: StatusesConfig {
999                allowed: vec![
1000                    "active".into(),
1001                    "superseded".into(),
1002                    "deprecated".into(),
1003                    "abandoned".into(),
1004                ],
1005                terminal: vec!["superseded".into()],
1006            },
1007            ..Config::default()
1008        };
1009        let err = config.validate().unwrap_err();
1010        match err {
1011            Error::Config(msg) => {
1012                assert!(msg.contains("archived"), "message was: {msg}");
1013                assert!(msg.contains("lifecycle"), "message was: {msg}");
1014            }
1015            _ => panic!("expected Config error"),
1016        }
1017    }
1018
1019    #[test]
1020    fn validate_rejects_override_status_enum_missing_lifecycle_target() {
1021        // An override enum that narrows `status` below the four
1022        // lifecycle targets would let `nodex lifecycle archive` on a
1023        // matching kind write a status the config's own enum then
1024        // rejects — the tool mutating itself into invalidity. Refuse
1025        // at load.
1026        let config = Config {
1027            schema: SchemaConfig {
1028                overrides: vec![SchemaOverride {
1029                    kinds: vec!["adr".into()],
1030                    required: vec![],
1031                    types: BTreeMap::new(),
1032                    enums: [(
1033                        "status".to_string(),
1034                        vec!["active".into(), "superseded".into()],
1035                    )]
1036                    .into_iter()
1037                    .collect(),
1038                    cross_field: vec![],
1039                }],
1040                ..Default::default()
1041            },
1042            ..Config::default()
1043        };
1044        let err = config.validate().unwrap_err();
1045        match err {
1046            Error::Config(msg) => {
1047                assert!(msg.contains("archived"), "message was: {msg}");
1048                assert!(msg.contains("lifecycle"), "message was: {msg}");
1049            }
1050            _ => panic!("expected Config error"),
1051        }
1052    }
1053
1054    #[test]
1055    fn validate_rejects_output_dir_escaping_root() {
1056        // `output.dir` is joined to the project root for every
1057        // build / report / cache write. A traversal value would
1058        // silently write `_index/*` outside the project root (seen:
1059        // `../escape` leaked `graph.json` / `cache.json` /
1060        // `backlinks.json` into the parent directory). Refuse at load.
1061        for bad in ["../escape", "/etc/nodex", "docs/../../out"] {
1062            let config = Config {
1063                output: OutputConfig {
1064                    dir: bad.to_string(),
1065                },
1066                ..Config::default()
1067            };
1068            match config.validate() {
1069                Err(Error::Config(msg)) => assert!(
1070                    msg.contains("output.dir") && msg.contains("escapes"),
1071                    "for {bad:?} got unexpected message: {msg}"
1072                ),
1073                other => panic!("value {bad:?} should have been rejected, got {other:?}"),
1074            }
1075        }
1076    }
1077
1078    #[test]
1079    fn validate_rejects_kinds_allowed_missing_fallback_kind() {
1080        // `migrate` / `parse_document` assign the fallback kind
1081        // ("generic") to any document whose path isn't covered by an
1082        // `identity.kind_rules` glob. If the user's `kinds.allowed`
1083        // omits it, that assignment immediately fails FieldEnumRule —
1084        // the tool writing a document its own config rejects. Refuse
1085        // at load.
1086        let config = Config {
1087            kinds: KindsConfig {
1088                allowed: vec!["adr".into()],
1089            },
1090            ..Config::default()
1091        };
1092        let err = config.validate().unwrap_err();
1093        match err {
1094            Error::Config(msg) => {
1095                assert!(msg.contains("generic"), "message was: {msg}");
1096                assert!(msg.contains("fallback"), "message was: {msg}");
1097            }
1098            _ => panic!("expected Config error"),
1099        }
1100    }
1101
1102    #[test]
1103    fn validate_rejects_enum_value_failing_its_declared_type() {
1104        // `types = { priority = "integer" }` paired with
1105        // `enums = { priority = ["low", "medium", "high"] }` was an
1106        // accepted config that made `scaffold` emit an immediately-
1107        // invalid document (first enum value written, then FieldTypeRule
1108        // flagged it). Both constraints can legally coexist, but each
1109        // enum value must parse as the declared type.
1110        let config = Config {
1111            schema: SchemaConfig {
1112                overrides: vec![SchemaOverride {
1113                    kinds: vec!["adr".into()],
1114                    required: vec![],
1115                    types: [("priority".to_string(), FieldType::Integer)]
1116                        .into_iter()
1117                        .collect(),
1118                    enums: [(
1119                        "priority".to_string(),
1120                        vec!["low".into(), "medium".into(), "high".into()],
1121                    )]
1122                    .into_iter()
1123                    .collect(),
1124                    cross_field: vec![],
1125                }],
1126                ..Default::default()
1127            },
1128            ..Config::default()
1129        };
1130        let err = config.validate().unwrap_err();
1131        match err {
1132            Error::Config(msg) => {
1133                assert!(msg.contains("priority"), "message was: {msg}");
1134                assert!(msg.contains("\"low\""), "message was: {msg}");
1135            }
1136            _ => panic!("expected Config error"),
1137        }
1138    }
1139
1140    #[test]
1141    fn global_cross_field_applies_without_override() {
1142        let config = Config {
1143            schema: SchemaConfig {
1144                cross_field: vec![CrossFieldSpec {
1145                    when: "status=superseded".into(),
1146                    require: "superseded_by".into(),
1147                }],
1148                ..Default::default()
1149            },
1150            ..Config::default()
1151        };
1152        config.validate().unwrap();
1153        let collected = config.cross_field_for("adr");
1154        assert_eq!(collected.len(), 1);
1155        assert_eq!(collected[0].require, "superseded_by");
1156    }
1157
1158    #[test]
1159    fn validate_rejects_cross_field_duplicate_across_global_and_override() {
1160        let config = Config {
1161            schema: SchemaConfig {
1162                cross_field: vec![CrossFieldSpec {
1163                    when: "status=superseded".into(),
1164                    require: "superseded_by".into(),
1165                }],
1166                overrides: vec![SchemaOverride {
1167                    kinds: vec!["adr".into()],
1168                    required: vec![],
1169                    types: BTreeMap::new(),
1170                    enums: BTreeMap::new(),
1171                    cross_field: vec![CrossFieldSpec {
1172                        when: "status=superseded".into(),
1173                        require: "superseded_by".into(),
1174                    }],
1175                }],
1176                ..Default::default()
1177            },
1178            ..Config::default()
1179        };
1180        let err = config.validate().unwrap_err();
1181        match err {
1182            Error::Config(msg) => {
1183                assert!(msg.contains("already declared in [schema].cross_field"));
1184            }
1185            _ => panic!("expected Config error"),
1186        }
1187    }
1188
1189    #[test]
1190    fn validate_rejects_orphan_ok_kind_outside_kinds_allowed() {
1191        // Listing a kind in `detection.orphan_ok_kinds` that isn't in
1192        // `kinds.allowed` would let the user think they had exempted
1193        // a kind from orphan detection while the runtime silently
1194        // exempts nothing. Refuse at load.
1195        let config = Config {
1196            kinds: KindsConfig {
1197                allowed: vec!["generic".into(), "guide".into(), "readme".into()],
1198            },
1199            detection: DetectionConfig {
1200                orphan_ok_kinds: vec!["skll".into()],
1201                ..DetectionConfig::default()
1202            },
1203            ..Config::default()
1204        };
1205        let err = config.validate().unwrap_err();
1206        match err {
1207            Error::Config(msg) => {
1208                assert!(msg.contains("orphan_ok_kinds"), "message was: {msg}");
1209                assert!(msg.contains("\"skll\""), "message was: {msg}");
1210                assert!(msg.contains("kinds.allowed"), "message was: {msg}");
1211            }
1212            _ => panic!("expected Config error"),
1213        }
1214    }
1215
1216    #[test]
1217    fn is_orphan_ok_kind_matches_configured_entries() {
1218        let config = Config {
1219            kinds: KindsConfig {
1220                allowed: vec!["generic".into(), "skill".into()],
1221            },
1222            detection: DetectionConfig {
1223                orphan_ok_kinds: vec!["skill".into()],
1224                ..DetectionConfig::default()
1225            },
1226            ..Config::default()
1227        };
1228        config.validate().unwrap();
1229        assert!(config.is_orphan_ok_kind("skill"));
1230        assert!(!config.is_orphan_ok_kind("generic"));
1231    }
1232
1233    #[test]
1234    fn parse_when_error_mentions_quoting_unsupported() {
1235        let err = parse_when("status==foo").unwrap_err();
1236        assert!(err.contains("embedded '='") || err.contains("exactly one"));
1237    }
1238}