Skip to main content

flodl_cli/
config.rs

1//! fdl.yaml configuration loading and discovery.
2//!
3//! Walks up from CWD to find the project manifest, parses YAML/JSON,
4//! and loads sub-command configs from registered command directories.
5
6use std::collections::BTreeMap;
7use std::path::{Path, PathBuf};
8
9use serde::{Deserialize, Serialize};
10
11// ── Root project config ─────────────────────────────────────────────────
12
13/// Root fdl.yaml at project root.
14#[derive(Debug, Default, Deserialize)]
15pub struct ProjectConfig {
16    #[serde(default)]
17    pub description: Option<String>,
18    /// Commands defined at this level. Each value is a [`CommandSpec`] that
19    /// encodes the kind of command (inline `run` script, `path` pointer to
20    /// a child fdl.yml, or inline preset reusing the parent entry).
21    #[serde(default)]
22    pub commands: BTreeMap<String, CommandSpec>,
23}
24
25// ── Sub-command config ──────────────────────────────────────────────────
26
27/// Sub-command fdl.yaml (e.g., ddp-bench/fdl.yaml).
28///
29/// Identical shape to [`ProjectConfig`] but with an executable `entry:`
30/// and optional structured config sections (ddp/training/output) that
31/// inline preset commands can override.
32#[derive(Debug, Default, Deserialize)]
33pub struct CommandConfig {
34    #[serde(default)]
35    pub description: Option<String>,
36    #[serde(default)]
37    pub entry: Option<String>,
38    /// Docker compose service name. When set, entry is wrapped in
39    /// `docker compose run --rm <service> bash -c "cd <workdir> && <entry> <args>"`.
40    #[serde(default)]
41    pub docker: Option<String>,
42    #[serde(default)]
43    pub ddp: Option<DdpConfig>,
44    #[serde(default)]
45    pub training: Option<TrainingConfig>,
46    #[serde(default)]
47    pub output: Option<OutputConfig>,
48    /// Nested commands — inline presets of this config's entry, standalone
49    /// `run` scripts, or `path` pointers to child fdl.yml files.
50    #[serde(default)]
51    pub commands: BTreeMap<String, CommandSpec>,
52    /// Help-only placeholder name for the first-positional slot when
53    /// `commands:` holds presets. Defaults to "preset". Pure UX — it
54    /// does not affect dispatch (presets are always looked up by name).
55    /// Useful to match domain vocabulary, e.g. `arg-name: recipe` or
56    /// `arg-name: target`.
57    #[serde(default, rename = "arg-name")]
58    pub arg_name: Option<String>,
59    /// Inline interim schema (before `<entry> --fdl-schema` is implemented).
60    /// Drives help rendering, validation, and completions.
61    #[serde(default)]
62    pub schema: Option<Schema>,
63}
64
65// ── Unified command specification ───────────────────────────────────────
66
67/// A command at any nesting level. Three mutually-exclusive kinds are
68/// recognised at resolve time:
69///
70/// - **Path** (`path` set, or by default when the map is empty/null): the
71///   command is a pointer to a child `fdl.yml`. By convention the path is
72///   `./<command-name>/` when omitted.
73/// - **Run** (`run` set): the command is a self-contained shell script
74///   that is executed as-is. Optional `docker:` service routes it through
75///   `docker compose`.
76/// - **Preset**: neither `path` nor `run` is set. The command merges its
77///   `ddp` / `training` / `output` / `options` fields over the enclosing
78///   `CommandConfig` defaults and invokes that config's `entry:`.
79#[derive(Debug, Default, Clone)]
80pub struct CommandSpec {
81    pub description: Option<String>,
82    /// Inline shell command. Mutex with `path`.
83    pub run: Option<String>,
84    /// Pointer to a child directory containing its own `fdl.yml`. Absolute
85    /// or relative to the declaring config's directory. Mutex with `run`.
86    /// `None` + no other fields = "use the convention path
87    /// `./<command-name>/`".
88    pub path: Option<String>,
89    /// Docker compose service for `run`-kind commands.
90    pub docker: Option<String>,
91    /// Preset overrides. Only consulted when neither `run` nor `path` is set.
92    pub ddp: Option<DdpConfig>,
93    pub training: Option<TrainingConfig>,
94    pub output: Option<OutputConfig>,
95    pub options: BTreeMap<String, serde_json::Value>,
96}
97
98/// What kind of command is this, resolved from a [`CommandSpec`].
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub enum CommandKind {
101    /// `run: "…"` — execute the inline shell command (optionally in Docker).
102    Run,
103    /// `path: "…"` or convention default — load `<path>/fdl.yml` and
104    /// recurse.
105    Path,
106    /// Neither `run` nor `path`. Merges preset fields onto the enclosing
107    /// `CommandConfig` defaults and invokes that config's `entry:`.
108    Preset,
109}
110
111impl CommandSpec {
112    /// Classify this command. Returns an error when both `run` and `path`
113    /// are declared — always a mistake, caught loudly rather than silently
114    /// picking one. Also rejects `docker:` without `run:`: the docker
115    /// service wraps the inline run-script, so pairing it with a `path:`
116    /// pointer or a preset entry is always silent-noop territory.
117    pub fn kind(&self) -> Result<CommandKind, String> {
118        if self.docker.is_some() && self.run.is_none() {
119            return Err(
120                "command declares `docker:` without `run:`; \
121                 `docker:` only wraps inline run-scripts"
122                    .to_string(),
123            );
124        }
125        match (self.run.as_deref(), self.path.as_deref()) {
126            (Some(_), Some(_)) => Err(
127                "command declares both `run:` and `path:`; \
128                 only one is allowed"
129                    .to_string(),
130            ),
131            (Some(_), None) => Ok(CommandKind::Run),
132            (None, Some(_)) => Ok(CommandKind::Path),
133            (None, None) => {
134                // No kind-selecting field. If preset fields are present,
135                // treat as Preset; otherwise, fall through to Path (the
136                // convention-default: `./<name>/fdl.yml`).
137                if self.ddp.is_some()
138                    || self.training.is_some()
139                    || self.output.is_some()
140                    || !self.options.is_empty()
141                {
142                    Ok(CommandKind::Preset)
143                } else {
144                    Ok(CommandKind::Path)
145                }
146            }
147        }
148    }
149
150    /// Resolve the effective directory for a `Path`-kind command declared
151    /// in `parent_dir`. Applies the `./<name>/` convention when `path` is
152    /// unset.
153    pub fn resolve_path(&self, name: &str, parent_dir: &Path) -> PathBuf {
154        match &self.path {
155            Some(p) => parent_dir.join(p),
156            None => parent_dir.join(name),
157        }
158    }
159}
160
161// Custom Deserialize so that `commands: { name: ~ }` (YAML null) and
162// `commands: { name: }` (empty value) both deserialize to a default
163// `CommandSpec`. Without this, serde_yaml errors on null because a
164// struct expects a map.
165impl<'de> Deserialize<'de> for CommandSpec {
166    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
167    where
168        D: serde::Deserializer<'de>,
169    {
170        #[derive(Deserialize)]
171        struct Inner {
172            #[serde(default)]
173            description: Option<String>,
174            #[serde(default)]
175            run: Option<String>,
176            #[serde(default)]
177            path: Option<String>,
178            #[serde(default)]
179            docker: Option<String>,
180            #[serde(default)]
181            ddp: Option<DdpConfig>,
182            #[serde(default)]
183            training: Option<TrainingConfig>,
184            #[serde(default)]
185            output: Option<OutputConfig>,
186            #[serde(default)]
187            options: BTreeMap<String, serde_json::Value>,
188        }
189
190        let raw = serde_yaml::Value::deserialize(deserializer)?;
191        if matches!(raw, serde_yaml::Value::Null) {
192            return Ok(Self::default());
193        }
194        let inner: Inner =
195            serde_yaml::from_value(raw).map_err(serde::de::Error::custom)?;
196        Ok(Self {
197            description: inner.description,
198            run: inner.run,
199            path: inner.path,
200            docker: inner.docker,
201            ddp: inner.ddp,
202            training: inner.training,
203            output: inner.output,
204            options: inner.options,
205        })
206    }
207}
208
209// ── Schema (interim hand-written, future `<entry> --fdl-schema`) ────────
210
211/// The schema declared inline in a sub-command's fdl.yaml. Maps 1:1 to
212/// what `<entry> --fdl-schema` will later emit as JSON.
213#[derive(Debug, Clone, Default, Deserialize, Serialize)]
214pub struct Schema {
215    #[serde(default, skip_serializing_if = "Vec::is_empty")]
216    pub args: Vec<ArgSpec>,
217    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
218    pub options: BTreeMap<String, OptionSpec>,
219    /// When true, the fdl layer rejects options not declared in the
220    /// schema before the sub-command's entry ever runs. Two validation
221    /// points:
222    ///
223    /// 1. *Load time* — preset `options:` maps are checked against the
224    ///    enclosing `schema.options` (see [`validate_presets_strict`]).
225    ///    A typo like `options: { batchsize: 32 }` when the schema
226    ///    declares `batch-size` is a loud load error.
227    /// 2. *Dispatch time* — the user's extra argv tail is tokenized
228    ///    against the schema (see [`validate_tail`]). Unknown flags
229    ///    error out with a "did you mean" suggestion instead of being
230    ///    silently forwarded.
231    ///
232    /// **Validation NOT gated by `strict`** — always-on for declared
233    /// items, so positive assertions from the schema always hold:
234    /// - `choices:` on options: the user's value and any preset YAML
235    ///   value must be in the list.
236    /// - `choices:` on positional args: the user's value must be in
237    ///   the list (when strict is off, this may mis-fire if unknown
238    ///   flags push orphan values into positional slots — opt into
239    ///   strict for clean positional handling).
240    ///
241    /// `strict` is purely about **unknown** options/args, not about
242    /// validating declared contracts.
243    #[serde(default, skip_serializing_if = "is_false")]
244    pub strict: bool,
245}
246
247/// A flag option, `--name` / `-x`.
248#[derive(Debug, Clone, Deserialize, Serialize)]
249pub struct OptionSpec {
250    #[serde(rename = "type")]
251    pub ty: String,
252    #[serde(default, skip_serializing_if = "Option::is_none")]
253    pub description: Option<String>,
254    #[serde(default, skip_serializing_if = "Option::is_none")]
255    pub default: Option<serde_json::Value>,
256    #[serde(default, skip_serializing_if = "Option::is_none")]
257    pub choices: Option<Vec<serde_json::Value>>,
258    /// Single-letter short alias.
259    #[serde(default, skip_serializing_if = "Option::is_none")]
260    pub short: Option<String>,
261    #[serde(default, skip_serializing_if = "Option::is_none")]
262    pub env: Option<String>,
263    /// Shell snippet producing completion values.
264    /// Consumed by `fdl completions <shell>` (follow-up rollout task).
265    #[serde(default, skip_serializing_if = "Option::is_none")]
266    #[allow(dead_code)]
267    pub completer: Option<String>,
268}
269
270/// A positional argument.
271#[derive(Debug, Clone, Deserialize, Serialize)]
272pub struct ArgSpec {
273    pub name: String,
274    #[serde(rename = "type")]
275    pub ty: String,
276    #[serde(default, skip_serializing_if = "Option::is_none")]
277    pub description: Option<String>,
278    #[serde(default = "default_required")]
279    pub required: bool,
280    #[serde(default, skip_serializing_if = "is_false")]
281    pub variadic: bool,
282    #[serde(default, skip_serializing_if = "Option::is_none")]
283    pub default: Option<serde_json::Value>,
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub choices: Option<Vec<serde_json::Value>>,
286    /// Shell snippet producing completion values.
287    /// Consumed by `fdl completions <shell>` (follow-up rollout task).
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    #[allow(dead_code)]
290    pub completer: Option<String>,
291}
292
293fn is_false(b: &bool) -> bool {
294    !*b
295}
296
297fn default_required() -> bool {
298    true
299}
300
301/// Flags reserved at the fdl level — no sub-command option may shadow them.
302/// Kept in sync with main.rs dispatch.
303const RESERVED_LONGS: &[&str] = &[
304    "help", "version", "quiet", "env",
305];
306const RESERVED_SHORTS: &[&str] = &[
307    "h", "V", "q", "v", "e",
308];
309const VALID_TYPES: &[&str] = &[
310    "string", "int", "float", "bool", "path",
311    "list[string]", "list[int]", "list[float]", "list[path]",
312];
313
314/// Check a schema for collisions and structural issues.
315///
316/// Loud-at-load-time: ambiguity caught here is cheaper to fix than mysterious
317/// pass-through behavior at runtime.
318pub fn validate_schema(schema: &Schema) -> Result<(), String> {
319    // Options: check types, shorts, reserved flags.
320    let mut short_seen: BTreeMap<String, String> = BTreeMap::new();
321    for (long, spec) in &schema.options {
322        if !VALID_TYPES.contains(&spec.ty.as_str()) {
323            return Err(format!(
324                "option --{}: unknown type '{}' (valid: {})",
325                long,
326                spec.ty,
327                VALID_TYPES.join(", ")
328            ));
329        }
330        if RESERVED_LONGS.contains(&long.as_str()) {
331            return Err(format!(
332                "option --{long} shadows a reserved fdl-level flag"
333            ));
334        }
335        if let Some(s) = &spec.short {
336            if s.chars().count() != 1 {
337                return Err(format!(
338                    "option --{long}: `short: \"{s}\"` must be a single character"
339                ));
340            }
341            if RESERVED_SHORTS.contains(&s.as_str()) {
342                return Err(format!(
343                    "option --{long}: short -{s} shadows a reserved fdl-level flag"
344                ));
345            }
346            if let Some(prev) = short_seen.insert(s.clone(), long.clone()) {
347                return Err(format!(
348                    "options --{prev} and --{long} both declare short -{s}"
349                ));
350            }
351        }
352    }
353
354    // Args: check types, variadic-only-at-end, no-required-after-optional.
355    let mut seen_optional = false;
356    let mut name_seen: BTreeMap<String, ()> = BTreeMap::new();
357    for (i, arg) in schema.args.iter().enumerate() {
358        if !VALID_TYPES.contains(&arg.ty.as_str()) {
359            return Err(format!(
360                "arg <{}>: unknown type '{}' (valid: {})",
361                arg.name,
362                arg.ty,
363                VALID_TYPES.join(", ")
364            ));
365        }
366        if name_seen.insert(arg.name.clone(), ()).is_some() {
367            return Err(format!("duplicate positional name <{}>", arg.name));
368        }
369        if arg.variadic && i != schema.args.len() - 1 {
370            return Err(format!(
371                "arg <{}>: variadic positional must be the last one",
372                arg.name
373            ));
374        }
375        let is_optional = !arg.required || arg.default.is_some();
376        if arg.required && arg.default.is_some() {
377            return Err(format!(
378                "arg <{}>: `required: true` with a default is a contradiction",
379                arg.name
380            ));
381        }
382        if seen_optional && arg.required && arg.default.is_none() {
383            return Err(format!(
384                "arg <{}>: required positional cannot follow an optional one",
385                arg.name
386            ));
387        }
388        if is_optional {
389            seen_optional = true;
390        }
391    }
392
393    Ok(())
394}
395
396// ── Structured config sections ──────────────────────────────────────────
397
398/// DDP configuration. Maps 1:1 to flodl DdpConfig / DdpRunConfig.
399#[derive(Debug, Clone, Default, Deserialize)]
400pub struct DdpConfig {
401    pub mode: Option<String>,
402    pub policy: Option<String>,
403    pub backend: Option<String>,
404    /// "auto" or integer.
405    pub anchor: Option<serde_json::Value>,
406    pub max_anchor: Option<u32>,
407    pub overhead_target: Option<f64>,
408    pub divergence_threshold: Option<f64>,
409    /// null (unlimited) or integer.
410    pub max_batch_diff: Option<serde_json::Value>,
411    pub speed_hint: Option<SpeedHint>,
412    pub partition_ratios: Option<Vec<f64>>,
413    /// "auto" or bool.
414    pub progressive: Option<serde_json::Value>,
415    pub max_grad_norm: Option<f64>,
416    pub lr_scale_ratio: Option<f64>,
417    pub snapshot_timeout: Option<u32>,
418    pub checkpoint_every: Option<u32>,
419    pub timeline: Option<bool>,
420}
421
422#[derive(Debug, Clone, Default, Deserialize)]
423pub struct SpeedHint {
424    pub slow_rank: usize,
425    pub ratio: f64,
426}
427
428/// Training scalars.
429#[derive(Debug, Clone, Default, Deserialize)]
430pub struct TrainingConfig {
431    pub epochs: Option<u32>,
432    pub batch_size: Option<u32>,
433    pub batches_per_epoch: Option<u32>,
434    pub lr: Option<f64>,
435    pub seed: Option<u64>,
436}
437
438/// Output settings.
439#[derive(Debug, Clone, Default, Deserialize)]
440pub struct OutputConfig {
441    pub dir: Option<String>,
442    pub timeline: Option<bool>,
443    pub monitor: Option<u16>,
444}
445
446
447// ── Config discovery ────────────────────────────────────────────────────
448
449const CONFIG_NAMES: &[&str] = &["fdl.yaml", "fdl.yml", "fdl.json"];
450const EXAMPLE_SUFFIXES: &[&str] = &[".example", ".dist"];
451
452/// Walk up from `start` looking for fdl.yaml.
453///
454/// If only an `.example` (or `.dist`) variant exists, offers to copy it
455/// to the real config path. This lets the repo commit `fdl.yaml.example`
456/// while `.gitignore`-ing `fdl.yaml` so users can customize locally.
457pub fn find_config(start: &Path) -> Option<PathBuf> {
458    let mut dir = start.to_path_buf();
459    loop {
460        // First pass: look for the real config.
461        for name in CONFIG_NAMES {
462            let candidate = dir.join(name);
463            if candidate.is_file() {
464                return Some(candidate);
465            }
466        }
467        // Second pass: look for .example/.dist variants.
468        for name in CONFIG_NAMES {
469            for suffix in EXAMPLE_SUFFIXES {
470                let example = dir.join(format!("{name}{suffix}"));
471                if example.is_file() {
472                    let target = dir.join(name);
473                    if try_copy_example(&example, &target) {
474                        return Some(target);
475                    }
476                    // User declined: use the example directly.
477                    return Some(example);
478                }
479            }
480        }
481        if !dir.pop() {
482            return None;
483        }
484    }
485}
486
487/// Prompt the user to copy an example config to the real path.
488/// Returns true if the copy succeeded.
489fn try_copy_example(example: &Path, target: &Path) -> bool {
490    let example_name = example.file_name().unwrap_or_default().to_string_lossy();
491    let target_name = target.file_name().unwrap_or_default().to_string_lossy();
492    eprintln!(
493        "fdl: found {example_name} but no {target_name}. \
494         Copy it to create your local config? [Y/n] "
495    );
496    let mut input = String::new();
497    if std::io::stdin().read_line(&mut input).is_err() {
498        return false;
499    }
500    let answer = input.trim().to_lowercase();
501    if answer.is_empty() || answer == "y" || answer == "yes" {
502        match std::fs::copy(example, target) {
503            Ok(_) => {
504                eprintln!("fdl: created {target_name} (edit to customize)");
505                true
506            }
507            Err(e) => {
508                eprintln!("fdl: failed to copy: {e}");
509                false
510            }
511        }
512    } else {
513        false
514    }
515}
516
517/// Load a project config from a specific path.
518pub fn load_project(path: &Path) -> Result<ProjectConfig, String> {
519    load_project_with_env(path, None)
520}
521
522/// Load a project config with an optional environment overlay.
523///
524/// When `env` is `Some`, looks for a sibling `fdl.<env>.{yml,yaml,json}` next
525/// to `base_path` and deep-merges it over the base before deserialization.
526/// Missing overlay files are a hard error — the user asked for this env, so
527/// silently ignoring it would be worse than a clear message.
528pub fn load_project_with_env(
529    base_path: &Path,
530    env: Option<&str>,
531) -> Result<ProjectConfig, String> {
532    let merged = load_merged_value(base_path, env)?;
533    serde_yaml::from_value::<ProjectConfig>(merged)
534        .map_err(|e| format!("{}: {}", base_path.display(), e))
535}
536
537/// Load the raw merged [`serde_yaml::Value`] for a config + optional env
538/// overlay. Exposed so callers like `fdl config show` can inspect the
539/// resolved view before it is deserialized into a strongly-typed struct.
540pub fn load_merged_value(
541    base_path: &Path,
542    env: Option<&str>,
543) -> Result<serde_yaml::Value, String> {
544    let layers = resolve_config_layers(base_path, env)?;
545    Ok(crate::overlay::merge_layers(
546        layers.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
547    ))
548}
549
550/// Resolve every layer contributing to a config, in merge order, with
551/// `inherit-from:` chains expanded. Paired with the base file + optional
552/// env overlay, the result is `[chain(base)..., chain(env_overlay)...]`
553/// de-duplicated by canonical path (kept-first).
554///
555/// Used by `fdl config show` for per-leaf source annotation, and
556/// internally by [`load_merged_value`] / [`load_command_with_env`] so
557/// every consumer picks up `inherit-from:` uniformly.
558pub fn resolve_config_layers(
559    base_path: &Path,
560    env: Option<&str>,
561) -> Result<Vec<(PathBuf, serde_yaml::Value)>, String> {
562    let mut layers = crate::overlay::resolve_chain(base_path)?;
563    if let Some(name) = env {
564        match crate::overlay::find_env_file(base_path, name) {
565            Some(p) => {
566                let env_chain = crate::overlay::resolve_chain(&p)?;
567                layers.extend(env_chain);
568            }
569            None => {
570                return Err(format!(
571                    "environment `{name}` not found (expected fdl.{name}.yml next to {})",
572                    base_path.display()
573                ));
574            }
575        }
576    }
577    // Dedup by canonical path, keeping first occurrence. An env overlay
578    // whose chain loops back to a file already in the base chain (same
579    // file via a different inheritance route) collapses cleanly.
580    let mut seen = std::collections::HashSet::new();
581    layers.retain(|(path, _)| seen.insert(path.clone()));
582    Ok(layers)
583}
584
585/// Source path list for a base config + env overlay, in merge order. Used
586/// by `fdl config show` to annotate which layer a value came from.
587pub fn config_layer_sources(base_path: &Path, env: Option<&str>) -> Vec<PathBuf> {
588    resolve_config_layers(base_path, env)
589        .map(|ls| ls.into_iter().map(|(p, _)| p).collect())
590        .unwrap_or_else(|_| vec![base_path.to_path_buf()])
591}
592
593/// Load a command config from a sub-directory.
594///
595/// Applies the same `.example`/`.dist` fallback as [`find_config`]. If a
596/// `schema:` block is present, validates it before returning.
597pub fn load_command(dir: &Path) -> Result<CommandConfig, String> {
598    load_command_with_env(dir, None)
599}
600
601/// Load a sub-command config with an optional environment overlay.
602///
603/// Applies the same `.example`/`.dist` fallback as [`find_config`] to locate
604/// the base file, then deep-merges a sibling `fdl.<env>.yml` overlay if one
605/// exists. A *missing* overlay is silently accepted here (different from
606/// [`load_project_with_env`]) — envs declared at the project root don't
607/// have to exist for every sub-command.
608pub fn load_command_with_env(dir: &Path, env: Option<&str>) -> Result<CommandConfig, String> {
609    // Resolve the base config path (with .example fallback, same as before).
610    let mut base_path: Option<PathBuf> = None;
611    for name in CONFIG_NAMES {
612        let path = dir.join(name);
613        if path.is_file() {
614            base_path = Some(path);
615            break;
616        }
617    }
618    if base_path.is_none() {
619        for name in CONFIG_NAMES {
620            for suffix in EXAMPLE_SUFFIXES {
621                let example = dir.join(format!("{name}{suffix}"));
622                if example.is_file() {
623                    let target = dir.join(name);
624                    let src = if try_copy_example(&example, &target) {
625                        target
626                    } else {
627                        example
628                    };
629                    base_path = Some(src);
630                    break;
631                }
632            }
633            if base_path.is_some() {
634                break;
635            }
636        }
637    }
638    let base_path = base_path
639        .ok_or_else(|| format!("no fdl.yml found in {}", dir.display()))?;
640
641    // Layered load: base chain + optional env overlay chain. Both sides
642    // run through `resolve_chain` so `inherit-from:` composes the same
643    // way for nested commands as for the project root.
644    let mut layers = crate::overlay::resolve_chain(&base_path)?;
645    if let Some(name) = env {
646        if let Some(p) = crate::overlay::find_env_file(&base_path, name) {
647            layers.extend(crate::overlay::resolve_chain(&p)?);
648        }
649    }
650    let mut seen = std::collections::HashSet::new();
651    layers.retain(|(path, _)| seen.insert(path.clone()));
652    let merged = crate::overlay::merge_layers(
653        layers.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
654    );
655    let mut cfg: CommandConfig = serde_yaml::from_value(merged)
656        .map_err(|e| format!("{}: {}", base_path.display(), e))?;
657
658    if let Some(schema) = &cfg.schema {
659        validate_schema(schema)
660            .map_err(|e| format!("schema error in {}/fdl.yml: {e}", dir.display()))?;
661        // Preset validation (choice values + strict unknown-key rejection)
662        // is intentionally deferred to the exec path. Load-time validation
663        // would block `fdl <cmd> --help` whenever ANY preset in the config
664        // has a typo — worse UX than letting help render and erroring only
665        // when the broken preset is actually invoked.
666    }
667
668    // Cache precedence: a valid, fresh cached schema (written by `fdl <cmd>
669    // --refresh-schema`) wins over the inline YAML schema. This lets a
670    // binary become the source of truth for its own surface once it opts
671    // into the `--fdl-schema` contract. A cache that is older than the
672    // command's fdl.yml is treated as stale and skipped — the inline
673    // schema (if any) reasserts until the user refreshes.
674    let cmd_name = dir
675        .file_name()
676        .and_then(|n| n.to_str())
677        .unwrap_or("_");
678    let cache = crate::schema_cache::cache_path(dir, cmd_name);
679    // Reference mtimes: config files that, when edited, might invalidate
680    // the cached schema (e.g. changing `entry:` to point somewhere else).
681    let refs: Vec<std::path::PathBuf> = CONFIG_NAMES
682        .iter()
683        .map(|n| dir.join(n))
684        .filter(|p| p.exists())
685        .collect();
686    if !crate::schema_cache::is_stale(&cache, &refs) {
687        if let Some(cached) = crate::schema_cache::read_cache(&cache) {
688            cfg.schema = Some(cached);
689        }
690    }
691
692    Ok(cfg)
693}
694
695// ── Strict-mode validation ──────────────────────────────────────────────
696
697/// Reserved flags that strict mode always tolerates in the user's tail.
698/// These are fdl-level universals (help/version) or opt-ins every
699/// FdlArgs-derived binary exposes (--fdl-schema) — keeping them out of
700/// the `schema.options` map means strict mode has to allowlist them
701/// separately or spuriously reject legal invocations.
702const STRICT_UNIVERSAL_LONGS: &[(&str, Option<char>, bool)] = &[
703    // (long, short, takes_value)
704    ("help", Some('h'), false),
705    ("version", Some('V'), false),
706    ("fdl-schema", None, false),
707    ("refresh-schema", None, false),
708];
709
710/// Convert a [`Schema`] into an [`ArgsSpec`](crate::args::parser::ArgsSpec) suitable for strict-mode
711/// tail validation. Positional `required` flags are intentionally
712/// dropped: the binary itself will enforce them after parsing, and
713/// treating them as required here would turn "missing positional" into
714/// a double-errored mess.
715pub fn schema_to_args_spec(schema: &Schema) -> crate::args::parser::ArgsSpec {
716    use crate::args::parser::{ArgsSpec, OptionDecl, PositionalDecl};
717
718    let mut options: Vec<OptionDecl> = schema
719        .options
720        .iter()
721        .map(|(long, spec)| OptionDecl {
722            long: long.clone(),
723            short: spec
724                .short
725                .as_deref()
726                .and_then(|s| s.chars().next()),
727            takes_value: spec.ty != "bool",
728            // Every value-taking option is allowed to appear bare in
729            // strict mode. fdl does not second-guess whether the binary
730            // would accept a bare `--foo`; that stays in the binary's
731            // court.
732            allows_bare: true,
733            repeatable: spec.ty.starts_with("list["),
734            choices: spec
735                .choices
736                .as_ref()
737                .map(|cs| strict_choices_to_strings(cs)),
738        })
739        .collect();
740
741    // Always-allowed universals — help/version/fdl-schema/refresh-schema
742    // are not in the user's schema but must not trigger "unknown flag".
743    for (long, short, takes_value) in STRICT_UNIVERSAL_LONGS {
744        options.push(OptionDecl {
745            long: (*long).to_string(),
746            short: *short,
747            takes_value: *takes_value,
748            allows_bare: true,
749            repeatable: false,
750            choices: None,
751        });
752    }
753
754    // Positionals: drop the `required` bit. Strict mode is scoped to
755    // option names/values only; arity is the binary's concern.
756    let positionals: Vec<PositionalDecl> = schema
757        .args
758        .iter()
759        .map(|a| PositionalDecl {
760            name: a.name.clone(),
761            required: false,
762            variadic: a.variadic,
763            choices: a
764                .choices
765                .as_ref()
766                .map(|cs| strict_choices_to_strings(cs)),
767        })
768        .collect();
769
770    ArgsSpec {
771        options,
772        positionals,
773        // Non-strict schemas accept user-forwarded flags the author
774        // didn't declare — the binary re-parses the tail anyway.
775        // Strict schemas reject anything not declared.
776        lenient_unknowns: !schema.strict,
777    }
778}
779
780fn strict_choices_to_strings(cs: &[serde_json::Value]) -> Vec<String> {
781    cs.iter()
782        .map(|v| match v {
783            serde_json::Value::String(s) => s.clone(),
784            other => other.to_string(),
785        })
786        .collect()
787}
788
789/// Validate the user's extra argv tail against a schema. Always called
790/// before `run::exec_command` — the parser's lenient-unknowns mode is
791/// keyed off `schema.strict` so choice validation on declared flags
792/// fires regardless, while unknown-flag rejection stays opt-in.
793///
794/// The tokenizer from [`crate::args::parser`] is reused so "did you
795/// mean" suggestions, cluster, and equals handling come for free.
796pub fn validate_tail(tail: &[String], schema: &Schema) -> Result<(), String> {
797    let spec = schema_to_args_spec(schema);
798    let mut argv = Vec::with_capacity(tail.len() + 1);
799    argv.push("fdl".to_string());
800    argv.extend(tail.iter().cloned());
801    crate::args::parser::parse(&spec, &argv).map(|_| ())
802}
803
804/// Validate a single preset that's about to be invoked. Combines the
805/// always-on `choices:` check and, if `schema.strict`, the unknown-key
806/// rejection — scoped to just this preset, not the whole `commands:`
807/// map. Called from the exec path so typos in a sibling preset don't
808/// block `--help` for a correct one.
809pub fn validate_preset_for_exec(
810    preset_name: &str,
811    spec: &CommandSpec,
812    schema: &Schema,
813) -> Result<(), String> {
814    for (key, value) in &spec.options {
815        let Some(opt) = schema.options.get(key) else {
816            if schema.strict {
817                return Err(format!(
818                    "preset `{preset_name}` pins option `{key}` which is not declared in schema.options"
819                ));
820            }
821            continue;
822        };
823        let Some(choices) = &opt.choices else {
824            continue;
825        };
826        if !choices.iter().any(|c| values_equal(c, value)) {
827            let allowed: Vec<String> = choices
828                .iter()
829                .map(|c| match c {
830                    serde_json::Value::String(s) => s.clone(),
831                    other => other.to_string(),
832                })
833                .collect();
834            return Err(format!(
835                "preset `{preset_name}` sets option `{key}` to `{}` -- allowed: {}",
836                display_json(value),
837                allowed.join(", "),
838            ));
839        }
840    }
841    Ok(())
842}
843
844/// Always-on: validate preset YAML `options:` values against declared
845/// `choices:` in the schema. An option YAML value whose key matches a
846/// declared option with a `choices:` list must be one of those choices.
847/// Keys not declared in the schema are ignored here — those are the
848/// concern of [`validate_presets_strict`] (opt-in).
849///
850/// Used for whole-map validation (e.g. from a future `fdl config lint`
851/// subcommand). The dispatch path uses [`validate_preset_for_exec`] so
852/// sibling-preset typos don't block correct invocations.
853pub fn validate_preset_values(
854    commands: &BTreeMap<String, CommandSpec>,
855    schema: &Schema,
856) -> Result<(), String> {
857    for (preset_name, spec) in commands {
858        match spec.kind() {
859            Ok(CommandKind::Preset) => {}
860            _ => continue,
861        }
862        for (key, value) in &spec.options {
863            let Some(opt) = schema.options.get(key) else {
864                continue; // unknown key — strict's problem, not ours
865            };
866            let Some(choices) = &opt.choices else {
867                continue; // no choices declared — anything goes
868            };
869            if !choices.iter().any(|c| values_equal(c, value)) {
870                let allowed: Vec<String> = choices
871                    .iter()
872                    .map(|c| match c {
873                        serde_json::Value::String(s) => s.clone(),
874                        other => other.to_string(),
875                    })
876                    .collect();
877                return Err(format!(
878                    "preset `{preset_name}` sets option `{key}` to `{}` -- allowed: {}",
879                    display_json(value),
880                    allowed.join(", "),
881                ));
882            }
883        }
884    }
885    Ok(())
886}
887
888/// Compare two JSON values for equality, treating YAML's loose-typed
889/// representation (a preset might write `batch-size: 32` as an int
890/// while the schema's choices list contains `"32"` as a string).
891fn values_equal(a: &serde_json::Value, b: &serde_json::Value) -> bool {
892    if a == b {
893        return true;
894    }
895    // Cross-type string ↔ number comparison for YAML-friendly matching.
896    match (a, b) {
897        (serde_json::Value::String(s), other) | (other, serde_json::Value::String(s)) => {
898            s == &other.to_string()
899        }
900        _ => false,
901    }
902}
903
904fn display_json(v: &serde_json::Value) -> String {
905    match v {
906        serde_json::Value::String(s) => s.clone(),
907        other => other.to_string(),
908    }
909}
910
911/// At load time, reject preset `options:` keys that are not declared in
912/// the enclosing schema. Runs only when `schema.strict == true`, and
913/// only against entries resolved to [`CommandKind::Preset`] — `run:` and
914/// `path:` kinds don't share the parent schema.
915pub fn validate_presets_strict(
916    commands: &BTreeMap<String, CommandSpec>,
917    schema: &Schema,
918) -> Result<(), String> {
919    for (preset_name, spec) in commands {
920        match spec.kind() {
921            Ok(CommandKind::Preset) => {}
922            _ => continue,
923        }
924        for key in spec.options.keys() {
925            if !schema.options.contains_key(key) {
926                return Err(format!(
927                    "preset `{preset_name}` pins option `{key}` which is not declared in schema.options"
928                ));
929            }
930        }
931    }
932    Ok(())
933}
934
935// ── Merge ───────────────────────────────────────────────────────────────
936
937/// Merge the enclosing `CommandConfig` defaults with a named preset's
938/// overrides. Preset values win. Used when dispatching an inline preset
939/// command (neither `run` nor `path`).
940pub fn merge_preset(root: &CommandConfig, preset: &CommandSpec) -> ResolvedConfig {
941    ResolvedConfig {
942        ddp: merge_ddp(&root.ddp, &preset.ddp),
943        training: merge_training(&root.training, &preset.training),
944        output: merge_output(&root.output, &preset.output),
945        options: preset.options.clone(),
946    }
947}
948
949/// Resolved config from root defaults only (no job).
950pub fn defaults_only(root: &CommandConfig) -> ResolvedConfig {
951    ResolvedConfig {
952        ddp: root.ddp.clone().unwrap_or_default(),
953        training: root.training.clone().unwrap_or_default(),
954        output: root.output.clone().unwrap_or_default(),
955        options: BTreeMap::new(),
956    }
957}
958
959/// Fully resolved configuration ready for arg translation.
960pub struct ResolvedConfig {
961    pub ddp: DdpConfig,
962    pub training: TrainingConfig,
963    pub output: OutputConfig,
964    pub options: BTreeMap<String, serde_json::Value>,
965}
966
967macro_rules! merge_field {
968    ($base:expr, $over:expr, $field:ident) => {
969        $over
970            .as_ref()
971            .and_then(|o| o.$field.clone())
972            .or_else(|| $base.as_ref().and_then(|b| b.$field.clone()))
973    };
974}
975
976fn merge_ddp(base: &Option<DdpConfig>, over: &Option<DdpConfig>) -> DdpConfig {
977    DdpConfig {
978        mode: merge_field!(base, over, mode),
979        policy: merge_field!(base, over, policy),
980        backend: merge_field!(base, over, backend),
981        anchor: merge_field!(base, over, anchor),
982        max_anchor: merge_field!(base, over, max_anchor),
983        overhead_target: merge_field!(base, over, overhead_target),
984        divergence_threshold: merge_field!(base, over, divergence_threshold),
985        max_batch_diff: merge_field!(base, over, max_batch_diff),
986        speed_hint: merge_field!(base, over, speed_hint),
987        partition_ratios: merge_field!(base, over, partition_ratios),
988        progressive: merge_field!(base, over, progressive),
989        max_grad_norm: merge_field!(base, over, max_grad_norm),
990        lr_scale_ratio: merge_field!(base, over, lr_scale_ratio),
991        snapshot_timeout: merge_field!(base, over, snapshot_timeout),
992        checkpoint_every: merge_field!(base, over, checkpoint_every),
993        timeline: merge_field!(base, over, timeline),
994    }
995}
996
997fn merge_training(base: &Option<TrainingConfig>, over: &Option<TrainingConfig>) -> TrainingConfig {
998    TrainingConfig {
999        epochs: merge_field!(base, over, epochs),
1000        batch_size: merge_field!(base, over, batch_size),
1001        batches_per_epoch: merge_field!(base, over, batches_per_epoch),
1002        lr: merge_field!(base, over, lr),
1003        seed: merge_field!(base, over, seed),
1004    }
1005}
1006
1007fn merge_output(base: &Option<OutputConfig>, over: &Option<OutputConfig>) -> OutputConfig {
1008    OutputConfig {
1009        dir: merge_field!(base, over, dir),
1010        timeline: merge_field!(base, over, timeline),
1011        monitor: merge_field!(base, over, monitor),
1012    }
1013}
1014
1015#[cfg(test)]
1016mod tests {
1017    use super::*;
1018
1019    /// Resolve the project root (where fdl.yml / fdl.yml.example live) starting
1020    /// from CARGO_MANIFEST_DIR. The CLI crate sits one level down.
1021    fn project_root() -> PathBuf {
1022        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1023            .parent()
1024            .expect("flodl-cli parent must be project root")
1025            .to_path_buf()
1026    }
1027
1028    fn load_example() -> ProjectConfig {
1029        let path = project_root().join("fdl.yml.example");
1030        assert!(
1031            path.is_file(),
1032            "fdl.yml.example missing at {} -- the CLI depends on it as the canonical config template",
1033            path.display()
1034        );
1035        load_project(&path).expect("fdl.yml.example must parse as a valid ProjectConfig")
1036    }
1037
1038    fn opt(ty: &str) -> OptionSpec {
1039        OptionSpec {
1040            ty: ty.into(),
1041            description: None,
1042            default: None,
1043            choices: None,
1044            short: None,
1045            env: None,
1046            completer: None,
1047        }
1048    }
1049
1050    fn arg(name: &str, ty: &str) -> ArgSpec {
1051        ArgSpec {
1052            name: name.into(),
1053            ty: ty.into(),
1054            description: None,
1055            required: true,
1056            variadic: false,
1057            default: None,
1058            choices: None,
1059            completer: None,
1060        }
1061    }
1062
1063    #[test]
1064    fn validate_schema_accepts_minimal_valid() {
1065        let mut s = Schema::default();
1066        s.options.insert("model".into(), opt("string"));
1067        s.options.insert("epochs".into(), opt("int"));
1068        s.args.push(arg("run-id", "string"));
1069        validate_schema(&s).expect("minimal valid schema must pass");
1070    }
1071
1072    #[test]
1073    fn validate_schema_rejects_unknown_option_type() {
1074        let mut s = Schema::default();
1075        s.options.insert("bad".into(), opt("integer"));
1076        let err = validate_schema(&s).expect_err("unknown type should fail");
1077        assert!(err.contains("unknown type"), "err was: {err}");
1078    }
1079
1080    #[test]
1081    fn validate_schema_rejects_reserved_long() {
1082        let mut s = Schema::default();
1083        s.options.insert("help".into(), opt("bool"));
1084        let err = validate_schema(&s).expect_err("reserved --help must fail");
1085        assert!(err.contains("reserved"), "err was: {err}");
1086    }
1087
1088    #[test]
1089    fn validate_schema_rejects_reserved_short() {
1090        let mut s = Schema::default();
1091        let mut o = opt("string");
1092        o.short = Some("h".into());
1093        s.options.insert("host".into(), o);
1094        let err = validate_schema(&s).expect_err("short -h must fail");
1095        assert!(err.contains("reserved"), "err was: {err}");
1096    }
1097
1098    #[test]
1099    fn validate_schema_rejects_duplicate_short() {
1100        let mut s = Schema::default();
1101        let mut a = opt("string");
1102        a.short = Some("m".into());
1103        let mut b = opt("string");
1104        b.short = Some("m".into());
1105        s.options.insert("model".into(), a);
1106        s.options.insert("mode".into(), b);
1107        let err = validate_schema(&s).expect_err("duplicate -m must fail");
1108        assert!(err.contains("both declare short"), "err was: {err}");
1109    }
1110
1111    #[test]
1112    fn validate_schema_rejects_non_last_variadic() {
1113        let mut s = Schema::default();
1114        let mut first = arg("files", "string");
1115        first.variadic = true;
1116        s.args.push(first);
1117        s.args.push(arg("trailer", "string"));
1118        let err = validate_schema(&s).expect_err("variadic-not-last must fail");
1119        assert!(err.contains("variadic"), "err was: {err}");
1120    }
1121
1122    #[test]
1123    fn validate_schema_rejects_required_after_optional() {
1124        let mut s = Schema::default();
1125        let mut first = arg("maybe", "string");
1126        first.required = false;
1127        s.args.push(first);
1128        s.args.push(arg("need", "string"));
1129        let err = validate_schema(&s).expect_err("required-after-optional must fail");
1130        assert!(err.contains("cannot follow"), "err was: {err}");
1131    }
1132
1133    // ── Tail validation (always-on) + strict unknown-rejection ─────
1134
1135    fn schema_with_model_option(strict: bool) -> Schema {
1136        let mut s = Schema {
1137            strict,
1138            ..Schema::default()
1139        };
1140        let mut model = opt("string");
1141        model.short = Some("m".into());
1142        model.choices = Some(vec![
1143            serde_json::json!("mlp"),
1144            serde_json::json!("resnet"),
1145        ]);
1146        s.options.insert("model".into(), model);
1147        s.options.insert("epochs".into(), opt("int"));
1148        // A bool flag, no value.
1149        s.options.insert("validate".into(), opt("bool"));
1150        s
1151    }
1152
1153    fn strict_schema_with_model_option() -> Schema {
1154        schema_with_model_option(true)
1155    }
1156
1157    #[test]
1158    fn validate_tail_accepts_known_long_flag() {
1159        let schema = strict_schema_with_model_option();
1160        let tail = vec!["--epochs".into(), "3".into()];
1161        validate_tail(&tail, &schema).expect("known flag must pass");
1162    }
1163
1164    #[test]
1165    fn validate_tail_accepts_known_short_flag() {
1166        let schema = strict_schema_with_model_option();
1167        let tail = vec!["-m".into(), "mlp".into()];
1168        validate_tail(&tail, &schema).expect("known short must pass");
1169    }
1170
1171    #[test]
1172    fn validate_tail_accepts_bool_flag() {
1173        let schema = strict_schema_with_model_option();
1174        let tail = vec!["--validate".into()];
1175        validate_tail(&tail, &schema).expect("bool flag must pass");
1176    }
1177
1178    #[test]
1179    fn validate_tail_strict_rejects_unknown_long_flag() {
1180        let schema = strict_schema_with_model_option();
1181        let tail = vec!["--nope".into()];
1182        let err = validate_tail(&tail, &schema)
1183            .expect_err("unknown long flag must error in strict mode");
1184        assert!(err.contains("--nope"), "err was: {err}");
1185    }
1186
1187    #[test]
1188    fn validate_tail_strict_suggests_did_you_mean() {
1189        // "--epoch" is one char off "--epochs" — edit distance ≤ 2.
1190        let schema = strict_schema_with_model_option();
1191        let tail = vec!["--epoch".into(), "3".into()];
1192        let err = validate_tail(&tail, &schema).expect_err("typo must error");
1193        assert!(err.contains("did you mean"), "err was: {err}");
1194        assert!(err.contains("--epochs"), "suggestion missing: {err}");
1195    }
1196
1197    #[test]
1198    fn validate_tail_strict_rejects_unknown_short_flag() {
1199        let schema = strict_schema_with_model_option();
1200        let tail = vec!["-z".into()];
1201        let err = validate_tail(&tail, &schema)
1202            .expect_err("unknown short must error in strict mode");
1203        assert!(err.contains("-z"), "err was: {err}");
1204    }
1205
1206    #[test]
1207    fn validate_tail_rejects_bad_choice_always_strict() {
1208        let schema = strict_schema_with_model_option();
1209        let tail = vec!["--model".into(), "lenet".into()];
1210        let err = validate_tail(&tail, &schema)
1211            .expect_err("out-of-set choice must error");
1212        assert!(err.contains("lenet"), "err was: {err}");
1213        assert!(err.contains("allowed"), "err should list allowed values: {err}");
1214    }
1215
1216    #[test]
1217    fn validate_tail_rejects_bad_choice_even_when_not_strict() {
1218        // The main change in this rollout: `choices:` is a positive
1219        // assertion by the author, so it must be enforced regardless
1220        // of `schema.strict`. Only *unknown* flags relax without
1221        // strict.
1222        let schema = schema_with_model_option(false);
1223        let tail = vec!["--model".into(), "lenet".into()];
1224        let err = validate_tail(&tail, &schema)
1225            .expect_err("out-of-set choice must error without strict");
1226        assert!(err.contains("lenet"), "err was: {err}");
1227        assert!(err.contains("allowed"), "err should list allowed values: {err}");
1228    }
1229
1230    #[test]
1231    fn validate_tail_non_strict_tolerates_unknown_flag() {
1232        // Without strict, unknown flags are legitimate pass-through
1233        // candidates (the binary handles them itself).
1234        let schema = schema_with_model_option(false);
1235        let tail = vec!["--fancy-passthrough".into(), "value".into()];
1236        validate_tail(&tail, &schema)
1237            .expect("unknown flag must be tolerated when strict is off");
1238    }
1239
1240    #[test]
1241    fn validate_tail_non_strict_still_checks_known_short_choices() {
1242        // The declared short `-m` has choices; a bad value fails even
1243        // when strict is off. Unknown options would be tolerated, but
1244        // once the user reaches a declared option, its contract holds.
1245        let schema = schema_with_model_option(false);
1246        let tail = vec!["-m".into(), "lenet".into()];
1247        let err = validate_tail(&tail, &schema)
1248            .expect_err("out-of-set choice via short must error");
1249        assert!(err.contains("lenet"), "err was: {err}");
1250    }
1251
1252    #[test]
1253    fn validate_tail_allows_reserved_help() {
1254        // Reserved universal flags must pass even though they are not
1255        // declared in the schema. Defense-in-depth against edge cases
1256        // where `--help` somehow reaches dispatch.
1257        let schema = strict_schema_with_model_option();
1258        let tail = vec!["--help".into()];
1259        validate_tail(&tail, &schema).expect("--help must be allowed");
1260    }
1261
1262    #[test]
1263    fn validate_tail_allows_reserved_fdl_schema() {
1264        // `fdl ddp-bench --fdl-schema` is forwarded to the binary.
1265        let schema = strict_schema_with_model_option();
1266        let tail = vec!["--fdl-schema".into()];
1267        validate_tail(&tail, &schema).expect("--fdl-schema must be allowed");
1268    }
1269
1270    #[test]
1271    fn validate_tail_passthrough_after_double_dash() {
1272        // `--` terminates flag parsing. Tokens after it are positionals
1273        // and must never trigger "unknown flag" errors.
1274        let schema = strict_schema_with_model_option();
1275        let tail = vec!["--".into(), "--arbitrary".into(), "anything".into()];
1276        validate_tail(&tail, &schema).expect("passthrough must work");
1277    }
1278
1279    #[test]
1280    fn validate_presets_strict_rejects_unknown_option() {
1281        let schema = strict_schema_with_model_option();
1282        let mut commands = BTreeMap::new();
1283        let mut bad_options = BTreeMap::new();
1284        bad_options.insert("batchsize".into(), serde_json::json!(32));
1285        commands.insert(
1286            "quick".into(),
1287            CommandSpec {
1288                options: bad_options,
1289                ..Default::default()
1290            },
1291        );
1292        let err = validate_presets_strict(&commands, &schema)
1293            .expect_err("preset pinning undeclared option must error");
1294        assert!(err.contains("quick"), "err should name the preset: {err}");
1295        assert!(err.contains("batchsize"), "err should name the key: {err}");
1296    }
1297
1298    #[test]
1299    fn validate_presets_strict_accepts_known_options() {
1300        let schema = strict_schema_with_model_option();
1301        let mut commands = BTreeMap::new();
1302        let mut good_options = BTreeMap::new();
1303        good_options.insert("model".into(), serde_json::json!("mlp"));
1304        good_options.insert("epochs".into(), serde_json::json!(5));
1305        commands.insert(
1306            "quick".into(),
1307            CommandSpec {
1308                options: good_options,
1309                ..Default::default()
1310            },
1311        );
1312        validate_presets_strict(&commands, &schema)
1313            .expect("presets with declared options must pass");
1314    }
1315
1316    #[test]
1317    fn validate_presets_strict_ignores_run_and_path_kinds() {
1318        // Only Preset-kind entries share the parent schema. Run/Path
1319        // siblings are independent, so strict must not touch them.
1320        let schema = strict_schema_with_model_option();
1321        let mut commands = BTreeMap::new();
1322        commands.insert(
1323            "helper".into(),
1324            CommandSpec {
1325                run: Some("echo hi".into()),
1326                ..Default::default()
1327            },
1328        );
1329        commands.insert(
1330            "nested".into(),
1331            CommandSpec {
1332                path: Some("./nested/".into()),
1333                ..Default::default()
1334            },
1335        );
1336        validate_presets_strict(&commands, &schema)
1337            .expect("run/path siblings must be ignored by preset strict check");
1338    }
1339
1340    // ── Preset value validation (always-on `choices:`) ──────────────
1341
1342    #[test]
1343    fn validate_preset_values_rejects_bad_choice_even_without_strict() {
1344        // Schema has `choices:` on model; a preset pinning model to
1345        // something outside the list must fail at load, strict or not.
1346        let schema = schema_with_model_option(false);
1347        let mut commands = BTreeMap::new();
1348        let mut opts = BTreeMap::new();
1349        opts.insert("model".into(), serde_json::json!("lenet"));
1350        commands.insert(
1351            "quick".into(),
1352            CommandSpec {
1353                options: opts,
1354                ..Default::default()
1355            },
1356        );
1357        let err = validate_preset_values(&commands, &schema)
1358            .expect_err("out-of-choices preset must error");
1359        assert!(err.contains("quick"), "preset name missing: {err}");
1360        assert!(err.contains("model"), "option name missing: {err}");
1361        assert!(err.contains("lenet"), "bad value missing: {err}");
1362        assert!(err.contains("allowed"), "allowed list missing: {err}");
1363    }
1364
1365    #[test]
1366    fn validate_preset_values_accepts_in_choices_preset() {
1367        let schema = schema_with_model_option(false);
1368        let mut commands = BTreeMap::new();
1369        let mut opts = BTreeMap::new();
1370        opts.insert("model".into(), serde_json::json!("mlp"));
1371        commands.insert(
1372            "quick".into(),
1373            CommandSpec {
1374                options: opts,
1375                ..Default::default()
1376            },
1377        );
1378        validate_preset_values(&commands, &schema)
1379            .expect("in-choices preset must pass");
1380    }
1381
1382    #[test]
1383    fn validate_preset_values_ignores_undeclared_keys() {
1384        // Unknown keys aren't our concern here — that's for
1385        // `validate_presets_strict`, which only runs under strict.
1386        let schema = schema_with_model_option(false);
1387        let mut commands = BTreeMap::new();
1388        let mut opts = BTreeMap::new();
1389        opts.insert("extra".into(), serde_json::json!("whatever"));
1390        commands.insert(
1391            "quick".into(),
1392            CommandSpec {
1393                options: opts,
1394                ..Default::default()
1395            },
1396        );
1397        validate_preset_values(&commands, &schema)
1398            .expect("undeclared key must be ignored by value validator");
1399    }
1400
1401    #[test]
1402    fn validate_preset_values_ignores_options_without_choices() {
1403        // `epochs` is declared as int with no `choices:`, so any value
1404        // passes the choice check (type validation is a separate pass).
1405        let schema = schema_with_model_option(false);
1406        let mut commands = BTreeMap::new();
1407        let mut opts = BTreeMap::new();
1408        opts.insert("epochs".into(), serde_json::json!(999));
1409        commands.insert(
1410            "quick".into(),
1411            CommandSpec {
1412                options: opts,
1413                ..Default::default()
1414            },
1415        );
1416        validate_preset_values(&commands, &schema)
1417            .expect("no-choices option must accept any value");
1418    }
1419
1420    #[test]
1421    fn validate_schema_rejects_required_with_default() {
1422        let mut s = Schema::default();
1423        let mut a = arg("x", "string");
1424        a.default = Some(serde_json::json!("foo"));
1425        s.args.push(a);
1426        let err = validate_schema(&s).expect_err("required+default must fail");
1427        assert!(err.contains("contradiction"), "err was: {err}");
1428    }
1429
1430    /// Regression guard: fdl.yml.example must keep a working `doc` command.
1431    /// The fdl.doc pipeline (api-ref for the port skill, rustdoc warning
1432    /// enforcement in CI) depends on this entry existing and producing output.
1433    #[test]
1434    fn fdl_yml_example_has_doc_script() {
1435        let cfg = load_example();
1436        let doc = cfg.commands.get("doc").unwrap_or_else(|| {
1437            panic!(
1438                "fdl.yml.example is missing a `doc` command; the rustdoc pipeline \
1439                 depends on `fdl doc` being defined"
1440            )
1441        });
1442        let cmd = doc
1443            .run
1444            .as_deref()
1445            .expect("fdl.yml.example `doc` command must be a `run:` entry");
1446        assert!(
1447            !cmd.trim().is_empty(),
1448            "fdl.yml.example `doc` command has an empty `run:` command"
1449        );
1450        assert!(
1451            cmd.contains("cargo doc"),
1452            "fdl.yml.example `doc` command must invoke `cargo doc`, got: {cmd}"
1453        );
1454        // Must assert some output was produced -- otherwise rustdoc can
1455        // silently succeed without writing anything useful (e.g. when the
1456        // target crate fails to resolve). Keeping the exact check liberal:
1457        // any mention of target/doc as a produced artifact counts.
1458        assert!(
1459            cmd.contains("target/doc"),
1460            "fdl.yml.example `doc` command must verify output was produced \
1461             (expected a `test -f target/doc/...` check), got: {cmd}"
1462        );
1463    }
1464
1465    #[test]
1466    fn command_spec_kind_mutex_run_and_path() {
1467        let spec = CommandSpec {
1468            run: Some("echo".into()),
1469            path: Some("x/".into()),
1470            ..Default::default()
1471        };
1472        let err = spec.kind().expect_err("run + path must fail");
1473        assert!(err.contains("both"), "err was: {err}");
1474    }
1475
1476    #[test]
1477    fn command_spec_kind_path_convention() {
1478        let spec = CommandSpec::default();
1479        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1480    }
1481
1482    #[test]
1483    fn command_spec_kind_preset_when_preset_fields_set() {
1484        let spec = CommandSpec {
1485            training: Some(TrainingConfig {
1486                epochs: Some(1),
1487                ..Default::default()
1488            }),
1489            ..Default::default()
1490        };
1491        assert_eq!(spec.kind().unwrap(), CommandKind::Preset);
1492    }
1493
1494    #[test]
1495    fn command_spec_kind_preset_when_only_options_set() {
1496        // `options:` alone is enough to make a preset — not every preset
1497        // overrides the structured ddp/training/output blocks.
1498        let mut options = BTreeMap::new();
1499        options.insert("model".into(), serde_json::json!("linear"));
1500        let spec = CommandSpec {
1501            options,
1502            ..Default::default()
1503        };
1504        assert_eq!(spec.kind().unwrap(), CommandKind::Preset);
1505    }
1506
1507    #[test]
1508    fn command_spec_kind_path_explicit() {
1509        // Explicit `path:` is a Path even if preset fields are also set;
1510        // the presence of `path:` is the kind-selecting field.
1511        let spec = CommandSpec {
1512            path: Some("./sub/".into()),
1513            ..Default::default()
1514        };
1515        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1516    }
1517
1518    #[test]
1519    fn command_spec_kind_rejects_docker_without_run() {
1520        // `docker:` is meaningful only as a wrapper around an inline
1521        // `run:` script. Pairing it with path/preset is a silent noop
1522        // at dispatch time, so we reject at load.
1523        let spec = CommandSpec {
1524            docker: Some("cuda".into()),
1525            ..Default::default()
1526        };
1527        let err = spec
1528            .kind()
1529            .expect_err("docker without run must fail");
1530        assert!(err.contains("docker"), "err was: {err}");
1531    }
1532
1533    #[test]
1534    fn command_spec_kind_allows_docker_with_run() {
1535        let spec = CommandSpec {
1536            run: Some("cargo test".into()),
1537            docker: Some("dev".into()),
1538            ..Default::default()
1539        };
1540        assert_eq!(spec.kind().unwrap(), CommandKind::Run);
1541    }
1542
1543    #[test]
1544    fn command_spec_deserialize_from_null() {
1545        let yaml = "cmd: ~";
1546        let map: BTreeMap<String, CommandSpec> =
1547            serde_yaml::from_str(yaml).expect("null must deserialize to default");
1548        let spec = map.get("cmd").expect("cmd missing");
1549        assert!(spec.run.is_none() && spec.path.is_none());
1550        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1551    }
1552
1553    #[test]
1554    fn command_config_arg_name_deserializes_kebab_case() {
1555        // YAML uses `arg-name:`, Rust field is `arg_name`.
1556        let yaml = "arg-name: recipe\nentry: echo\n";
1557        let cfg: CommandConfig =
1558            serde_yaml::from_str(yaml).expect("arg-name must parse");
1559        assert_eq!(cfg.arg_name.as_deref(), Some("recipe"));
1560    }
1561
1562    #[test]
1563    fn command_config_arg_name_defaults_to_none() {
1564        let cfg: CommandConfig =
1565            serde_yaml::from_str("entry: echo\n").expect("minimal cfg must parse");
1566        assert!(cfg.arg_name.is_none());
1567    }
1568
1569    // ── resolve_config_layers: inherit-from + env composition ────────────
1570    //
1571    // Integration coverage for how `inherit-from:` chains compose with env
1572    // overlays at the config-module boundary. The overlay module already
1573    // tests `resolve_chain` in isolation; here we verify the concat+dedup
1574    // behaviour that config.rs layers on top.
1575
1576    /// Minimal tempdir helper — matches the pattern used across the crate.
1577    struct TempDir(PathBuf);
1578    impl TempDir {
1579        fn new() -> Self {
1580            use std::sync::atomic::{AtomicU64, Ordering};
1581            static N: AtomicU64 = AtomicU64::new(0);
1582            let dir = std::env::temp_dir().join(format!(
1583                "fdl-cfg-test-{}-{}",
1584                std::process::id(),
1585                N.fetch_add(1, Ordering::Relaxed)
1586            ));
1587            std::fs::create_dir_all(&dir).unwrap();
1588            Self(dir)
1589        }
1590    }
1591    impl Drop for TempDir {
1592        fn drop(&mut self) {
1593            let _ = std::fs::remove_dir_all(&self.0);
1594        }
1595    }
1596
1597    fn filenames(layers: &[(PathBuf, serde_yaml::Value)]) -> Vec<String> {
1598        layers
1599            .iter()
1600            .map(|(p, _)| {
1601                p.file_name()
1602                    .and_then(|n| n.to_str())
1603                    .unwrap_or("?")
1604                    .to_string()
1605            })
1606            .collect()
1607    }
1608
1609    #[test]
1610    fn resolve_config_layers_base_only() {
1611        let tmp = TempDir::new();
1612        let base = tmp.0.join("fdl.yml");
1613        std::fs::write(&base, "a: 1\n").unwrap();
1614        let layers = resolve_config_layers(&base, None).unwrap();
1615        assert_eq!(filenames(&layers), vec!["fdl.yml"]);
1616    }
1617
1618    #[test]
1619    fn resolve_config_layers_base_with_env_overlay() {
1620        let tmp = TempDir::new();
1621        let base = tmp.0.join("fdl.yml");
1622        let env = tmp.0.join("fdl.ci.yml");
1623        std::fs::write(&base, "a: 1\n").unwrap();
1624        std::fs::write(&env, "b: 2\n").unwrap();
1625        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1626        assert_eq!(filenames(&layers), vec!["fdl.yml", "fdl.ci.yml"]);
1627    }
1628
1629    #[test]
1630    fn resolve_config_layers_env_inherits_from_mixin() {
1631        // fdl.ci.yml inherits from fdl.cloud.yml (standalone mix-in, not
1632        // derived from base). Combined chain: [base, cloud, ci].
1633        let tmp = TempDir::new();
1634        let base = tmp.0.join("fdl.yml");
1635        let cloud = tmp.0.join("fdl.cloud.yml");
1636        let ci = tmp.0.join("fdl.ci.yml");
1637        std::fs::write(&base, "a: 1\n").unwrap();
1638        std::fs::write(&cloud, "b: 2\n").unwrap();
1639        std::fs::write(&ci, "inherit-from: fdl.cloud.yml\nc: 3\n").unwrap();
1640        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1641        assert_eq!(
1642            filenames(&layers),
1643            vec!["fdl.yml", "fdl.cloud.yml", "fdl.ci.yml"]
1644        );
1645    }
1646
1647    #[test]
1648    fn resolve_config_layers_dedups_when_env_inherits_from_base() {
1649        // fdl.ci.yml inherits from fdl.yml directly. Base is already in
1650        // the layer list, so env's chain collapses into it — the final
1651        // list must not have fdl.yml twice.
1652        let tmp = TempDir::new();
1653        let base = tmp.0.join("fdl.yml");
1654        let ci = tmp.0.join("fdl.ci.yml");
1655        std::fs::write(&base, "a: 1\n").unwrap();
1656        std::fs::write(&ci, "inherit-from: fdl.yml\nb: 2\n").unwrap();
1657        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1658        assert_eq!(filenames(&layers), vec!["fdl.yml", "fdl.ci.yml"]);
1659    }
1660
1661    #[test]
1662    fn resolve_config_layers_merged_value_matches_chain() {
1663        // End-to-end: the merge result should reflect the chain order
1664        // (base < cloud < ci), with each subsequent layer overriding.
1665        let tmp = TempDir::new();
1666        let base = tmp.0.join("fdl.yml");
1667        let cloud = tmp.0.join("fdl.cloud.yml");
1668        let ci = tmp.0.join("fdl.ci.yml");
1669        std::fs::write(&base, "value: base\nkeep_base: yes\n").unwrap();
1670        std::fs::write(&cloud, "value: cloud\nkeep_cloud: yes\n").unwrap();
1671        std::fs::write(
1672            &ci,
1673            "inherit-from: fdl.cloud.yml\nvalue: ci\nkeep_ci: yes\n",
1674        )
1675        .unwrap();
1676        let merged = load_merged_value(&base, Some("ci")).unwrap();
1677        let m = merged.as_mapping().unwrap();
1678        // Last writer wins on `value`.
1679        assert_eq!(
1680            m.get(serde_yaml::Value::String("value".into())).unwrap(),
1681            &serde_yaml::Value::String("ci".into())
1682        );
1683        // Each layer's unique key survives.
1684        assert!(m.contains_key(serde_yaml::Value::String("keep_base".into())));
1685        assert!(m.contains_key(serde_yaml::Value::String("keep_cloud".into())));
1686        assert!(m.contains_key(serde_yaml::Value::String("keep_ci".into())));
1687    }
1688
1689    #[test]
1690    fn resolve_config_layers_missing_env_errors() {
1691        let tmp = TempDir::new();
1692        let base = tmp.0.join("fdl.yml");
1693        std::fs::write(&base, "a: 1\n").unwrap();
1694        let err = resolve_config_layers(&base, Some("nope")).unwrap_err();
1695        assert!(err.contains("nope"));
1696        assert!(err.contains("not found"));
1697    }
1698
1699    #[test]
1700    fn resolve_config_layers_base_inherit_from_chain() {
1701        // Base itself uses inherit-from: shared-defaults.yml. The
1702        // defaults live in a sibling file and are merged UNDER the base.
1703        let tmp = TempDir::new();
1704        let defaults = tmp.0.join("shared.yml");
1705        let base = tmp.0.join("fdl.yml");
1706        std::fs::write(&defaults, "policy: default\n").unwrap();
1707        std::fs::write(&base, "inherit-from: shared.yml\npolicy: override\n").unwrap();
1708        let layers = resolve_config_layers(&base, None).unwrap();
1709        assert_eq!(filenames(&layers), vec!["shared.yml", "fdl.yml"]);
1710    }
1711}