Skip to main content

flodl_cli/
config.rs

1//! fdl.yaml configuration loading and discovery.
2//!
3//! Walks up from CWD to find the project manifest, parses YAML/JSON,
4//! and loads sub-command configs from registered command directories.
5
6use std::collections::BTreeMap;
7use std::path::{Path, PathBuf};
8
9use serde::{Deserialize, Serialize};
10
11// ── Root project config ─────────────────────────────────────────────────
12
13/// Root fdl.yaml at project root.
14#[derive(Debug, Default, Deserialize)]
15pub struct ProjectConfig {
16    #[serde(default)]
17    pub description: Option<String>,
18    /// Commands defined at this level. Each value is a [`CommandSpec`] that
19    /// encodes the kind of command (inline `run` script, `path` pointer to
20    /// a child fdl.yml, or inline preset reusing the parent entry).
21    #[serde(default)]
22    pub commands: BTreeMap<String, CommandSpec>,
23}
24
25// ── Sub-command config ──────────────────────────────────────────────────
26
27/// Sub-command fdl.yaml (e.g., ddp-bench/fdl.yaml).
28///
29/// Identical shape to [`ProjectConfig`] but with an executable `entry:`
30/// and optional structured config sections (ddp/training/output) that
31/// inline preset commands can override.
32#[derive(Debug, Default, Deserialize)]
33pub struct CommandConfig {
34    #[serde(default)]
35    pub description: Option<String>,
36    #[serde(default)]
37    pub entry: Option<String>,
38    /// Docker compose service name. When set, entry is wrapped in
39    /// `docker compose run --rm <service> bash -c "cd <workdir> && <entry> <args>"`.
40    #[serde(default)]
41    pub docker: Option<String>,
42    #[serde(default)]
43    pub ddp: Option<DdpConfig>,
44    #[serde(default)]
45    pub training: Option<TrainingConfig>,
46    #[serde(default)]
47    pub output: Option<OutputConfig>,
48    /// Nested commands — inline presets of this config's entry, standalone
49    /// `run` scripts, or `path` pointers to child fdl.yml files.
50    #[serde(default)]
51    pub commands: BTreeMap<String, CommandSpec>,
52    /// Help-only placeholder name for the first-positional slot when
53    /// `commands:` holds presets. Defaults to "preset". Pure UX — it
54    /// does not affect dispatch (presets are always looked up by name).
55    /// Useful to match domain vocabulary, e.g. `arg-name: recipe` or
56    /// `arg-name: target`.
57    #[serde(default, rename = "arg-name")]
58    pub arg_name: Option<String>,
59    /// Inline interim schema (before `<entry> --fdl-schema` is implemented).
60    /// Drives help rendering, validation, and completions.
61    #[serde(default)]
62    pub schema: Option<Schema>,
63}
64
65// ── Unified command specification ───────────────────────────────────────
66
67/// A command at any nesting level. Three mutually-exclusive kinds are
68/// recognised at resolve time:
69///
70/// - **Path** (`path` set, or by default when the map is empty/null): the
71///   command is a pointer to a child `fdl.yml`. By convention the path is
72///   `./<command-name>/` when omitted.
73/// - **Run** (`run` set): the command is a self-contained shell script
74///   that is executed as-is. Optional `docker:` service routes it through
75///   `docker compose`.
76/// - **Preset**: neither `path` nor `run` is set. The command merges its
77///   `ddp` / `training` / `output` / `options` fields over the enclosing
78///   `CommandConfig` defaults and invokes that config's `entry:`.
79#[derive(Debug, Default, Clone)]
80pub struct CommandSpec {
81    pub description: Option<String>,
82    /// Inline shell command. Mutex with `path`.
83    pub run: Option<String>,
84    /// Pointer to a child directory containing its own `fdl.yml`. Absolute
85    /// or relative to the declaring config's directory. Mutex with `run`.
86    /// `None` + no other fields = "use the convention path
87    /// `./<command-name>/`".
88    pub path: Option<String>,
89    /// Docker compose service for `run`-kind commands.
90    pub docker: Option<String>,
91    /// Preset overrides. Only consulted when neither `run` nor `path` is set.
92    pub ddp: Option<DdpConfig>,
93    pub training: Option<TrainingConfig>,
94    pub output: Option<OutputConfig>,
95    pub options: BTreeMap<String, serde_json::Value>,
96}
97
98/// What kind of command is this, resolved from a [`CommandSpec`].
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub enum CommandKind {
101    /// `run: "…"` — execute the inline shell command (optionally in Docker).
102    Run,
103    /// `path: "…"` or convention default — load `<path>/fdl.yml` and
104    /// recurse.
105    Path,
106    /// Neither `run` nor `path`. Merges preset fields onto the enclosing
107    /// `CommandConfig` defaults and invokes that config's `entry:`.
108    Preset,
109}
110
111impl CommandSpec {
112    /// Classify this command. Returns an error when both `run` and `path`
113    /// are declared — always a mistake, caught loudly rather than silently
114    /// picking one. Also rejects `docker:` without `run:`: the docker
115    /// service wraps the inline run-script, so pairing it with a `path:`
116    /// pointer or a preset entry is always silent-noop territory.
117    pub fn kind(&self) -> Result<CommandKind, String> {
118        if self.docker.is_some() && self.run.is_none() {
119            return Err(
120                "command declares `docker:` without `run:`; \
121                 `docker:` only wraps inline run-scripts"
122                    .to_string(),
123            );
124        }
125        match (self.run.as_deref(), self.path.as_deref()) {
126            (Some(_), Some(_)) => Err(
127                "command declares both `run:` and `path:`; \
128                 only one is allowed"
129                    .to_string(),
130            ),
131            (Some(_), None) => Ok(CommandKind::Run),
132            (None, Some(_)) => Ok(CommandKind::Path),
133            (None, None) => {
134                // No kind-selecting field. If preset fields are present,
135                // treat as Preset; otherwise, fall through to Path (the
136                // convention-default: `./<name>/fdl.yml`).
137                if self.ddp.is_some()
138                    || self.training.is_some()
139                    || self.output.is_some()
140                    || !self.options.is_empty()
141                {
142                    Ok(CommandKind::Preset)
143                } else {
144                    Ok(CommandKind::Path)
145                }
146            }
147        }
148    }
149
150    /// Resolve the effective directory for a `Path`-kind command declared
151    /// in `parent_dir`. Applies the `./<name>/` convention when `path` is
152    /// unset.
153    pub fn resolve_path(&self, name: &str, parent_dir: &Path) -> PathBuf {
154        match &self.path {
155            Some(p) => parent_dir.join(p),
156            None => parent_dir.join(name),
157        }
158    }
159}
160
161// Custom Deserialize so that `commands: { name: ~ }` (YAML null) and
162// `commands: { name: }` (empty value) both deserialize to a default
163// `CommandSpec`. Without this, serde_yaml errors on null because a
164// struct expects a map.
165impl<'de> Deserialize<'de> for CommandSpec {
166    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
167    where
168        D: serde::Deserializer<'de>,
169    {
170        #[derive(Deserialize)]
171        struct Inner {
172            #[serde(default)]
173            description: Option<String>,
174            #[serde(default)]
175            run: Option<String>,
176            #[serde(default)]
177            path: Option<String>,
178            #[serde(default)]
179            docker: Option<String>,
180            #[serde(default)]
181            ddp: Option<DdpConfig>,
182            #[serde(default)]
183            training: Option<TrainingConfig>,
184            #[serde(default)]
185            output: Option<OutputConfig>,
186            #[serde(default)]
187            options: BTreeMap<String, serde_json::Value>,
188        }
189
190        let raw = serde_yaml::Value::deserialize(deserializer)?;
191        if matches!(raw, serde_yaml::Value::Null) {
192            return Ok(Self::default());
193        }
194        let inner: Inner =
195            serde_yaml::from_value(raw).map_err(serde::de::Error::custom)?;
196        Ok(Self {
197            description: inner.description,
198            run: inner.run,
199            path: inner.path,
200            docker: inner.docker,
201            ddp: inner.ddp,
202            training: inner.training,
203            output: inner.output,
204            options: inner.options,
205        })
206    }
207}
208
209// ── Schema (interim hand-written, future `<entry> --fdl-schema`) ────────
210
211/// The schema declared inline in a sub-command's fdl.yaml. Maps 1:1 to
212/// what `<entry> --fdl-schema` will later emit as JSON.
213#[derive(Debug, Clone, Default, Deserialize, Serialize)]
214pub struct Schema {
215    #[serde(default, skip_serializing_if = "Vec::is_empty")]
216    pub args: Vec<ArgSpec>,
217    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
218    pub options: BTreeMap<String, OptionSpec>,
219    /// When true, the fdl layer rejects options not declared in the
220    /// schema before the sub-command's entry ever runs. Two validation
221    /// points:
222    ///
223    /// 1. *Load time* — preset `options:` maps are checked against the
224    ///    enclosing `schema.options` (see [`validate_presets_strict`]).
225    ///    A typo like `options: { batchsize: 32 }` when the schema
226    ///    declares `batch-size` is a loud load error.
227    /// 2. *Dispatch time* — the user's extra argv tail is tokenized
228    ///    against the schema (see [`validate_tail`]). Unknown flags
229    ///    error out with a "did you mean" suggestion instead of being
230    ///    silently forwarded.
231    ///
232    /// **Validation NOT gated by `strict`** — always-on for declared
233    /// items, so positive assertions from the schema always hold:
234    /// - `choices:` on options: the user's value and any preset YAML
235    ///   value must be in the list.
236    /// - `choices:` on positional args: the user's value must be in
237    ///   the list (when strict is off, this may mis-fire if unknown
238    ///   flags push orphan values into positional slots — opt into
239    ///   strict for clean positional handling).
240    ///
241    /// `strict` is purely about **unknown** options/args, not about
242    /// validating declared contracts.
243    #[serde(default, skip_serializing_if = "is_false")]
244    pub strict: bool,
245}
246
247/// A flag option, `--name` / `-x`.
248#[derive(Debug, Clone, Deserialize, Serialize)]
249pub struct OptionSpec {
250    #[serde(rename = "type")]
251    pub ty: String,
252    #[serde(default, skip_serializing_if = "Option::is_none")]
253    pub description: Option<String>,
254    #[serde(default, skip_serializing_if = "Option::is_none")]
255    pub default: Option<serde_json::Value>,
256    #[serde(default, skip_serializing_if = "Option::is_none")]
257    pub choices: Option<Vec<serde_json::Value>>,
258    /// Single-letter short alias.
259    #[serde(default, skip_serializing_if = "Option::is_none")]
260    pub short: Option<String>,
261    #[serde(default, skip_serializing_if = "Option::is_none")]
262    pub env: Option<String>,
263    /// Shell snippet producing completion values.
264    /// Consumed by `fdl completions <shell>` (follow-up rollout task).
265    #[serde(default, skip_serializing_if = "Option::is_none")]
266    #[allow(dead_code)]
267    pub completer: Option<String>,
268}
269
270/// A positional argument.
271#[derive(Debug, Clone, Deserialize, Serialize)]
272pub struct ArgSpec {
273    pub name: String,
274    #[serde(rename = "type")]
275    pub ty: String,
276    #[serde(default, skip_serializing_if = "Option::is_none")]
277    pub description: Option<String>,
278    #[serde(default = "default_required")]
279    pub required: bool,
280    #[serde(default, skip_serializing_if = "is_false")]
281    pub variadic: bool,
282    #[serde(default, skip_serializing_if = "Option::is_none")]
283    pub default: Option<serde_json::Value>,
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub choices: Option<Vec<serde_json::Value>>,
286    /// Shell snippet producing completion values.
287    /// Consumed by `fdl completions <shell>` (follow-up rollout task).
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    #[allow(dead_code)]
290    pub completer: Option<String>,
291}
292
293fn is_false(b: &bool) -> bool {
294    !*b
295}
296
297fn default_required() -> bool {
298    true
299}
300
301/// Flags reserved at the fdl level — no sub-command option may shadow them.
302/// Kept in sync with main.rs dispatch.
303const RESERVED_LONGS: &[&str] = &[
304    "help", "version", "quiet", "env",
305];
306const RESERVED_SHORTS: &[&str] = &[
307    "h", "V", "q", "v", "e",
308];
309const VALID_TYPES: &[&str] = &[
310    "string", "int", "float", "bool", "path",
311    "list[string]", "list[int]", "list[float]", "list[path]",
312];
313
314/// Check a schema for collisions and structural issues.
315///
316/// Loud-at-load-time: ambiguity caught here is cheaper to fix than mysterious
317/// pass-through behavior at runtime.
318pub fn validate_schema(schema: &Schema) -> Result<(), String> {
319    // Options: check types, shorts, reserved flags.
320    let mut short_seen: BTreeMap<String, String> = BTreeMap::new();
321    for (long, spec) in &schema.options {
322        if !VALID_TYPES.contains(&spec.ty.as_str()) {
323            return Err(format!(
324                "option --{}: unknown type '{}' (valid: {})",
325                long,
326                spec.ty,
327                VALID_TYPES.join(", ")
328            ));
329        }
330        if RESERVED_LONGS.contains(&long.as_str()) {
331            return Err(format!(
332                "option --{long} shadows a reserved fdl-level flag"
333            ));
334        }
335        if let Some(s) = &spec.short {
336            if s.chars().count() != 1 {
337                return Err(format!(
338                    "option --{long}: `short: \"{s}\"` must be a single character"
339                ));
340            }
341            if RESERVED_SHORTS.contains(&s.as_str()) {
342                return Err(format!(
343                    "option --{long}: short -{s} shadows a reserved fdl-level flag"
344                ));
345            }
346            if let Some(prev) = short_seen.insert(s.clone(), long.clone()) {
347                return Err(format!(
348                    "options --{prev} and --{long} both declare short -{s}"
349                ));
350            }
351        }
352    }
353
354    // Args: check types, variadic-only-at-end, no-required-after-optional.
355    let mut seen_optional = false;
356    let mut name_seen: BTreeMap<String, ()> = BTreeMap::new();
357    for (i, arg) in schema.args.iter().enumerate() {
358        if !VALID_TYPES.contains(&arg.ty.as_str()) {
359            return Err(format!(
360                "arg <{}>: unknown type '{}' (valid: {})",
361                arg.name,
362                arg.ty,
363                VALID_TYPES.join(", ")
364            ));
365        }
366        if name_seen.insert(arg.name.clone(), ()).is_some() {
367            return Err(format!("duplicate positional name <{}>", arg.name));
368        }
369        if arg.variadic && i != schema.args.len() - 1 {
370            return Err(format!(
371                "arg <{}>: variadic positional must be the last one",
372                arg.name
373            ));
374        }
375        let is_optional = !arg.required || arg.default.is_some();
376        if arg.required && arg.default.is_some() {
377            return Err(format!(
378                "arg <{}>: `required: true` with a default is a contradiction",
379                arg.name
380            ));
381        }
382        if seen_optional && arg.required && arg.default.is_none() {
383            return Err(format!(
384                "arg <{}>: required positional cannot follow an optional one",
385                arg.name
386            ));
387        }
388        if is_optional {
389            seen_optional = true;
390        }
391    }
392
393    Ok(())
394}
395
396// ── Structured config sections ──────────────────────────────────────────
397
398/// DDP configuration. Maps 1:1 to flodl DdpConfig / DdpRunConfig.
399#[derive(Debug, Clone, Default, Deserialize)]
400pub struct DdpConfig {
401    pub mode: Option<String>,
402    pub policy: Option<String>,
403    pub backend: Option<String>,
404    /// "auto" or integer.
405    pub anchor: Option<serde_json::Value>,
406    pub max_anchor: Option<u32>,
407    pub overhead_target: Option<f64>,
408    pub divergence_threshold: Option<f64>,
409    /// null (unlimited) or integer.
410    pub max_batch_diff: Option<serde_json::Value>,
411    pub speed_hint: Option<SpeedHint>,
412    pub partition_ratios: Option<Vec<f64>>,
413    /// "auto" or bool.
414    pub progressive: Option<serde_json::Value>,
415    pub max_grad_norm: Option<f64>,
416    pub lr_scale_ratio: Option<f64>,
417    pub snapshot_timeout: Option<u32>,
418    pub checkpoint_every: Option<u32>,
419    pub timeline: Option<bool>,
420}
421
422#[derive(Debug, Clone, Default, Deserialize)]
423pub struct SpeedHint {
424    pub slow_rank: usize,
425    pub ratio: f64,
426}
427
428/// Training scalars.
429#[derive(Debug, Clone, Default, Deserialize)]
430pub struct TrainingConfig {
431    pub epochs: Option<u32>,
432    pub batch_size: Option<u32>,
433    pub batches_per_epoch: Option<u32>,
434    pub lr: Option<f64>,
435    pub seed: Option<u64>,
436}
437
438/// Output settings.
439#[derive(Debug, Clone, Default, Deserialize)]
440pub struct OutputConfig {
441    pub dir: Option<String>,
442    pub timeline: Option<bool>,
443    pub monitor: Option<u16>,
444}
445
446
447// ── Config discovery ────────────────────────────────────────────────────
448
449const CONFIG_NAMES: &[&str] = &["fdl.yaml", "fdl.yml", "fdl.json"];
450const EXAMPLE_SUFFIXES: &[&str] = &[".example", ".dist"];
451
452/// Walk up from `start` looking for fdl.yaml.
453///
454/// If only an `.example` (or `.dist`) variant exists, offers to copy it
455/// to the real config path. This lets the repo commit `fdl.yaml.example`
456/// while `.gitignore`-ing `fdl.yaml` so users can customize locally.
457pub fn find_config(start: &Path) -> Option<PathBuf> {
458    let mut dir = start.to_path_buf();
459    loop {
460        // First pass: look for the real config.
461        for name in CONFIG_NAMES {
462            let candidate = dir.join(name);
463            if candidate.is_file() {
464                return Some(candidate);
465            }
466        }
467        // Second pass: look for .example/.dist variants.
468        for name in CONFIG_NAMES {
469            for suffix in EXAMPLE_SUFFIXES {
470                let example = dir.join(format!("{name}{suffix}"));
471                if example.is_file() {
472                    let target = dir.join(name);
473                    if try_copy_example(&example, &target) {
474                        return Some(target);
475                    }
476                    // User declined: use the example directly.
477                    return Some(example);
478                }
479            }
480        }
481        if !dir.pop() {
482            return None;
483        }
484    }
485}
486
487/// Prompt the user to copy an example config to the real path.
488/// Returns true if the copy succeeded.
489fn try_copy_example(example: &Path, target: &Path) -> bool {
490    let example_name = example.file_name().unwrap_or_default().to_string_lossy();
491    let target_name = target.file_name().unwrap_or_default().to_string_lossy();
492    eprintln!(
493        "fdl: found {example_name} but no {target_name}. \
494         Copy it to create your local config? [Y/n] "
495    );
496    let mut input = String::new();
497    if std::io::stdin().read_line(&mut input).is_err() {
498        return false;
499    }
500    let answer = input.trim().to_lowercase();
501    if answer.is_empty() || answer == "y" || answer == "yes" {
502        match std::fs::copy(example, target) {
503            Ok(_) => {
504                eprintln!("fdl: created {target_name} (edit to customize)");
505                true
506            }
507            Err(e) => {
508                eprintln!("fdl: failed to copy: {e}");
509                false
510            }
511        }
512    } else {
513        false
514    }
515}
516
517/// Load a project config from a specific path.
518pub fn load_project(path: &Path) -> Result<ProjectConfig, String> {
519    load_project_with_env(path, None)
520}
521
522/// Load a project config with an optional environment overlay.
523///
524/// When `env` is `Some`, looks for a sibling `fdl.<env>.{yml,yaml,json}` next
525/// to `base_path` and deep-merges it over the base before deserialization.
526/// Missing overlay files are a hard error — the user asked for this env, so
527/// silently ignoring it would be worse than a clear message.
528pub fn load_project_with_env(
529    base_path: &Path,
530    env: Option<&str>,
531) -> Result<ProjectConfig, String> {
532    let merged = load_merged_value(base_path, env)?;
533    serde_yaml::from_value::<ProjectConfig>(merged)
534        .map_err(|e| format!("{}: {}", base_path.display(), e))
535}
536
537/// Load the raw merged [`serde_yaml::Value`] for a config + optional env
538/// overlay. Exposed so callers like `fdl config show` can inspect the
539/// resolved view before it is deserialized into a strongly-typed struct.
540pub fn load_merged_value(
541    base_path: &Path,
542    env: Option<&str>,
543) -> Result<serde_yaml::Value, String> {
544    let layers = resolve_config_layers(base_path, env)?;
545    Ok(crate::overlay::merge_layers(
546        layers.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
547    ))
548}
549
550/// Resolve every layer contributing to a config, in merge order, with
551/// `inherit-from:` chains expanded. Paired with the base file + optional
552/// env overlay, the result is `[chain(base)..., chain(env_overlay)...]`
553/// de-duplicated by canonical path (kept-first).
554///
555/// Used by `fdl config show` for per-leaf source annotation, and
556/// internally by [`load_merged_value`] / [`load_command_with_env`] so
557/// every consumer picks up `inherit-from:` uniformly.
558pub fn resolve_config_layers(
559    base_path: &Path,
560    env: Option<&str>,
561) -> Result<Vec<(PathBuf, serde_yaml::Value)>, String> {
562    let mut layers = crate::overlay::resolve_chain(base_path)?;
563    if let Some(name) = env {
564        match crate::overlay::find_env_file(base_path, name) {
565            Some(p) => {
566                let env_chain = crate::overlay::resolve_chain(&p)?;
567                layers.extend(env_chain);
568            }
569            None => {
570                return Err(format!(
571                    "environment `{name}` not found (expected fdl.{name}.yml next to {})",
572                    base_path.display()
573                ));
574            }
575        }
576    }
577    // Dedup by canonical path, keeping first occurrence. An env overlay
578    // whose chain loops back to a file already in the base chain (same
579    // file via a different inheritance route) collapses cleanly.
580    let mut seen = std::collections::HashSet::new();
581    layers.retain(|(path, _)| seen.insert(path.clone()));
582    Ok(layers)
583}
584
585/// Source path list for a base config + env overlay, in merge order. Used
586/// by `fdl config show` to annotate which layer a value came from.
587pub fn config_layer_sources(base_path: &Path, env: Option<&str>) -> Vec<PathBuf> {
588    resolve_config_layers(base_path, env)
589        .map(|ls| ls.into_iter().map(|(p, _)| p).collect())
590        .unwrap_or_else(|_| vec![base_path.to_path_buf()])
591}
592
593/// Load a command config from a sub-directory.
594///
595/// Applies the same `.example`/`.dist` fallback as [`find_config`]. If a
596/// `schema:` block is present, validates it before returning.
597pub fn load_command(dir: &Path) -> Result<CommandConfig, String> {
598    load_command_with_env(dir, None)
599}
600
601/// Load a sub-command config with an optional environment overlay.
602///
603/// Applies the same `.example`/`.dist` fallback as [`find_config`] to locate
604/// the base file, then deep-merges a sibling `fdl.<env>.yml` overlay if one
605/// exists. A *missing* overlay is silently accepted here (different from
606/// [`load_project_with_env`]) — envs declared at the project root don't
607/// have to exist for every sub-command.
608pub fn load_command_with_env(dir: &Path, env: Option<&str>) -> Result<CommandConfig, String> {
609    // Resolve the base config path (with .example fallback, same as before).
610    let mut base_path: Option<PathBuf> = None;
611    for name in CONFIG_NAMES {
612        let path = dir.join(name);
613        if path.is_file() {
614            base_path = Some(path);
615            break;
616        }
617    }
618    if base_path.is_none() {
619        for name in CONFIG_NAMES {
620            for suffix in EXAMPLE_SUFFIXES {
621                let example = dir.join(format!("{name}{suffix}"));
622                if example.is_file() {
623                    let target = dir.join(name);
624                    let src = if try_copy_example(&example, &target) {
625                        target
626                    } else {
627                        example
628                    };
629                    base_path = Some(src);
630                    break;
631                }
632            }
633            if base_path.is_some() {
634                break;
635            }
636        }
637    }
638    let base_path = base_path
639        .ok_or_else(|| format!("no fdl.yml found in {}", dir.display()))?;
640
641    // Layered load: base chain + optional env overlay chain. Both sides
642    // run through `resolve_chain` so `inherit-from:` composes the same
643    // way for nested commands as for the project root.
644    let mut layers = crate::overlay::resolve_chain(&base_path)?;
645    if let Some(name) = env {
646        if let Some(p) = crate::overlay::find_env_file(&base_path, name) {
647            layers.extend(crate::overlay::resolve_chain(&p)?);
648        }
649    }
650    let mut seen = std::collections::HashSet::new();
651    layers.retain(|(path, _)| seen.insert(path.clone()));
652    let merged = crate::overlay::merge_layers(
653        layers.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
654    );
655    let mut cfg: CommandConfig = serde_yaml::from_value(merged)
656        .map_err(|e| format!("{}: {}", base_path.display(), e))?;
657
658    if let Some(schema) = &cfg.schema {
659        validate_schema(schema)
660            .map_err(|e| format!("schema error in {}/fdl.yml: {e}", dir.display()))?;
661        // Preset validation (choice values + strict unknown-key rejection)
662        // is intentionally deferred to the exec path. Load-time validation
663        // would block `fdl <cmd> --help` whenever ANY preset in the config
664        // has a typo — worse UX than letting help render and erroring only
665        // when the broken preset is actually invoked.
666    }
667
668    // Cache precedence: a valid, fresh cached schema (written by `fdl <cmd>
669    // --refresh-schema` or auto-probed below) wins over the inline YAML
670    // schema. This lets a binary become the source of truth for its own
671    // surface once it opts into the `--fdl-schema` contract. A cache that
672    // is older than the command's fdl.yml is treated as stale and skipped
673    // — the inline schema (if any) reasserts until a refresh happens.
674    let cmd_name = dir
675        .file_name()
676        .and_then(|n| n.to_str())
677        .unwrap_or("_");
678    let cache = crate::schema_cache::cache_path(dir, cmd_name);
679    // Reference mtimes: config files that, when edited, might invalidate
680    // the cached schema (e.g. changing `entry:` to point somewhere else).
681    let refs: Vec<std::path::PathBuf> = CONFIG_NAMES
682        .iter()
683        .map(|n| dir.join(n))
684        .filter(|p| p.exists())
685        .collect();
686    if !crate::schema_cache::is_stale(&cache, &refs) {
687        if let Some(cached) = crate::schema_cache::read_cache(&cache) {
688            cfg.schema = Some(cached);
689        }
690    } else if let Some(entry) = cfg.entry.as_deref() {
691        // Auto-probe non-cargo entries when the cache is stale or missing.
692        // Cargo entries are deliberately skipped — `cargo run --fdl-schema`
693        // triggers a full compile which is unacceptable latency for `-h`.
694        // Scripts and pre-built binaries are expected to handle the flag
695        // cheaply (emit JSON and exit), so probing them on demand is safe.
696        // Probe failures are swallowed: an entry that doesn't implement
697        // `--fdl-schema` simply falls through to the inline schema (or no
698        // schema) — help still renders.
699        if !crate::schema_cache::is_cargo_entry(entry) {
700            if let Ok(probed) = crate::schema_cache::probe(entry, dir) {
701                // Best-effort cache write: if the dir is read-only, the
702                // schema still applies to this invocation, we just re-probe
703                // next time. Non-fatal.
704                let _ = crate::schema_cache::write_cache(&cache, &probed);
705                cfg.schema = Some(probed);
706            }
707        }
708    }
709
710    Ok(cfg)
711}
712
713// ── Strict-mode validation ──────────────────────────────────────────────
714
715/// Reserved flags that strict mode always tolerates in the user's tail.
716/// These are fdl-level universals (help/version) or opt-ins every
717/// FdlArgs-derived binary exposes (--fdl-schema) — keeping them out of
718/// the `schema.options` map means strict mode has to allowlist them
719/// separately or spuriously reject legal invocations.
720const STRICT_UNIVERSAL_LONGS: &[(&str, Option<char>, bool)] = &[
721    // (long, short, takes_value)
722    ("help", Some('h'), false),
723    ("version", Some('V'), false),
724    ("fdl-schema", None, false),
725    ("refresh-schema", None, false),
726];
727
728/// Convert a [`Schema`] into an [`ArgsSpec`](crate::args::parser::ArgsSpec) suitable for strict-mode
729/// tail validation. Positional `required` flags are intentionally
730/// dropped: the binary itself will enforce them after parsing, and
731/// treating them as required here would turn "missing positional" into
732/// a double-errored mess.
733pub fn schema_to_args_spec(schema: &Schema) -> crate::args::parser::ArgsSpec {
734    use crate::args::parser::{ArgsSpec, OptionDecl, PositionalDecl};
735
736    let mut options: Vec<OptionDecl> = schema
737        .options
738        .iter()
739        .map(|(long, spec)| OptionDecl {
740            long: long.clone(),
741            short: spec
742                .short
743                .as_deref()
744                .and_then(|s| s.chars().next()),
745            takes_value: spec.ty != "bool",
746            // Every value-taking option is allowed to appear bare in
747            // strict mode. fdl does not second-guess whether the binary
748            // would accept a bare `--foo`; that stays in the binary's
749            // court.
750            allows_bare: true,
751            repeatable: spec.ty.starts_with("list["),
752            choices: spec
753                .choices
754                .as_ref()
755                .map(|cs| strict_choices_to_strings(cs)),
756        })
757        .collect();
758
759    // Always-allowed universals — help/version/fdl-schema/refresh-schema
760    // are not in the user's schema but must not trigger "unknown flag".
761    for (long, short, takes_value) in STRICT_UNIVERSAL_LONGS {
762        options.push(OptionDecl {
763            long: (*long).to_string(),
764            short: *short,
765            takes_value: *takes_value,
766            allows_bare: true,
767            repeatable: false,
768            choices: None,
769        });
770    }
771
772    // Positionals: drop the `required` bit. Strict mode is scoped to
773    // option names/values only; arity is the binary's concern.
774    let positionals: Vec<PositionalDecl> = schema
775        .args
776        .iter()
777        .map(|a| PositionalDecl {
778            name: a.name.clone(),
779            required: false,
780            variadic: a.variadic,
781            choices: a
782                .choices
783                .as_ref()
784                .map(|cs| strict_choices_to_strings(cs)),
785        })
786        .collect();
787
788    ArgsSpec {
789        options,
790        positionals,
791        // Non-strict schemas accept user-forwarded flags the author
792        // didn't declare — the binary re-parses the tail anyway.
793        // Strict schemas reject anything not declared.
794        lenient_unknowns: !schema.strict,
795    }
796}
797
798fn strict_choices_to_strings(cs: &[serde_json::Value]) -> Vec<String> {
799    cs.iter()
800        .map(|v| match v {
801            serde_json::Value::String(s) => s.clone(),
802            other => other.to_string(),
803        })
804        .collect()
805}
806
807/// Validate the user's extra argv tail against a schema. Always called
808/// before `run::exec_command` — the parser's lenient-unknowns mode is
809/// keyed off `schema.strict` so choice validation on declared flags
810/// fires regardless, while unknown-flag rejection stays opt-in.
811///
812/// The tokenizer from [`crate::args::parser`] is reused so "did you
813/// mean" suggestions, cluster, and equals handling come for free.
814pub fn validate_tail(tail: &[String], schema: &Schema) -> Result<(), String> {
815    let spec = schema_to_args_spec(schema);
816    let mut argv = Vec::with_capacity(tail.len() + 1);
817    argv.push("fdl".to_string());
818    argv.extend(tail.iter().cloned());
819    crate::args::parser::parse(&spec, &argv).map(|_| ())
820}
821
822/// Validate a single preset that's about to be invoked. Combines the
823/// always-on `choices:` check and, if `schema.strict`, the unknown-key
824/// rejection — scoped to just this preset, not the whole `commands:`
825/// map. Called from the exec path so typos in a sibling preset don't
826/// block `--help` for a correct one.
827pub fn validate_preset_for_exec(
828    preset_name: &str,
829    spec: &CommandSpec,
830    schema: &Schema,
831) -> Result<(), String> {
832    for (key, value) in &spec.options {
833        let Some(opt) = schema.options.get(key) else {
834            if schema.strict {
835                return Err(format!(
836                    "preset `{preset_name}` pins option `{key}` which is not declared in schema.options"
837                ));
838            }
839            continue;
840        };
841        let Some(choices) = &opt.choices else {
842            continue;
843        };
844        if !choices.iter().any(|c| values_equal(c, value)) {
845            let allowed: Vec<String> = choices
846                .iter()
847                .map(|c| match c {
848                    serde_json::Value::String(s) => s.clone(),
849                    other => other.to_string(),
850                })
851                .collect();
852            return Err(format!(
853                "preset `{preset_name}` sets option `{key}` to `{}` -- allowed: {}",
854                display_json(value),
855                allowed.join(", "),
856            ));
857        }
858    }
859    Ok(())
860}
861
862/// Always-on: validate preset YAML `options:` values against declared
863/// `choices:` in the schema. An option YAML value whose key matches a
864/// declared option with a `choices:` list must be one of those choices.
865/// Keys not declared in the schema are ignored here — those are the
866/// concern of [`validate_presets_strict`] (opt-in).
867///
868/// Used for whole-map validation (e.g. from a future `fdl config lint`
869/// subcommand). The dispatch path uses [`validate_preset_for_exec`] so
870/// sibling-preset typos don't block correct invocations.
871pub fn validate_preset_values(
872    commands: &BTreeMap<String, CommandSpec>,
873    schema: &Schema,
874) -> Result<(), String> {
875    for (preset_name, spec) in commands {
876        match spec.kind() {
877            Ok(CommandKind::Preset) => {}
878            _ => continue,
879        }
880        for (key, value) in &spec.options {
881            let Some(opt) = schema.options.get(key) else {
882                continue; // unknown key — strict's problem, not ours
883            };
884            let Some(choices) = &opt.choices else {
885                continue; // no choices declared — anything goes
886            };
887            if !choices.iter().any(|c| values_equal(c, value)) {
888                let allowed: Vec<String> = choices
889                    .iter()
890                    .map(|c| match c {
891                        serde_json::Value::String(s) => s.clone(),
892                        other => other.to_string(),
893                    })
894                    .collect();
895                return Err(format!(
896                    "preset `{preset_name}` sets option `{key}` to `{}` -- allowed: {}",
897                    display_json(value),
898                    allowed.join(", "),
899                ));
900            }
901        }
902    }
903    Ok(())
904}
905
906/// Compare two JSON values for equality, treating YAML's loose-typed
907/// representation (a preset might write `batch-size: 32` as an int
908/// while the schema's choices list contains `"32"` as a string).
909fn values_equal(a: &serde_json::Value, b: &serde_json::Value) -> bool {
910    if a == b {
911        return true;
912    }
913    // Cross-type string ↔ number comparison for YAML-friendly matching.
914    match (a, b) {
915        (serde_json::Value::String(s), other) | (other, serde_json::Value::String(s)) => {
916            s == &other.to_string()
917        }
918        _ => false,
919    }
920}
921
922fn display_json(v: &serde_json::Value) -> String {
923    match v {
924        serde_json::Value::String(s) => s.clone(),
925        other => other.to_string(),
926    }
927}
928
929/// At load time, reject preset `options:` keys that are not declared in
930/// the enclosing schema. Runs only when `schema.strict == true`, and
931/// only against entries resolved to [`CommandKind::Preset`] — `run:` and
932/// `path:` kinds don't share the parent schema.
933pub fn validate_presets_strict(
934    commands: &BTreeMap<String, CommandSpec>,
935    schema: &Schema,
936) -> Result<(), String> {
937    for (preset_name, spec) in commands {
938        match spec.kind() {
939            Ok(CommandKind::Preset) => {}
940            _ => continue,
941        }
942        for key in spec.options.keys() {
943            if !schema.options.contains_key(key) {
944                return Err(format!(
945                    "preset `{preset_name}` pins option `{key}` which is not declared in schema.options"
946                ));
947            }
948        }
949    }
950    Ok(())
951}
952
953// ── Merge ───────────────────────────────────────────────────────────────
954
955/// Merge the enclosing `CommandConfig` defaults with a named preset's
956/// overrides. Preset values win. Used when dispatching an inline preset
957/// command (neither `run` nor `path`).
958pub fn merge_preset(root: &CommandConfig, preset: &CommandSpec) -> ResolvedConfig {
959    ResolvedConfig {
960        ddp: merge_ddp(&root.ddp, &preset.ddp),
961        training: merge_training(&root.training, &preset.training),
962        output: merge_output(&root.output, &preset.output),
963        options: preset.options.clone(),
964    }
965}
966
967/// Resolved config from root defaults only (no job).
968pub fn defaults_only(root: &CommandConfig) -> ResolvedConfig {
969    ResolvedConfig {
970        ddp: root.ddp.clone().unwrap_or_default(),
971        training: root.training.clone().unwrap_or_default(),
972        output: root.output.clone().unwrap_or_default(),
973        options: BTreeMap::new(),
974    }
975}
976
977/// Fully resolved configuration ready for arg translation.
978pub struct ResolvedConfig {
979    pub ddp: DdpConfig,
980    pub training: TrainingConfig,
981    pub output: OutputConfig,
982    pub options: BTreeMap<String, serde_json::Value>,
983}
984
985macro_rules! merge_field {
986    ($base:expr, $over:expr, $field:ident) => {
987        $over
988            .as_ref()
989            .and_then(|o| o.$field.clone())
990            .or_else(|| $base.as_ref().and_then(|b| b.$field.clone()))
991    };
992}
993
994fn merge_ddp(base: &Option<DdpConfig>, over: &Option<DdpConfig>) -> DdpConfig {
995    DdpConfig {
996        mode: merge_field!(base, over, mode),
997        policy: merge_field!(base, over, policy),
998        backend: merge_field!(base, over, backend),
999        anchor: merge_field!(base, over, anchor),
1000        max_anchor: merge_field!(base, over, max_anchor),
1001        overhead_target: merge_field!(base, over, overhead_target),
1002        divergence_threshold: merge_field!(base, over, divergence_threshold),
1003        max_batch_diff: merge_field!(base, over, max_batch_diff),
1004        speed_hint: merge_field!(base, over, speed_hint),
1005        partition_ratios: merge_field!(base, over, partition_ratios),
1006        progressive: merge_field!(base, over, progressive),
1007        max_grad_norm: merge_field!(base, over, max_grad_norm),
1008        lr_scale_ratio: merge_field!(base, over, lr_scale_ratio),
1009        snapshot_timeout: merge_field!(base, over, snapshot_timeout),
1010        checkpoint_every: merge_field!(base, over, checkpoint_every),
1011        timeline: merge_field!(base, over, timeline),
1012    }
1013}
1014
1015fn merge_training(base: &Option<TrainingConfig>, over: &Option<TrainingConfig>) -> TrainingConfig {
1016    TrainingConfig {
1017        epochs: merge_field!(base, over, epochs),
1018        batch_size: merge_field!(base, over, batch_size),
1019        batches_per_epoch: merge_field!(base, over, batches_per_epoch),
1020        lr: merge_field!(base, over, lr),
1021        seed: merge_field!(base, over, seed),
1022    }
1023}
1024
1025fn merge_output(base: &Option<OutputConfig>, over: &Option<OutputConfig>) -> OutputConfig {
1026    OutputConfig {
1027        dir: merge_field!(base, over, dir),
1028        timeline: merge_field!(base, over, timeline),
1029        monitor: merge_field!(base, over, monitor),
1030    }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035    use super::*;
1036
1037    /// Resolve the project root (where fdl.yml / fdl.yml.example live) starting
1038    /// from CARGO_MANIFEST_DIR. The CLI crate sits one level down.
1039    fn project_root() -> PathBuf {
1040        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1041            .parent()
1042            .expect("flodl-cli parent must be project root")
1043            .to_path_buf()
1044    }
1045
1046    fn load_example() -> ProjectConfig {
1047        let path = project_root().join("fdl.yml.example");
1048        assert!(
1049            path.is_file(),
1050            "fdl.yml.example missing at {} -- the CLI depends on it as the canonical config template",
1051            path.display()
1052        );
1053        load_project(&path).expect("fdl.yml.example must parse as a valid ProjectConfig")
1054    }
1055
1056    fn opt(ty: &str) -> OptionSpec {
1057        OptionSpec {
1058            ty: ty.into(),
1059            description: None,
1060            default: None,
1061            choices: None,
1062            short: None,
1063            env: None,
1064            completer: None,
1065        }
1066    }
1067
1068    fn arg(name: &str, ty: &str) -> ArgSpec {
1069        ArgSpec {
1070            name: name.into(),
1071            ty: ty.into(),
1072            description: None,
1073            required: true,
1074            variadic: false,
1075            default: None,
1076            choices: None,
1077            completer: None,
1078        }
1079    }
1080
1081    #[test]
1082    fn validate_schema_accepts_minimal_valid() {
1083        let mut s = Schema::default();
1084        s.options.insert("model".into(), opt("string"));
1085        s.options.insert("epochs".into(), opt("int"));
1086        s.args.push(arg("run-id", "string"));
1087        validate_schema(&s).expect("minimal valid schema must pass");
1088    }
1089
1090    #[test]
1091    fn validate_schema_rejects_unknown_option_type() {
1092        let mut s = Schema::default();
1093        s.options.insert("bad".into(), opt("integer"));
1094        let err = validate_schema(&s).expect_err("unknown type should fail");
1095        assert!(err.contains("unknown type"), "err was: {err}");
1096    }
1097
1098    #[test]
1099    fn validate_schema_rejects_reserved_long() {
1100        let mut s = Schema::default();
1101        s.options.insert("help".into(), opt("bool"));
1102        let err = validate_schema(&s).expect_err("reserved --help must fail");
1103        assert!(err.contains("reserved"), "err was: {err}");
1104    }
1105
1106    #[test]
1107    fn validate_schema_rejects_reserved_short() {
1108        let mut s = Schema::default();
1109        let mut o = opt("string");
1110        o.short = Some("h".into());
1111        s.options.insert("host".into(), o);
1112        let err = validate_schema(&s).expect_err("short -h must fail");
1113        assert!(err.contains("reserved"), "err was: {err}");
1114    }
1115
1116    #[test]
1117    fn validate_schema_rejects_duplicate_short() {
1118        let mut s = Schema::default();
1119        let mut a = opt("string");
1120        a.short = Some("m".into());
1121        let mut b = opt("string");
1122        b.short = Some("m".into());
1123        s.options.insert("model".into(), a);
1124        s.options.insert("mode".into(), b);
1125        let err = validate_schema(&s).expect_err("duplicate -m must fail");
1126        assert!(err.contains("both declare short"), "err was: {err}");
1127    }
1128
1129    #[test]
1130    fn validate_schema_rejects_non_last_variadic() {
1131        let mut s = Schema::default();
1132        let mut first = arg("files", "string");
1133        first.variadic = true;
1134        s.args.push(first);
1135        s.args.push(arg("trailer", "string"));
1136        let err = validate_schema(&s).expect_err("variadic-not-last must fail");
1137        assert!(err.contains("variadic"), "err was: {err}");
1138    }
1139
1140    #[test]
1141    fn validate_schema_rejects_required_after_optional() {
1142        let mut s = Schema::default();
1143        let mut first = arg("maybe", "string");
1144        first.required = false;
1145        s.args.push(first);
1146        s.args.push(arg("need", "string"));
1147        let err = validate_schema(&s).expect_err("required-after-optional must fail");
1148        assert!(err.contains("cannot follow"), "err was: {err}");
1149    }
1150
1151    // ── Tail validation (always-on) + strict unknown-rejection ─────
1152
1153    fn schema_with_model_option(strict: bool) -> Schema {
1154        let mut s = Schema {
1155            strict,
1156            ..Schema::default()
1157        };
1158        let mut model = opt("string");
1159        model.short = Some("m".into());
1160        model.choices = Some(vec![
1161            serde_json::json!("mlp"),
1162            serde_json::json!("resnet"),
1163        ]);
1164        s.options.insert("model".into(), model);
1165        s.options.insert("epochs".into(), opt("int"));
1166        // A bool flag, no value.
1167        s.options.insert("validate".into(), opt("bool"));
1168        s
1169    }
1170
1171    fn strict_schema_with_model_option() -> Schema {
1172        schema_with_model_option(true)
1173    }
1174
1175    #[test]
1176    fn validate_tail_accepts_known_long_flag() {
1177        let schema = strict_schema_with_model_option();
1178        let tail = vec!["--epochs".into(), "3".into()];
1179        validate_tail(&tail, &schema).expect("known flag must pass");
1180    }
1181
1182    #[test]
1183    fn validate_tail_accepts_known_short_flag() {
1184        let schema = strict_schema_with_model_option();
1185        let tail = vec!["-m".into(), "mlp".into()];
1186        validate_tail(&tail, &schema).expect("known short must pass");
1187    }
1188
1189    #[test]
1190    fn validate_tail_accepts_bool_flag() {
1191        let schema = strict_schema_with_model_option();
1192        let tail = vec!["--validate".into()];
1193        validate_tail(&tail, &schema).expect("bool flag must pass");
1194    }
1195
1196    #[test]
1197    fn validate_tail_strict_rejects_unknown_long_flag() {
1198        let schema = strict_schema_with_model_option();
1199        let tail = vec!["--nope".into()];
1200        let err = validate_tail(&tail, &schema)
1201            .expect_err("unknown long flag must error in strict mode");
1202        assert!(err.contains("--nope"), "err was: {err}");
1203    }
1204
1205    #[test]
1206    fn validate_tail_strict_suggests_did_you_mean() {
1207        // "--epoch" is one char off "--epochs" — edit distance ≤ 2.
1208        let schema = strict_schema_with_model_option();
1209        let tail = vec!["--epoch".into(), "3".into()];
1210        let err = validate_tail(&tail, &schema).expect_err("typo must error");
1211        assert!(err.contains("did you mean"), "err was: {err}");
1212        assert!(err.contains("--epochs"), "suggestion missing: {err}");
1213    }
1214
1215    #[test]
1216    fn validate_tail_strict_rejects_unknown_short_flag() {
1217        let schema = strict_schema_with_model_option();
1218        let tail = vec!["-z".into()];
1219        let err = validate_tail(&tail, &schema)
1220            .expect_err("unknown short must error in strict mode");
1221        assert!(err.contains("-z"), "err was: {err}");
1222    }
1223
1224    #[test]
1225    fn validate_tail_rejects_bad_choice_always_strict() {
1226        let schema = strict_schema_with_model_option();
1227        let tail = vec!["--model".into(), "lenet".into()];
1228        let err = validate_tail(&tail, &schema)
1229            .expect_err("out-of-set choice must error");
1230        assert!(err.contains("lenet"), "err was: {err}");
1231        assert!(err.contains("allowed"), "err should list allowed values: {err}");
1232    }
1233
1234    #[test]
1235    fn validate_tail_rejects_bad_choice_even_when_not_strict() {
1236        // The main change in this rollout: `choices:` is a positive
1237        // assertion by the author, so it must be enforced regardless
1238        // of `schema.strict`. Only *unknown* flags relax without
1239        // strict.
1240        let schema = schema_with_model_option(false);
1241        let tail = vec!["--model".into(), "lenet".into()];
1242        let err = validate_tail(&tail, &schema)
1243            .expect_err("out-of-set choice must error without strict");
1244        assert!(err.contains("lenet"), "err was: {err}");
1245        assert!(err.contains("allowed"), "err should list allowed values: {err}");
1246    }
1247
1248    #[test]
1249    fn validate_tail_non_strict_tolerates_unknown_flag() {
1250        // Without strict, unknown flags are legitimate pass-through
1251        // candidates (the binary handles them itself).
1252        let schema = schema_with_model_option(false);
1253        let tail = vec!["--fancy-passthrough".into(), "value".into()];
1254        validate_tail(&tail, &schema)
1255            .expect("unknown flag must be tolerated when strict is off");
1256    }
1257
1258    #[test]
1259    fn validate_tail_non_strict_still_checks_known_short_choices() {
1260        // The declared short `-m` has choices; a bad value fails even
1261        // when strict is off. Unknown options would be tolerated, but
1262        // once the user reaches a declared option, its contract holds.
1263        let schema = schema_with_model_option(false);
1264        let tail = vec!["-m".into(), "lenet".into()];
1265        let err = validate_tail(&tail, &schema)
1266            .expect_err("out-of-set choice via short must error");
1267        assert!(err.contains("lenet"), "err was: {err}");
1268    }
1269
1270    #[test]
1271    fn validate_tail_allows_reserved_help() {
1272        // Reserved universal flags must pass even though they are not
1273        // declared in the schema. Defense-in-depth against edge cases
1274        // where `--help` somehow reaches dispatch.
1275        let schema = strict_schema_with_model_option();
1276        let tail = vec!["--help".into()];
1277        validate_tail(&tail, &schema).expect("--help must be allowed");
1278    }
1279
1280    #[test]
1281    fn validate_tail_allows_reserved_fdl_schema() {
1282        // `fdl ddp-bench --fdl-schema` is forwarded to the binary.
1283        let schema = strict_schema_with_model_option();
1284        let tail = vec!["--fdl-schema".into()];
1285        validate_tail(&tail, &schema).expect("--fdl-schema must be allowed");
1286    }
1287
1288    #[test]
1289    fn validate_tail_passthrough_after_double_dash() {
1290        // `--` terminates flag parsing. Tokens after it are positionals
1291        // and must never trigger "unknown flag" errors.
1292        let schema = strict_schema_with_model_option();
1293        let tail = vec!["--".into(), "--arbitrary".into(), "anything".into()];
1294        validate_tail(&tail, &schema).expect("passthrough must work");
1295    }
1296
1297    #[test]
1298    fn validate_presets_strict_rejects_unknown_option() {
1299        let schema = strict_schema_with_model_option();
1300        let mut commands = BTreeMap::new();
1301        let mut bad_options = BTreeMap::new();
1302        bad_options.insert("batchsize".into(), serde_json::json!(32));
1303        commands.insert(
1304            "quick".into(),
1305            CommandSpec {
1306                options: bad_options,
1307                ..Default::default()
1308            },
1309        );
1310        let err = validate_presets_strict(&commands, &schema)
1311            .expect_err("preset pinning undeclared option must error");
1312        assert!(err.contains("quick"), "err should name the preset: {err}");
1313        assert!(err.contains("batchsize"), "err should name the key: {err}");
1314    }
1315
1316    #[test]
1317    fn validate_presets_strict_accepts_known_options() {
1318        let schema = strict_schema_with_model_option();
1319        let mut commands = BTreeMap::new();
1320        let mut good_options = BTreeMap::new();
1321        good_options.insert("model".into(), serde_json::json!("mlp"));
1322        good_options.insert("epochs".into(), serde_json::json!(5));
1323        commands.insert(
1324            "quick".into(),
1325            CommandSpec {
1326                options: good_options,
1327                ..Default::default()
1328            },
1329        );
1330        validate_presets_strict(&commands, &schema)
1331            .expect("presets with declared options must pass");
1332    }
1333
1334    #[test]
1335    fn validate_presets_strict_ignores_run_and_path_kinds() {
1336        // Only Preset-kind entries share the parent schema. Run/Path
1337        // siblings are independent, so strict must not touch them.
1338        let schema = strict_schema_with_model_option();
1339        let mut commands = BTreeMap::new();
1340        commands.insert(
1341            "helper".into(),
1342            CommandSpec {
1343                run: Some("echo hi".into()),
1344                ..Default::default()
1345            },
1346        );
1347        commands.insert(
1348            "nested".into(),
1349            CommandSpec {
1350                path: Some("./nested/".into()),
1351                ..Default::default()
1352            },
1353        );
1354        validate_presets_strict(&commands, &schema)
1355            .expect("run/path siblings must be ignored by preset strict check");
1356    }
1357
1358    // ── Preset value validation (always-on `choices:`) ──────────────
1359
1360    #[test]
1361    fn validate_preset_values_rejects_bad_choice_even_without_strict() {
1362        // Schema has `choices:` on model; a preset pinning model to
1363        // something outside the list must fail at load, strict or not.
1364        let schema = schema_with_model_option(false);
1365        let mut commands = BTreeMap::new();
1366        let mut opts = BTreeMap::new();
1367        opts.insert("model".into(), serde_json::json!("lenet"));
1368        commands.insert(
1369            "quick".into(),
1370            CommandSpec {
1371                options: opts,
1372                ..Default::default()
1373            },
1374        );
1375        let err = validate_preset_values(&commands, &schema)
1376            .expect_err("out-of-choices preset must error");
1377        assert!(err.contains("quick"), "preset name missing: {err}");
1378        assert!(err.contains("model"), "option name missing: {err}");
1379        assert!(err.contains("lenet"), "bad value missing: {err}");
1380        assert!(err.contains("allowed"), "allowed list missing: {err}");
1381    }
1382
1383    #[test]
1384    fn validate_preset_values_accepts_in_choices_preset() {
1385        let schema = schema_with_model_option(false);
1386        let mut commands = BTreeMap::new();
1387        let mut opts = BTreeMap::new();
1388        opts.insert("model".into(), serde_json::json!("mlp"));
1389        commands.insert(
1390            "quick".into(),
1391            CommandSpec {
1392                options: opts,
1393                ..Default::default()
1394            },
1395        );
1396        validate_preset_values(&commands, &schema)
1397            .expect("in-choices preset must pass");
1398    }
1399
1400    #[test]
1401    fn validate_preset_values_ignores_undeclared_keys() {
1402        // Unknown keys aren't our concern here — that's for
1403        // `validate_presets_strict`, which only runs under strict.
1404        let schema = schema_with_model_option(false);
1405        let mut commands = BTreeMap::new();
1406        let mut opts = BTreeMap::new();
1407        opts.insert("extra".into(), serde_json::json!("whatever"));
1408        commands.insert(
1409            "quick".into(),
1410            CommandSpec {
1411                options: opts,
1412                ..Default::default()
1413            },
1414        );
1415        validate_preset_values(&commands, &schema)
1416            .expect("undeclared key must be ignored by value validator");
1417    }
1418
1419    #[test]
1420    fn validate_preset_values_ignores_options_without_choices() {
1421        // `epochs` is declared as int with no `choices:`, so any value
1422        // passes the choice check (type validation is a separate pass).
1423        let schema = schema_with_model_option(false);
1424        let mut commands = BTreeMap::new();
1425        let mut opts = BTreeMap::new();
1426        opts.insert("epochs".into(), serde_json::json!(999));
1427        commands.insert(
1428            "quick".into(),
1429            CommandSpec {
1430                options: opts,
1431                ..Default::default()
1432            },
1433        );
1434        validate_preset_values(&commands, &schema)
1435            .expect("no-choices option must accept any value");
1436    }
1437
1438    #[test]
1439    fn validate_schema_rejects_required_with_default() {
1440        let mut s = Schema::default();
1441        let mut a = arg("x", "string");
1442        a.default = Some(serde_json::json!("foo"));
1443        s.args.push(a);
1444        let err = validate_schema(&s).expect_err("required+default must fail");
1445        assert!(err.contains("contradiction"), "err was: {err}");
1446    }
1447
1448    /// Regression guard: fdl.yml.example must keep a working `doc` command.
1449    /// The fdl.doc pipeline (api-ref for the port skill, rustdoc warning
1450    /// enforcement in CI) depends on this entry existing and producing output.
1451    #[test]
1452    fn fdl_yml_example_has_doc_script() {
1453        let cfg = load_example();
1454        let doc = cfg.commands.get("doc").unwrap_or_else(|| {
1455            panic!(
1456                "fdl.yml.example is missing a `doc` command; the rustdoc pipeline \
1457                 depends on `fdl doc` being defined"
1458            )
1459        });
1460        let cmd = doc
1461            .run
1462            .as_deref()
1463            .expect("fdl.yml.example `doc` command must be a `run:` entry");
1464        assert!(
1465            !cmd.trim().is_empty(),
1466            "fdl.yml.example `doc` command has an empty `run:` command"
1467        );
1468        assert!(
1469            cmd.contains("cargo doc"),
1470            "fdl.yml.example `doc` command must invoke `cargo doc`, got: {cmd}"
1471        );
1472        // Must assert some output was produced -- otherwise rustdoc can
1473        // silently succeed without writing anything useful (e.g. when the
1474        // target crate fails to resolve). Keeping the exact check liberal:
1475        // any mention of target/doc as a produced artifact counts.
1476        assert!(
1477            cmd.contains("target/doc"),
1478            "fdl.yml.example `doc` command must verify output was produced \
1479             (expected a `test -f target/doc/...` check), got: {cmd}"
1480        );
1481    }
1482
1483    #[test]
1484    fn command_spec_kind_mutex_run_and_path() {
1485        let spec = CommandSpec {
1486            run: Some("echo".into()),
1487            path: Some("x/".into()),
1488            ..Default::default()
1489        };
1490        let err = spec.kind().expect_err("run + path must fail");
1491        assert!(err.contains("both"), "err was: {err}");
1492    }
1493
1494    #[test]
1495    fn command_spec_kind_path_convention() {
1496        let spec = CommandSpec::default();
1497        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1498    }
1499
1500    #[test]
1501    fn command_spec_kind_preset_when_preset_fields_set() {
1502        let spec = CommandSpec {
1503            training: Some(TrainingConfig {
1504                epochs: Some(1),
1505                ..Default::default()
1506            }),
1507            ..Default::default()
1508        };
1509        assert_eq!(spec.kind().unwrap(), CommandKind::Preset);
1510    }
1511
1512    #[test]
1513    fn command_spec_kind_preset_when_only_options_set() {
1514        // `options:` alone is enough to make a preset — not every preset
1515        // overrides the structured ddp/training/output blocks.
1516        let mut options = BTreeMap::new();
1517        options.insert("model".into(), serde_json::json!("linear"));
1518        let spec = CommandSpec {
1519            options,
1520            ..Default::default()
1521        };
1522        assert_eq!(spec.kind().unwrap(), CommandKind::Preset);
1523    }
1524
1525    #[test]
1526    fn command_spec_kind_path_explicit() {
1527        // Explicit `path:` is a Path even if preset fields are also set;
1528        // the presence of `path:` is the kind-selecting field.
1529        let spec = CommandSpec {
1530            path: Some("./sub/".into()),
1531            ..Default::default()
1532        };
1533        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1534    }
1535
1536    #[test]
1537    fn command_spec_kind_rejects_docker_without_run() {
1538        // `docker:` is meaningful only as a wrapper around an inline
1539        // `run:` script. Pairing it with path/preset is a silent noop
1540        // at dispatch time, so we reject at load.
1541        let spec = CommandSpec {
1542            docker: Some("cuda".into()),
1543            ..Default::default()
1544        };
1545        let err = spec
1546            .kind()
1547            .expect_err("docker without run must fail");
1548        assert!(err.contains("docker"), "err was: {err}");
1549    }
1550
1551    #[test]
1552    fn command_spec_kind_allows_docker_with_run() {
1553        let spec = CommandSpec {
1554            run: Some("cargo test".into()),
1555            docker: Some("dev".into()),
1556            ..Default::default()
1557        };
1558        assert_eq!(spec.kind().unwrap(), CommandKind::Run);
1559    }
1560
1561    #[test]
1562    fn command_spec_deserialize_from_null() {
1563        let yaml = "cmd: ~";
1564        let map: BTreeMap<String, CommandSpec> =
1565            serde_yaml::from_str(yaml).expect("null must deserialize to default");
1566        let spec = map.get("cmd").expect("cmd missing");
1567        assert!(spec.run.is_none() && spec.path.is_none());
1568        assert_eq!(spec.kind().unwrap(), CommandKind::Path);
1569    }
1570
1571    #[test]
1572    fn command_config_arg_name_deserializes_kebab_case() {
1573        // YAML uses `arg-name:`, Rust field is `arg_name`.
1574        let yaml = "arg-name: recipe\nentry: echo\n";
1575        let cfg: CommandConfig =
1576            serde_yaml::from_str(yaml).expect("arg-name must parse");
1577        assert_eq!(cfg.arg_name.as_deref(), Some("recipe"));
1578    }
1579
1580    #[test]
1581    fn command_config_arg_name_defaults_to_none() {
1582        let cfg: CommandConfig =
1583            serde_yaml::from_str("entry: echo\n").expect("minimal cfg must parse");
1584        assert!(cfg.arg_name.is_none());
1585    }
1586
1587    // ── resolve_config_layers: inherit-from + env composition ────────────
1588    //
1589    // Integration coverage for how `inherit-from:` chains compose with env
1590    // overlays at the config-module boundary. The overlay module already
1591    // tests `resolve_chain` in isolation; here we verify the concat+dedup
1592    // behaviour that config.rs layers on top.
1593
1594    /// Minimal tempdir helper — matches the pattern used across the crate.
1595    struct TempDir(PathBuf);
1596    impl TempDir {
1597        fn new() -> Self {
1598            use std::sync::atomic::{AtomicU64, Ordering};
1599            static N: AtomicU64 = AtomicU64::new(0);
1600            let dir = std::env::temp_dir().join(format!(
1601                "fdl-cfg-test-{}-{}",
1602                std::process::id(),
1603                N.fetch_add(1, Ordering::Relaxed)
1604            ));
1605            std::fs::create_dir_all(&dir).unwrap();
1606            Self(dir)
1607        }
1608    }
1609    impl Drop for TempDir {
1610        fn drop(&mut self) {
1611            let _ = std::fs::remove_dir_all(&self.0);
1612        }
1613    }
1614
1615    fn filenames(layers: &[(PathBuf, serde_yaml::Value)]) -> Vec<String> {
1616        layers
1617            .iter()
1618            .map(|(p, _)| {
1619                p.file_name()
1620                    .and_then(|n| n.to_str())
1621                    .unwrap_or("?")
1622                    .to_string()
1623            })
1624            .collect()
1625    }
1626
1627    #[test]
1628    fn resolve_config_layers_base_only() {
1629        let tmp = TempDir::new();
1630        let base = tmp.0.join("fdl.yml");
1631        std::fs::write(&base, "a: 1\n").unwrap();
1632        let layers = resolve_config_layers(&base, None).unwrap();
1633        assert_eq!(filenames(&layers), vec!["fdl.yml"]);
1634    }
1635
1636    #[test]
1637    fn resolve_config_layers_base_with_env_overlay() {
1638        let tmp = TempDir::new();
1639        let base = tmp.0.join("fdl.yml");
1640        let env = tmp.0.join("fdl.ci.yml");
1641        std::fs::write(&base, "a: 1\n").unwrap();
1642        std::fs::write(&env, "b: 2\n").unwrap();
1643        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1644        assert_eq!(filenames(&layers), vec!["fdl.yml", "fdl.ci.yml"]);
1645    }
1646
1647    #[test]
1648    fn resolve_config_layers_env_inherits_from_mixin() {
1649        // fdl.ci.yml inherits from fdl.cloud.yml (standalone mix-in, not
1650        // derived from base). Combined chain: [base, cloud, ci].
1651        let tmp = TempDir::new();
1652        let base = tmp.0.join("fdl.yml");
1653        let cloud = tmp.0.join("fdl.cloud.yml");
1654        let ci = tmp.0.join("fdl.ci.yml");
1655        std::fs::write(&base, "a: 1\n").unwrap();
1656        std::fs::write(&cloud, "b: 2\n").unwrap();
1657        std::fs::write(&ci, "inherit-from: fdl.cloud.yml\nc: 3\n").unwrap();
1658        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1659        assert_eq!(
1660            filenames(&layers),
1661            vec!["fdl.yml", "fdl.cloud.yml", "fdl.ci.yml"]
1662        );
1663    }
1664
1665    #[test]
1666    fn resolve_config_layers_dedups_when_env_inherits_from_base() {
1667        // fdl.ci.yml inherits from fdl.yml directly. Base is already in
1668        // the layer list, so env's chain collapses into it — the final
1669        // list must not have fdl.yml twice.
1670        let tmp = TempDir::new();
1671        let base = tmp.0.join("fdl.yml");
1672        let ci = tmp.0.join("fdl.ci.yml");
1673        std::fs::write(&base, "a: 1\n").unwrap();
1674        std::fs::write(&ci, "inherit-from: fdl.yml\nb: 2\n").unwrap();
1675        let layers = resolve_config_layers(&base, Some("ci")).unwrap();
1676        assert_eq!(filenames(&layers), vec!["fdl.yml", "fdl.ci.yml"]);
1677    }
1678
1679    #[test]
1680    fn resolve_config_layers_merged_value_matches_chain() {
1681        // End-to-end: the merge result should reflect the chain order
1682        // (base < cloud < ci), with each subsequent layer overriding.
1683        let tmp = TempDir::new();
1684        let base = tmp.0.join("fdl.yml");
1685        let cloud = tmp.0.join("fdl.cloud.yml");
1686        let ci = tmp.0.join("fdl.ci.yml");
1687        std::fs::write(&base, "value: base\nkeep_base: yes\n").unwrap();
1688        std::fs::write(&cloud, "value: cloud\nkeep_cloud: yes\n").unwrap();
1689        std::fs::write(
1690            &ci,
1691            "inherit-from: fdl.cloud.yml\nvalue: ci\nkeep_ci: yes\n",
1692        )
1693        .unwrap();
1694        let merged = load_merged_value(&base, Some("ci")).unwrap();
1695        let m = merged.as_mapping().unwrap();
1696        // Last writer wins on `value`.
1697        assert_eq!(
1698            m.get(serde_yaml::Value::String("value".into())).unwrap(),
1699            &serde_yaml::Value::String("ci".into())
1700        );
1701        // Each layer's unique key survives.
1702        assert!(m.contains_key(serde_yaml::Value::String("keep_base".into())));
1703        assert!(m.contains_key(serde_yaml::Value::String("keep_cloud".into())));
1704        assert!(m.contains_key(serde_yaml::Value::String("keep_ci".into())));
1705    }
1706
1707    #[test]
1708    fn resolve_config_layers_missing_env_errors() {
1709        let tmp = TempDir::new();
1710        let base = tmp.0.join("fdl.yml");
1711        std::fs::write(&base, "a: 1\n").unwrap();
1712        let err = resolve_config_layers(&base, Some("nope")).unwrap_err();
1713        assert!(err.contains("nope"));
1714        assert!(err.contains("not found"));
1715    }
1716
1717    #[test]
1718    fn resolve_config_layers_base_inherit_from_chain() {
1719        // Base itself uses inherit-from: shared-defaults.yml. The
1720        // defaults live in a sibling file and are merged UNDER the base.
1721        let tmp = TempDir::new();
1722        let defaults = tmp.0.join("shared.yml");
1723        let base = tmp.0.join("fdl.yml");
1724        std::fs::write(&defaults, "policy: default\n").unwrap();
1725        std::fs::write(&base, "inherit-from: shared.yml\npolicy: override\n").unwrap();
1726        let layers = resolve_config_layers(&base, None).unwrap();
1727        assert_eq!(filenames(&layers), vec!["shared.yml", "fdl.yml"]);
1728    }
1729
1730    #[test]
1731    fn load_command_auto_probes_non_cargo_entry_and_writes_cache() {
1732        // Script-kind entry + missing cache: load_command should invoke
1733        // `<entry> --fdl-schema`, apply the result to cfg.schema, and
1734        // write it to .fdl/schema-cache/<name>.json for next time.
1735        let tmp = TempDir::new();
1736        let cmd_dir = tmp.0.join("mybench");
1737        std::fs::create_dir_all(&cmd_dir).unwrap();
1738
1739        let script = cmd_dir.join("emit.sh");
1740        let body = "#!/bin/sh\n\
1741                    if [ \"$1\" = \"--fdl-schema\" ]; then\n\
1742                      cat <<'JSON'\n\
1743                    { \"options\": { \"rounds\": { \"type\": \"int\", \"description\": \"N\" } } }\n\
1744                    JSON\n\
1745                      exit 0\n\
1746                    fi\n";
1747        std::fs::write(&script, body).unwrap();
1748        #[cfg(unix)]
1749        {
1750            use std::os::unix::fs::PermissionsExt;
1751            std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
1752        }
1753
1754        std::fs::write(cmd_dir.join("fdl.yml"), "entry: sh emit.sh\n").unwrap();
1755
1756        let cfg = load_command(&cmd_dir).expect("load ok");
1757        let schema = cfg.schema.expect("auto-probe must populate schema");
1758        assert!(schema.options.contains_key("rounds"));
1759
1760        // Second load reads the freshly-written cache (same content).
1761        let cached_path = crate::schema_cache::cache_path(&cmd_dir, "mybench");
1762        assert!(cached_path.is_file(), "cache file should exist");
1763    }
1764
1765    #[test]
1766    fn load_command_skips_auto_probe_for_cargo_entries() {
1767        // Cargo entries are deliberately not probed — a `cargo run
1768        // --fdl-schema` would compile the whole crate before help
1769        // renders. Missing cache + cargo entry ⇒ no schema, help
1770        // still renders (just without options).
1771        let tmp = TempDir::new();
1772        let cmd_dir = tmp.0.join("cargo-cmd");
1773        std::fs::create_dir_all(&cmd_dir).unwrap();
1774        std::fs::write(cmd_dir.join("fdl.yml"), "entry: cargo run --\n").unwrap();
1775
1776        let cfg = load_command(&cmd_dir).expect("load ok");
1777        assert!(
1778            cfg.schema.is_none(),
1779            "cargo entry must not be auto-probed (compile latency would ruin --help)"
1780        );
1781        let cached = crate::schema_cache::cache_path(&cmd_dir, "cargo-cmd");
1782        assert!(!cached.exists(), "no cache should be written for cargo entries");
1783    }
1784
1785    #[test]
1786    fn load_command_auto_probe_failure_falls_through_silently() {
1787        // An entry that ignores --fdl-schema (or errors) must not break
1788        // help rendering. cfg.schema stays None, no cache written.
1789        let tmp = TempDir::new();
1790        let cmd_dir = tmp.0.join("silent");
1791        std::fs::create_dir_all(&cmd_dir).unwrap();
1792        // `/bin/true` ignores any args and exits 0 with empty stdout; probe
1793        // will reject "no JSON object" and Err — we want that swallowed.
1794        // Quoted so YAML doesn't parse the bareword `true` as a boolean.
1795        std::fs::write(cmd_dir.join("fdl.yml"), "entry: \"/bin/true\"\n").unwrap();
1796
1797        let cfg = load_command(&cmd_dir).expect("load must succeed despite probe error");
1798        assert!(cfg.schema.is_none());
1799    }
1800}