Skip to main content

flodl_cli/
schema_cache.rs

1//! `--fdl-schema` binary contract: probe, validate, and cache.
2//!
3//! A sub-command binary that opts into the contract exposes a single
4//! `--fdl-schema` flag printing a JSON schema describing its CLI surface.
5//! `flodl-cli` caches the output under `<cmd_dir>/.fdl/schema-cache/<cmd>.json`
6//! and prefers it over any inline YAML schema declared in `fdl.yaml`.
7//!
8//! **Cargo entries** (`entry: cargo run ...`) are *not* auto-probed: invoking
9//! them forces a full compile, which is unacceptable latency for `fdl --help`.
10//! For those, users run `fdl <cmd> --refresh-schema` explicitly after a build.
11//!
12//! Cache invalidation is mtime-based: the cache file's mtime is compared
13//! against `fdl.yml` in the command dir. A cache older than its fdl.yml is
14//! considered stale. Users can also force-refresh.
15//!
16//! See `docs/design/run-config.md` — "2. Option schemas and the `--fdl-schema`
17//! contract" — for the JSON shape.
18
19use std::fs;
20use std::path::{Path, PathBuf};
21use std::process::{Command, Stdio};
22use std::time::SystemTime;
23
24use crate::config::{self, Schema};
25
26/// Directory where all schema caches live, relative to the command dir.
27const CACHE_DIR: &str = ".fdl/schema-cache";
28
29/// Resolve the cache file path for a given command dir and name.
30pub fn cache_path(cmd_dir: &Path, cmd_name: &str) -> PathBuf {
31    cmd_dir.join(CACHE_DIR).join(format!("{cmd_name}.json"))
32}
33
34/// Read a schema cache file, returning `Some` only if it parses cleanly
35/// and survives validation. Parse or validation errors are treated as
36/// "no cache" (the caller falls through to the inline/YAML schema).
37pub fn read_cache(path: &Path) -> Option<Schema> {
38    let content = fs::read_to_string(path).ok()?;
39    let schema: Schema = serde_json::from_str(&content).ok()?;
40    config::validate_schema(&schema).ok()?;
41    Some(schema)
42}
43
44/// Consider a cache "stale" if it is older than the command's fdl.yml
45/// (config changes), or older than a sentinel binary path when supplied.
46///
47/// Missing cache ⇒ stale (return true). Missing reference mtime ⇒ treat
48/// the cache as fresh (conservative: don't refresh what we can't justify).
49pub fn is_stale(cache: &Path, reference_mtimes: &[PathBuf]) -> bool {
50    let Some(cache_mtime) = mtime(cache) else {
51        return true;
52    };
53    reference_mtimes
54        .iter()
55        .filter_map(|p| mtime(p))
56        .any(|ref_m| ref_m > cache_mtime)
57}
58
59fn mtime(path: &Path) -> Option<SystemTime> {
60    fs::metadata(path).ok()?.modified().ok()
61}
62
63/// Serialize a schema to the cache file, creating parent dirs as needed.
64pub fn write_cache(path: &Path, schema: &Schema) -> Result<(), String> {
65    if let Some(parent) = path.parent() {
66        fs::create_dir_all(parent)
67            .map_err(|e| format!("cannot create {}: {}", parent.display(), e))?;
68    }
69    let json = serde_json::to_string_pretty(schema)
70        .map_err(|e| format!("schema serialize: {e}"))?;
71    fs::write(path, json).map_err(|e| format!("cannot write {}: {}", path.display(), e))
72}
73
74/// Probe a binary for its schema by running `<entry> --fdl-schema` via the
75/// shell and parsing stdout as JSON.
76///
77/// `cmd_dir` is the directory containing the `fdl.yml` that declared the
78/// entry — it serves as the cwd for the shell unless the entry is wrapped
79/// through docker (then the wrap walks up to the nearest
80/// `docker-compose.yml` for compose's cwd).
81///
82/// `docker_service` carries the `docker:` field from the resolved
83/// command config. When set AND we're not already inside a container,
84/// the invocation is wrapped as
85/// `docker compose run --rm <service> bash -c '<entry> --fdl-schema'`
86/// so cargo entries that need libtorch get probed inside the dev
87/// container instead of failing silently on the host. When unset, the
88/// entry runs directly on the host.
89///
90/// On failure returns a string error rather than panicking — callers
91/// almost always want to fall back to the inline schema (or none).
92pub fn probe(entry: &str, cmd_dir: &Path, docker_service: Option<&str>) -> Result<Schema, String> {
93    if entry.trim().is_empty() {
94        return Err("entry is empty".into());
95    }
96
97    let inner = format!("{entry} --fdl-schema");
98    let (invocation, run_cwd) = match docker_service {
99        Some(svc) if !inside_docker() => {
100            let compose_root = find_docker_compose_root(cmd_dir).ok_or_else(|| {
101                format!(
102                    "cannot probe schema: docker:{svc} declared but no \
103                     docker-compose.yml found above {}",
104                    cmd_dir.display()
105                )
106            })?;
107            let wrapped = format!(
108                "docker compose run --rm {svc} bash -c {}",
109                posix_quote(&inner)
110            );
111            (wrapped, compose_root)
112        }
113        _ => (inner, cmd_dir.to_path_buf()),
114    };
115
116    let (shell, flag) = if cfg!(target_os = "windows") {
117        ("cmd", "/C")
118    } else {
119        ("sh", "-c")
120    };
121    let output = Command::new(shell)
122        .args([flag, &invocation])
123        .current_dir(&run_cwd)
124        .stdout(Stdio::piped())
125        .stderr(Stdio::piped())
126        .output()
127        .map_err(|e| format!("spawn `{invocation}`: {e}"))?;
128
129    if !output.status.success() {
130        let stderr = String::from_utf8_lossy(&output.stderr);
131        return Err(format!(
132            "`{invocation}` exited with {}: {}",
133            output.status,
134            stderr.trim()
135        ));
136    }
137
138    // Tolerate leading lines of cargo chatter by locating the first `{`.
139    let stdout = String::from_utf8_lossy(&output.stdout);
140    let start = stdout
141        .find('{')
142        .ok_or_else(|| "no JSON object in --fdl-schema output".to_string())?;
143    let schema: Schema = serde_json::from_str(&stdout[start..])
144        .map_err(|e| format!("--fdl-schema did not emit valid JSON: {e}"))?;
145    config::validate_schema(&schema)
146        .map_err(|e| format!("--fdl-schema output failed validation: {e}"))?;
147    Ok(schema)
148}
149
150/// Heuristic: cargo entries compile-on-run, so they are never auto-probed.
151/// Probing must be explicit (`fdl <cmd> --refresh-schema`) for those.
152pub fn is_cargo_entry(entry: &str) -> bool {
153    entry.trim_start().starts_with("cargo ")
154}
155
156/// True when this process is running inside a Docker container. Mirrors
157/// the `/.dockerenv` heuristic used elsewhere in the crate.
158fn inside_docker() -> bool {
159    Path::new("/.dockerenv").exists()
160}
161
162/// Climb from `start` looking for a directory containing
163/// `docker-compose.yml` (the compose root used as cwd for `docker
164/// compose` invocations). Returns `None` if none is found before
165/// hitting the filesystem root.
166fn find_docker_compose_root(start: &Path) -> Option<PathBuf> {
167    let mut dir = start.to_path_buf();
168    loop {
169        if dir.join("docker-compose.yml").exists() {
170            return Some(dir);
171        }
172        if !dir.pop() {
173            return None;
174        }
175    }
176}
177
178/// POSIX-quote a single token so it survives `bash -c` as one argument.
179/// Local copy to avoid pulling `run.rs` into `schema_cache.rs` for one
180/// helper.
181fn posix_quote(s: &str) -> String {
182    if s.is_empty() {
183        return "''".to_string();
184    }
185    let safe = s.chars().all(|c| {
186        c.is_ascii_alphanumeric()
187            || matches!(c, '_' | '-' | '.' | '/' | ':' | '=' | '+' | '@' | ',')
188    });
189    if safe {
190        return s.to_string();
191    }
192    let mut out = String::with_capacity(s.len() + 2);
193    out.push('\'');
194    for c in s.chars() {
195        if c == '\'' {
196            out.push_str("'\\''");
197        } else {
198            out.push(c);
199        }
200    }
201    out.push('\'');
202    out
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use std::collections::BTreeMap;
209    use std::io::Write;
210
211    /// Scoped test directory under `std::env::temp_dir()` that cleans up on drop.
212    /// Zero-external-dep replacement for `tempfile::tempdir()`.
213    struct TestDir {
214        path: PathBuf,
215    }
216
217    impl TestDir {
218        fn new(tag: &str) -> Self {
219            let nanos = std::time::SystemTime::now()
220                .duration_since(std::time::UNIX_EPOCH)
221                .map(|d| d.as_nanos())
222                .unwrap_or(0);
223            let pid = std::process::id();
224            let path = std::env::temp_dir().join(format!("fdl-test-{tag}-{pid}-{nanos}"));
225            fs::create_dir_all(&path).expect("create test dir");
226            Self { path }
227        }
228
229        fn path(&self) -> &Path {
230            &self.path
231        }
232    }
233
234    impl Drop for TestDir {
235        fn drop(&mut self) {
236            let _ = fs::remove_dir_all(&self.path);
237        }
238    }
239
240    fn minimal_schema() -> Schema {
241        let mut options = BTreeMap::new();
242        options.insert(
243            "model".into(),
244            config::OptionSpec {
245                ty: "string".into(),
246                description: Some("pick a model".into()),
247                default: Some(serde_json::json!("mlp")),
248                choices: Some(vec![
249                    serde_json::json!("mlp"),
250                    serde_json::json!("resnet"),
251                ]),
252                short: Some("m".into()),
253                env: None,
254                completer: None,
255            },
256        );
257        Schema {
258            args: Vec::new(),
259            options,
260            strict: false,
261        }
262    }
263
264    #[test]
265    fn cache_roundtrip_preserves_schema() {
266        let tmp = TestDir::new("sc");
267        let path = cache_path(tmp.path(), "ddp-bench");
268        let schema = minimal_schema();
269        write_cache(&path, &schema).expect("write cache");
270
271        let read = read_cache(&path).expect("round-trip parses");
272        let orig_model = schema.options.get("model").unwrap();
273        let round_model = read.options.get("model").unwrap();
274        assert_eq!(orig_model.ty, round_model.ty);
275        assert_eq!(orig_model.short, round_model.short);
276        assert_eq!(orig_model.choices, round_model.choices);
277    }
278
279    #[test]
280    fn read_cache_rejects_invalid_json() {
281        let tmp = TestDir::new("sc");
282        let path = tmp.path().join("bad.json");
283        fs::write(&path, "not json at all").unwrap();
284        assert!(read_cache(&path).is_none());
285    }
286
287    #[test]
288    fn read_cache_rejects_validation_failure() {
289        // A schema that clears validation at struct level but fails
290        // semantic validation: shadowed fdl-level flag `--help`.
291        let tmp = TestDir::new("sc");
292        let path = tmp.path().join("bad_sem.json");
293        let body = r#"{
294            "options": {
295                "help": { "type": "bool" }
296            }
297        }"#;
298        fs::write(&path, body).unwrap();
299        assert!(read_cache(&path).is_none(),
300            "cache must not return a schema that fails validate_schema");
301    }
302
303    #[test]
304    fn is_stale_missing_cache_is_stale() {
305        let tmp = TestDir::new("sc");
306        let path = tmp.path().join("missing.json");
307        assert!(is_stale(&path, &[]));
308    }
309
310    #[test]
311    fn is_stale_compares_mtimes() {
312        let tmp = TestDir::new("sc");
313        let cache = tmp.path().join("cache.json");
314        let source = tmp.path().join("fdl.yml");
315        fs::write(&cache, "{}").unwrap();
316        // Sleep a moment then touch source so its mtime is newer.
317        std::thread::sleep(std::time::Duration::from_millis(20));
318        let mut f = fs::File::create(&source).unwrap();
319        writeln!(f, "newer").unwrap();
320        assert!(
321            is_stale(&cache, std::slice::from_ref(&source)),
322            "source newer than cache ⇒ stale"
323        );
324    }
325
326    #[test]
327    fn is_cargo_entry_detects_common_shapes() {
328        assert!(is_cargo_entry("cargo run --release --features cuda --"));
329        assert!(is_cargo_entry("  cargo run -- "));
330        assert!(!is_cargo_entry("./target/release/ddp-bench"));
331        assert!(!is_cargo_entry("python ./train.py"));
332        assert!(!is_cargo_entry(""));
333    }
334
335    #[test]
336    fn probe_round_trips_with_mock_binary() {
337        // Build a tiny shell script that emits the schema JSON and use it
338        // as the "entry". This tests the full probe path end-to-end
339        // without pulling in cargo.
340        let tmp = TestDir::new("sc");
341        let script = tmp.path().join("mock-bin.sh");
342        let body = r#"#!/bin/sh
343cat <<'JSON'
344{
345  "options": {
346    "model": {
347      "type": "string",
348      "short": "m",
349      "description": "pick a model",
350      "default": "mlp",
351      "choices": ["mlp", "resnet"]
352    }
353  }
354}
355JSON
356"#;
357        fs::write(&script, body).unwrap();
358        // chmod +x
359        #[cfg(unix)]
360        {
361            use std::os::unix::fs::PermissionsExt;
362            let perm = fs::Permissions::from_mode(0o755);
363            fs::set_permissions(&script, perm).unwrap();
364        }
365
366        let entry = script.to_string_lossy();
367        let schema = probe(&entry, tmp.path(), None).expect("probe should succeed");
368        let model = schema.options.get("model").expect("model opt");
369        assert_eq!(model.ty, "string");
370        assert_eq!(model.short.as_deref(), Some("m"));
371    }
372
373    #[test]
374    fn probe_rejects_non_json_output() {
375        let tmp = TestDir::new("sc");
376        let script = tmp.path().join("junk.sh");
377        fs::write(&script, "#!/bin/sh\necho not json\n").unwrap();
378        #[cfg(unix)]
379        {
380            use std::os::unix::fs::PermissionsExt;
381            let perm = fs::Permissions::from_mode(0o755);
382            fs::set_permissions(&script, perm).unwrap();
383        }
384        let err = probe(&script.to_string_lossy(), tmp.path(), None)
385            .expect_err("non-json must fail");
386        assert!(err.contains("no JSON") || err.contains("valid JSON"),
387            "err was: {err}");
388    }
389
390    #[test]
391    fn probe_rejects_semantically_invalid_schema() {
392        let tmp = TestDir::new("sc");
393        let script = tmp.path().join("bad.sh");
394        // Emits JSON that parses but declares a reserved flag.
395        let body = r#"#!/bin/sh
396cat <<'JSON'
397{ "options": { "help": { "type": "bool" } } }
398JSON
399"#;
400        fs::write(&script, body).unwrap();
401        #[cfg(unix)]
402        {
403            use std::os::unix::fs::PermissionsExt;
404            let perm = fs::Permissions::from_mode(0o755);
405            fs::set_permissions(&script, perm).unwrap();
406        }
407        let err = probe(&script.to_string_lossy(), tmp.path(), None)
408            .expect_err("semantic fail must propagate");
409        assert!(err.contains("validation") || err.contains("reserved"),
410            "err was: {err}");
411    }
412}