Skip to main content

flodl_cli/
schema_cache.rs

1//! `--fdl-schema` binary contract: probe, validate, and cache.
2//!
3//! A sub-command binary that opts into the contract exposes a single
4//! `--fdl-schema` flag printing a JSON schema describing its CLI surface.
5//! `flodl-cli` caches the output under `<cmd_dir>/.fdl/schema-cache/<cmd>.json`
6//! and prefers it over any inline YAML schema declared in `fdl.yaml`.
7//!
8//! **Cargo entries** (`entry: cargo run ...`) are *not* auto-probed: invoking
9//! them forces a full compile, which is unacceptable latency for `fdl --help`.
10//! For those, users run `fdl <cmd> --refresh-schema` explicitly after a build.
11//!
12//! Cache invalidation is mtime-based: the cache file's mtime is compared
13//! against `fdl.yml` in the command dir. A cache older than its fdl.yml is
14//! considered stale. Users can also force-refresh.
15//!
16//! See `docs/design/run-config.md` — "2. Option schemas and the `--fdl-schema`
17//! contract" — for the JSON shape.
18
19use std::fs;
20use std::path::{Path, PathBuf};
21use std::process::{Command, Stdio};
22use std::time::SystemTime;
23
24use crate::config::{self, Schema};
25
26/// Directory where all schema caches live, relative to the command dir.
27const CACHE_DIR: &str = ".fdl/schema-cache";
28
29/// Resolve the cache file path for a given command dir and name.
30pub fn cache_path(cmd_dir: &Path, cmd_name: &str) -> PathBuf {
31    cmd_dir.join(CACHE_DIR).join(format!("{cmd_name}.json"))
32}
33
34/// Read a schema cache file, returning `Some` only if it parses cleanly
35/// and survives validation. Parse or validation errors are treated as
36/// "no cache" (the caller falls through to the inline/YAML schema).
37pub fn read_cache(path: &Path) -> Option<Schema> {
38    let content = fs::read_to_string(path).ok()?;
39    let schema: Schema = serde_json::from_str(&content).ok()?;
40    config::validate_schema(&schema).ok()?;
41    Some(schema)
42}
43
44/// Consider a cache "stale" if it is older than the command's fdl.yml
45/// (config changes), or older than a sentinel binary path when supplied.
46///
47/// Missing cache ⇒ stale (return true). Missing reference mtime ⇒ treat
48/// the cache as fresh (conservative: don't refresh what we can't justify).
49pub fn is_stale(cache: &Path, reference_mtimes: &[PathBuf]) -> bool {
50    let Some(cache_mtime) = mtime(cache) else {
51        return true;
52    };
53    reference_mtimes
54        .iter()
55        .filter_map(|p| mtime(p))
56        .any(|ref_m| ref_m > cache_mtime)
57}
58
59fn mtime(path: &Path) -> Option<SystemTime> {
60    fs::metadata(path).ok()?.modified().ok()
61}
62
63/// Serialize a schema to the cache file, creating parent dirs as needed.
64pub fn write_cache(path: &Path, schema: &Schema) -> Result<(), String> {
65    if let Some(parent) = path.parent() {
66        fs::create_dir_all(parent)
67            .map_err(|e| format!("cannot create {}: {}", parent.display(), e))?;
68    }
69    let json = serde_json::to_string_pretty(schema)
70        .map_err(|e| format!("schema serialize: {e}"))?;
71    fs::write(path, json).map_err(|e| format!("cannot write {}: {}", path.display(), e))
72}
73
74/// Probe a binary for its schema by running `<entry> --fdl-schema` via the
75/// shell and parsing stdout as JSON.
76///
77/// The entry is run with `cwd = cmd_dir` so relative paths (e.g. in
78/// `cargo run` contexts) resolve correctly. On failure returns a string
79/// error rather than panicking — callers almost always want to fall back.
80pub fn probe(entry: &str, cmd_dir: &Path) -> Result<Schema, String> {
81    if entry.trim().is_empty() {
82        return Err("entry is empty".into());
83    }
84    let invocation = format!("{entry} --fdl-schema");
85    let (shell, flag) = if cfg!(target_os = "windows") {
86        ("cmd", "/C")
87    } else {
88        ("sh", "-c")
89    };
90    let output = Command::new(shell)
91        .args([flag, &invocation])
92        .current_dir(cmd_dir)
93        .stdout(Stdio::piped())
94        .stderr(Stdio::piped())
95        .output()
96        .map_err(|e| format!("spawn `{invocation}`: {e}"))?;
97
98    if !output.status.success() {
99        let stderr = String::from_utf8_lossy(&output.stderr);
100        return Err(format!(
101            "`{invocation}` exited with {}: {}",
102            output.status,
103            stderr.trim()
104        ));
105    }
106
107    // Tolerate leading lines of cargo chatter by locating the first `{`.
108    let stdout = String::from_utf8_lossy(&output.stdout);
109    let start = stdout
110        .find('{')
111        .ok_or_else(|| "no JSON object in --fdl-schema output".to_string())?;
112    let schema: Schema = serde_json::from_str(&stdout[start..])
113        .map_err(|e| format!("--fdl-schema did not emit valid JSON: {e}"))?;
114    config::validate_schema(&schema)
115        .map_err(|e| format!("--fdl-schema output failed validation: {e}"))?;
116    Ok(schema)
117}
118
119/// Heuristic: cargo entries compile-on-run, so they are never auto-probed.
120/// Probing must be explicit (`fdl <cmd> --refresh-schema`) for those.
121pub fn is_cargo_entry(entry: &str) -> bool {
122    entry.trim_start().starts_with("cargo ")
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128    use std::collections::BTreeMap;
129    use std::io::Write;
130
131    /// Scoped test directory under `std::env::temp_dir()` that cleans up on drop.
132    /// Zero-external-dep replacement for `tempfile::tempdir()`.
133    struct TestDir {
134        path: PathBuf,
135    }
136
137    impl TestDir {
138        fn new(tag: &str) -> Self {
139            let nanos = std::time::SystemTime::now()
140                .duration_since(std::time::UNIX_EPOCH)
141                .map(|d| d.as_nanos())
142                .unwrap_or(0);
143            let pid = std::process::id();
144            let path = std::env::temp_dir().join(format!("fdl-test-{tag}-{pid}-{nanos}"));
145            fs::create_dir_all(&path).expect("create test dir");
146            Self { path }
147        }
148
149        fn path(&self) -> &Path {
150            &self.path
151        }
152    }
153
154    impl Drop for TestDir {
155        fn drop(&mut self) {
156            let _ = fs::remove_dir_all(&self.path);
157        }
158    }
159
160    fn minimal_schema() -> Schema {
161        let mut options = BTreeMap::new();
162        options.insert(
163            "model".into(),
164            config::OptionSpec {
165                ty: "string".into(),
166                description: Some("pick a model".into()),
167                default: Some(serde_json::json!("mlp")),
168                choices: Some(vec![
169                    serde_json::json!("mlp"),
170                    serde_json::json!("resnet"),
171                ]),
172                short: Some("m".into()),
173                env: None,
174                completer: None,
175            },
176        );
177        Schema {
178            args: Vec::new(),
179            options,
180            strict: false,
181        }
182    }
183
184    #[test]
185    fn cache_roundtrip_preserves_schema() {
186        let tmp = TestDir::new("sc");
187        let path = cache_path(tmp.path(), "ddp-bench");
188        let schema = minimal_schema();
189        write_cache(&path, &schema).expect("write cache");
190
191        let read = read_cache(&path).expect("round-trip parses");
192        let orig_model = schema.options.get("model").unwrap();
193        let round_model = read.options.get("model").unwrap();
194        assert_eq!(orig_model.ty, round_model.ty);
195        assert_eq!(orig_model.short, round_model.short);
196        assert_eq!(orig_model.choices, round_model.choices);
197    }
198
199    #[test]
200    fn read_cache_rejects_invalid_json() {
201        let tmp = TestDir::new("sc");
202        let path = tmp.path().join("bad.json");
203        fs::write(&path, "not json at all").unwrap();
204        assert!(read_cache(&path).is_none());
205    }
206
207    #[test]
208    fn read_cache_rejects_validation_failure() {
209        // A schema that clears validation at struct level but fails
210        // semantic validation: shadowed fdl-level flag `--help`.
211        let tmp = TestDir::new("sc");
212        let path = tmp.path().join("bad_sem.json");
213        let body = r#"{
214            "options": {
215                "help": { "type": "bool" }
216            }
217        }"#;
218        fs::write(&path, body).unwrap();
219        assert!(read_cache(&path).is_none(),
220            "cache must not return a schema that fails validate_schema");
221    }
222
223    #[test]
224    fn is_stale_missing_cache_is_stale() {
225        let tmp = TestDir::new("sc");
226        let path = tmp.path().join("missing.json");
227        assert!(is_stale(&path, &[]));
228    }
229
230    #[test]
231    fn is_stale_compares_mtimes() {
232        let tmp = TestDir::new("sc");
233        let cache = tmp.path().join("cache.json");
234        let source = tmp.path().join("fdl.yml");
235        fs::write(&cache, "{}").unwrap();
236        // Sleep a moment then touch source so its mtime is newer.
237        std::thread::sleep(std::time::Duration::from_millis(20));
238        let mut f = fs::File::create(&source).unwrap();
239        writeln!(f, "newer").unwrap();
240        assert!(
241            is_stale(&cache, std::slice::from_ref(&source)),
242            "source newer than cache ⇒ stale"
243        );
244    }
245
246    #[test]
247    fn is_cargo_entry_detects_common_shapes() {
248        assert!(is_cargo_entry("cargo run --release --features cuda --"));
249        assert!(is_cargo_entry("  cargo run -- "));
250        assert!(!is_cargo_entry("./target/release/ddp-bench"));
251        assert!(!is_cargo_entry("python ./train.py"));
252        assert!(!is_cargo_entry(""));
253    }
254
255    #[test]
256    fn probe_round_trips_with_mock_binary() {
257        // Build a tiny shell script that emits the schema JSON and use it
258        // as the "entry". This tests the full probe path end-to-end
259        // without pulling in cargo.
260        let tmp = TestDir::new("sc");
261        let script = tmp.path().join("mock-bin.sh");
262        let body = r#"#!/bin/sh
263cat <<'JSON'
264{
265  "options": {
266    "model": {
267      "type": "string",
268      "short": "m",
269      "description": "pick a model",
270      "default": "mlp",
271      "choices": ["mlp", "resnet"]
272    }
273  }
274}
275JSON
276"#;
277        fs::write(&script, body).unwrap();
278        // chmod +x
279        #[cfg(unix)]
280        {
281            use std::os::unix::fs::PermissionsExt;
282            let perm = fs::Permissions::from_mode(0o755);
283            fs::set_permissions(&script, perm).unwrap();
284        }
285
286        let entry = script.to_string_lossy();
287        let schema = probe(&entry, tmp.path()).expect("probe should succeed");
288        let model = schema.options.get("model").expect("model opt");
289        assert_eq!(model.ty, "string");
290        assert_eq!(model.short.as_deref(), Some("m"));
291    }
292
293    #[test]
294    fn probe_rejects_non_json_output() {
295        let tmp = TestDir::new("sc");
296        let script = tmp.path().join("junk.sh");
297        fs::write(&script, "#!/bin/sh\necho not json\n").unwrap();
298        #[cfg(unix)]
299        {
300            use std::os::unix::fs::PermissionsExt;
301            let perm = fs::Permissions::from_mode(0o755);
302            fs::set_permissions(&script, perm).unwrap();
303        }
304        let err = probe(&script.to_string_lossy(), tmp.path())
305            .expect_err("non-json must fail");
306        assert!(err.contains("no JSON") || err.contains("valid JSON"),
307            "err was: {err}");
308    }
309
310    #[test]
311    fn probe_rejects_semantically_invalid_schema() {
312        let tmp = TestDir::new("sc");
313        let script = tmp.path().join("bad.sh");
314        // Emits JSON that parses but declares a reserved flag.
315        let body = r#"#!/bin/sh
316cat <<'JSON'
317{ "options": { "help": { "type": "bool" } } }
318JSON
319"#;
320        fs::write(&script, body).unwrap();
321        #[cfg(unix)]
322        {
323            use std::os::unix::fs::PermissionsExt;
324            let perm = fs::Permissions::from_mode(0o755);
325            fs::set_permissions(&script, perm).unwrap();
326        }
327        let err = probe(&script.to_string_lossy(), tmp.path())
328            .expect_err("semantic fail must propagate");
329        assert!(err.contains("validation") || err.contains("reserved"),
330            "err was: {err}");
331    }
332}