Skip to main content

weaveffi_core/
cache.rs

1//! Content-hashing and per-generator caching for skip-if-unchanged builds.
2
3use anyhow::{Context, Result};
4use camino::Utf8Path;
5use sha2::{Digest, Sha256};
6use weaveffi_ir::ir::Api;
7
8const CACHE_DIR: &str = ".weaveffi-cache";
9
10/// Version string baked into every cache entry. Bumping the WeaveFFI CLI
11/// version automatically invalidates every cache file so users never see
12/// stale generator output after an upgrade.
13pub const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
14
15/// Serialize the API to canonical JSON and return its SHA-256 hex digest.
16///
17/// The IR is first serialized to a `serde_json::Value`, whose `Object`
18/// representation is backed by a `BTreeMap` (when the `preserve_order`
19/// feature is not enabled). Re-serializing that `Value` therefore emits
20/// keys in deterministic, lexicographic order regardless of the iteration
21/// order of any source maps. This guarantees that two runs over the same
22/// IR always produce the same hash.
23pub fn hash_api(api: &Api) -> String {
24    let value = serde_json::to_value(api).expect("Api serialization should not fail");
25    let json = serde_json::to_string(&value).expect("Value serialization should not fail");
26    let hash = Sha256::digest(json.as_bytes());
27    format!("{hash:x}")
28}
29
30/// Return the SHA-256 hex digest of the API content keyed by `generator_name`.
31///
32/// Kept for tests and direct callers that only need an IR-keyed digest;
33/// the orchestrator goes through [`hash_generator_inputs`] so that config
34/// and CLI version changes invalidate the cache too.
35pub fn hash_api_for_generator(api: &Api, generator_name: &str) -> String {
36    let value = serde_json::to_value(api).expect("Api serialization should not fail");
37    let json = serde_json::to_string(&value).expect("Value serialization should not fail");
38    let mut hasher = Sha256::new();
39    hasher.update(generator_name.as_bytes());
40    hasher.update(b":");
41    hasher.update(json.as_bytes());
42    let hash = hasher.finalize();
43    format!("{hash:x}")
44}
45
46/// Return the SHA-256 hex digest of every input that affects a single
47/// generator's output: the canonical IR, the generator's name, the
48/// generator's typed config (already serialized to canonical JSON bytes
49/// by the caller via [`crate::codegen::DynGenerator::config_hash_input`]),
50/// and the CLI version.
51///
52/// This is the cache key the orchestrator stores under
53/// `{out_dir}/.weaveffi-cache/{generator_name}.hash`, so any change to
54/// the IR, generator config, or CLI version invalidates that entry and
55/// triggers a re-run.
56pub fn hash_generator_inputs(api: &Api, generator_name: &str, config_bytes: &[u8]) -> String {
57    let api_value = serde_json::to_value(api).expect("Api serialization should not fail");
58    let api_json = serde_json::to_string(&api_value).expect("Value serialization should not fail");
59
60    let mut hasher = Sha256::new();
61    hasher.update(b"v1\0");
62    hasher.update(CLI_VERSION.as_bytes());
63    hasher.update(b"\0");
64    hasher.update(generator_name.as_bytes());
65    hasher.update(b"\0");
66    hasher.update(api_json.as_bytes());
67    hasher.update(b"\0");
68    hasher.update(config_bytes);
69    let hash = hasher.finalize();
70    format!("{hash:x}")
71}
72
73/// Read the persisted hash for `generator_name` from `out_dir/.weaveffi-cache/`.
74///
75/// Returns `None` when no cache entry exists yet (or it is empty).
76pub fn read_generator_cache(out_dir: &Utf8Path, generator_name: &str) -> Option<String> {
77    let path = out_dir
78        .join(CACHE_DIR)
79        .join(format!("{generator_name}.hash"));
80    std::fs::read_to_string(path)
81        .ok()
82        .map(|s| s.trim().to_string())
83        .filter(|s| !s.is_empty())
84}
85
86/// Persist `hash` as the cache entry for `generator_name`.
87///
88/// Removes a stale legacy `.weaveffi-cache` regular file (written by older
89/// CLI versions that used a single global cache) before creating the new
90/// per-generator directory layout.
91pub fn write_generator_cache(out_dir: &Utf8Path, generator_name: &str, hash: &str) -> Result<()> {
92    let cache_dir = out_dir.join(CACHE_DIR);
93    migrate_legacy_cache(out_dir)?;
94    std::fs::create_dir_all(cache_dir.as_std_path())
95        .with_context(|| format!("failed to create cache directory: {cache_dir}"))?;
96    let path = cache_dir.join(format!("{generator_name}.hash"));
97    std::fs::write(path.as_std_path(), hash)
98        .with_context(|| format!("failed to write cache file: {path}"))?;
99    Ok(())
100}
101
102/// Delete every persisted cache entry under `out_dir/.weaveffi-cache/`.
103///
104/// Called when `--force` is used so subsequent runs always regenerate.
105pub fn invalidate_all(out_dir: &Utf8Path) -> Result<()> {
106    let cache_dir = out_dir.join(CACHE_DIR);
107    if cache_dir.is_dir() {
108        std::fs::remove_dir_all(cache_dir.as_std_path())
109            .with_context(|| format!("failed to remove cache directory: {cache_dir}"))?;
110    } else if cache_dir.exists() {
111        std::fs::remove_file(cache_dir.as_std_path())
112            .with_context(|| format!("failed to remove legacy cache file: {cache_dir}"))?;
113    }
114    Ok(())
115}
116
117/// Remove a stale legacy single-file cache so we can create the new
118/// per-generator directory in its place.
119fn migrate_legacy_cache(out_dir: &Utf8Path) -> Result<()> {
120    let cache_path = out_dir.join(CACHE_DIR);
121    if cache_path.is_file() {
122        std::fs::remove_file(cache_path.as_std_path())
123            .with_context(|| format!("failed to remove legacy cache file: {cache_path}"))?;
124    }
125    Ok(())
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::codegen::{ConfiguredGenerator, Generator, Orchestrator, OrchestratorHooks};
132    use std::sync::atomic::{AtomicUsize, Ordering};
133    use std::sync::Arc;
134    use weaveffi_ir::ir::{Function, Module, Param, TypeRef};
135
136    /// Minimal serde-able config so the cache tests can exercise the
137    /// orchestrator without depending on any real per-language config.
138    #[derive(Default, Clone, serde::Serialize, serde::Deserialize)]
139    struct TestConfig {
140        knob: Option<String>,
141    }
142
143    fn config_bytes(c: &TestConfig) -> Vec<u8> {
144        let v = serde_json::to_value(c).unwrap();
145        serde_json::to_vec(&v).unwrap()
146    }
147
148    fn minimal_api() -> Api {
149        Api {
150            version: "0.1.0".to_string(),
151            modules: vec![Module {
152                name: "math".to_string(),
153                functions: vec![Function {
154                    name: "add".to_string(),
155                    params: vec![
156                        Param {
157                            name: "a".to_string(),
158                            ty: TypeRef::I32,
159                            mutable: false,
160                            doc: None,
161                        },
162                        Param {
163                            name: "b".to_string(),
164                            ty: TypeRef::I32,
165                            mutable: false,
166                            doc: None,
167                        },
168                    ],
169                    returns: Some(TypeRef::I32),
170                    doc: None,
171                    r#async: false,
172                    cancellable: false,
173                    deprecated: None,
174                    since: None,
175                }],
176                structs: vec![],
177                enums: vec![],
178                callbacks: vec![],
179                listeners: vec![],
180                errors: None,
181                modules: vec![],
182            }],
183            generators: None,
184            package: None,
185        }
186    }
187
188    struct CountingGenerator {
189        name: &'static str,
190        calls: Arc<AtomicUsize>,
191    }
192
193    impl Generator for CountingGenerator {
194        type Config = TestConfig;
195
196        fn name(&self) -> &'static str {
197            self.name
198        }
199
200        fn generate(
201            &self,
202            _api: &Api,
203            out_dir: &Utf8Path,
204            _config: &Self::Config,
205        ) -> anyhow::Result<()> {
206            self.calls.fetch_add(1, Ordering::SeqCst);
207            let dir = out_dir.join(self.name);
208            std::fs::create_dir_all(dir.as_std_path())?;
209            std::fs::write(dir.join("output.txt").as_std_path(), "generated")?;
210            Ok(())
211        }
212    }
213
214    fn configured(
215        name: &'static str,
216        calls: Arc<AtomicUsize>,
217        cfg: TestConfig,
218    ) -> ConfiguredGenerator<CountingGenerator> {
219        ConfiguredGenerator::new(CountingGenerator { name, calls }, cfg)
220    }
221
222    #[test]
223    fn hash_deterministic() {
224        let api = minimal_api();
225        let h1 = hash_api(&api);
226        let h2 = hash_api(&api);
227        assert_eq!(h1, h2);
228        assert_eq!(h1.len(), 64);
229    }
230
231    #[test]
232    fn hash_is_deterministic_across_runs() {
233        let mut api = minimal_api();
234        let mut generators = std::collections::BTreeMap::new();
235        let mut swift = toml::value::Table::new();
236        swift.insert(
237            "module_name".into(),
238            toml::Value::String("MySwiftModule".into()),
239        );
240        generators.insert("swift".into(), toml::Value::Table(swift));
241        let mut android = toml::value::Table::new();
242        android.insert(
243            "package".into(),
244            toml::Value::String("com.example.app".into()),
245        );
246        generators.insert("android".into(), toml::Value::Table(android));
247        api.generators = Some(generators);
248
249        let baseline = hash_api(&api);
250        for _ in 0..100 {
251            assert_eq!(
252                hash_api(&api),
253                baseline,
254                "hash_api must produce identical output on every call"
255            );
256        }
257    }
258
259    #[test]
260    fn hash_changes_on_modification() {
261        let mut api = minimal_api();
262        let h1 = hash_api(&api);
263
264        api.modules[0].functions.push(Function {
265            name: "subtract".to_string(),
266            params: vec![
267                Param {
268                    name: "a".to_string(),
269                    ty: TypeRef::I32,
270                    mutable: false,
271                    doc: None,
272                },
273                Param {
274                    name: "b".to_string(),
275                    ty: TypeRef::I32,
276                    mutable: false,
277                    doc: None,
278                },
279            ],
280            returns: Some(TypeRef::I32),
281            doc: None,
282            r#async: false,
283            cancellable: false,
284            deprecated: None,
285            since: None,
286        });
287        let h2 = hash_api(&api);
288
289        assert_ne!(h1, h2);
290    }
291
292    #[test]
293    fn per_generator_hash_includes_name() {
294        let api = minimal_api();
295        let h_c = hash_api_for_generator(&api, "c");
296        let h_swift = hash_api_for_generator(&api, "swift");
297        assert_ne!(h_c, h_swift);
298        assert_eq!(h_c.len(), 64);
299    }
300
301    #[test]
302    fn per_generator_hash_deterministic() {
303        let api = minimal_api();
304        assert_eq!(
305            hash_api_for_generator(&api, "c"),
306            hash_api_for_generator(&api, "c"),
307        );
308    }
309
310    #[test]
311    fn per_generator_cache_round_trip() {
312        let dir = tempfile::tempdir().unwrap();
313        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
314
315        let hash = hash_api_for_generator(&minimal_api(), "c");
316        write_generator_cache(dir_path, "c", &hash).unwrap();
317
318        let read_back = read_generator_cache(dir_path, "c");
319        assert_eq!(read_back, Some(hash));
320        assert_eq!(read_generator_cache(dir_path, "swift"), None);
321    }
322
323    #[test]
324    fn read_generator_cache_returns_none_when_missing() {
325        let dir = tempfile::tempdir().unwrap();
326        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
327        assert_eq!(read_generator_cache(dir_path, "c"), None);
328    }
329
330    #[test]
331    fn invalidate_all_clears_cache() {
332        let dir = tempfile::tempdir().unwrap();
333        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
334        write_generator_cache(dir_path, "c", "abc").unwrap();
335        write_generator_cache(dir_path, "swift", "def").unwrap();
336
337        invalidate_all(dir_path).unwrap();
338        assert_eq!(read_generator_cache(dir_path, "c"), None);
339        assert_eq!(read_generator_cache(dir_path, "swift"), None);
340    }
341
342    #[test]
343    fn legacy_cache_file_is_replaced_by_directory() {
344        let dir = tempfile::tempdir().unwrap();
345        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
346        std::fs::write(dir_path.join(CACHE_DIR), "stale-global-hash").unwrap();
347        assert!(dir_path.join(CACHE_DIR).is_file());
348
349        write_generator_cache(dir_path, "c", "fresh-hash").unwrap();
350
351        assert!(dir_path.join(CACHE_DIR).is_dir());
352        assert_eq!(
353            read_generator_cache(dir_path, "c"),
354            Some("fresh-hash".to_string())
355        );
356    }
357
358    #[test]
359    fn cache_file_written_after_generate() {
360        let dir = tempfile::tempdir().unwrap();
361        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
362        let api = minimal_api();
363        let hooks = OrchestratorHooks::default();
364        let calls = Arc::new(AtomicUsize::new(0));
365        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
366
367        let orch = Orchestrator::new().with_generator(&gen);
368        orch.run(&api, out_dir, &hooks, false).unwrap();
369
370        assert!(out_dir.join(CACHE_DIR).join("counting.hash").exists());
371        assert_eq!(calls.load(Ordering::SeqCst), 1);
372    }
373
374    #[test]
375    fn cache_prevents_regeneration() {
376        let dir = tempfile::tempdir().unwrap();
377        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
378        let api = minimal_api();
379        let hooks = OrchestratorHooks::default();
380        let calls = Arc::new(AtomicUsize::new(0));
381        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
382
383        let orch = Orchestrator::new().with_generator(&gen);
384        orch.run(&api, out_dir, &hooks, false).unwrap();
385        assert_eq!(calls.load(Ordering::SeqCst), 1);
386
387        orch.run(&api, out_dir, &hooks, false).unwrap();
388        assert_eq!(
389            calls.load(Ordering::SeqCst),
390            1,
391            "second run should skip generation"
392        );
393    }
394
395    #[test]
396    fn cache_invalidated_on_api_change() {
397        let dir = tempfile::tempdir().unwrap();
398        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
399        let api = minimal_api();
400        let hooks = OrchestratorHooks::default();
401        let calls = Arc::new(AtomicUsize::new(0));
402        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
403
404        let orch = Orchestrator::new().with_generator(&gen);
405        orch.run(&api, out_dir, &hooks, false).unwrap();
406        assert_eq!(calls.load(Ordering::SeqCst), 1);
407
408        let mut modified_api = api;
409        modified_api.modules[0].functions.push(Function {
410            name: "subtract".to_string(),
411            params: vec![
412                Param {
413                    name: "a".to_string(),
414                    ty: TypeRef::I32,
415                    mutable: false,
416                    doc: None,
417                },
418                Param {
419                    name: "b".to_string(),
420                    ty: TypeRef::I32,
421                    mutable: false,
422                    doc: None,
423                },
424            ],
425            returns: Some(TypeRef::I32),
426            doc: None,
427            r#async: false,
428            cancellable: false,
429            deprecated: None,
430            since: None,
431        });
432
433        orch.run(&modified_api, out_dir, &hooks, false).unwrap();
434        assert_eq!(
435            calls.load(Ordering::SeqCst),
436            2,
437            "changed API should trigger regeneration"
438        );
439    }
440
441    #[test]
442    fn force_flag_bypasses_cache() {
443        let dir = tempfile::tempdir().unwrap();
444        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
445        let api = minimal_api();
446        let hooks = OrchestratorHooks::default();
447        let calls = Arc::new(AtomicUsize::new(0));
448        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
449
450        let orch = Orchestrator::new().with_generator(&gen);
451        orch.run(&api, out_dir, &hooks, true).unwrap();
452        assert_eq!(calls.load(Ordering::SeqCst), 1);
453
454        orch.run(&api, out_dir, &hooks, true).unwrap();
455        assert_eq!(
456            calls.load(Ordering::SeqCst),
457            2,
458            "force=true should bypass cache"
459        );
460    }
461
462    #[test]
463    fn legacy_cache_file_ignored_on_first_run() {
464        let dir = tempfile::tempdir().unwrap();
465        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
466        std::fs::write(out_dir.join(CACHE_DIR), "stale-legacy").unwrap();
467
468        let api = minimal_api();
469        let hooks = OrchestratorHooks::default();
470        let calls = Arc::new(AtomicUsize::new(0));
471        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
472
473        let orch = Orchestrator::new().with_generator(&gen);
474        orch.run(&api, out_dir, &hooks, false).unwrap();
475        assert_eq!(
476            calls.load(Ordering::SeqCst),
477            1,
478            "legacy single-file cache must not skip first run"
479        );
480        assert!(out_dir.join(CACHE_DIR).is_dir());
481    }
482
483    #[test]
484    fn single_generator_cache_invalidates_independently() {
485        let dir = tempfile::tempdir().unwrap();
486        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
487        let hooks = OrchestratorHooks::default();
488        let c_calls = Arc::new(AtomicUsize::new(0));
489        let s_calls = Arc::new(AtomicUsize::new(0));
490        let c_gen = configured("c", Arc::clone(&c_calls), TestConfig::default());
491        let s_gen = configured("swift", Arc::clone(&s_calls), TestConfig::default());
492        let orch = Orchestrator::new()
493            .with_generator(&c_gen)
494            .with_generator(&s_gen);
495
496        let api = minimal_api();
497        orch.run(&api, out_dir, &hooks, false).unwrap();
498        assert_eq!(c_calls.load(Ordering::SeqCst), 1);
499        assert_eq!(s_calls.load(Ordering::SeqCst), 1);
500
501        // Invalidate only the C generator's cache; the API itself is unchanged.
502        std::fs::remove_file(out_dir.join(CACHE_DIR).join("c.hash")).unwrap();
503
504        orch.run(&api, out_dir, &hooks, false).unwrap();
505        assert_eq!(
506            c_calls.load(Ordering::SeqCst),
507            2,
508            "C generator should re-run after its cache entry was removed"
509        );
510        assert_eq!(
511            s_calls.load(Ordering::SeqCst),
512            1,
513            "Swift generator's cache is intact and must be skipped"
514        );
515    }
516
517    #[test]
518    fn hash_generator_inputs_changes_when_config_bytes_change() {
519        let api = minimal_api();
520        let base = config_bytes(&TestConfig::default());
521
522        let changed = config_bytes(&TestConfig {
523            knob: Some("flipped".into()),
524        });
525
526        assert_ne!(
527            hash_generator_inputs(&api, "c", &base),
528            hash_generator_inputs(&api, "c", &changed),
529            "changing config bytes must change the per-generator hash"
530        );
531    }
532
533    #[test]
534    fn hash_generator_inputs_includes_cli_version() {
535        let api = minimal_api();
536        let cfg = config_bytes(&TestConfig::default());
537
538        // Compute the canonical hash, then compute the digest the same way
539        // but pretend a different CLI version produced it. The two must
540        // differ — otherwise upgrades silently leave stale output.
541        let real = hash_generator_inputs(&api, "c", &cfg);
542
543        let api_value = serde_json::to_value(&api).unwrap();
544        let api_json = serde_json::to_string(&api_value).unwrap();
545
546        let mut h = Sha256::new();
547        h.update(b"v1\0");
548        h.update(b"0.0.0-pretend-old\0");
549        h.update(b"c\0");
550        h.update(api_json.as_bytes());
551        h.update(b"\0");
552        h.update(&cfg);
553        let pretend = format!("{:x}", h.finalize());
554
555        assert_ne!(
556            real, pretend,
557            "CLI_VERSION must be part of the cache key so an upgrade invalidates it"
558        );
559        assert_eq!(CLI_VERSION, env!("CARGO_PKG_VERSION"));
560    }
561
562    #[test]
563    fn cache_invalidated_on_config_only_change() {
564        let dir = tempfile::tempdir().unwrap();
565        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
566        let api = minimal_api();
567        let hooks = OrchestratorHooks::default();
568
569        let calls = Arc::new(AtomicUsize::new(0));
570        let gen = configured("c", Arc::clone(&calls), TestConfig::default());
571        Orchestrator::new()
572            .with_generator(&gen)
573            .run(&api, out_dir, &hooks, false)
574            .unwrap();
575        assert_eq!(calls.load(Ordering::SeqCst), 1);
576
577        // Re-run with the *same* IR but a changed generator config.
578        let gen2 = configured(
579            "c",
580            Arc::clone(&calls),
581            TestConfig {
582                knob: Some("changed".into()),
583            },
584        );
585        Orchestrator::new()
586            .with_generator(&gen2)
587            .run(&api, out_dir, &hooks, false)
588            .unwrap();
589        assert_eq!(
590            calls.load(Ordering::SeqCst),
591            2,
592            "changing generator config must invalidate the cache and re-run the generator"
593        );
594
595        // A third run with the same `changed` config should hit the cache again.
596        Orchestrator::new()
597            .with_generator(&gen2)
598            .run(&api, out_dir, &hooks, false)
599            .unwrap();
600        assert_eq!(
601            calls.load(Ordering::SeqCst),
602            2,
603            "running with the same config twice should not regenerate"
604        );
605    }
606
607    #[test]
608    fn cache_invalidated_when_pre_generated_hash_has_wrong_version() {
609        let dir = tempfile::tempdir().unwrap();
610        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
611        let api = minimal_api();
612        let hooks = OrchestratorHooks::default();
613        let calls = Arc::new(AtomicUsize::new(0));
614        let gen = configured("c", Arc::clone(&calls), TestConfig::default());
615        let orch = Orchestrator::new().with_generator(&gen);
616
617        // Pre-seed the cache with a hash that was computed with the
618        // legacy IR-only function. The orchestrator now keys on
619        // `hash_generator_inputs`, so the stale entry must not match
620        // and the generator must re-run.
621        let stale = hash_api_for_generator(&api, "c");
622        write_generator_cache(out_dir, "c", &stale).unwrap();
623
624        orch.run(&api, out_dir, &hooks, false).unwrap();
625        assert_eq!(
626            calls.load(Ordering::SeqCst),
627            1,
628            "legacy IR-only hash must not satisfy the new cache key shape"
629        );
630    }
631}