Skip to main content

weaveffi_core/
cache.rs

1//! Content-hashing and per-generator caching for skip-if-unchanged builds.
2
3use anyhow::{Context, Result};
4use camino::Utf8Path;
5use sha2::{Digest, Sha256};
6use weaveffi_ir::ir::Api;
7
8const CACHE_DIR: &str = ".weaveffi-cache";
9
10/// Version string baked into every cache entry. Bumping the WeaveFFI CLI
11/// version automatically invalidates every cache file so users never see
12/// stale generator output after an upgrade.
13pub const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
14
15/// Serialize the API to canonical JSON and return its SHA-256 hex digest.
16///
17/// The IR is first serialized to a `serde_json::Value`, whose `Object`
18/// representation is backed by a `BTreeMap` (when the `preserve_order`
19/// feature is not enabled). Re-serializing that `Value` therefore emits
20/// keys in deterministic, lexicographic order regardless of the iteration
21/// order of any source maps. This guarantees that two runs over the same
22/// IR always produce the same hash.
23pub fn hash_api(api: &Api) -> String {
24    let value = serde_json::to_value(api).expect("Api serialization should not fail");
25    let json = serde_json::to_string(&value).expect("Value serialization should not fail");
26    let hash = Sha256::digest(json.as_bytes());
27    format!("{hash:x}")
28}
29
30/// Return the SHA-256 hex digest of the API content keyed by `generator_name`.
31///
32/// Kept for tests and direct callers that only need an IR-keyed digest;
33/// the orchestrator goes through [`hash_generator_inputs`] so that config
34/// and CLI version changes invalidate the cache too.
35pub fn hash_api_for_generator(api: &Api, generator_name: &str) -> String {
36    let value = serde_json::to_value(api).expect("Api serialization should not fail");
37    let json = serde_json::to_string(&value).expect("Value serialization should not fail");
38    let mut hasher = Sha256::new();
39    hasher.update(generator_name.as_bytes());
40    hasher.update(b":");
41    hasher.update(json.as_bytes());
42    let hash = hasher.finalize();
43    format!("{hash:x}")
44}
45
46/// Return the SHA-256 hex digest of every input that affects a single
47/// generator's output: the canonical IR, the generator's name, the
48/// generator's typed config (already serialized to canonical JSON bytes
49/// by the caller via [`crate::codegen::DynGenerator::config_hash_input`]),
50/// and the CLI version.
51///
52/// This is the cache key the orchestrator stores under
53/// `{out_dir}/.weaveffi-cache/{generator_name}.hash`, so any change to
54/// the IR, generator config, or CLI version invalidates that entry and
55/// triggers a re-run.
56pub fn hash_generator_inputs(api: &Api, generator_name: &str, config_bytes: &[u8]) -> String {
57    let api_value = serde_json::to_value(api).expect("Api serialization should not fail");
58    let api_json = serde_json::to_string(&api_value).expect("Value serialization should not fail");
59
60    let mut hasher = Sha256::new();
61    hasher.update(b"v1\0");
62    hasher.update(CLI_VERSION.as_bytes());
63    hasher.update(b"\0");
64    hasher.update(generator_name.as_bytes());
65    hasher.update(b"\0");
66    hasher.update(api_json.as_bytes());
67    hasher.update(b"\0");
68    hasher.update(config_bytes);
69    let hash = hasher.finalize();
70    format!("{hash:x}")
71}
72
73/// Read the persisted hash for `generator_name` from `out_dir/.weaveffi-cache/`.
74///
75/// Returns `None` when no cache entry exists yet (or it is empty).
76pub fn read_generator_cache(out_dir: &Utf8Path, generator_name: &str) -> Option<String> {
77    let path = out_dir
78        .join(CACHE_DIR)
79        .join(format!("{generator_name}.hash"));
80    std::fs::read_to_string(path)
81        .ok()
82        .map(|s| s.trim().to_string())
83        .filter(|s| !s.is_empty())
84}
85
86/// Persist `hash` as the cache entry for `generator_name`.
87///
88/// Removes a stale legacy `.weaveffi-cache` regular file (written by older
89/// CLI versions that used a single global cache) before creating the new
90/// per-generator directory layout.
91pub fn write_generator_cache(out_dir: &Utf8Path, generator_name: &str, hash: &str) -> Result<()> {
92    let cache_dir = out_dir.join(CACHE_DIR);
93    migrate_legacy_cache(out_dir)?;
94    std::fs::create_dir_all(cache_dir.as_std_path())
95        .with_context(|| format!("failed to create cache directory: {cache_dir}"))?;
96    let path = cache_dir.join(format!("{generator_name}.hash"));
97    std::fs::write(path.as_std_path(), hash)
98        .with_context(|| format!("failed to write cache file: {path}"))?;
99    Ok(())
100}
101
102/// Delete every persisted cache entry under `out_dir/.weaveffi-cache/`.
103///
104/// Called when `--force` is used so subsequent runs always regenerate.
105pub fn invalidate_all(out_dir: &Utf8Path) -> Result<()> {
106    let cache_dir = out_dir.join(CACHE_DIR);
107    if cache_dir.is_dir() {
108        std::fs::remove_dir_all(cache_dir.as_std_path())
109            .with_context(|| format!("failed to remove cache directory: {cache_dir}"))?;
110    } else if cache_dir.exists() {
111        std::fs::remove_file(cache_dir.as_std_path())
112            .with_context(|| format!("failed to remove legacy cache file: {cache_dir}"))?;
113    }
114    Ok(())
115}
116
117/// Remove a stale legacy single-file cache so we can create the new
118/// per-generator directory in its place.
119fn migrate_legacy_cache(out_dir: &Utf8Path) -> Result<()> {
120    let cache_path = out_dir.join(CACHE_DIR);
121    if cache_path.is_file() {
122        std::fs::remove_file(cache_path.as_std_path())
123            .with_context(|| format!("failed to remove legacy cache file: {cache_path}"))?;
124    }
125    Ok(())
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::codegen::{ConfiguredGenerator, Generator, Orchestrator, OrchestratorHooks};
132    use std::sync::atomic::{AtomicUsize, Ordering};
133    use std::sync::Arc;
134    use weaveffi_ir::ir::{Function, Module, Param, TypeRef};
135
136    /// Minimal serde-able config so the cache tests can exercise the
137    /// orchestrator without depending on any real per-language config.
138    #[derive(Default, Clone, serde::Serialize, serde::Deserialize)]
139    struct TestConfig {
140        knob: Option<String>,
141    }
142
143    fn config_bytes(c: &TestConfig) -> Vec<u8> {
144        let v = serde_json::to_value(c).unwrap();
145        serde_json::to_vec(&v).unwrap()
146    }
147
148    fn minimal_api() -> Api {
149        Api {
150            version: "0.1.0".to_string(),
151            modules: vec![Module {
152                name: "math".to_string(),
153                functions: vec![Function {
154                    name: "add".to_string(),
155                    params: vec![
156                        Param {
157                            name: "a".to_string(),
158                            ty: TypeRef::I32,
159                            mutable: false,
160                            doc: None,
161                        },
162                        Param {
163                            name: "b".to_string(),
164                            ty: TypeRef::I32,
165                            mutable: false,
166                            doc: None,
167                        },
168                    ],
169                    returns: Some(TypeRef::I32),
170                    doc: None,
171                    r#async: false,
172                    cancellable: false,
173                    deprecated: None,
174                    since: None,
175                }],
176                structs: vec![],
177                enums: vec![],
178                callbacks: vec![],
179                listeners: vec![],
180                errors: None,
181                modules: vec![],
182            }],
183            generators: None,
184        }
185    }
186
187    struct CountingGenerator {
188        name: &'static str,
189        calls: Arc<AtomicUsize>,
190    }
191
192    impl Generator for CountingGenerator {
193        type Config = TestConfig;
194
195        fn name(&self) -> &'static str {
196            self.name
197        }
198
199        fn generate(
200            &self,
201            _api: &Api,
202            out_dir: &Utf8Path,
203            _config: &Self::Config,
204        ) -> anyhow::Result<()> {
205            self.calls.fetch_add(1, Ordering::SeqCst);
206            let dir = out_dir.join(self.name);
207            std::fs::create_dir_all(dir.as_std_path())?;
208            std::fs::write(dir.join("output.txt").as_std_path(), "generated")?;
209            Ok(())
210        }
211    }
212
213    fn configured(
214        name: &'static str,
215        calls: Arc<AtomicUsize>,
216        cfg: TestConfig,
217    ) -> ConfiguredGenerator<CountingGenerator> {
218        ConfiguredGenerator::new(CountingGenerator { name, calls }, cfg)
219    }
220
221    #[test]
222    fn hash_deterministic() {
223        let api = minimal_api();
224        let h1 = hash_api(&api);
225        let h2 = hash_api(&api);
226        assert_eq!(h1, h2);
227        assert_eq!(h1.len(), 64);
228    }
229
230    #[test]
231    fn hash_is_deterministic_across_runs() {
232        let mut api = minimal_api();
233        let mut generators = std::collections::BTreeMap::new();
234        let mut swift = toml::value::Table::new();
235        swift.insert(
236            "module_name".into(),
237            toml::Value::String("MySwiftModule".into()),
238        );
239        generators.insert("swift".into(), toml::Value::Table(swift));
240        let mut android = toml::value::Table::new();
241        android.insert(
242            "package".into(),
243            toml::Value::String("com.example.app".into()),
244        );
245        generators.insert("android".into(), toml::Value::Table(android));
246        api.generators = Some(generators);
247
248        let baseline = hash_api(&api);
249        for _ in 0..100 {
250            assert_eq!(
251                hash_api(&api),
252                baseline,
253                "hash_api must produce identical output on every call"
254            );
255        }
256    }
257
258    #[test]
259    fn hash_changes_on_modification() {
260        let mut api = minimal_api();
261        let h1 = hash_api(&api);
262
263        api.modules[0].functions.push(Function {
264            name: "subtract".to_string(),
265            params: vec![
266                Param {
267                    name: "a".to_string(),
268                    ty: TypeRef::I32,
269                    mutable: false,
270                    doc: None,
271                },
272                Param {
273                    name: "b".to_string(),
274                    ty: TypeRef::I32,
275                    mutable: false,
276                    doc: None,
277                },
278            ],
279            returns: Some(TypeRef::I32),
280            doc: None,
281            r#async: false,
282            cancellable: false,
283            deprecated: None,
284            since: None,
285        });
286        let h2 = hash_api(&api);
287
288        assert_ne!(h1, h2);
289    }
290
291    #[test]
292    fn per_generator_hash_includes_name() {
293        let api = minimal_api();
294        let h_c = hash_api_for_generator(&api, "c");
295        let h_swift = hash_api_for_generator(&api, "swift");
296        assert_ne!(h_c, h_swift);
297        assert_eq!(h_c.len(), 64);
298    }
299
300    #[test]
301    fn per_generator_hash_deterministic() {
302        let api = minimal_api();
303        assert_eq!(
304            hash_api_for_generator(&api, "c"),
305            hash_api_for_generator(&api, "c"),
306        );
307    }
308
309    #[test]
310    fn per_generator_cache_round_trip() {
311        let dir = tempfile::tempdir().unwrap();
312        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
313
314        let hash = hash_api_for_generator(&minimal_api(), "c");
315        write_generator_cache(dir_path, "c", &hash).unwrap();
316
317        let read_back = read_generator_cache(dir_path, "c");
318        assert_eq!(read_back, Some(hash));
319        assert_eq!(read_generator_cache(dir_path, "swift"), None);
320    }
321
322    #[test]
323    fn read_generator_cache_returns_none_when_missing() {
324        let dir = tempfile::tempdir().unwrap();
325        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
326        assert_eq!(read_generator_cache(dir_path, "c"), None);
327    }
328
329    #[test]
330    fn invalidate_all_clears_cache() {
331        let dir = tempfile::tempdir().unwrap();
332        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
333        write_generator_cache(dir_path, "c", "abc").unwrap();
334        write_generator_cache(dir_path, "swift", "def").unwrap();
335
336        invalidate_all(dir_path).unwrap();
337        assert_eq!(read_generator_cache(dir_path, "c"), None);
338        assert_eq!(read_generator_cache(dir_path, "swift"), None);
339    }
340
341    #[test]
342    fn legacy_cache_file_is_replaced_by_directory() {
343        let dir = tempfile::tempdir().unwrap();
344        let dir_path = Utf8Path::from_path(dir.path()).unwrap();
345        std::fs::write(dir_path.join(CACHE_DIR), "stale-global-hash").unwrap();
346        assert!(dir_path.join(CACHE_DIR).is_file());
347
348        write_generator_cache(dir_path, "c", "fresh-hash").unwrap();
349
350        assert!(dir_path.join(CACHE_DIR).is_dir());
351        assert_eq!(
352            read_generator_cache(dir_path, "c"),
353            Some("fresh-hash".to_string())
354        );
355    }
356
357    #[test]
358    fn cache_file_written_after_generate() {
359        let dir = tempfile::tempdir().unwrap();
360        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
361        let api = minimal_api();
362        let hooks = OrchestratorHooks::default();
363        let calls = Arc::new(AtomicUsize::new(0));
364        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
365
366        let orch = Orchestrator::new().with_generator(&gen);
367        orch.run(&api, out_dir, &hooks, false).unwrap();
368
369        assert!(out_dir.join(CACHE_DIR).join("counting.hash").exists());
370        assert_eq!(calls.load(Ordering::SeqCst), 1);
371    }
372
373    #[test]
374    fn cache_prevents_regeneration() {
375        let dir = tempfile::tempdir().unwrap();
376        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
377        let api = minimal_api();
378        let hooks = OrchestratorHooks::default();
379        let calls = Arc::new(AtomicUsize::new(0));
380        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
381
382        let orch = Orchestrator::new().with_generator(&gen);
383        orch.run(&api, out_dir, &hooks, false).unwrap();
384        assert_eq!(calls.load(Ordering::SeqCst), 1);
385
386        orch.run(&api, out_dir, &hooks, false).unwrap();
387        assert_eq!(
388            calls.load(Ordering::SeqCst),
389            1,
390            "second run should skip generation"
391        );
392    }
393
394    #[test]
395    fn cache_invalidated_on_api_change() {
396        let dir = tempfile::tempdir().unwrap();
397        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
398        let api = minimal_api();
399        let hooks = OrchestratorHooks::default();
400        let calls = Arc::new(AtomicUsize::new(0));
401        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
402
403        let orch = Orchestrator::new().with_generator(&gen);
404        orch.run(&api, out_dir, &hooks, false).unwrap();
405        assert_eq!(calls.load(Ordering::SeqCst), 1);
406
407        let mut modified_api = api;
408        modified_api.modules[0].functions.push(Function {
409            name: "subtract".to_string(),
410            params: vec![
411                Param {
412                    name: "a".to_string(),
413                    ty: TypeRef::I32,
414                    mutable: false,
415                    doc: None,
416                },
417                Param {
418                    name: "b".to_string(),
419                    ty: TypeRef::I32,
420                    mutable: false,
421                    doc: None,
422                },
423            ],
424            returns: Some(TypeRef::I32),
425            doc: None,
426            r#async: false,
427            cancellable: false,
428            deprecated: None,
429            since: None,
430        });
431
432        orch.run(&modified_api, out_dir, &hooks, false).unwrap();
433        assert_eq!(
434            calls.load(Ordering::SeqCst),
435            2,
436            "changed API should trigger regeneration"
437        );
438    }
439
440    #[test]
441    fn force_flag_bypasses_cache() {
442        let dir = tempfile::tempdir().unwrap();
443        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
444        let api = minimal_api();
445        let hooks = OrchestratorHooks::default();
446        let calls = Arc::new(AtomicUsize::new(0));
447        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
448
449        let orch = Orchestrator::new().with_generator(&gen);
450        orch.run(&api, out_dir, &hooks, true).unwrap();
451        assert_eq!(calls.load(Ordering::SeqCst), 1);
452
453        orch.run(&api, out_dir, &hooks, true).unwrap();
454        assert_eq!(
455            calls.load(Ordering::SeqCst),
456            2,
457            "force=true should bypass cache"
458        );
459    }
460
461    #[test]
462    fn legacy_cache_file_ignored_on_first_run() {
463        let dir = tempfile::tempdir().unwrap();
464        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
465        std::fs::write(out_dir.join(CACHE_DIR), "stale-legacy").unwrap();
466
467        let api = minimal_api();
468        let hooks = OrchestratorHooks::default();
469        let calls = Arc::new(AtomicUsize::new(0));
470        let gen = configured("counting", Arc::clone(&calls), TestConfig::default());
471
472        let orch = Orchestrator::new().with_generator(&gen);
473        orch.run(&api, out_dir, &hooks, false).unwrap();
474        assert_eq!(
475            calls.load(Ordering::SeqCst),
476            1,
477            "legacy single-file cache must not skip first run"
478        );
479        assert!(out_dir.join(CACHE_DIR).is_dir());
480    }
481
482    #[test]
483    fn single_generator_cache_invalidates_independently() {
484        let dir = tempfile::tempdir().unwrap();
485        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
486        let hooks = OrchestratorHooks::default();
487        let c_calls = Arc::new(AtomicUsize::new(0));
488        let s_calls = Arc::new(AtomicUsize::new(0));
489        let c_gen = configured("c", Arc::clone(&c_calls), TestConfig::default());
490        let s_gen = configured("swift", Arc::clone(&s_calls), TestConfig::default());
491        let orch = Orchestrator::new()
492            .with_generator(&c_gen)
493            .with_generator(&s_gen);
494
495        let api = minimal_api();
496        orch.run(&api, out_dir, &hooks, false).unwrap();
497        assert_eq!(c_calls.load(Ordering::SeqCst), 1);
498        assert_eq!(s_calls.load(Ordering::SeqCst), 1);
499
500        // Invalidate only the C generator's cache; the API itself is unchanged.
501        std::fs::remove_file(out_dir.join(CACHE_DIR).join("c.hash")).unwrap();
502
503        orch.run(&api, out_dir, &hooks, false).unwrap();
504        assert_eq!(
505            c_calls.load(Ordering::SeqCst),
506            2,
507            "C generator should re-run after its cache entry was removed"
508        );
509        assert_eq!(
510            s_calls.load(Ordering::SeqCst),
511            1,
512            "Swift generator's cache is intact and must be skipped"
513        );
514    }
515
516    #[test]
517    fn hash_generator_inputs_changes_when_config_bytes_change() {
518        let api = minimal_api();
519        let base = config_bytes(&TestConfig::default());
520
521        let changed = config_bytes(&TestConfig {
522            knob: Some("flipped".into()),
523        });
524
525        assert_ne!(
526            hash_generator_inputs(&api, "c", &base),
527            hash_generator_inputs(&api, "c", &changed),
528            "changing config bytes must change the per-generator hash"
529        );
530    }
531
532    #[test]
533    fn hash_generator_inputs_includes_cli_version() {
534        let api = minimal_api();
535        let cfg = config_bytes(&TestConfig::default());
536
537        // Compute the canonical hash, then compute the digest the same way
538        // but pretend a different CLI version produced it. The two must
539        // differ — otherwise upgrades silently leave stale output.
540        let real = hash_generator_inputs(&api, "c", &cfg);
541
542        let api_value = serde_json::to_value(&api).unwrap();
543        let api_json = serde_json::to_string(&api_value).unwrap();
544
545        let mut h = Sha256::new();
546        h.update(b"v1\0");
547        h.update(b"0.0.0-pretend-old\0");
548        h.update(b"c\0");
549        h.update(api_json.as_bytes());
550        h.update(b"\0");
551        h.update(&cfg);
552        let pretend = format!("{:x}", h.finalize());
553
554        assert_ne!(
555            real, pretend,
556            "CLI_VERSION must be part of the cache key so an upgrade invalidates it"
557        );
558        assert_eq!(CLI_VERSION, env!("CARGO_PKG_VERSION"));
559    }
560
561    #[test]
562    fn cache_invalidated_on_config_only_change() {
563        let dir = tempfile::tempdir().unwrap();
564        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
565        let api = minimal_api();
566        let hooks = OrchestratorHooks::default();
567
568        let calls = Arc::new(AtomicUsize::new(0));
569        let gen = configured("c", Arc::clone(&calls), TestConfig::default());
570        Orchestrator::new()
571            .with_generator(&gen)
572            .run(&api, out_dir, &hooks, false)
573            .unwrap();
574        assert_eq!(calls.load(Ordering::SeqCst), 1);
575
576        // Re-run with the *same* IR but a changed generator config.
577        let gen2 = configured(
578            "c",
579            Arc::clone(&calls),
580            TestConfig {
581                knob: Some("changed".into()),
582            },
583        );
584        Orchestrator::new()
585            .with_generator(&gen2)
586            .run(&api, out_dir, &hooks, false)
587            .unwrap();
588        assert_eq!(
589            calls.load(Ordering::SeqCst),
590            2,
591            "changing generator config must invalidate the cache and re-run the generator"
592        );
593
594        // A third run with the same `changed` config should hit the cache again.
595        Orchestrator::new()
596            .with_generator(&gen2)
597            .run(&api, out_dir, &hooks, false)
598            .unwrap();
599        assert_eq!(
600            calls.load(Ordering::SeqCst),
601            2,
602            "running with the same config twice should not regenerate"
603        );
604    }
605
606    #[test]
607    fn cache_invalidated_when_pre_generated_hash_has_wrong_version() {
608        let dir = tempfile::tempdir().unwrap();
609        let out_dir = Utf8Path::from_path(dir.path()).unwrap();
610        let api = minimal_api();
611        let hooks = OrchestratorHooks::default();
612        let calls = Arc::new(AtomicUsize::new(0));
613        let gen = configured("c", Arc::clone(&calls), TestConfig::default());
614        let orch = Orchestrator::new().with_generator(&gen);
615
616        // Pre-seed the cache with a hash that was computed with the
617        // legacy IR-only function. The orchestrator now keys on
618        // `hash_generator_inputs`, so the stale entry must not match
619        // and the generator must re-run.
620        let stale = hash_api_for_generator(&api, "c");
621        write_generator_cache(out_dir, "c", &stale).unwrap();
622
623        orch.run(&api, out_dir, &hooks, false).unwrap();
624        assert_eq!(
625            calls.load(Ordering::SeqCst),
626            1,
627            "legacy IR-only hash must not satisfy the new cache key shape"
628        );
629    }
630}