Skip to main content

lisette_semantics/cache/
mod.rs

1pub mod go_stdlib;
2pub mod prelude;
3pub mod types;
4
5use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
6use std::fs;
7use std::hash::{Hash, Hasher};
8use std::io;
9use std::path::{Path, PathBuf};
10
11use serde::{Deserialize, Serialize};
12use syntax::program::File;
13
14use crate::store::{ENTRY_MODULE_ID, Store};
15use types::CachedDefinition;
16
17/// Current cache format version. Bump this when making breaking changes to the cache format.
18pub const CACHE_FORMAT_VERSION: u32 = 1;
19
20/// Compiler version hash. Caches from different compiler versions are invalid.
21pub const COMPILER_VERSION_HASH: u64 = const_fnv1a_hash(env!("CARGO_PKG_VERSION").as_bytes());
22
23/// Combined stdlib content hash. Changes to any stdlib file (prelude.d.lis
24/// or any typedefs/*.d.lis) will change this hash, invalidating all user module caches.
25pub const STDLIB_HASH: u64 = stdlib::STDLIB_CONTENT_HASH;
26
27/// Prelude-only content hash (prelude.d.lis).
28pub const PRELUDE_HASH: u64 = stdlib::PRELUDE_CONTENT_HASH;
29
30/// Go stdlib-only content hash (typedefs/*.d.lis).
31pub const GO_STDLIB_HASH: u64 = stdlib::GO_STD_CONTENT_HASH;
32
33const FNV_OFFSET: u64 = 0xcbf29ce484222325;
34const FNV_PRIME: u64 = 0x100000001b3;
35
36/// Compile-time FNV-1a hash function for creating version hashes.
37const fn const_fnv1a_hash(bytes: &[u8]) -> u64 {
38    let mut hash = FNV_OFFSET;
39    let mut i = 0;
40    while i < bytes.len() {
41        hash ^= bytes[i] as u64;
42        hash = hash.wrapping_mul(FNV_PRIME);
43        i += 1;
44    }
45    hash
46}
47
48/// FNV-1a hasher implementing `std::hash::Hasher`.
49/// Unlike `DefaultHasher`, this produces deterministic hashes across Rust versions.
50struct FnvHasher(u64);
51
52impl FnvHasher {
53    fn new() -> Self {
54        Self(FNV_OFFSET)
55    }
56}
57
58impl Hasher for FnvHasher {
59    fn write(&mut self, bytes: &[u8]) {
60        for &byte in bytes {
61            self.0 ^= byte as u64;
62            self.0 = self.0.wrapping_mul(FNV_PRIME);
63        }
64    }
65
66    fn finish(&self) -> u64 {
67        self.0
68    }
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ModuleInterface {
73    pub version: u32,
74
75    pub compiler_version: u64,
76
77    pub stdlib_hash: u64,
78
79    /// This module's content hash: hash(source_hash + dependency module_hashes)
80    /// Used by downstream modules to detect transitive changes
81    pub module_hash: u64,
82
83    pub source_hash: u64,
84
85    /// Module hash of each direct dependency.
86    pub dependency_hashes: HashMap<String, u64>,
87
88    pub files: Vec<CachedFile>,
89
90    pub definitions: HashMap<String, CachedDefinition>,
91
92    /// UFCS method pairs for this module, computed during registration.
93    pub ufcs_methods: Vec<(String, String)>,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct CachedFile {
98    pub name: String,
99    pub source: String,
100}
101
102#[derive(Debug)]
103pub struct CompiledModule {
104    pub module_id: String,
105    pub source_hash: u64,
106    pub dep_hashes: HashMap<String, u64>,
107}
108
109pub fn hash_module_sources(files: &[File]) -> u64 {
110    let mut hasher = FnvHasher::new();
111
112    let mut sorted: Vec<_> = files.iter().collect();
113    sorted.sort_by_key(|f| &f.name);
114
115    for file in sorted {
116        file.name.hash(&mut hasher);
117        file.source.hash(&mut hasher);
118    }
119
120    hasher.finish()
121}
122
123/// Compute a module's hash from its source hash and dependency hashes.
124/// This ensures transitive invalidation: if C changes, B's module_hash changes
125/// (even though B's source didn't), which invalidates A's cache.
126pub fn compute_module_hash(source_hash: u64, dep_hashes: &HashMap<String, u64>) -> u64 {
127    let mut hasher = FnvHasher::new();
128    source_hash.hash(&mut hasher);
129
130    let mut deps: Vec<_> = dep_hashes.iter().collect();
131    deps.sort_by_key(|(k, _)| *k);
132    for (name, hash) in deps {
133        name.hash(&mut hasher);
134        hash.hash(&mut hasher);
135    }
136
137    hasher.finish()
138}
139
140pub fn get_dependency_module_hashes(
141    module_id: &str,
142    edges: &HashMap<String, HashSet<String>>,
143    module_hashes: &HashMap<String, u64>,
144) -> HashMap<String, u64> {
145    let Some(deps) = edges.get(module_id) else {
146        return HashMap::default();
147    };
148
149    deps.iter()
150        .map(|dep_id| {
151            let hash = if dep_id.starts_with("go:") || dep_id == "prelude" {
152                STDLIB_HASH
153            } else {
154                *module_hashes.get(dep_id).unwrap_or(&0)
155            };
156            (dep_id.clone(), hash)
157        })
158        .collect()
159}
160
161pub fn is_cache_valid(
162    cache: &ModuleInterface,
163    current_source_hash: u64,
164    current_dep_hashes: &HashMap<String, u64>,
165) -> bool {
166    cache.version == CACHE_FORMAT_VERSION
167        && cache.compiler_version == COMPILER_VERSION_HASH
168        && cache.stdlib_hash == STDLIB_HASH
169        && cache.source_hash == current_source_hash
170        && cache.dependency_hashes == *current_dep_hashes
171}
172
173pub fn cache_path(project_root: &Path, module_id: &str) -> PathBuf {
174    project_root
175        .join("target")
176        .join("cache")
177        .join(format!("{}.cache", module_id.replace('/', "_")))
178}
179
180pub fn try_load_cache(
181    module_id: &str,
182    expected_source_hash: u64,
183    expected_dep_hashes: &HashMap<String, u64>,
184    project_root: &Path,
185    check_go_files: bool,
186) -> Option<ModuleInterface> {
187    let path = cache_path(project_root, module_id);
188    let bytes = fs::read(&path).ok()?;
189    let interface: ModuleInterface = bincode::deserialize(&bytes).ok()?;
190
191    if !is_cache_valid(&interface, expected_source_hash, expected_dep_hashes) {
192        let _ = fs::remove_file(&path);
193        return None;
194    }
195
196    if check_go_files && !all_go_outputs_exist(module_id, &interface.files, project_root) {
197        let _ = fs::remove_file(&path);
198        return None;
199    }
200
201    Some(interface)
202}
203
204fn all_go_outputs_exist(module_id: &str, cached_files: &[CachedFile], project_root: &Path) -> bool {
205    let target_dir = if module_id == ENTRY_MODULE_ID {
206        project_root.join("target")
207    } else {
208        project_root.join("target").join(module_id)
209    };
210
211    for cached_file in cached_files {
212        if cached_file.name.ends_with(".lis") && !cached_file.name.ends_with(".d.lis") {
213            let go_name = cached_file.name.replace(".lis", ".go");
214            if !target_dir.join(&go_name).exists() {
215                return false;
216            }
217        }
218    }
219
220    true
221}
222
223pub fn save_module_cache(
224    compiled: &CompiledModule,
225    store: &Store,
226    project_root: &Path,
227    ufcs_methods: &HashSet<(String, String)>,
228) -> io::Result<()> {
229    let module_hash = compute_module_hash(compiled.source_hash, &compiled.dep_hashes);
230
231    let Some(module) = store.get_module(&compiled.module_id) else {
232        return Err(io::Error::other("module not found in store"));
233    };
234
235    let mut all_files: Vec<_> = module
236        .files
237        .values()
238        .chain(module.typedefs.values())
239        .collect();
240    all_files.sort_by_key(|f| &f.name);
241
242    let file_id_to_index: HashMap<u32, u32> = all_files
243        .iter()
244        .enumerate()
245        .map(|(idx, f)| (f.id, idx as u32))
246        .collect();
247
248    let interface = ModuleInterface {
249        version: CACHE_FORMAT_VERSION,
250        compiler_version: COMPILER_VERSION_HASH,
251        stdlib_hash: STDLIB_HASH,
252        module_hash,
253        source_hash: compiled.source_hash,
254        dependency_hashes: compiled.dep_hashes.clone(),
255        files: all_files
256            .iter()
257            .map(|f| CachedFile {
258                name: f.name.clone(),
259                source: f.source.clone(),
260            })
261            .collect(),
262        definitions: extract_public_definitions(store, &compiled.module_id, &file_id_to_index),
263        ufcs_methods: {
264            let prefix = format!("{}.", compiled.module_id);
265            ufcs_methods
266                .iter()
267                .filter(|(type_id, _)| type_id.starts_with(&prefix))
268                .cloned()
269                .collect()
270        },
271    };
272
273    let path = cache_path(project_root, &compiled.module_id);
274    if let Some(parent) = path.parent() {
275        fs::create_dir_all(parent)?;
276    }
277
278    // Write to temp file, then rename (atomic)
279    let temp_path = path.with_extension("cache.tmp");
280    let bytes = bincode::serialize(&interface).map_err(io::Error::other)?;
281    fs::write(&temp_path, bytes)?;
282    fs::rename(&temp_path, &path)?;
283
284    Ok(())
285}
286
287fn extract_public_definitions(
288    store: &Store,
289    module_id: &str,
290    file_id_to_index: &HashMap<u32, u32>,
291) -> HashMap<String, CachedDefinition> {
292    let Some(module) = store.get_module(module_id) else {
293        return HashMap::default();
294    };
295
296    module
297        .definitions
298        .iter()
299        .filter(|(_, definition)| definition.visibility().is_public())
300        .map(|(name, definition)| {
301            (
302                name.to_string(),
303                CachedDefinition::from_definition(definition, file_id_to_index),
304            )
305        })
306        .collect()
307}
308
309/// Register a cached module in the store.
310/// This loads the cached definitions and source files without running inference.
311pub fn register_cached_module(store: &mut Store, module_id: &str, cached: ModuleInterface) {
312    store.add_module(module_id);
313
314    // Clear files stored during module graph construction (parse_module_files stores files
315    // eagerly for diagnostic rendering). These have full ASTs but un-inferred typed_patterns,
316    // which would cause pattern analysis to panic.
317    if let Some(module) = store.get_module_mut(module_id) {
318        module.files.clear();
319    }
320
321    let mut file_ids: Vec<u32> = vec![];
322    for cached_file in &cached.files {
323        let file_id = store.new_file_id();
324        file_ids.push(file_id);
325
326        let file = File::new_cached(module_id, &cached_file.name, &cached_file.source, file_id);
327
328        store.store_file(module_id, file);
329    }
330
331    let module = store.get_module_mut(module_id).unwrap();
332    for (qualified_name, cached_definition) in cached.definitions {
333        let definition = cached_definition.to_definition(&file_ids);
334        module.definitions.insert(qualified_name.into(), definition);
335    }
336
337    store.mark_visited(module_id);
338}
339
340pub fn is_cache_disabled() -> bool {
341    std::env::var("LISETTE_NO_CACHE")
342        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
343        .unwrap_or(false)
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use crate::cache::types::CachedType;
350    use syntax::types::Type;
351
352    #[test]
353    fn test_hash_module_sources_deterministic() {
354        let file1 = File::new_cached("mod", "a.lis", "fn foo() {}", 1);
355        let file2 = File::new_cached("mod", "b.lis", "fn bar() {}", 2);
356
357        let hash1 = hash_module_sources(&[file1.clone(), file2.clone()]);
358        let hash2 = hash_module_sources(&[file2.clone(), file1.clone()]);
359
360        assert_eq!(hash1, hash2);
361    }
362
363    #[test]
364    fn test_hash_module_sources_content_sensitive() {
365        let file1 = File::new_cached("mod", "a.lis", "fn foo() {}", 1);
366        let file2 = File::new_cached("mod", "a.lis", "fn bar() {}", 1);
367
368        let hash1 = hash_module_sources(&[file1]);
369        let hash2 = hash_module_sources(&[file2]);
370
371        assert_ne!(hash1, hash2);
372    }
373
374    #[test]
375    fn test_compute_module_hash_includes_deps() {
376        let source_hash = 12345u64;
377        let mut deps1 = HashMap::default();
378        deps1.insert("dep_a".to_string(), 111u64);
379
380        let mut deps2 = HashMap::default();
381        deps2.insert("dep_a".to_string(), 222u64);
382
383        let hash1 = compute_module_hash(source_hash, &deps1);
384        let hash2 = compute_module_hash(source_hash, &deps2);
385
386        assert_ne!(hash1, hash2);
387    }
388
389    #[test]
390    fn test_compute_module_hash_deterministic() {
391        let source_hash = 12345u64;
392        let mut deps = HashMap::default();
393        deps.insert("dep_b".to_string(), 222u64);
394        deps.insert("dep_a".to_string(), 111u64);
395
396        let hash1 = compute_module_hash(source_hash, &deps);
397        let hash2 = compute_module_hash(source_hash, &deps);
398
399        assert_eq!(hash1, hash2);
400    }
401
402    #[test]
403    fn test_cache_validity_checks_version() {
404        let cache = ModuleInterface {
405            version: CACHE_FORMAT_VERSION + 1, // Wrong version
406            compiler_version: COMPILER_VERSION_HASH,
407            stdlib_hash: STDLIB_HASH,
408            module_hash: 0,
409            source_hash: 100,
410            dependency_hashes: HashMap::default(),
411            files: vec![],
412            definitions: HashMap::default(),
413            ufcs_methods: vec![],
414        };
415
416        assert!(!is_cache_valid(&cache, 100, &HashMap::default()));
417    }
418
419    #[test]
420    fn test_cache_validity_checks_compiler_version() {
421        let cache = ModuleInterface {
422            version: CACHE_FORMAT_VERSION,
423            compiler_version: COMPILER_VERSION_HASH + 1, // Wrong compiler
424            stdlib_hash: STDLIB_HASH,
425            module_hash: 0,
426            source_hash: 100,
427            dependency_hashes: HashMap::default(),
428            files: vec![],
429            definitions: HashMap::default(),
430            ufcs_methods: vec![],
431        };
432
433        assert!(!is_cache_valid(&cache, 100, &HashMap::default()));
434    }
435
436    #[test]
437    fn test_cache_validity_checks_source_hash() {
438        let cache = ModuleInterface {
439            version: CACHE_FORMAT_VERSION,
440            compiler_version: COMPILER_VERSION_HASH,
441            stdlib_hash: STDLIB_HASH,
442            module_hash: 0,
443            source_hash: 100,
444            dependency_hashes: HashMap::default(),
445            files: vec![],
446            definitions: HashMap::default(),
447            ufcs_methods: vec![],
448        };
449
450        assert!(!is_cache_valid(&cache, 200, &HashMap::default()));
451        assert!(is_cache_valid(&cache, 100, &HashMap::default()));
452    }
453
454    #[test]
455    fn test_cache_validity_checks_dep_hashes() {
456        let mut cached_deps = HashMap::default();
457        cached_deps.insert("dep".to_string(), 111u64);
458
459        let cache = ModuleInterface {
460            version: CACHE_FORMAT_VERSION,
461            compiler_version: COMPILER_VERSION_HASH,
462            stdlib_hash: STDLIB_HASH,
463            module_hash: 0,
464            source_hash: 100,
465            dependency_hashes: cached_deps.clone(),
466            files: vec![],
467            definitions: HashMap::default(),
468            ufcs_methods: vec![],
469        };
470
471        let mut different_deps = HashMap::default();
472        different_deps.insert("dep".to_string(), 222u64);
473
474        assert!(!is_cache_valid(&cache, 100, &different_deps));
475        assert!(is_cache_valid(&cache, 100, &cached_deps));
476    }
477
478    #[test]
479    fn test_cached_type_roundtrip_constructor() {
480        let ty = Type::Constructor {
481            id: "MyType".into(),
482            params: vec![Type::Constructor {
483                id: "int".into(),
484                params: vec![],
485                underlying_ty: None,
486            }],
487            underlying_ty: None,
488        };
489
490        let cached = CachedType::from_type(&ty);
491        let restored = cached.to_type();
492
493        match (&ty, &restored) {
494            (
495                Type::Constructor {
496                    id: id1,
497                    params: p1,
498                    ..
499                },
500                Type::Constructor {
501                    id: id2,
502                    params: p2,
503                    ..
504                },
505            ) => {
506                assert_eq!(id1, id2);
507                assert_eq!(p1.len(), p2.len());
508            }
509            _ => panic!("Type mismatch"),
510        }
511    }
512
513    #[test]
514    fn test_cached_type_roundtrip_function() {
515        let ty = Type::Function {
516            params: vec![Type::Constructor {
517                id: "int".into(),
518                params: vec![],
519                underlying_ty: None,
520            }],
521            param_mutability: vec![false],
522            bounds: vec![],
523            return_type: Box::new(Type::Constructor {
524                id: "string".into(),
525                params: vec![],
526                underlying_ty: None,
527            }),
528        };
529
530        let cached = CachedType::from_type(&ty);
531        let restored = cached.to_type();
532
533        match (&ty, &restored) {
534            (
535                Type::Function {
536                    params: p1,
537                    return_type: r1,
538                    ..
539                },
540                Type::Function {
541                    params: p2,
542                    return_type: r2,
543                    ..
544                },
545            ) => {
546                assert_eq!(p1.len(), p2.len());
547                match (r1.as_ref(), r2.as_ref()) {
548                    (Type::Constructor { id: id1, .. }, Type::Constructor { id: id2, .. }) => {
549                        assert_eq!(id1, id2);
550                    }
551                    _ => panic!("Return type mismatch"),
552                }
553            }
554            _ => panic!("Type mismatch"),
555        }
556    }
557
558    #[test]
559    fn test_cached_type_roundtrip_tuple() {
560        let ty = Type::Tuple(vec![
561            Type::Constructor {
562                id: "int".into(),
563                params: vec![],
564                underlying_ty: None,
565            },
566            Type::Constructor {
567                id: "string".into(),
568                params: vec![],
569                underlying_ty: None,
570            },
571        ]);
572
573        let cached = CachedType::from_type(&ty);
574        let restored = cached.to_type();
575
576        match (&ty, &restored) {
577            (Type::Tuple(t1), Type::Tuple(t2)) => {
578                assert_eq!(t1.len(), t2.len());
579            }
580            _ => panic!("Type mismatch"),
581        }
582    }
583
584    #[test]
585    fn test_cached_type_roundtrip_never() {
586        let ty = Type::Never;
587        let cached = CachedType::from_type(&ty);
588        let restored = cached.to_type();
589
590        assert!(matches!(restored, Type::Never));
591    }
592
593    #[test]
594    fn test_cache_path_format() {
595        let path = cache_path(Path::new("/project"), "utils");
596        assert_eq!(path, PathBuf::from("/project/target/cache/utils.cache"));
597
598        let path = cache_path(Path::new("/project"), "deep/nested/mod");
599        assert_eq!(
600            path,
601            PathBuf::from("/project/target/cache/deep_nested_mod.cache")
602        );
603    }
604
605    #[test]
606    fn test_get_dependency_module_hashes_uses_stdlib_hash() {
607        let mut edges = HashMap::default();
608        let mut deps = HashSet::default();
609        deps.insert("go:fmt".to_string());
610        deps.insert("prelude".to_string());
611        deps.insert("user_mod".to_string());
612        edges.insert("my_mod".to_string(), deps);
613
614        let mut module_hashes = HashMap::default();
615        module_hashes.insert("user_mod".to_string(), 12345u64);
616
617        let result = get_dependency_module_hashes("my_mod", &edges, &module_hashes);
618
619        assert_eq!(result.get("go:fmt"), Some(&STDLIB_HASH));
620        assert_eq!(result.get("prelude"), Some(&STDLIB_HASH));
621        assert_eq!(result.get("user_mod"), Some(&12345u64));
622    }
623}