Skip to main content

sqry_classpath/stub/
cache.rs

1//! Per-JAR stub cache keyed by SHA-256 hash of the JAR file.
2//!
3//! Cache location: `.sqry/classpath/jars/{hash}.stub`
4//! Format: postcard binary serialization of `Vec<ClassStub>`
5//!
6//! The cache is content-addressed: the cache key is derived from the full
7//! SHA-256 hash of the JAR file contents. When a JAR's contents change,
8//! its hash changes and the old cache entry becomes orphaned (eventually
9//! cleaned up by a cache sweep, or left harmlessly on disk).
10//!
11//! ## Atomic writes
12//!
13//! Cache writes use a temporary file with rename to prevent reads of
14//! partially-written files. This makes the cache safe for concurrent
15//! readers (though not for concurrent writers to the same key — which
16//! is fine because each JAR is processed once).
17
18use std::fs;
19use std::io::Read;
20use std::path::{Path, PathBuf};
21
22use log::warn;
23use sha2::{Digest, Sha256};
24
25use crate::stub::model::ClassStub;
26use crate::{ClasspathError, ClasspathResult};
27
28// ---------------------------------------------------------------------------
29// Constants
30// ---------------------------------------------------------------------------
31
32/// Number of bytes from the SHA-256 hash used as the cache key.
33/// 16 bytes = 32 hex chars, providing 128-bit collision resistance.
34const HASH_PREFIX_BYTES: usize = 16;
35
36/// Subdirectory under the project root for JAR stub cache files.
37const CACHE_SUBDIR: &str = ".sqry/classpath/jars";
38
39/// File extension for cached stub files.
40const CACHE_EXTENSION: &str = "stub";
41
42/// Temporary file suffix used during atomic writes.
43const TEMP_SUFFIX: &str = ".tmp";
44
45// ---------------------------------------------------------------------------
46// StubCache
47// ---------------------------------------------------------------------------
48
49/// Per-JAR stub cache keyed by SHA-256 hash of the JAR file.
50///
51/// Cache location: `.sqry/classpath/jars/{hash}.stub`
52/// Format: postcard binary serialization of `Vec<ClassStub>`
53///
54/// # Thread safety
55///
56/// `StubCache` is `Send + Sync` (all fields are owned, no interior mutability).
57/// Multiple threads may call [`get`](StubCache::get) concurrently. Concurrent
58/// [`put`](StubCache::put) calls for different JAR files are safe because they
59/// write to different files. Concurrent puts for the same JAR file are
60/// idempotent (last writer wins via atomic rename).
61#[derive(Debug, Clone)]
62pub struct StubCache {
63    /// Directory where cache files are stored.
64    cache_dir: PathBuf,
65}
66
67impl StubCache {
68    /// Create a new stub cache rooted at the given project directory.
69    ///
70    /// The cache directory (`.sqry/classpath/jars/`) is created lazily on
71    /// first write.
72    #[must_use]
73    pub fn new(project_root: &Path) -> Self {
74        Self {
75            cache_dir: project_root.join(CACHE_SUBDIR),
76        }
77    }
78
79    /// Try to load cached stubs for a JAR file.
80    ///
81    /// Returns `None` on cache miss, corrupt cache, hash computation failure,
82    /// or I/O error. Errors are logged as warnings but never propagated.
83    #[must_use]
84    pub fn get(&self, jar_path: &Path) -> Option<Vec<ClassStub>> {
85        let key = match Self::cache_key(jar_path) {
86            Ok(k) => k,
87            Err(e) => {
88                warn!(
89                    "stub cache: cannot compute key for {}: {e}",
90                    jar_path.display()
91                );
92                return None;
93            }
94        };
95
96        let cache_path = self.cache_file_path(&key);
97        let bytes = match fs::read(&cache_path) {
98            Ok(b) => b,
99            Err(_) => return None, // Cache miss — not worth logging.
100        };
101
102        match postcard::from_bytes::<Vec<ClassStub>>(&bytes) {
103            Ok(stubs) => Some(stubs),
104            Err(e) => {
105                warn!(
106                    "stub cache: corrupt cache file {}: {e}",
107                    cache_path.display()
108                );
109                // Attempt to remove the corrupt file.
110                let _ = fs::remove_file(&cache_path);
111                None
112            }
113        }
114    }
115
116    /// Cache parsed stubs for a JAR file.
117    ///
118    /// Uses atomic write (temp file + rename) to prevent corrupt reads.
119    ///
120    /// # Errors
121    ///
122    /// Returns [`ClasspathError::CacheError`] if the cache key cannot be
123    /// computed or the cache directory cannot be created. Serialization
124    /// and I/O failures during write are returned as `CacheError`.
125    pub fn put(&self, jar_path: &Path, stubs: &[ClassStub]) -> ClasspathResult<()> {
126        let key = Self::cache_key(jar_path)?;
127        let cache_path = self.cache_file_path(&key);
128
129        // Ensure cache directory exists.
130        fs::create_dir_all(&self.cache_dir).map_err(|e| {
131            ClasspathError::CacheError(format!(
132                "cannot create cache directory {}: {e}",
133                self.cache_dir.display()
134            ))
135        })?;
136
137        // Serialize.
138        let bytes = postcard::to_allocvec(stubs).map_err(|e| {
139            ClasspathError::CacheError(format!(
140                "cannot serialize stubs for {}: {e}",
141                jar_path.display()
142            ))
143        })?;
144
145        // Atomic write: write to temp file, then rename.
146        let temp_path = cache_path.with_extension(format!("{CACHE_EXTENSION}{TEMP_SUFFIX}"));
147
148        if let Err(e) = fs::write(&temp_path, &bytes) {
149            warn!(
150                "stub cache: cannot write temp file {}: {e}",
151                temp_path.display()
152            );
153            // Non-fatal: we can continue without caching.
154            return Err(ClasspathError::CacheError(format!(
155                "cannot write temp cache file: {e}"
156            )));
157        }
158
159        if let Err(e) = fs::rename(&temp_path, &cache_path) {
160            // Clean up the temp file if rename fails.
161            let _ = fs::remove_file(&temp_path);
162            warn!(
163                "stub cache: cannot rename temp file to {}: {e}",
164                cache_path.display()
165            );
166            return Err(ClasspathError::CacheError(format!(
167                "cannot rename cache file: {e}"
168            )));
169        }
170
171        Ok(())
172    }
173
174    /// Compute cache key: first 16 bytes of SHA-256 of the JAR file, hex-encoded.
175    ///
176    /// # Errors
177    ///
178    /// Returns [`ClasspathError::CacheError`] if the JAR file cannot be read.
179    fn cache_key(jar_path: &Path) -> ClasspathResult<String> {
180        let mut file = fs::File::open(jar_path).map_err(|e| {
181            ClasspathError::CacheError(format!(
182                "cannot open JAR for hashing {}: {e}",
183                jar_path.display()
184            ))
185        })?;
186
187        let mut hasher = Sha256::new();
188        let mut buffer = [0u8; 8192];
189        loop {
190            let n = file.read(&mut buffer).map_err(|e| {
191                ClasspathError::CacheError(format!(
192                    "cannot read JAR for hashing {}: {e}",
193                    jar_path.display()
194                ))
195            })?;
196            if n == 0 {
197                break;
198            }
199            hasher.update(&buffer[..n]);
200        }
201
202        let hash = hasher.finalize();
203        let key = hex::encode(&hash[..HASH_PREFIX_BYTES]);
204        Ok(key)
205    }
206
207    /// Compute the filesystem path for a given cache key.
208    fn cache_file_path(&self, key: &str) -> PathBuf {
209        self.cache_dir.join(format!("{key}.{CACHE_EXTENSION}"))
210    }
211}
212
213// ---------------------------------------------------------------------------
214// Tests
215// ---------------------------------------------------------------------------
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use crate::stub::model::{AccessFlags, ClassKind};
221    use tempfile::TempDir;
222
223    /// Create a minimal ClassStub for testing.
224    fn make_stub(fqn: &str) -> ClassStub {
225        ClassStub {
226            fqn: fqn.to_owned(),
227            name: fqn.rsplit('.').next().unwrap_or(fqn).to_owned(),
228            kind: ClassKind::Class,
229            access: AccessFlags::new(0x0021),
230            superclass: Some("java.lang.Object".to_owned()),
231            interfaces: vec![],
232            methods: vec![],
233            fields: vec![],
234            annotations: vec![],
235            generic_signature: None,
236            inner_classes: vec![],
237            lambda_targets: vec![],
238            module: None,
239            record_components: vec![],
240            enum_constants: vec![],
241            source_file: None,
242            source_jar: None,
243            kotlin_metadata: None,
244            scala_signature: None,
245        }
246    }
247
248    /// Create a dummy JAR file for hashing.
249    fn create_dummy_jar(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
250        let path = dir.join(name);
251        fs::write(&path, content).unwrap();
252        path
253    }
254
255    #[test]
256    fn test_cache_miss_returns_none() {
257        let tmp = TempDir::new().unwrap();
258        let cache = StubCache::new(tmp.path());
259
260        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
261        assert!(cache.get(&jar_path).is_none());
262    }
263
264    #[test]
265    fn test_cache_hit_returns_stubs() {
266        let tmp = TempDir::new().unwrap();
267        let cache = StubCache::new(tmp.path());
268
269        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
270        let stubs = vec![make_stub("com.example.Foo"), make_stub("com.example.Bar")];
271
272        cache.put(&jar_path, &stubs).unwrap();
273        let cached = cache.get(&jar_path).unwrap();
274
275        assert_eq!(cached.len(), 2);
276        assert_eq!(cached[0].fqn, "com.example.Foo");
277        assert_eq!(cached[1].fqn, "com.example.Bar");
278    }
279
280    #[test]
281    fn test_hash_change_triggers_miss() {
282        let tmp = TempDir::new().unwrap();
283        let cache = StubCache::new(tmp.path());
284
285        let jar_path = tmp.path().join("test.jar");
286        fs::write(&jar_path, b"version 1").unwrap();
287
288        let stubs = vec![make_stub("com.example.Foo")];
289        cache.put(&jar_path, &stubs).unwrap();
290
291        // Overwrite the JAR with different content — hash changes.
292        fs::write(&jar_path, b"version 2").unwrap();
293
294        // Old cache entry is keyed to old hash, so this is a miss.
295        assert!(cache.get(&jar_path).is_none());
296    }
297
298    #[test]
299    fn test_corrupt_cache_returns_none() {
300        let tmp = TempDir::new().unwrap();
301        let cache = StubCache::new(tmp.path());
302
303        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
304        let key = StubCache::cache_key(&jar_path).unwrap();
305
306        // Write garbage to the cache file.
307        let cache_dir = tmp.path().join(CACHE_SUBDIR);
308        fs::create_dir_all(&cache_dir).unwrap();
309        let cache_file = cache_dir.join(format!("{key}.{CACHE_EXTENSION}"));
310        fs::write(&cache_file, b"corrupt data").unwrap();
311
312        // get should return None and remove the corrupt file.
313        assert!(cache.get(&jar_path).is_none());
314        assert!(!cache_file.exists(), "corrupt cache file should be removed");
315    }
316
317    #[test]
318    fn test_postcard_roundtrip() {
319        let stubs = vec![
320            make_stub("com.example.Foo"),
321            make_stub("com.example.Bar"),
322            make_stub("com.example.Baz"),
323        ];
324
325        let bytes = postcard::to_allocvec(&stubs).unwrap();
326        let deserialized: Vec<ClassStub> = postcard::from_bytes(&bytes).unwrap();
327
328        assert_eq!(deserialized.len(), 3);
329        assert_eq!(deserialized[0].fqn, "com.example.Foo");
330        assert_eq!(deserialized[1].fqn, "com.example.Bar");
331        assert_eq!(deserialized[2].fqn, "com.example.Baz");
332    }
333
334    #[test]
335    fn test_cache_empty_stubs() {
336        let tmp = TempDir::new().unwrap();
337        let cache = StubCache::new(tmp.path());
338
339        let jar_path = create_dummy_jar(tmp.path(), "empty.jar", b"empty jar content");
340        let stubs: Vec<ClassStub> = vec![];
341
342        cache.put(&jar_path, &stubs).unwrap();
343        let cached = cache.get(&jar_path).unwrap();
344        assert!(cached.is_empty());
345    }
346
347    #[test]
348    fn test_cache_key_is_deterministic() {
349        let tmp = TempDir::new().unwrap();
350        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"deterministic content");
351
352        let key1 = StubCache::cache_key(&jar_path).unwrap();
353        let key2 = StubCache::cache_key(&jar_path).unwrap();
354
355        assert_eq!(key1, key2);
356        // 16 bytes hex-encoded = 32 hex chars.
357        assert_eq!(key1.len(), 32);
358    }
359
360    #[test]
361    fn test_cache_nonexistent_jar() {
362        let tmp = TempDir::new().unwrap();
363        let cache = StubCache::new(tmp.path());
364
365        let result = cache.put(Path::new("/nonexistent/foo.jar"), &[]);
366        assert!(result.is_err());
367    }
368}