Skip to main content

sqry_classpath/stub/
cache.rs

1//! Per-JAR stub cache keyed by SHA-256 hash of the JAR file.
2//!
3//! Cache location: `.sqry/classpath/jars/{hash}.stub`
4//! Format: postcard binary serialization of `Vec<ClassStub>`
5//!
6//! The cache is content-addressed: the cache key is derived from the full
7//! SHA-256 hash of the JAR file contents. When a JAR's contents change,
8//! its hash changes and the old cache entry becomes orphaned (eventually
9//! cleaned up by a cache sweep, or left harmlessly on disk).
10//!
11//! ## Atomic writes
12//!
13//! Cache writes use a temporary file with rename to prevent reads of
14//! partially-written files. This makes the cache safe for concurrent
15//! readers (though not for concurrent writers to the same key — which
16//! is fine because each JAR is processed once).
17
18use std::fs;
19use std::io::Read;
20use std::path::{Path, PathBuf};
21
22use log::warn;
23use sha2::{Digest, Sha256};
24
25use crate::stub::model::ClassStub;
26use crate::{ClasspathError, ClasspathResult};
27
28// ---------------------------------------------------------------------------
29// Constants
30// ---------------------------------------------------------------------------
31
32/// Number of bytes from the SHA-256 hash used as the cache key.
33/// 16 bytes = 32 hex chars, providing 128-bit collision resistance.
34const HASH_PREFIX_BYTES: usize = 16;
35
36/// Subdirectory under the project root for JAR stub cache files.
37const CACHE_SUBDIR: &str = ".sqry/classpath/jars";
38
39/// File extension for cached stub files.
40const CACHE_EXTENSION: &str = "stub";
41
42/// Temporary file suffix used during atomic writes.
43const TEMP_SUFFIX: &str = ".tmp";
44
45// ---------------------------------------------------------------------------
46// StubCache
47// ---------------------------------------------------------------------------
48
49/// Per-JAR stub cache keyed by SHA-256 hash of the JAR file.
50///
51/// Cache location: `.sqry/classpath/jars/{hash}.stub`
52/// Format: postcard binary serialization of `Vec<ClassStub>`
53///
54/// # Thread safety
55///
56/// `StubCache` is `Send + Sync` (all fields are owned, no interior mutability).
57/// Multiple threads may call [`get`](StubCache::get) concurrently. Concurrent
58/// [`put`](StubCache::put) calls for different JAR files are safe because they
59/// write to different files. Concurrent puts for the same JAR file are
60/// idempotent (last writer wins via atomic rename).
61#[derive(Debug, Clone)]
62pub struct StubCache {
63    /// Directory where cache files are stored.
64    cache_dir: PathBuf,
65}
66
67impl StubCache {
68    /// Create a new stub cache rooted at the given project directory.
69    ///
70    /// The cache directory (`.sqry/classpath/jars/`) is created lazily on
71    /// first write.
72    #[must_use]
73    pub fn new(project_root: &Path) -> Self {
74        Self {
75            cache_dir: project_root.join(CACHE_SUBDIR),
76        }
77    }
78
79    /// Try to load cached stubs for a JAR file.
80    ///
81    /// Returns `None` on cache miss, corrupt cache, hash computation failure,
82    /// or I/O error. Errors are logged as warnings but never propagated.
83    #[must_use]
84    #[allow(clippy::manual_let_else)] // Match for error handling clarity
85    pub fn get(&self, jar_path: &Path) -> Option<Vec<ClassStub>> {
86        let key = match Self::cache_key(jar_path) {
87            Ok(k) => k,
88            Err(e) => {
89                warn!(
90                    "stub cache: cannot compute key for {}: {e}",
91                    jar_path.display()
92                );
93                return None;
94            }
95        };
96
97        let cache_path = self.cache_file_path(&key);
98        let bytes = match fs::read(&cache_path) {
99            Ok(b) => b,
100            Err(_) => return None, // Cache miss — not worth logging.
101        };
102
103        match postcard::from_bytes::<Vec<ClassStub>>(&bytes) {
104            Ok(stubs) => Some(stubs),
105            Err(e) => {
106                warn!(
107                    "stub cache: corrupt cache file {}: {e}",
108                    cache_path.display()
109                );
110                // Attempt to remove the corrupt file.
111                let _ = fs::remove_file(&cache_path);
112                None
113            }
114        }
115    }
116
117    /// Cache parsed stubs for a JAR file.
118    ///
119    /// Uses atomic write (temp file + rename) to prevent corrupt reads.
120    ///
121    /// # Errors
122    ///
123    /// Returns [`ClasspathError::CacheError`] if the cache key cannot be
124    /// computed or the cache directory cannot be created. Serialization
125    /// and I/O failures during write are returned as `CacheError`.
126    pub fn put(&self, jar_path: &Path, stubs: &[ClassStub]) -> ClasspathResult<()> {
127        let key = Self::cache_key(jar_path)?;
128        let cache_path = self.cache_file_path(&key);
129
130        // Ensure cache directory exists.
131        fs::create_dir_all(&self.cache_dir).map_err(|e| {
132            ClasspathError::CacheError(format!(
133                "cannot create cache directory {}: {e}",
134                self.cache_dir.display()
135            ))
136        })?;
137
138        // Serialize.
139        let bytes = postcard::to_allocvec(stubs).map_err(|e| {
140            ClasspathError::CacheError(format!(
141                "cannot serialize stubs for {}: {e}",
142                jar_path.display()
143            ))
144        })?;
145
146        // Atomic write: write to temp file, then rename.
147        let temp_path = cache_path.with_extension(format!("{CACHE_EXTENSION}{TEMP_SUFFIX}"));
148
149        if let Err(e) = fs::write(&temp_path, &bytes) {
150            warn!(
151                "stub cache: cannot write temp file {}: {e}",
152                temp_path.display()
153            );
154            // Non-fatal: we can continue without caching.
155            return Err(ClasspathError::CacheError(format!(
156                "cannot write temp cache file: {e}"
157            )));
158        }
159
160        if let Err(e) = fs::rename(&temp_path, &cache_path) {
161            // Clean up the temp file if rename fails.
162            let _ = fs::remove_file(&temp_path);
163            warn!(
164                "stub cache: cannot rename temp file to {}: {e}",
165                cache_path.display()
166            );
167            return Err(ClasspathError::CacheError(format!(
168                "cannot rename cache file: {e}"
169            )));
170        }
171
172        Ok(())
173    }
174
175    /// Compute cache key: first 16 bytes of SHA-256 of the JAR file, hex-encoded.
176    ///
177    /// # Errors
178    ///
179    /// Returns [`ClasspathError::CacheError`] if the JAR file cannot be read.
180    fn cache_key(jar_path: &Path) -> ClasspathResult<String> {
181        let mut file = fs::File::open(jar_path).map_err(|e| {
182            ClasspathError::CacheError(format!(
183                "cannot open JAR for hashing {}: {e}",
184                jar_path.display()
185            ))
186        })?;
187
188        let mut hasher = Sha256::new();
189        let mut buffer = [0u8; 8192];
190        loop {
191            let n = file.read(&mut buffer).map_err(|e| {
192                ClasspathError::CacheError(format!(
193                    "cannot read JAR for hashing {}: {e}",
194                    jar_path.display()
195                ))
196            })?;
197            if n == 0 {
198                break;
199            }
200            hasher.update(&buffer[..n]);
201        }
202
203        let hash = hasher.finalize();
204        let key = hex::encode(&hash[..HASH_PREFIX_BYTES]);
205        Ok(key)
206    }
207
208    /// Compute the filesystem path for a given cache key.
209    fn cache_file_path(&self, key: &str) -> PathBuf {
210        self.cache_dir.join(format!("{key}.{CACHE_EXTENSION}"))
211    }
212}
213
214// ---------------------------------------------------------------------------
215// Tests
216// ---------------------------------------------------------------------------
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use crate::stub::model::{AccessFlags, ClassKind};
222    use tempfile::TempDir;
223
224    /// Create a minimal `ClassStub` for testing.
225    fn make_stub(fqn: &str) -> ClassStub {
226        ClassStub {
227            fqn: fqn.to_owned(),
228            name: fqn.rsplit('.').next().unwrap_or(fqn).to_owned(),
229            kind: ClassKind::Class,
230            access: AccessFlags::new(0x0021),
231            superclass: Some("java.lang.Object".to_owned()),
232            interfaces: vec![],
233            methods: vec![],
234            fields: vec![],
235            annotations: vec![],
236            generic_signature: None,
237            inner_classes: vec![],
238            lambda_targets: vec![],
239            module: None,
240            record_components: vec![],
241            enum_constants: vec![],
242            source_file: None,
243            source_jar: None,
244            kotlin_metadata: None,
245            scala_signature: None,
246        }
247    }
248
249    /// Create a dummy JAR file for hashing.
250    fn create_dummy_jar(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
251        let path = dir.join(name);
252        fs::write(&path, content).unwrap();
253        path
254    }
255
256    #[test]
257    fn test_cache_miss_returns_none() {
258        let tmp = TempDir::new().unwrap();
259        let cache = StubCache::new(tmp.path());
260
261        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
262        assert!(cache.get(&jar_path).is_none());
263    }
264
265    #[test]
266    fn test_cache_hit_returns_stubs() {
267        let tmp = TempDir::new().unwrap();
268        let cache = StubCache::new(tmp.path());
269
270        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
271        let stubs = vec![make_stub("com.example.Foo"), make_stub("com.example.Bar")];
272
273        cache.put(&jar_path, &stubs).unwrap();
274        let cached = cache.get(&jar_path).unwrap();
275
276        assert_eq!(cached.len(), 2);
277        assert_eq!(cached[0].fqn, "com.example.Foo");
278        assert_eq!(cached[1].fqn, "com.example.Bar");
279    }
280
281    #[test]
282    fn test_hash_change_triggers_miss() {
283        let tmp = TempDir::new().unwrap();
284        let cache = StubCache::new(tmp.path());
285
286        let jar_path = tmp.path().join("test.jar");
287        fs::write(&jar_path, b"version 1").unwrap();
288
289        let stubs = vec![make_stub("com.example.Foo")];
290        cache.put(&jar_path, &stubs).unwrap();
291
292        // Overwrite the JAR with different content — hash changes.
293        fs::write(&jar_path, b"version 2").unwrap();
294
295        // Old cache entry is keyed to old hash, so this is a miss.
296        assert!(cache.get(&jar_path).is_none());
297    }
298
299    #[test]
300    fn test_corrupt_cache_returns_none() {
301        let tmp = TempDir::new().unwrap();
302        let cache = StubCache::new(tmp.path());
303
304        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"some jar content");
305        let key = StubCache::cache_key(&jar_path).unwrap();
306
307        // Write garbage to the cache file.
308        let cache_dir = tmp.path().join(CACHE_SUBDIR);
309        fs::create_dir_all(&cache_dir).unwrap();
310        let cache_file = cache_dir.join(format!("{key}.{CACHE_EXTENSION}"));
311        fs::write(&cache_file, b"corrupt data").unwrap();
312
313        // get should return None and remove the corrupt file.
314        assert!(cache.get(&jar_path).is_none());
315        assert!(!cache_file.exists(), "corrupt cache file should be removed");
316    }
317
318    #[test]
319    fn test_postcard_roundtrip() {
320        let stubs = vec![
321            make_stub("com.example.Foo"),
322            make_stub("com.example.Bar"),
323            make_stub("com.example.Baz"),
324        ];
325
326        let bytes = postcard::to_allocvec(&stubs).unwrap();
327        let deserialized: Vec<ClassStub> = postcard::from_bytes(&bytes).unwrap();
328
329        assert_eq!(deserialized.len(), 3);
330        assert_eq!(deserialized[0].fqn, "com.example.Foo");
331        assert_eq!(deserialized[1].fqn, "com.example.Bar");
332        assert_eq!(deserialized[2].fqn, "com.example.Baz");
333    }
334
335    #[test]
336    fn test_cache_empty_stubs() {
337        let tmp = TempDir::new().unwrap();
338        let cache = StubCache::new(tmp.path());
339
340        let jar_path = create_dummy_jar(tmp.path(), "empty.jar", b"empty jar content");
341        let stubs: Vec<ClassStub> = vec![];
342
343        cache.put(&jar_path, &stubs).unwrap();
344        let cached = cache.get(&jar_path).unwrap();
345        assert!(cached.is_empty());
346    }
347
348    #[test]
349    fn test_cache_key_is_deterministic() {
350        let tmp = TempDir::new().unwrap();
351        let jar_path = create_dummy_jar(tmp.path(), "test.jar", b"deterministic content");
352
353        let key1 = StubCache::cache_key(&jar_path).unwrap();
354        let key2 = StubCache::cache_key(&jar_path).unwrap();
355
356        assert_eq!(key1, key2);
357        // 16 bytes hex-encoded = 32 hex chars.
358        assert_eq!(key1.len(), 32);
359    }
360
361    #[test]
362    fn test_cache_nonexistent_jar() {
363        let tmp = TempDir::new().unwrap();
364        let cache = StubCache::new(tmp.path());
365
366        let result = cache.put(Path::new("/nonexistent/foo.jar"), &[]);
367        assert!(result.is_err());
368    }
369}