Skip to main content

php_lsp/
cache.rs

1//! Persistent on-disk cache for Phase K.
2//!
3//! The cache stores a serialized `FileIndex` per PHP file, keyed on
4//! `(uri, content)`.  On a warm start `scan_workspace` reads the cached index
5//! instead of parsing the file, shrinking cold-start I/O from O(parse) to
6//! O(read + bincode-decode) — roughly 10–50× faster per file.
7//!
8//! ## Layout
9//!
10//! ```text
11//! ~/.cache/php-lsp/<schema-version>/<workspace-hash>/<entry-hash>.bin
12//! ```
13//!
14//! - `<schema-version>` — `php-lsp` crate version; bumping it rotates the
15//!   entire cache so old entries are never decoded against a newer schema.
16//! - `<workspace-hash>` — blake3 of the canonicalized absolute path of the
17//!   first workspace root, truncated to 16 hex chars. Two separate projects
18//!   get isolated caches; two checkouts of the same project at the same
19//!   absolute path share one.
20//! - `<entry-hash>` — blake3 of the bytes `uri || 0x00 || content`, truncated
21//!   to 32 hex chars. Editing a file changes the content → new key → cache
22//!   miss; a different file at the same URI also gets a different key.
23//!
24//! ## Format
25//!
26//! `bincode` v2 (binary, fast, schema-stable via serde derives on
27//! `FileIndex` et al). Files are written atomically via a temp-file rename
28//! to avoid half-written entries on an interrupted shutdown.
29//!
30//! ## Invalidation
31//!
32//! Rotating the schema version invalidates everything; rotating the content
33//! invalidates one file. There's no LRU or cleanup yet — Step 2 will add a
34//! size cap + orphan sweep.
35
36use std::io::{self, Write};
37use std::path::{Path, PathBuf};
38
39use serde::{Serialize, de::DeserializeOwned};
40
41/// Identifies a single cache entry. Opaque — callers produce it via
42/// [`WorkspaceCache::key_for`] and pass it straight back to read/write.
43#[derive(Debug, Clone, PartialEq, Eq, Hash)]
44pub struct CacheKey(String);
45
46impl CacheKey {
47    fn as_filename(&self) -> &str {
48        &self.0
49    }
50}
51
52/// Handle to the cache directory for a single workspace. Construction is
53/// cheap (creates directories on demand); the same handle can be shared
54/// across threads via `Arc` — it holds no mutable state.
55#[derive(Debug, Clone)]
56pub struct WorkspaceCache {
57    dir: PathBuf,
58}
59
60/// Size cap (bytes) for a single workspace's cache directory. At
61/// startup, if the directory exceeds this, we reset it — simpler than
62/// LRU eviction and the rebuild cost is bounded (it's just the next
63/// workspace scan running as if cold). 512 MiB fits a mega-workspace
64/// (50 k files × ~10 KB average `FileIndex`) with headroom and is
65/// small enough that no reasonable disk will choke on it.
66pub const CACHE_SIZE_CAP: u64 = 512 * 1024 * 1024;
67
68impl WorkspaceCache {
69    /// Create (or re-open) the cache directory for a workspace rooted at
70    /// `root`. Returns `None` when the system has no usable home/cache
71    /// directory — callers should treat that as "cache disabled" and
72    /// proceed without persistence.
73    ///
74    /// If the existing cache directory exceeds [`CACHE_SIZE_CAP`], it is
75    /// cleared before the handle is returned. That's a coarse knob —
76    /// K3 could refine to LRU-by-mtime — but crossing 512 MiB at
77    /// startup indicates the workspace has churned through many
78    /// content hashes and the rebuild cost is bounded to one full
79    /// re-scan.
80    pub fn new(root: &Path) -> Option<Self> {
81        let base = cache_base_dir()?;
82        let schema = schema_version();
83        let workspace = workspace_hash(root);
84        let dir = base.join("php-lsp").join(schema).join(workspace);
85        std::fs::create_dir_all(&dir).ok()?;
86        let cache = Self { dir };
87        if cache.size_bytes().unwrap_or(0) > CACHE_SIZE_CAP {
88            let _ = cache.clear();
89        }
90        Some(cache)
91    }
92
93    /// Total bytes consumed by `.bin` entries in this workspace's cache
94    /// directory. Cheap (one `read_dir` pass, no recursion into
95    /// subdirectories because the layout is flat).
96    pub fn size_bytes(&self) -> io::Result<u64> {
97        let mut total = 0u64;
98        let entries = match std::fs::read_dir(&self.dir) {
99            Ok(e) => e,
100            Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(0),
101            Err(e) => return Err(e),
102        };
103        for entry in entries.flatten() {
104            let meta = match entry.metadata() {
105                Ok(m) => m,
106                Err(_) => continue,
107            };
108            if meta.is_file() {
109                total = total.saturating_add(meta.len());
110            }
111        }
112        Ok(total)
113    }
114
115    /// Override the root directory directly. Intended for tests; the
116    /// directory is used verbatim (no schema / workspace subdirectories
117    /// are appended).
118    #[cfg(test)]
119    pub fn with_dir(dir: PathBuf) -> Self {
120        Self { dir }
121    }
122
123    /// Build a cache key for a single file. Combines `uri` and `content`
124    /// so that two files with identical content but different URIs get
125    /// different keys (StubSlice bakes `file` into its payload).
126    pub fn key_for(uri: &str, content: &str) -> CacheKey {
127        let mut hasher = blake3::Hasher::new();
128        hasher.update(uri.as_bytes());
129        hasher.update(&[0u8]);
130        hasher.update(content.as_bytes());
131        let full = hasher.finalize().to_hex();
132        // 32 hex chars = 128 bits, ample collision resistance for
133        // workspaces with millions of files (birthday bound ≫ 10^18).
134        CacheKey(full.as_str()[..32].to_string())
135    }
136
137    /// Deserialize a previously-cached value. Returns `None` on any I/O
138    /// or decode failure — a corrupted entry should look identical to a
139    /// missing one so callers fall through to the recompute path.
140    pub fn read<T: DeserializeOwned>(&self, key: &CacheKey) -> Option<T> {
141        let path = self.path_for(key);
142        let bytes = std::fs::read(&path).ok()?;
143        let config = bincode::config::standard();
144        bincode::serde::decode_from_slice(&bytes, config)
145            .ok()
146            .map(|(v, _len)| v)
147    }
148
149    /// Atomically publish an entry to the cache. Writes to a sibling
150    /// temp file then renames, so readers never see a half-written
151    /// payload even if the process dies mid-write.
152    pub fn write<T: Serialize>(&self, key: &CacheKey, value: &T) -> io::Result<()> {
153        let path = self.path_for(key);
154        let tmp = path.with_extension("tmp");
155        let config = bincode::config::standard();
156        let bytes = bincode::serde::encode_to_vec(value, config)
157            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
158        {
159            let mut f = std::fs::File::create(&tmp)?;
160            f.write_all(&bytes)?;
161            f.sync_all()?;
162        }
163        std::fs::rename(&tmp, &path)?;
164        Ok(())
165    }
166
167    /// Drop every entry in this workspace's cache. Safe to call while
168    /// other threads are reading — individual `read` calls that race
169    /// with a `clear` will see `None` rather than garbage, and the next
170    /// `write` recreates the entry.
171    pub fn clear(&self) -> io::Result<()> {
172        if self.dir.exists() {
173            std::fs::remove_dir_all(&self.dir)?;
174            std::fs::create_dir_all(&self.dir)?;
175        }
176        Ok(())
177    }
178
179    fn path_for(&self, key: &CacheKey) -> PathBuf {
180        self.dir.join(format!("{}.bin", key.as_filename()))
181    }
182}
183
184/// Platform cache directory: `$XDG_CACHE_HOME` or `$HOME/.cache` on Unix,
185/// `%LOCALAPPDATA%` on Windows. Deliberately doesn't depend on the `dirs`
186/// crate — keeps the footprint small and the behaviour predictable.
187fn cache_base_dir() -> Option<PathBuf> {
188    if let Some(xdg) = std::env::var_os("XDG_CACHE_HOME")
189        && !xdg.is_empty()
190    {
191        return Some(PathBuf::from(xdg));
192    }
193    if cfg!(windows) {
194        if let Some(local) = std::env::var_os("LOCALAPPDATA")
195            && !local.is_empty()
196        {
197            return Some(PathBuf::from(local));
198        }
199    } else if let Some(home) = std::env::var_os("HOME")
200        && !home.is_empty()
201    {
202        return Some(PathBuf::from(home).join(".cache"));
203    }
204    None
205}
206
207/// Bump this constant (and the matching literal in [`schema_version`]) when
208/// `FileIndex` or any type it contains gains, loses, or renames a field.
209/// Rotating it causes every cached entry to be treated as a miss on the next
210/// cold start, regardless of whether the crate version changed.
211pub const FILE_INDEX_SCHEMA: &str = "fi-v1";
212
213/// Schema marker: bumping `php-lsp` crate version, `mir-codebase` version,
214/// or [`FILE_INDEX_SCHEMA`] invalidates every cached entry. The hardcoded mir
215/// version is a trade-off: keeping it in source means we don't depend on
216/// `build.rs` introspection, at the cost of needing to remember to update it
217/// alongside `Cargo.toml`.
218///
219/// **Important**: the `"fi-v1"` literal here must stay in sync with
220/// `FILE_INDEX_SCHEMA`. `concat!` requires a literal — bump both together.
221fn schema_version() -> &'static str {
222    concat!(env!("CARGO_PKG_VERSION"), "-mir-0.7-fi-v1")
223}
224
225fn workspace_hash(root: &Path) -> String {
226    let canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
227    let hex = blake3::hash(canonical.as_os_str().as_encoded_bytes()).to_hex();
228    hex.as_str()[..16].to_string()
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234    use tempfile::TempDir;
235
236    #[derive(Serialize, serde::Deserialize, PartialEq, Debug)]
237    struct SamplePayload {
238        name: String,
239        values: Vec<u32>,
240    }
241
242    #[test]
243    fn key_for_is_deterministic_per_uri_and_content() {
244        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
245        let k2 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
246        assert_eq!(k1, k2);
247    }
248
249    #[test]
250    fn key_for_differs_when_content_differs() {
251        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
252        let k2 = WorkspaceCache::key_for("file:///a.php", "<?php echo 2;");
253        assert_ne!(k1, k2);
254    }
255
256    #[test]
257    fn key_for_differs_when_uri_differs() {
258        // Same content, different URI — the separator byte prevents
259        // (uri_a || content_b) from colliding with (uri_a+b || content).
260        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php");
261        let k2 = WorkspaceCache::key_for("file:///b.php", "<?php");
262        assert_ne!(k1, k2);
263    }
264
265    #[test]
266    fn write_then_read_round_trips() {
267        let dir = TempDir::new().unwrap();
268        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
269        let key = WorkspaceCache::key_for("file:///x.php", "<?php");
270        let payload = SamplePayload {
271            name: "x".into(),
272            values: vec![1, 2, 3],
273        };
274        cache.write(&key, &payload).unwrap();
275        let decoded: SamplePayload = cache.read(&key).unwrap();
276        assert_eq!(decoded, payload);
277    }
278
279    #[test]
280    fn read_returns_none_for_missing_key() {
281        let dir = TempDir::new().unwrap();
282        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
283        let missing = WorkspaceCache::key_for("file:///nope.php", "");
284        let decoded: Option<SamplePayload> = cache.read(&missing);
285        assert!(decoded.is_none());
286    }
287
288    #[test]
289    fn read_returns_none_for_corrupted_entry() {
290        let dir = TempDir::new().unwrap();
291        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
292        let key = WorkspaceCache::key_for("file:///c.php", "<?php");
293        // Write garbage bytes directly into the slot the cache would use.
294        std::fs::write(cache.path_for(&key), b"not valid bincode").unwrap();
295        let decoded: Option<SamplePayload> = cache.read(&key);
296        assert!(
297            decoded.is_none(),
298            "corrupted entry must look missing, not panic"
299        );
300    }
301
302    #[test]
303    fn write_is_atomic_via_rename() {
304        // If the write path didn't go through a temp file, a crash
305        // mid-`write_all` could leave a half-written `.bin`. We can't
306        // easily simulate a crash, but we can at least assert the
307        // temp-file doesn't linger on success.
308        let dir = TempDir::new().unwrap();
309        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
310        let key = WorkspaceCache::key_for("file:///atomic.php", "<?php");
311        let payload = SamplePayload {
312            name: "a".into(),
313            values: vec![],
314        };
315        cache.write(&key, &payload).unwrap();
316        let tmp = cache.path_for(&key).with_extension("tmp");
317        assert!(!tmp.exists(), "tmp file should be removed by rename");
318    }
319
320    #[test]
321    fn clear_drops_all_entries() {
322        let dir = TempDir::new().unwrap();
323        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
324        for i in 0..3 {
325            let k = WorkspaceCache::key_for(&format!("file:///c{i}.php"), "");
326            cache
327                .write(
328                    &k,
329                    &SamplePayload {
330                        name: i.to_string(),
331                        values: vec![],
332                    },
333                )
334                .unwrap();
335        }
336        cache.clear().unwrap();
337        for i in 0..3 {
338            let k = WorkspaceCache::key_for(&format!("file:///c{i}.php"), "");
339            let decoded: Option<SamplePayload> = cache.read(&k);
340            assert!(decoded.is_none());
341        }
342    }
343
344    #[test]
345    fn size_bytes_sums_flat_bin_files() {
346        let dir = TempDir::new().unwrap();
347        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
348        assert_eq!(cache.size_bytes().unwrap(), 0);
349
350        let key1 = WorkspaceCache::key_for("file:///s1.php", "<?php");
351        cache
352            .write(
353                &key1,
354                &SamplePayload {
355                    name: "s1".into(),
356                    values: vec![0u32; 16],
357                },
358            )
359            .unwrap();
360        let key2 = WorkspaceCache::key_for("file:///s2.php", "<?php");
361        cache
362            .write(
363                &key2,
364                &SamplePayload {
365                    name: "s2".into(),
366                    values: vec![0u32; 16],
367                },
368            )
369            .unwrap();
370
371        let total = cache.size_bytes().unwrap();
372        let expected1 = cache.path_for(&key1).metadata().unwrap().len();
373        let expected2 = cache.path_for(&key2).metadata().unwrap().len();
374        assert_eq!(total, expected1 + expected2);
375    }
376
377    #[test]
378    fn file_index_round_trips() {
379        use crate::ast::ParsedDoc;
380        use crate::file_index::FileIndex;
381
382        let dir = TempDir::new().unwrap();
383        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
384        let src = "<?php\nnamespace App;\nclass Foo { public function bar(): string {} }";
385        let key = WorkspaceCache::key_for("file:///Foo.php", src);
386
387        let doc = ParsedDoc::parse(src.to_string());
388        let index = FileIndex::extract(&doc);
389        cache.write(&key, &index).unwrap();
390
391        let decoded: FileIndex = cache.read(&key).unwrap();
392        assert_eq!(decoded.namespace.as_deref(), Some("App"));
393        assert_eq!(decoded.classes.len(), 1);
394        assert_eq!(decoded.classes[0].name.as_ref(), "Foo");
395        assert_eq!(decoded.classes[0].methods.len(), 1);
396        assert_eq!(decoded.classes[0].methods[0].name.as_ref(), "bar");
397    }
398}