php-lsp 0.11.0

A PHP Language Server Protocol implementation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
//! Persistent on-disk cache for Phase K.
//!
//! The cache stores a serialized `FileIndex` per PHP file, keyed on
//! `(uri, content)`.  On a warm start `scan_workspace` reads the cached index
//! instead of parsing the file, shrinking cold-start I/O from O(parse) to
//! O(read + bincode-decode) — roughly 10–50× faster per file.
//!
//! ## Layout
//!
//! ```text
//! ~/.cache/php-lsp/<schema-version>/<workspace-hash>/<entry-hash>.bin
//! ```
//!
//! - `<schema-version>` — `php-lsp` crate version; bumping it rotates the
//!   entire cache so old entries are never decoded against a newer schema.
//! - `<workspace-hash>` — blake3 of the canonicalized absolute path of the
//!   first workspace root, truncated to 16 hex chars. Two separate projects
//!   get isolated caches; two checkouts of the same project at the same
//!   absolute path share one.
//! - `<entry-hash>` — blake3 of the bytes `uri || 0x00 || content`, truncated
//!   to 32 hex chars. Editing a file changes the content → new key → cache
//!   miss; a different file at the same URI also gets a different key.
//!
//! ## Format
//!
//! `bincode` v2 (binary, fast, schema-stable via serde derives on
//! `FileIndex` et al). Files are written atomically via a temp-file rename
//! to avoid half-written entries on an interrupted shutdown.
//!
//! ## Invalidation
//!
//! Rotating the schema version invalidates everything; rotating the content
//! invalidates one file. There's no LRU or cleanup yet — Step 2 will add a
//! size cap + orphan sweep.

use std::io::{self, Write};
use std::path::{Path, PathBuf};

use serde::{Serialize, de::DeserializeOwned};

/// Identifies a single cache entry. Opaque — callers produce it via
/// [`WorkspaceCache::key_for`] and pass it straight back to read/write.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CacheKey(String);

impl CacheKey {
    fn as_filename(&self) -> &str {
        &self.0
    }
}

/// Handle to the cache directory for a single workspace. Construction is
/// cheap (creates directories on demand); the same handle can be shared
/// across threads via `Arc` — it holds no mutable state.
#[derive(Debug, Clone)]
pub struct WorkspaceCache {
    dir: PathBuf,
}

/// Size cap (bytes) for a single workspace's cache directory. At
/// startup, if the directory exceeds this, we reset it — simpler than
/// LRU eviction and the rebuild cost is bounded (it's just the next
/// workspace scan running as if cold). 512 MiB fits a mega-workspace
/// (50 k files × ~10 KB average `FileIndex`) with headroom and is
/// small enough that no reasonable disk will choke on it.
pub const CACHE_SIZE_CAP: u64 = 512 * 1024 * 1024;

impl WorkspaceCache {
    /// Create (or re-open) the cache directory for a workspace rooted at
    /// `root`. Returns `None` when the system has no usable home/cache
    /// directory — callers should treat that as "cache disabled" and
    /// proceed without persistence.
    ///
    /// If the existing cache directory exceeds [`CACHE_SIZE_CAP`], it is
    /// cleared before the handle is returned. That's a coarse knob —
    /// K3 could refine to LRU-by-mtime — but crossing 512 MiB at
    /// startup indicates the workspace has churned through many
    /// content hashes and the rebuild cost is bounded to one full
    /// re-scan.
    pub fn new(root: &Path) -> Option<Self> {
        let base = cache_base_dir()?;
        let schema = schema_version();
        let workspace = workspace_hash(root);
        let dir = base.join("php-lsp").join(schema).join(workspace);
        std::fs::create_dir_all(&dir).ok()?;
        let cache = Self { dir };
        if cache.size_bytes().unwrap_or(0) > CACHE_SIZE_CAP {
            let _ = cache.clear();
        }
        Some(cache)
    }

    /// The filesystem path of this workspace's cache directory.
    pub fn cache_dir(&self) -> &std::path::Path {
        &self.dir
    }

    /// Total bytes consumed by `.bin` entries in this workspace's cache
    /// directory. Cheap (one `read_dir` pass, no recursion into
    /// subdirectories because the layout is flat).
    pub fn size_bytes(&self) -> io::Result<u64> {
        let mut total = 0u64;
        let entries = match std::fs::read_dir(&self.dir) {
            Ok(e) => e,
            Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(0),
            Err(e) => return Err(e),
        };
        for entry in entries.flatten() {
            let meta = match entry.metadata() {
                Ok(m) => m,
                Err(_) => continue,
            };
            if meta.is_file() {
                total = total.saturating_add(meta.len());
            }
        }
        Ok(total)
    }

    /// Override the root directory directly. Intended for tests; the
    /// directory is used verbatim (no schema / workspace subdirectories
    /// are appended).
    #[cfg(test)]
    pub fn with_dir(dir: PathBuf) -> Self {
        Self { dir }
    }

    /// Build a cache key from file content. Combines `uri` and `content`
    /// so that two files with identical content but different URIs get
    /// different keys. Used in tests and legacy code paths.
    pub fn key_for(uri: &str, content: &str) -> CacheKey {
        let mut hasher = blake3::Hasher::new();
        hasher.update(uri.as_bytes());
        hasher.update(&[0u8]);
        hasher.update(content.as_bytes());
        let full = hasher.finalize().to_hex();
        CacheKey(full.as_str()[..32].to_string())
    }

    /// Build a cache key from file metadata instead of content.
    ///
    /// Hashing the full file content costs ~1 ms/file CPU on warm starts,
    /// nearly cancelling the parse savings. An mtime+size key is O(constant)
    /// per file and is invalidated automatically when the file changes.
    ///
    /// Tradeoffs vs content hash: `touch` without an edit invalidates the
    /// entry (safe cache miss, re-parses once) and clock skew on network
    /// mounts can in theory produce a stale hit. Both are acceptable for a
    /// developer tool where a spurious miss is safe.
    pub fn key_for_stat(uri: &str, mtime_secs: u64, size: u64) -> CacheKey {
        let mut hasher = blake3::Hasher::new();
        hasher.update(uri.as_bytes());
        hasher.update(&[1u8]); // distinct domain from key_for
        hasher.update(&mtime_secs.to_le_bytes());
        hasher.update(&size.to_le_bytes());
        let full = hasher.finalize().to_hex();
        CacheKey(full.as_str()[..32].to_string())
    }

    /// Deserialize a previously-cached value. Returns `None` on any I/O
    /// or decode failure — a corrupted entry should look identical to a
    /// missing one so callers fall through to the recompute path.
    pub fn read<T: DeserializeOwned>(&self, key: &CacheKey) -> Option<T> {
        let path = self.path_for(key);
        let bytes = std::fs::read(&path).ok()?;
        let config = bincode::config::standard();
        bincode::serde::decode_from_slice(&bytes, config)
            .ok()
            .map(|(v, _len)| v)
    }

    /// Atomically publish an entry to the cache. Writes to a sibling
    /// temp file then renames, so readers never see a half-written
    /// payload even if the process dies mid-write.
    ///
    /// No fsync: the cache is advisory-only — a crash that loses a write
    /// just produces a cache miss on the next startup, which safely falls
    /// back to re-parsing. Skipping sync_all() avoids 5–15 ms per file on
    /// macOS, which on a 1,500-file project accounts for most of the cold
    /// indexing time.
    pub fn write<T: Serialize>(&self, key: &CacheKey, value: &T) -> io::Result<()> {
        let path = self.path_for(key);
        let tmp = path.with_extension("tmp");
        let config = bincode::config::standard();
        let bytes = bincode::serde::encode_to_vec(value, config)
            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
        {
            let mut f = std::fs::File::create(&tmp)?;
            f.write_all(&bytes)?;
        }
        std::fs::rename(&tmp, &path)?;
        Ok(())
    }

    /// Drop every entry in this workspace's cache. Safe to call while
    /// other threads are reading — individual `read` calls that race
    /// with a `clear` will see `None` rather than garbage, and the next
    /// `write` recreates the entry.
    pub fn clear(&self) -> io::Result<()> {
        if self.dir.exists() {
            std::fs::remove_dir_all(&self.dir)?;
            std::fs::create_dir_all(&self.dir)?;
        }
        Ok(())
    }

    fn path_for(&self, key: &CacheKey) -> PathBuf {
        self.dir.join(format!("{}.bin", key.as_filename()))
    }
}

/// Platform cache directory: `$XDG_CACHE_HOME` or `$HOME/.cache` on Unix,
/// `%LOCALAPPDATA%` on Windows. Deliberately doesn't depend on the `dirs`
/// crate — keeps the footprint small and the behaviour predictable.
fn cache_base_dir() -> Option<PathBuf> {
    if let Some(xdg) = std::env::var_os("XDG_CACHE_HOME")
        && !xdg.is_empty()
    {
        return Some(PathBuf::from(xdg));
    }
    if cfg!(windows) {
        if let Some(local) = std::env::var_os("LOCALAPPDATA")
            && !local.is_empty()
        {
            return Some(PathBuf::from(local));
        }
    } else if let Some(home) = std::env::var_os("HOME")
        && !home.is_empty()
    {
        return Some(PathBuf::from(home).join(".cache"));
    }
    None
}

/// Bump this constant (and the matching literal in [`schema_version`]) when
/// `FileIndex` or any type it contains gains, loses, or renames a field.
/// Rotating it causes every cached entry to be treated as a miss on the next
/// cold start, regardless of whether the crate version changed.
pub const FILE_INDEX_SCHEMA: &str = "fi-v4";

/// Schema marker: bumping `php-lsp` crate version, `mir-codebase` version,
/// or [`FILE_INDEX_SCHEMA`] invalidates every cached entry. The hardcoded mir
/// version is a trade-off: keeping it in source means we don't depend on
/// `build.rs` introspection, at the cost of needing to remember to update it
/// alongside `Cargo.toml`.
///
/// **Important**: the `"fi-v1"` literal here must stay in sync with
/// `FILE_INDEX_SCHEMA`. `concat!` requires a literal — bump both together.
fn schema_version() -> &'static str {
    concat!(env!("CARGO_PKG_VERSION"), "-mir-0.7-fi-v4")
}

fn workspace_hash(root: &Path) -> String {
    let canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
    let hex = blake3::hash(canonical.as_os_str().as_encoded_bytes()).to_hex();
    hex.as_str()[..16].to_string()
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    #[derive(Serialize, serde::Deserialize, PartialEq, Debug)]
    struct SamplePayload {
        name: String,
        values: Vec<u32>,
    }

    #[test]
    fn key_for_is_deterministic_per_uri_and_content() {
        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
        let k2 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
        assert_eq!(k1, k2);
    }

    #[test]
    fn key_for_differs_when_content_differs() {
        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php echo 1;");
        let k2 = WorkspaceCache::key_for("file:///a.php", "<?php echo 2;");
        assert_ne!(k1, k2);
    }

    #[test]
    fn key_for_differs_when_uri_differs() {
        // Same content, different URI — the separator byte prevents
        // (uri_a || content_b) from colliding with (uri_a+b || content).
        let k1 = WorkspaceCache::key_for("file:///a.php", "<?php");
        let k2 = WorkspaceCache::key_for("file:///b.php", "<?php");
        assert_ne!(k1, k2);
    }

    #[test]
    fn write_then_read_round_trips() {
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        let key = WorkspaceCache::key_for("file:///x.php", "<?php");
        let payload = SamplePayload {
            name: "x".into(),
            values: vec![1, 2, 3],
        };
        cache.write(&key, &payload).unwrap();
        let decoded: SamplePayload = cache.read(&key).unwrap();
        assert_eq!(decoded, payload);
    }

    #[test]
    fn read_returns_none_for_missing_key() {
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        let missing = WorkspaceCache::key_for("file:///nope.php", "");
        let decoded: Option<SamplePayload> = cache.read(&missing);
        assert!(decoded.is_none());
    }

    #[test]
    fn read_returns_none_for_corrupted_entry() {
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        let key = WorkspaceCache::key_for("file:///c.php", "<?php");
        // Write garbage bytes directly into the slot the cache would use.
        std::fs::write(cache.path_for(&key), b"not valid bincode").unwrap();
        let decoded: Option<SamplePayload> = cache.read(&key);
        assert!(
            decoded.is_none(),
            "corrupted entry must look missing, not panic"
        );
    }

    #[test]
    fn write_is_atomic_via_rename() {
        // If the write path didn't go through a temp file, a crash
        // mid-`write_all` could leave a half-written `.bin`. We can't
        // easily simulate a crash, but we can at least assert the
        // temp-file doesn't linger on success.
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        let key = WorkspaceCache::key_for("file:///atomic.php", "<?php");
        let payload = SamplePayload {
            name: "a".into(),
            values: vec![],
        };
        cache.write(&key, &payload).unwrap();
        let tmp = cache.path_for(&key).with_extension("tmp");
        assert!(!tmp.exists(), "tmp file should be removed by rename");
    }

    #[test]
    fn clear_drops_all_entries() {
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        for i in 0..3 {
            let k = WorkspaceCache::key_for(&format!("file:///c{i}.php"), "");
            cache
                .write(
                    &k,
                    &SamplePayload {
                        name: i.to_string(),
                        values: vec![],
                    },
                )
                .unwrap();
        }
        cache.clear().unwrap();
        for i in 0..3 {
            let k = WorkspaceCache::key_for(&format!("file:///c{i}.php"), "");
            let decoded: Option<SamplePayload> = cache.read(&k);
            assert!(decoded.is_none());
        }
    }

    #[test]
    fn size_bytes_sums_flat_bin_files() {
        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        assert_eq!(cache.size_bytes().unwrap(), 0);

        let key1 = WorkspaceCache::key_for("file:///s1.php", "<?php");
        cache
            .write(
                &key1,
                &SamplePayload {
                    name: "s1".into(),
                    values: vec![0u32; 16],
                },
            )
            .unwrap();
        let key2 = WorkspaceCache::key_for("file:///s2.php", "<?php");
        cache
            .write(
                &key2,
                &SamplePayload {
                    name: "s2".into(),
                    values: vec![0u32; 16],
                },
            )
            .unwrap();

        let total = cache.size_bytes().unwrap();
        let expected1 = cache.path_for(&key1).metadata().unwrap().len();
        let expected2 = cache.path_for(&key2).metadata().unwrap().len();
        assert_eq!(total, expected1 + expected2);
    }

    #[test]
    fn file_index_round_trips() {
        use crate::document::ast::ParsedDoc;
        use crate::index::file_index::FileIndex;

        let dir = TempDir::new().unwrap();
        let cache = WorkspaceCache::with_dir(dir.path().to_path_buf());
        let src = "<?php\nnamespace App;\nclass Foo { public function bar(): string {} }";
        let key = WorkspaceCache::key_for("file:///Foo.php", src);

        let doc = ParsedDoc::parse(src.to_string());
        let index = FileIndex::extract(&doc);
        cache.write(&key, &index).unwrap();

        let decoded: FileIndex = cache.read(&key).unwrap();
        assert_eq!(decoded.namespace.as_deref(), Some("App"));
        assert_eq!(decoded.classes.len(), 1);
        assert_eq!(decoded.classes[0].name.as_ref(), "Foo");
        assert_eq!(decoded.classes[0].methods.len(), 1);
        assert_eq!(decoded.classes[0].methods[0].name.as_ref(), "bar");
    }
}