Skip to main content

php_lsp/
document_store.rs

1use std::collections::VecDeque;
2use std::sync::{Arc, Mutex};
3
4use dashmap::DashMap;
5use tower_lsp::lsp_types::{Diagnostic, SemanticToken, Url};
6
7use crate::ast::ParsedDoc;
8use crate::diagnostics::parse_document;
9
10/// Maximum number of indexed-only (not open in editor) files kept in memory.
11#[cfg(not(test))]
12const MAX_INDEXED: usize = 10_000;
13/// Reduced limit used in tests so eviction can be exercised without 10 k files.
14#[cfg(test)]
15const MAX_INDEXED: usize = 3;
16
17struct Document {
18    /// `Some` when the file is open in the editor; `None` for workspace-indexed files.
19    text: Option<String>,
20    doc: Arc<ParsedDoc>,
21    diagnostics: Vec<Diagnostic>,
22    /// Incremented on every `set_text` call; used to discard stale async parse results.
23    text_version: u64,
24}
25
26pub struct DocumentStore {
27    map: DashMap<Url, Document>,
28    /// Insertion-order queue of indexed-only URIs for LRU eviction.
29    indexed_order: Mutex<VecDeque<Url>>,
30    /// Cached semantic tokens per document: (result_id, tokens).
31    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
32    token_cache: DashMap<Url, (String, Vec<SemanticToken>)>,
33}
34
35impl Default for DocumentStore {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl DocumentStore {
42    pub fn new() -> Self {
43        DocumentStore {
44            map: DashMap::new(),
45            indexed_order: Mutex::new(VecDeque::new()),
46            token_cache: DashMap::new(),
47        }
48    }
49
50    /// Store new text immediately and return a version token for deferred parsing.
51    pub fn set_text(&self, uri: Url, text: String) -> u64 {
52        let mut entry = self.map.entry(uri).or_insert_with(|| Document {
53            text: None,
54            doc: Arc::new(ParsedDoc::default()),
55            diagnostics: vec![],
56            text_version: 0,
57        });
58        entry.text_version += 1;
59        entry.text = Some(text);
60        entry.text_version
61    }
62
63    /// Apply a completed async parse result.
64    /// Returns `true` if the update was applied.
65    pub fn apply_parse(
66        &self,
67        uri: &Url,
68        doc: ParsedDoc,
69        diagnostics: Vec<Diagnostic>,
70        version: u64,
71    ) -> bool {
72        if let Some(mut entry) = self.map.get_mut(uri)
73            && entry.text_version == version
74        {
75            entry.doc = Arc::new(doc);
76            entry.diagnostics = diagnostics;
77            return true;
78        }
79        false
80    }
81
82    pub fn close(&self, uri: &Url) {
83        if let Some(mut entry) = self.map.get_mut(uri) {
84            entry.text = None;
85            entry.text_version += 1;
86            let mut q = self.indexed_order.lock().unwrap();
87            if !q.contains(uri) {
88                q.push_back(uri.clone());
89            }
90        }
91    }
92
93    pub fn index(&self, uri: Url, text: &str) {
94        if self
95            .map
96            .get(&uri)
97            .map(|d| d.text.is_some())
98            .unwrap_or(false)
99        {
100            return;
101        }
102        let (doc, diagnostics) = parse_document(text);
103        self.map.insert(
104            uri.clone(),
105            Document {
106                text: None,
107                doc: Arc::new(doc),
108                diagnostics,
109                text_version: 0,
110            },
111        );
112
113        let mut order = self.indexed_order.lock().unwrap();
114        order.push_back(uri);
115        // Evict enough indexed-only entries to bring the queue back to MAX_INDEXED.
116        // A file that became open after being indexed must be skipped — it will be
117        // re-queued when it is eventually closed.  We must not stop early just
118        // because popping an open file decremented order.len() to MAX_INDEXED;
119        // that would leave the map with too many entries.
120        let need_to_evict = order.len().saturating_sub(MAX_INDEXED);
121        let mut evicted = 0;
122        while evicted < need_to_evict {
123            let Some(oldest) = order.pop_front() else {
124                break;
125            };
126            if self
127                .map
128                .get(&oldest)
129                .map(|d| d.text.is_none())
130                .unwrap_or(false)
131            {
132                self.map.remove(&oldest);
133                evicted += 1;
134            }
135            // If the file is open, discard it from the queue and keep looking.
136        }
137    }
138
139    pub fn remove(&self, uri: &Url) {
140        self.map.remove(uri);
141        self.token_cache.remove(uri);
142    }
143
144    /// Cache the semantic tokens computed for a delta response.
145    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
146    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Vec<SemanticToken>) {
147        self.token_cache.insert(uri.clone(), (result_id, tokens));
148    }
149
150    /// Return the cached tokens if `result_id` matches the stored one.
151    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Vec<SemanticToken>> {
152        self.token_cache
153            .get(uri)
154            .filter(|e| e.0.as_str() == result_id)
155            .map(|e| e.1.clone())
156    }
157
158    /// Returns the live source text (only for open files).
159    pub fn get(&self, uri: &Url) -> Option<String> {
160        self.map.get(uri).and_then(|d| d.text.clone())
161    }
162
163    /// Returns the parsed document (cheap Arc clone). Always present once indexed.
164    pub fn get_doc(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
165        self.map.get(uri).map(|d| d.doc.clone())
166    }
167
168    pub fn get_diagnostics(&self, uri: &Url) -> Option<Vec<Diagnostic>> {
169        self.map.get(uri).map(|d| d.diagnostics.clone())
170    }
171
172    pub fn all_docs(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
173        self.map
174            .iter()
175            .map(|e| (e.key().clone(), e.value().doc.clone()))
176            .collect()
177    }
178
179    /// Returns `(uri, diagnostics, version)` for every indexed document.
180    /// `version` is `None` for non-open files.
181    pub fn all_diagnostics(&self) -> Vec<(Url, Vec<Diagnostic>, Option<i64>)> {
182        self.map
183            .iter()
184            .map(|e| {
185                let version = if e.value().text.is_some() {
186                    Some(e.value().text_version as i64)
187                } else {
188                    None
189                };
190                (e.key().clone(), e.value().diagnostics.clone(), version)
191            })
192            .collect()
193    }
194
195    pub fn other_docs(&self, uri: &Url) -> Vec<(Url, Arc<ParsedDoc>)> {
196        self.map
197            .iter()
198            .filter(|e| e.key() != uri)
199            .map(|e| (e.key().clone(), e.value().doc.clone()))
200            .collect()
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    fn uri(path: &str) -> Url {
209        Url::parse(&format!("file://{path}")).unwrap()
210    }
211
212    #[test]
213    fn get_returns_none_for_unknown_uri() {
214        let store = DocumentStore::new();
215        assert!(store.get(&uri("/unknown.php")).is_none());
216    }
217
218    fn open(store: &DocumentStore, u: Url, text: String) {
219        use crate::diagnostics::parse_document;
220        let v = store.set_text(u.clone(), text.clone());
221        let (doc, diags) = parse_document(&text);
222        store.apply_parse(&u, doc, diags, v);
223    }
224
225    #[test]
226    fn open_then_get_returns_text() {
227        let store = DocumentStore::new();
228        open(&store, uri("/a.php"), "<?php echo 1;".to_string());
229        assert_eq!(store.get(&uri("/a.php")).as_deref(), Some("<?php echo 1;"));
230    }
231
232    #[test]
233    fn update_replaces_text() {
234        let store = DocumentStore::new();
235        open(&store, uri("/a.php"), "<?php echo 1;".to_string());
236        open(&store, uri("/a.php"), "<?php echo 2;".to_string());
237        assert_eq!(store.get(&uri("/a.php")).as_deref(), Some("<?php echo 2;"));
238    }
239
240    #[test]
241    fn close_clears_text_but_keeps_doc() {
242        let store = DocumentStore::new();
243        open(
244            &store,
245            uri("/a.php"),
246            "<?php\nfunction greet() {}".to_string(),
247        );
248        store.close(&uri("/a.php"));
249        assert!(store.get(&uri("/a.php")).is_none());
250        assert!(store.get_doc(&uri("/a.php")).is_some());
251    }
252
253    #[test]
254    fn close_nonexistent_uri_is_safe() {
255        let store = DocumentStore::new();
256        store.close(&uri("/nonexistent.php"));
257    }
258
259    #[test]
260    fn index_stores_doc_without_text() {
261        let store = DocumentStore::new();
262        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
263        assert!(store.get(&uri("/lib.php")).is_none());
264        assert!(store.get_doc(&uri("/lib.php")).is_some());
265    }
266
267    #[test]
268    fn index_does_not_overwrite_open_file() {
269        let store = DocumentStore::new();
270        open(&store, uri("/a.php"), "<?php\n$x = 1;".to_string());
271        store.index(uri("/a.php"), "<?php\n$x = 99;");
272        assert_eq!(store.get(&uri("/a.php")).as_deref(), Some("<?php\n$x = 1;"));
273    }
274
275    #[test]
276    fn remove_deletes_entry() {
277        let store = DocumentStore::new();
278        store.index(uri("/lib.php"), "<?php");
279        store.remove(&uri("/lib.php"));
280        assert!(store.get_doc(&uri("/lib.php")).is_none());
281    }
282
283    #[test]
284    fn all_docs_includes_indexed_files() {
285        let store = DocumentStore::new();
286        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
287        store.index(uri("/b.php"), "<?php\nfunction b() {}");
288        assert_eq!(store.all_docs().len(), 2);
289    }
290
291    #[test]
292    fn other_docs_excludes_current_uri() {
293        let store = DocumentStore::new();
294        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
295        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
296        assert_eq!(store.other_docs(&uri("/a.php")).len(), 1);
297    }
298
299    #[test]
300    fn open_caches_diagnostics_for_invalid_file() {
301        let store = DocumentStore::new();
302        open(&store, uri("/a.php"), "<?php\nclass {".to_string());
303        let diags = store.get_diagnostics(&uri("/a.php")).unwrap();
304        assert!(!diags.is_empty());
305    }
306
307    // ── LRU eviction regression tests ────────────────────────────────────────
308
309    #[test]
310    fn eviction_removes_oldest_indexed_file() {
311        // Fill the store to exactly MAX_INDEXED, then add one more.
312        // The oldest entry must be evicted so the map stays at MAX_INDEXED.
313        let store = DocumentStore::new();
314        for i in 0..MAX_INDEXED {
315            store.index(uri(&format!("/{i}.php")), "<?php");
316        }
317        store.index(uri("/overflow.php"), "<?php");
318
319        assert_eq!(
320            store.all_docs().len(),
321            MAX_INDEXED,
322            "map must not exceed MAX_INDEXED after overflow"
323        );
324        assert!(
325            store.get_doc(&uri("/overflow.php")).is_some(),
326            "newly indexed file must be present"
327        );
328        assert!(
329            store.get_doc(&uri("/0.php")).is_none(),
330            "oldest file must have been evicted"
331        );
332    }
333
334    #[test]
335    fn eviction_skips_open_files_and_evicts_next_indexed() {
336        // Regression test for the bug where an open file at the front of the
337        // eviction queue caused the loop to exit without evicting anything:
338        //
339        //   order.len() was MAX_INDEXED+1 → pop open file → order.len() drops
340        //   to MAX_INDEXED → while condition false → loop exits → no eviction.
341        //
342        // After the fix the loop tracks `need_to_evict` independently of
343        // order.len(), so it keeps looking until it finds an indexed file.
344        let store = DocumentStore::new();
345
346        // Index MAX_INDEXED files; /0.php will be the oldest in the queue.
347        for i in 0..MAX_INDEXED {
348            store.index(uri(&format!("/{i}.php")), "<?php");
349        }
350
351        // Open /0.php — it now has text and must not be evicted.
352        open(&store, uri("/0.php"), "<?php $x = 1;".to_string());
353
354        // Index one more file.  Eviction must skip /0.php (open) and evict
355        // /1.php (the next oldest indexed-only file) instead.
356        store.index(uri("/overflow.php"), "<?php");
357
358        // The open file must still be present.
359        assert!(
360            store.get_doc(&uri("/0.php")).is_some(),
361            "/0.php is open and must not be evicted"
362        );
363        // The overflow file must have been indexed.
364        assert!(
365            store.get_doc(&uri("/overflow.php")).is_some(),
366            "overflow file must be present"
367        );
368        // The eviction must have brought the map back to MAX_INDEXED total
369        // entries: /0.php (open) + the remaining indexed files + /overflow.php.
370        assert_eq!(
371            store.all_docs().len(),
372            MAX_INDEXED,
373            "total docs must equal MAX_INDEXED after eviction"
374        );
375        // /1.php should have been evicted (oldest indexed-only file after /0.php).
376        assert!(
377            store.get_doc(&uri("/1.php")).is_none(),
378            "/1.php must have been evicted as the oldest indexed-only file"
379        );
380    }
381
382    #[test]
383    fn close_twice_does_not_duplicate_lru_entry() {
384        let store = DocumentStore::new();
385        let u = uri("/a.php");
386        open(&store, u.clone(), "<?php".to_string());
387        // First close.
388        store.close(&u);
389        let len_after_first = store.indexed_order.lock().unwrap().len();
390        // Second close — must not push a duplicate.
391        store.close(&u);
392        let len_after_second = store.indexed_order.lock().unwrap().len();
393        assert_eq!(
394            len_after_first, len_after_second,
395            "second close must not add a duplicate entry to indexed_order"
396        );
397    }
398}