ane/commands/syntax_engine/
mod.rs

1pub mod merge;
2pub mod tree_sitter_parse;
3
4use std::collections::HashMap;
5use std::collections::hash_map::DefaultHasher;
6use std::hash::{Hash, Hasher};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Condvar, Mutex};
9use std::time::{Duration, Instant};
10
11use crate::commands::lsp_engine::LspEngine;
12use crate::data::lsp::types::{Language, SemanticToken};
13
14/// Callback trait for delivering computed syntax tokens to the frontend.
15/// Layer 1 defines this; Layer 2 implements it.
16pub trait SyntaxFrontend: Send + Sync {
17    fn set_semantic_tokens(&self, path: &Path, tokens: Vec<SemanticToken>);
18}
19
20struct LspRequest {
21    path: PathBuf,
22    content: String,
23    content_hash: u64,
24    ts_tokens: Vec<SemanticToken>,
25}
26
27/// Single-slot mailbox with latest-wins semantics. `compute()` overwrites
28/// any previously-queued request; the worker reads whichever request was
29/// last submitted. This avoids the wasted LSP roundtrip a bounded channel
30/// would cause when a stale request sits in the queue while newer ones
31/// are dropped on the floor.
32struct LspRequestSlot {
33    inner: Mutex<SlotState>,
34    cv: Condvar,
35}
36
37struct SlotState {
38    request: Option<LspRequest>,
39    shutdown: bool,
40}
41
42impl LspRequestSlot {
43    fn new() -> Self {
44        Self {
45            inner: Mutex::new(SlotState {
46                request: None,
47                shutdown: false,
48            }),
49            cv: Condvar::new(),
50        }
51    }
52
53    fn submit(&self, req: LspRequest) {
54        let mut s = self.inner.lock().unwrap();
55        s.request = Some(req);
56        self.cv.notify_all();
57    }
58
59    /// Block until a request is available, or return None on shutdown.
60    fn take(&self) -> Option<LspRequest> {
61        let mut s = self.inner.lock().unwrap();
62        loop {
63            if s.shutdown {
64                return None;
65            }
66            if let Some(req) = s.request.take() {
67                return Some(req);
68            }
69            s = self.cv.wait(s).unwrap();
70        }
71    }
72
73    /// Wait up to `dur` for a newer request to arrive. Returns the new
74    /// request if one shows up (consuming it from the slot), or None if
75    /// the window elapsed without any arrival (or on shutdown).
76    fn wait_for_newer(&self, dur: Duration) -> Option<LspRequest> {
77        let deadline = Instant::now() + dur;
78        let mut s = self.inner.lock().unwrap();
79        loop {
80            if s.shutdown {
81                return None;
82            }
83            if let Some(req) = s.request.take() {
84                return Some(req);
85            }
86            let now = Instant::now();
87            if now >= deadline {
88                return None;
89            }
90            let (next, _) = self.cv.wait_timeout(s, deadline - now).unwrap();
91            s = next;
92        }
93    }
94
95    fn is_shutdown(&self) -> bool {
96        self.inner.lock().unwrap().shutdown
97    }
98
99    fn signal_shutdown(&self) {
100        let mut s = self.inner.lock().unwrap();
101        s.shutdown = true;
102        self.cv.notify_all();
103    }
104}
105
106pub struct SyntaxEngine {
107    ts_cache: HashMap<PathBuf, (u64, Vec<SemanticToken>)>,
108    lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>>,
109    content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>>,
110    frontend: Arc<dyn SyntaxFrontend>,
111    request_slot: Arc<LspRequestSlot>,
112}
113
114fn hash_content(content: &str) -> u64 {
115    let mut hasher = DefaultHasher::new();
116    content.hash(&mut hasher);
117    hasher.finish()
118}
119
120impl SyntaxEngine {
121    pub fn new(lsp_engine: Arc<Mutex<LspEngine>>, frontend: Arc<dyn SyntaxFrontend>) -> Self {
122        let request_slot = Arc::new(LspRequestSlot::new());
123        let lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>> =
124            Arc::new(Mutex::new(HashMap::new()));
125        let content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>> =
126            Arc::new(Mutex::new(HashMap::new()));
127
128        let w_frontend = Arc::clone(&frontend);
129        let w_lsp_cache = Arc::clone(&lsp_cache);
130        let w_hashes = Arc::clone(&content_hashes);
131        let w_slot = Arc::clone(&request_slot);
132
133        std::thread::spawn(move || {
134            Self::lsp_worker(lsp_engine, w_slot, w_frontend, w_lsp_cache, w_hashes);
135        });
136
137        Self {
138            ts_cache: HashMap::new(),
139            lsp_cache,
140            content_hashes,
141            frontend,
142            request_slot,
143        }
144    }
145
146    /// Returns immediately. Runs tree-sitter synchronously (<2ms), then
147    /// queues a debounced LSP token request on the background worker.
148    pub fn compute(&mut self, path: &Path, content: &str) {
149        let lang = match Language::from_path(path) {
150            Some(l) => l,
151            None => {
152                // Unknown extension: clear any previous tokens so the
153                // frontend renders plain text. Matches spec edge case
154                // "Language with no tree-sitter and no LSP".
155                self.frontend.set_semantic_tokens(path, Vec::new());
156                return;
157            }
158        };
159        let caps = lang.capabilities();
160        let content_hash = hash_content(content);
161
162        // Phase 1: tree-sitter (synchronous, cached by content hash)
163        let ts_tokens = if caps.has_tree_sitter {
164            if self.ts_cache.get(path).map(|(h, _)| *h) != Some(content_hash) {
165                let tokens = tree_sitter_parse::parse(lang, content);
166                self.ts_cache
167                    .insert(path.to_path_buf(), (content_hash, tokens));
168            }
169            self.ts_cache.get(path).unwrap().1.clone()
170        } else {
171            vec![]
172        };
173
174        // Merge with any previously cached LSP tokens for this path
175        let cached_lsp = self
176            .lsp_cache
177            .lock()
178            .unwrap()
179            .get(path)
180            .cloned()
181            .unwrap_or_default();
182        let merged = if caps.has_lsp && !cached_lsp.is_empty() {
183            merge::merge(&ts_tokens, &cached_lsp)
184        } else {
185            ts_tokens.clone()
186        };
187
188        // Deliver best-effort tokens to frontend immediately
189        self.frontend.set_semantic_tokens(path, merged);
190
191        // Update content hash for staleness detection by the worker
192        self.content_hashes
193            .lock()
194            .unwrap()
195            .insert(path.to_path_buf(), content_hash);
196
197        // Phase 2: submit LSP request to the latest-wins slot. Any prior
198        // unprocessed request is silently overwritten — the worker reads
199        // whichever request was most recently submitted.
200        if caps.has_lsp {
201            self.request_slot.submit(LspRequest {
202                path: path.to_path_buf(),
203                content: content.to_string(),
204                content_hash,
205                ts_tokens,
206            });
207        }
208    }
209
210    fn lsp_worker(
211        engine: Arc<Mutex<LspEngine>>,
212        slot: Arc<LspRequestSlot>,
213        frontend: Arc<dyn SyntaxFrontend>,
214        lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>>,
215        content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>>,
216    ) {
217        let debounce = Duration::from_millis(300);
218
219        while let Some(mut req) = slot.take() {
220            // Debounce: keep taking newer requests for `debounce` since the
221            // last arrival. Each newer request resets the window.
222            while let Some(newer) = slot.wait_for_newer(debounce) {
223                req = newer;
224            }
225            if slot.is_shutdown() {
226                return;
227            }
228
229            // Fetch LSP semantic tokens
230            let lsp_tokens = engine
231                .lock()
232                .unwrap()
233                .semantic_tokens(&req.path, &req.content)
234                .unwrap_or_default();
235
236            // Staleness check: discard if content changed since request was queued
237            let current = content_hashes.lock().unwrap().get(&req.path).copied();
238            if current != Some(req.content_hash) {
239                continue;
240            }
241
242            // Cache LSP tokens for use by future compute() calls
243            lsp_cache
244                .lock()
245                .unwrap()
246                .insert(req.path.clone(), lsp_tokens.clone());
247
248            // Merge with tree-sitter tokens and deliver
249            let merged = if !lsp_tokens.is_empty() {
250                merge::merge(&req.ts_tokens, &lsp_tokens)
251            } else {
252                req.ts_tokens
253            };
254            frontend.set_semantic_tokens(&req.path, merged);
255        }
256    }
257}
258
259impl Drop for SyntaxEngine {
260    fn drop(&mut self) {
261        // Wake the worker thread so it can exit instead of blocking forever
262        // on the slot's condvar.
263        self.request_slot.signal_shutdown();
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use std::path::{Path, PathBuf};
270    use std::sync::{Arc, Mutex};
271    use std::time::Duration;
272
273    use crate::commands::lsp_engine::{LspEngine, LspEngineConfig};
274    use crate::data::lsp::types::SemanticToken;
275
276    use super::tree_sitter_parse::PARSE_COUNT;
277    use super::{SyntaxEngine, SyntaxFrontend};
278
279    type RecordedCalls = Arc<Mutex<Vec<(PathBuf, Vec<SemanticToken>)>>>;
280
281    struct RecordingFrontend {
282        calls: RecordedCalls,
283    }
284
285    impl SyntaxFrontend for RecordingFrontend {
286        fn set_semantic_tokens(&self, path: &Path, tokens: Vec<SemanticToken>) {
287            self.calls
288                .lock()
289                .unwrap()
290                .push((path.to_path_buf(), tokens));
291        }
292    }
293
294    fn make_engine() -> (SyntaxEngine, RecordedCalls) {
295        let calls: RecordedCalls = Arc::new(Mutex::new(Vec::new()));
296        let frontend = Arc::new(RecordingFrontend {
297            calls: Arc::clone(&calls),
298        });
299        let lsp = Arc::new(Mutex::new(LspEngine::new(LspEngineConfig::default())));
300        let engine = SyntaxEngine::new(lsp, frontend as Arc<dyn SyntaxFrontend>);
301        (engine, calls)
302    }
303
304    fn call_count(calls: &RecordedCalls) -> usize {
305        calls.lock().unwrap().len()
306    }
307
308    #[test]
309    fn compute_no_lsp_language_no_lsp_queued() {
310        let (mut engine, calls) = make_engine();
311        let path = Path::new("README.md");
312        engine.compute(path, "# Hello\n\nSome text.");
313        assert_eq!(call_count(&calls), 1, "one synchronous delivery");
314        // Wait beyond the debounce window — no LSP request was queued for Markdown
315        std::thread::sleep(Duration::from_millis(400));
316        assert_eq!(
317            call_count(&calls),
318            1,
319            "no worker delivery for has_lsp: false language"
320        );
321    }
322
323    #[test]
324    fn compute_ts_cache_hit() {
325        let (mut engine, calls) = make_engine();
326        let path = Path::new("main.rs");
327        let content = "fn main() {}";
328
329        let before = PARSE_COUNT.with(|c| c.get());
330        engine.compute(path, content);
331        let after_first = PARSE_COUNT.with(|c| c.get());
332        engine.compute(path, content);
333        let after_second = PARSE_COUNT.with(|c| c.get());
334
335        assert_eq!(
336            after_first - before,
337            1,
338            "first compute parses via tree-sitter"
339        );
340        assert_eq!(
341            after_second - after_first,
342            0,
343            "second compute with same content hits cache"
344        );
345        assert_eq!(call_count(&calls), 2, "both computes deliver tokens");
346    }
347
348    #[test]
349    fn compute_cache_miss_on_content_change() {
350        let (mut engine, calls) = make_engine();
351        let path = Path::new("main.rs");
352
353        let before = PARSE_COUNT.with(|c| c.get());
354        engine.compute(path, "fn main() {}");
355        engine.compute(path, "fn other() {}");
356        let after = PARSE_COUNT.with(|c| c.get());
357
358        assert_eq!(
359            after - before,
360            2,
361            "content change triggers new tree-sitter parse"
362        );
363        assert_eq!(call_count(&calls), 2);
364
365        let guard = calls.lock().unwrap();
366        // The two synchronous deliveries should carry different tokens
367        assert_ne!(
368            guard[0].1.len(),
369            0,
370            "first content should produce ts tokens"
371        );
372    }
373
374    #[test]
375    fn compute_returns_immediately() {
376        let (mut engine, calls) = make_engine();
377        let path = Path::new("main.rs");
378
379        let start = std::time::Instant::now();
380        engine.compute(path, "fn main() {}");
381        let elapsed = start.elapsed();
382
383        // set_semantic_tokens called synchronously within compute()
384        assert_eq!(call_count(&calls), 1);
385        assert!(
386            elapsed < Duration::from_millis(10),
387            "compute took {:?}, expected < 10ms",
388            elapsed
389        );
390    }
391
392    #[test]
393    fn debounce_coalesces_rapid_calls() {
394        let call_count = Arc::new(Mutex::new(0usize));
395        let cc = Arc::clone(&call_count);
396        let counter_frontend = Arc::new({
397            struct Counter(Arc<Mutex<usize>>);
398            impl SyntaxFrontend for Counter {
399                fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
400                    *self.0.lock().unwrap() += 1;
401                }
402            }
403            Counter(cc)
404        });
405
406        let lsp = Arc::new(Mutex::new(LspEngine::new(LspEngineConfig::default())));
407        let mut engine = SyntaxEngine::new(lsp, counter_frontend as Arc<dyn SyntaxFrontend>);
408
409        let path = Path::new("main.rs");
410        let content = "fn main() {}";
411        for _ in 0..10 {
412            engine.compute(path, content);
413        }
414
415        let sync_deliveries = *call_count.lock().unwrap();
416        assert_eq!(sync_deliveries, 10, "each compute fires one sync delivery");
417
418        // Wait for debounce + LSP (fails gracefully) → worker fires once
419        std::thread::sleep(Duration::from_millis(600));
420
421        let total = *call_count.lock().unwrap();
422        assert_eq!(total, 11, "worker should fire exactly once after debounce");
423    }
424
425    #[test]
426    fn staleness_check_discards_outdated_lsp_tokens() {
427        use crate::data::lsp::types::SemanticToken as ST;
428
429        // Strategy: inject slow LSP tokens (400ms delay). After the debounce
430        // window (300ms), the worker calls semantic_tokens and SLEEPS for 400ms.
431        // During that sleep the test thread calls compute(B), updating
432        // content_hashes to hash(B). When the worker wakes and checks staleness,
433        // hash(A) ≠ hash(B) → SKIP. Only the two sync deliveries occur.
434        let path = std::path::PathBuf::from("staleness_test.rs");
435
436        let mut lsp = LspEngine::new(LspEngineConfig::default());
437        lsp.inject_test_semantic_tokens(
438            path.clone(),
439            vec![ST {
440                line: 0,
441                start_col: 0,
442                length: 2,
443                token_type: "keyword".to_string(),
444            }],
445        );
446        lsp.test_semantic_tokens_delay = Some(Duration::from_millis(400));
447
448        let call_count = Arc::new(Mutex::new(0usize));
449        let cc = Arc::clone(&call_count);
450        let counter_frontend = Arc::new({
451            struct Counter(Arc<Mutex<usize>>);
452            impl SyntaxFrontend for Counter {
453                fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
454                    *self.0.lock().unwrap() += 1;
455                }
456            }
457            Counter(cc)
458        });
459
460        let lsp_arc = Arc::new(Mutex::new(lsp));
461        let mut engine = SyntaxEngine::new(
462            Arc::clone(&lsp_arc),
463            counter_frontend as Arc<dyn SyntaxFrontend>,
464        );
465
466        // compute(A): queued, worker starts 300ms debounce
467        engine.compute(path.as_path(), "fn foo() {}");
468        assert_eq!(
469            *call_count.lock().unwrap(),
470            1,
471            "sync delivery for content A"
472        );
473
474        // Wait for debounce to expire so the worker starts the slow LSP call
475        std::thread::sleep(Duration::from_millis(350));
476
477        // compute(B): worker is now sleeping inside semantic_tokens (400ms delay).
478        // This updates content_hashes to hash(B), and req(B) is queued.
479        engine.compute(path.as_path(), "fn bar() {}");
480        assert_eq!(
481            *call_count.lock().unwrap(),
482            2,
483            "sync delivery for content B"
484        );
485
486        // Timeline after compute(B) at t=350ms:
487        //   t≈700ms  – slow LSP for req(A) returns; staleness check → SKIP
488        //   t≈700ms  – worker picks up req(B), 300ms debounce fires at t≈1000ms
489        //   t≈1400ms – slow LSP for req(B) returns; staleness OK → delivery #3
490        // Sleep 2100ms after compute(B) (total ~2450ms) to ensure delivery #3 has fired.
491        std::thread::sleep(Duration::from_millis(2100));
492
493        // delivery #3 comes from req(B) processed correctly; req(A) was discarded
494        let final_count = *call_count.lock().unwrap();
495        assert_eq!(
496            final_count, 3,
497            "req(A) stale → skip; req(B) not stale → deliver; total = 3"
498        );
499    }
500
501    #[test]
502    fn latest_wins_during_slow_lsp_call() {
503        // True latest-wins: while the worker is inside a slow semantic_tokens
504        // call for content A, two more computes happen (B then C). Both B and C
505        // are submitted to the slot; with latest-wins semantics, C overwrites B
506        // before the worker takes them out. After A's call returns (stale →
507        // skip), the worker picks up C directly — B is never processed.
508        use crate::data::lsp::types::SemanticToken as ST;
509        let path = std::path::PathBuf::from("latest_wins.rs");
510
511        let mut lsp = LspEngine::new(LspEngineConfig::default());
512        lsp.inject_test_semantic_tokens(
513            path.clone(),
514            vec![ST {
515                line: 0,
516                start_col: 0,
517                length: 2,
518                token_type: "keyword".to_string(),
519            }],
520        );
521        lsp.test_semantic_tokens_delay = Some(Duration::from_millis(500));
522
523        let call_count = Arc::new(Mutex::new(0usize));
524        let cc = Arc::clone(&call_count);
525        let counter_frontend = Arc::new({
526            struct Counter(Arc<Mutex<usize>>);
527            impl SyntaxFrontend for Counter {
528                fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
529                    *self.0.lock().unwrap() += 1;
530                }
531            }
532            Counter(cc)
533        });
534
535        let lsp_arc = Arc::new(Mutex::new(lsp));
536        let mut engine = SyntaxEngine::new(
537            Arc::clone(&lsp_arc),
538            counter_frontend as Arc<dyn SyntaxFrontend>,
539        );
540
541        // compute(A): sync delivery #1, worker debounces 300ms, then starts
542        // a 500ms LSP call at t≈300ms (finishes at t≈800ms).
543        engine.compute(path.as_path(), "fn a() {}");
544        assert_eq!(*call_count.lock().unwrap(), 1);
545
546        // Wait past the debounce so the worker is mid-LSP-call.
547        std::thread::sleep(Duration::from_millis(400));
548
549        // compute(B) then compute(C) — both arrive while worker is in LSP call.
550        // With latest-wins, C overwrites B in the slot before the worker
551        // takes them. The worker should pick up C (not B) next.
552        engine.compute(path.as_path(), "fn b() {}");
553        engine.compute(path.as_path(), "fn c() {}");
554        assert_eq!(*call_count.lock().unwrap(), 3, "three sync deliveries");
555
556        // Timeline:
557        //   t≈800ms  – LSP(A) returns; content_hash is now hash(C) → SKIP
558        //   t≈800ms  – worker takes C, debounces 300ms (t≈1100ms)
559        //   t≈1600ms – LSP(C) returns; staleness OK → delivery #4
560        // Sleep 1500ms after compute(C) (total ~1900ms after start).
561        std::thread::sleep(Duration::from_millis(1500));
562
563        let total = *call_count.lock().unwrap();
564        assert_eq!(
565            total, 4,
566            "exactly one worker delivery (for C); B was overwritten before worker took it"
567        );
568    }
569
570    #[test]
571    fn compute_no_lsp_for_config_languages() {
572        let (mut engine, calls) = make_engine();
573
574        let cases: &[(&str, &str)] = &[
575            ("config.json", r#"{"x": 1}"#),
576            ("config.yaml", "x: 1\n"),
577            ("config.toml", "x = 1\n"),
578            ("Dockerfile", "FROM ubuntu:22.04\n"),
579            ("schema.xml", "<r/>"),
580        ];
581
582        for (filename, content) in cases {
583            engine.compute(Path::new(filename), content);
584        }
585
586        assert_eq!(
587            call_count(&calls),
588            5,
589            "one synchronous delivery per config file"
590        );
591
592        {
593            let guard = calls.lock().unwrap();
594            for (i, (_path, tokens)) in guard.iter().enumerate() {
595                assert!(
596                    !tokens.is_empty(),
597                    "case {i}: expected non-empty tree-sitter tokens"
598                );
599            }
600        }
601
602        // Wait past the debounce window — no LSP worker delivery for has_lsp: false languages
603        std::thread::sleep(Duration::from_millis(400));
604        assert_eq!(
605            call_count(&calls),
606            5,
607            "no additional worker delivery for config languages (has_lsp: false)"
608        );
609    }
610}
ane/commands/syntax_engine/mod.rs

ane/commands/syntax_engine/
mod.rs