1pub mod merge;
2pub mod tree_sitter_parse;
3
4use std::collections::HashMap;
5use std::collections::hash_map::DefaultHasher;
6use std::hash::{Hash, Hasher};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Condvar, Mutex};
9use std::time::{Duration, Instant};
10
11use crate::commands::lsp_engine::LspEngine;
12use crate::data::lsp::types::{Language, SemanticToken};
13
14pub trait SyntaxFrontend: Send + Sync {
17 fn set_semantic_tokens(&self, path: &Path, tokens: Vec<SemanticToken>);
18}
19
20struct LspRequest {
21 path: PathBuf,
22 content: String,
23 content_hash: u64,
24 ts_tokens: Vec<SemanticToken>,
25}
26
27struct LspRequestSlot {
33 inner: Mutex<SlotState>,
34 cv: Condvar,
35}
36
37struct SlotState {
38 request: Option<LspRequest>,
39 shutdown: bool,
40}
41
42impl LspRequestSlot {
43 fn new() -> Self {
44 Self {
45 inner: Mutex::new(SlotState {
46 request: None,
47 shutdown: false,
48 }),
49 cv: Condvar::new(),
50 }
51 }
52
53 fn submit(&self, req: LspRequest) {
54 let mut s = self.inner.lock().unwrap();
55 s.request = Some(req);
56 self.cv.notify_all();
57 }
58
59 fn take(&self) -> Option<LspRequest> {
61 let mut s = self.inner.lock().unwrap();
62 loop {
63 if s.shutdown {
64 return None;
65 }
66 if let Some(req) = s.request.take() {
67 return Some(req);
68 }
69 s = self.cv.wait(s).unwrap();
70 }
71 }
72
73 fn wait_for_newer(&self, dur: Duration) -> Option<LspRequest> {
77 let deadline = Instant::now() + dur;
78 let mut s = self.inner.lock().unwrap();
79 loop {
80 if s.shutdown {
81 return None;
82 }
83 if let Some(req) = s.request.take() {
84 return Some(req);
85 }
86 let now = Instant::now();
87 if now >= deadline {
88 return None;
89 }
90 let (next, _) = self.cv.wait_timeout(s, deadline - now).unwrap();
91 s = next;
92 }
93 }
94
95 fn is_shutdown(&self) -> bool {
96 self.inner.lock().unwrap().shutdown
97 }
98
99 fn signal_shutdown(&self) {
100 let mut s = self.inner.lock().unwrap();
101 s.shutdown = true;
102 self.cv.notify_all();
103 }
104}
105
106pub struct SyntaxEngine {
107 ts_cache: HashMap<PathBuf, (u64, Vec<SemanticToken>)>,
108 lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>>,
109 content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>>,
110 frontend: Arc<dyn SyntaxFrontend>,
111 request_slot: Arc<LspRequestSlot>,
112}
113
114fn hash_content(content: &str) -> u64 {
115 let mut hasher = DefaultHasher::new();
116 content.hash(&mut hasher);
117 hasher.finish()
118}
119
120impl SyntaxEngine {
121 pub fn new(lsp_engine: Arc<Mutex<LspEngine>>, frontend: Arc<dyn SyntaxFrontend>) -> Self {
122 let request_slot = Arc::new(LspRequestSlot::new());
123 let lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>> =
124 Arc::new(Mutex::new(HashMap::new()));
125 let content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>> =
126 Arc::new(Mutex::new(HashMap::new()));
127
128 let w_frontend = Arc::clone(&frontend);
129 let w_lsp_cache = Arc::clone(&lsp_cache);
130 let w_hashes = Arc::clone(&content_hashes);
131 let w_slot = Arc::clone(&request_slot);
132
133 std::thread::spawn(move || {
134 Self::lsp_worker(lsp_engine, w_slot, w_frontend, w_lsp_cache, w_hashes);
135 });
136
137 Self {
138 ts_cache: HashMap::new(),
139 lsp_cache,
140 content_hashes,
141 frontend,
142 request_slot,
143 }
144 }
145
146 pub fn compute(&mut self, path: &Path, content: &str) {
149 let lang = match Language::from_path(path) {
150 Some(l) => l,
151 None => {
152 self.frontend.set_semantic_tokens(path, Vec::new());
156 return;
157 }
158 };
159 let caps = lang.capabilities();
160 let content_hash = hash_content(content);
161
162 let ts_tokens = if caps.has_tree_sitter {
164 if self.ts_cache.get(path).map(|(h, _)| *h) != Some(content_hash) {
165 let tokens = tree_sitter_parse::parse(lang, content);
166 self.ts_cache
167 .insert(path.to_path_buf(), (content_hash, tokens));
168 }
169 self.ts_cache.get(path).unwrap().1.clone()
170 } else {
171 vec![]
172 };
173
174 let cached_lsp = self
176 .lsp_cache
177 .lock()
178 .unwrap()
179 .get(path)
180 .cloned()
181 .unwrap_or_default();
182 let merged = if caps.has_lsp && !cached_lsp.is_empty() {
183 merge::merge(&ts_tokens, &cached_lsp)
184 } else {
185 ts_tokens.clone()
186 };
187
188 self.frontend.set_semantic_tokens(path, merged);
190
191 self.content_hashes
193 .lock()
194 .unwrap()
195 .insert(path.to_path_buf(), content_hash);
196
197 if caps.has_lsp {
201 self.request_slot.submit(LspRequest {
202 path: path.to_path_buf(),
203 content: content.to_string(),
204 content_hash,
205 ts_tokens,
206 });
207 }
208 }
209
210 fn lsp_worker(
211 engine: Arc<Mutex<LspEngine>>,
212 slot: Arc<LspRequestSlot>,
213 frontend: Arc<dyn SyntaxFrontend>,
214 lsp_cache: Arc<Mutex<HashMap<PathBuf, Vec<SemanticToken>>>>,
215 content_hashes: Arc<Mutex<HashMap<PathBuf, u64>>>,
216 ) {
217 let debounce = Duration::from_millis(300);
218
219 while let Some(mut req) = slot.take() {
220 while let Some(newer) = slot.wait_for_newer(debounce) {
223 req = newer;
224 }
225 if slot.is_shutdown() {
226 return;
227 }
228
229 let lsp_tokens = engine
231 .lock()
232 .unwrap()
233 .semantic_tokens(&req.path, &req.content)
234 .unwrap_or_default();
235
236 let current = content_hashes.lock().unwrap().get(&req.path).copied();
238 if current != Some(req.content_hash) {
239 continue;
240 }
241
242 lsp_cache
244 .lock()
245 .unwrap()
246 .insert(req.path.clone(), lsp_tokens.clone());
247
248 let merged = if !lsp_tokens.is_empty() {
250 merge::merge(&req.ts_tokens, &lsp_tokens)
251 } else {
252 req.ts_tokens
253 };
254 frontend.set_semantic_tokens(&req.path, merged);
255 }
256 }
257}
258
259impl Drop for SyntaxEngine {
260 fn drop(&mut self) {
261 self.request_slot.signal_shutdown();
264 }
265}
266
267#[cfg(test)]
268mod tests {
269 use std::path::{Path, PathBuf};
270 use std::sync::{Arc, Mutex};
271 use std::time::Duration;
272
273 use crate::commands::lsp_engine::{LspEngine, LspEngineConfig};
274 use crate::data::lsp::types::SemanticToken;
275
276 use super::tree_sitter_parse::PARSE_COUNT;
277 use super::{SyntaxEngine, SyntaxFrontend};
278
279 type RecordedCalls = Arc<Mutex<Vec<(PathBuf, Vec<SemanticToken>)>>>;
280
281 struct RecordingFrontend {
282 calls: RecordedCalls,
283 }
284
285 impl SyntaxFrontend for RecordingFrontend {
286 fn set_semantic_tokens(&self, path: &Path, tokens: Vec<SemanticToken>) {
287 self.calls
288 .lock()
289 .unwrap()
290 .push((path.to_path_buf(), tokens));
291 }
292 }
293
294 fn make_engine() -> (SyntaxEngine, RecordedCalls) {
295 let calls: RecordedCalls = Arc::new(Mutex::new(Vec::new()));
296 let frontend = Arc::new(RecordingFrontend {
297 calls: Arc::clone(&calls),
298 });
299 let lsp = Arc::new(Mutex::new(LspEngine::new(LspEngineConfig::default())));
300 let engine = SyntaxEngine::new(lsp, frontend as Arc<dyn SyntaxFrontend>);
301 (engine, calls)
302 }
303
304 fn call_count(calls: &RecordedCalls) -> usize {
305 calls.lock().unwrap().len()
306 }
307
308 #[test]
309 fn compute_no_lsp_language_no_lsp_queued() {
310 let (mut engine, calls) = make_engine();
311 let path = Path::new("README.md");
312 engine.compute(path, "# Hello\n\nSome text.");
313 assert_eq!(call_count(&calls), 1, "one synchronous delivery");
314 std::thread::sleep(Duration::from_millis(400));
316 assert_eq!(
317 call_count(&calls),
318 1,
319 "no worker delivery for has_lsp: false language"
320 );
321 }
322
323 #[test]
324 fn compute_ts_cache_hit() {
325 let (mut engine, calls) = make_engine();
326 let path = Path::new("main.rs");
327 let content = "fn main() {}";
328
329 let before = PARSE_COUNT.with(|c| c.get());
330 engine.compute(path, content);
331 let after_first = PARSE_COUNT.with(|c| c.get());
332 engine.compute(path, content);
333 let after_second = PARSE_COUNT.with(|c| c.get());
334
335 assert_eq!(
336 after_first - before,
337 1,
338 "first compute parses via tree-sitter"
339 );
340 assert_eq!(
341 after_second - after_first,
342 0,
343 "second compute with same content hits cache"
344 );
345 assert_eq!(call_count(&calls), 2, "both computes deliver tokens");
346 }
347
348 #[test]
349 fn compute_cache_miss_on_content_change() {
350 let (mut engine, calls) = make_engine();
351 let path = Path::new("main.rs");
352
353 let before = PARSE_COUNT.with(|c| c.get());
354 engine.compute(path, "fn main() {}");
355 engine.compute(path, "fn other() {}");
356 let after = PARSE_COUNT.with(|c| c.get());
357
358 assert_eq!(
359 after - before,
360 2,
361 "content change triggers new tree-sitter parse"
362 );
363 assert_eq!(call_count(&calls), 2);
364
365 let guard = calls.lock().unwrap();
366 assert_ne!(
368 guard[0].1.len(),
369 0,
370 "first content should produce ts tokens"
371 );
372 }
373
374 #[test]
375 fn compute_returns_immediately() {
376 let (mut engine, calls) = make_engine();
377 let path = Path::new("main.rs");
378
379 let start = std::time::Instant::now();
380 engine.compute(path, "fn main() {}");
381 let elapsed = start.elapsed();
382
383 assert_eq!(call_count(&calls), 1);
385 assert!(
386 elapsed < Duration::from_millis(10),
387 "compute took {:?}, expected < 10ms",
388 elapsed
389 );
390 }
391
392 #[test]
393 fn debounce_coalesces_rapid_calls() {
394 let call_count = Arc::new(Mutex::new(0usize));
395 let cc = Arc::clone(&call_count);
396 let counter_frontend = Arc::new({
397 struct Counter(Arc<Mutex<usize>>);
398 impl SyntaxFrontend for Counter {
399 fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
400 *self.0.lock().unwrap() += 1;
401 }
402 }
403 Counter(cc)
404 });
405
406 let lsp = Arc::new(Mutex::new(LspEngine::new(LspEngineConfig::default())));
407 let mut engine = SyntaxEngine::new(lsp, counter_frontend as Arc<dyn SyntaxFrontend>);
408
409 let path = Path::new("main.rs");
410 let content = "fn main() {}";
411 for _ in 0..10 {
412 engine.compute(path, content);
413 }
414
415 let sync_deliveries = *call_count.lock().unwrap();
416 assert_eq!(sync_deliveries, 10, "each compute fires one sync delivery");
417
418 std::thread::sleep(Duration::from_millis(600));
420
421 let total = *call_count.lock().unwrap();
422 assert_eq!(total, 11, "worker should fire exactly once after debounce");
423 }
424
425 #[test]
426 fn staleness_check_discards_outdated_lsp_tokens() {
427 use crate::data::lsp::types::SemanticToken as ST;
428
429 let path = std::path::PathBuf::from("staleness_test.rs");
435
436 let mut lsp = LspEngine::new(LspEngineConfig::default());
437 lsp.inject_test_semantic_tokens(
438 path.clone(),
439 vec![ST {
440 line: 0,
441 start_col: 0,
442 length: 2,
443 token_type: "keyword".to_string(),
444 }],
445 );
446 lsp.test_semantic_tokens_delay = Some(Duration::from_millis(400));
447
448 let call_count = Arc::new(Mutex::new(0usize));
449 let cc = Arc::clone(&call_count);
450 let counter_frontend = Arc::new({
451 struct Counter(Arc<Mutex<usize>>);
452 impl SyntaxFrontend for Counter {
453 fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
454 *self.0.lock().unwrap() += 1;
455 }
456 }
457 Counter(cc)
458 });
459
460 let lsp_arc = Arc::new(Mutex::new(lsp));
461 let mut engine = SyntaxEngine::new(
462 Arc::clone(&lsp_arc),
463 counter_frontend as Arc<dyn SyntaxFrontend>,
464 );
465
466 engine.compute(path.as_path(), "fn foo() {}");
468 assert_eq!(
469 *call_count.lock().unwrap(),
470 1,
471 "sync delivery for content A"
472 );
473
474 std::thread::sleep(Duration::from_millis(350));
476
477 engine.compute(path.as_path(), "fn bar() {}");
480 assert_eq!(
481 *call_count.lock().unwrap(),
482 2,
483 "sync delivery for content B"
484 );
485
486 std::thread::sleep(Duration::from_millis(2100));
492
493 let final_count = *call_count.lock().unwrap();
495 assert_eq!(
496 final_count, 3,
497 "req(A) stale → skip; req(B) not stale → deliver; total = 3"
498 );
499 }
500
501 #[test]
502 fn latest_wins_during_slow_lsp_call() {
503 use crate::data::lsp::types::SemanticToken as ST;
509 let path = std::path::PathBuf::from("latest_wins.rs");
510
511 let mut lsp = LspEngine::new(LspEngineConfig::default());
512 lsp.inject_test_semantic_tokens(
513 path.clone(),
514 vec![ST {
515 line: 0,
516 start_col: 0,
517 length: 2,
518 token_type: "keyword".to_string(),
519 }],
520 );
521 lsp.test_semantic_tokens_delay = Some(Duration::from_millis(500));
522
523 let call_count = Arc::new(Mutex::new(0usize));
524 let cc = Arc::clone(&call_count);
525 let counter_frontend = Arc::new({
526 struct Counter(Arc<Mutex<usize>>);
527 impl SyntaxFrontend for Counter {
528 fn set_semantic_tokens(&self, _: &Path, _: Vec<SemanticToken>) {
529 *self.0.lock().unwrap() += 1;
530 }
531 }
532 Counter(cc)
533 });
534
535 let lsp_arc = Arc::new(Mutex::new(lsp));
536 let mut engine = SyntaxEngine::new(
537 Arc::clone(&lsp_arc),
538 counter_frontend as Arc<dyn SyntaxFrontend>,
539 );
540
541 engine.compute(path.as_path(), "fn a() {}");
544 assert_eq!(*call_count.lock().unwrap(), 1);
545
546 std::thread::sleep(Duration::from_millis(400));
548
549 engine.compute(path.as_path(), "fn b() {}");
553 engine.compute(path.as_path(), "fn c() {}");
554 assert_eq!(*call_count.lock().unwrap(), 3, "three sync deliveries");
555
556 std::thread::sleep(Duration::from_millis(1500));
562
563 let total = *call_count.lock().unwrap();
564 assert_eq!(
565 total, 4,
566 "exactly one worker delivery (for C); B was overwritten before worker took it"
567 );
568 }
569
570 #[test]
571 fn compute_no_lsp_for_config_languages() {
572 let (mut engine, calls) = make_engine();
573
574 let cases: &[(&str, &str)] = &[
575 ("config.json", r#"{"x": 1}"#),
576 ("config.yaml", "x: 1\n"),
577 ("config.toml", "x = 1\n"),
578 ("Dockerfile", "FROM ubuntu:22.04\n"),
579 ("schema.xml", "<r/>"),
580 ];
581
582 for (filename, content) in cases {
583 engine.compute(Path::new(filename), content);
584 }
585
586 assert_eq!(
587 call_count(&calls),
588 5,
589 "one synchronous delivery per config file"
590 );
591
592 {
593 let guard = calls.lock().unwrap();
594 for (i, (_path, tokens)) in guard.iter().enumerate() {
595 assert!(
596 !tokens.is_empty(),
597 "case {i}: expected non-empty tree-sitter tokens"
598 );
599 }
600 }
601
602 std::thread::sleep(Duration::from_millis(400));
604 assert_eq!(
605 call_count(&calls),
606 5,
607 "no additional worker delivery for config languages (has_lsp: false)"
608 );
609 }
610}