1use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26const CHECKPOINT_INTERVAL: usize = 256;
28
29pub struct TextMateEngine {
34 syntax_set: Arc<SyntaxSet>,
35 syntax_index: usize,
36 checkpoint_markers: MarkerList,
37 checkpoint_states:
38 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39 dirty_from: Option<usize>,
40 cache: Option<TextMateCache>,
41 last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46 range: Range<usize>,
47 spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52 range: Range<usize>,
53 category: HighlightCategory,
54}
55
56impl TextMateEngine {
57 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59 Self {
60 syntax_set,
61 syntax_index,
62 checkpoint_markers: MarkerList::new(),
63 checkpoint_states: HashMap::new(),
64 dirty_from: None,
65 cache: None,
66 last_buffer_len: 0,
67 }
68 }
69
70 pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
77 let syntax_set = registry.syntax_set_arc();
78 let syntax = registry.find_syntax_for_file(path)?;
79
80 let index = syntax_set
82 .syntaxes()
83 .iter()
84 .position(|s| s.name == syntax.name)?;
85
86 Some(Self::new(syntax_set, index))
87 }
88
89 pub fn notify_insert(&mut self, position: usize, length: usize) {
90 self.checkpoint_markers.adjust_for_insert(position, length);
91 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
92 }
93
94 pub fn notify_delete(&mut self, position: usize, length: usize) {
95 self.checkpoint_markers.adjust_for_delete(position, length);
96 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
97 }
98
99 pub fn highlight_viewport(
101 &mut self,
102 buffer: &Buffer,
103 viewport_start: usize,
104 viewport_end: usize,
105 theme: &Theme,
106 context_bytes: usize,
107 ) -> Vec<HighlightSpan> {
108 if let Some(cache) = &self.cache {
109 if cache.range.start <= viewport_start
110 && cache.range.end >= viewport_end
111 && self.last_buffer_len == buffer.len()
112 {
113 return cache
114 .spans
115 .iter()
116 .filter(|span| {
117 span.range.start < viewport_end && span.range.end > viewport_start
118 })
119 .map(|span| HighlightSpan {
120 range: span.range.clone(),
121 color: highlight_color(span.category, theme),
122 bg: None,
123 category: Some(span.category),
124 })
125 .collect();
126 }
127 }
128
129 let desired_parse_start = viewport_start.saturating_sub(context_bytes);
130 let parse_end = (viewport_end + context_bytes).min(buffer.len());
131 if parse_end <= desired_parse_start {
132 return Vec::new();
133 }
134
135 if let Some(dirty) = self.dirty_from {
136 if dirty < parse_end {
137 self.run_convergence_walk(buffer, parse_end);
138 }
139 }
140
141 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
142 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
143 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
144
145 let content = buffer.slice_bytes(actual_start..parse_end);
146 let content_str = match std::str::from_utf8(&content) {
147 Ok(s) => s,
148 Err(_) => return Vec::new(),
149 };
150
151 let mut spans = Vec::new();
152 let content_bytes = content_str.as_bytes();
153 let mut pos = 0;
154 let mut current_offset = actual_start;
155 let mut bytes_since_checkpoint: usize = 0;
156
157 while pos < content_bytes.len() {
158 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
159 let nearby = self.checkpoint_markers.query_range(
160 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
161 current_offset + CHECKPOINT_INTERVAL / 2,
162 );
163 if nearby.is_empty() {
164 let marker_id = self.checkpoint_markers.create(current_offset, true);
165 self.checkpoint_states
166 .insert(marker_id, (state.clone(), current_scopes.clone()));
167 }
168 bytes_since_checkpoint = 0;
169 }
170
171 let mut line_end = pos;
172 while line_end < content_bytes.len() {
173 if content_bytes[line_end] == b'\n' {
174 line_end += 1;
175 break;
176 } else if content_bytes[line_end] == b'\r' {
177 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
178 line_end += 2;
179 } else {
180 line_end += 1;
181 }
182 break;
183 }
184 line_end += 1;
185 }
186
187 let line_bytes = &content_bytes[pos..line_end];
188 let actual_line_byte_len = line_bytes.len();
189
190 let line_str = match std::str::from_utf8(line_bytes) {
191 Ok(s) => s,
192 Err(_) => {
193 pos = line_end;
194 current_offset += actual_line_byte_len;
195 bytes_since_checkpoint += actual_line_byte_len;
196 continue;
197 }
198 };
199
200 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
201 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
202 format!("{}\n", line_content)
203 } else {
204 line_content.to_string()
205 };
206
207 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
208 Ok(ops) => ops,
209 Err(_) => {
210 pos = line_end;
211 current_offset += actual_line_byte_len;
212 bytes_since_checkpoint += actual_line_byte_len;
213 continue;
214 }
215 };
216
217 let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
218 let mut syntect_offset = 0;
219 let line_content_len = line_content.len();
220
221 for (op_offset, op) in ops {
222 let clamped_op_offset = op_offset.min(line_content_len);
223 if collect_spans && clamped_op_offset > syntect_offset {
224 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
225 let byte_start = current_offset + syntect_offset;
226 let byte_end = current_offset + clamped_op_offset;
227 let clamped_start = byte_start.max(desired_parse_start);
228 if clamped_start < byte_end {
229 spans.push(CachedSpan {
230 range: clamped_start..byte_end,
231 category,
232 });
233 }
234 }
235 }
236 syntect_offset = clamped_op_offset;
237 #[allow(clippy::let_underscore_must_use)]
238 let _ = current_scopes.apply(&op);
239 }
240
241 if collect_spans && syntect_offset < line_content_len {
242 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
243 let byte_start = current_offset + syntect_offset;
244 let byte_end = current_offset + line_content_len;
245 let clamped_start = byte_start.max(desired_parse_start);
246 if clamped_start < byte_end {
247 spans.push(CachedSpan {
248 range: clamped_start..byte_end,
249 category,
250 });
251 }
252 }
253 }
254
255 pos = line_end;
256 current_offset += actual_line_byte_len;
257 bytes_since_checkpoint += actual_line_byte_len;
258 }
259
260 Self::merge_adjacent_spans(&mut spans);
261
262 self.cache = Some(TextMateCache {
263 range: desired_parse_start..parse_end,
264 spans: spans.clone(),
265 });
266 self.last_buffer_len = buffer.len();
267
268 spans
269 .into_iter()
270 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
271 .map(|span| {
272 let cat = span.category;
273 HighlightSpan {
274 range: span.range,
275 color: highlight_color(cat, theme),
276 bg: None,
277 category: Some(cat),
278 }
279 })
280 .collect()
281 }
282
283 fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
284 let dirty = match self.dirty_from.take() {
285 Some(d) => d,
286 None => return,
287 };
288
289 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
290
291 let (resume_pos, mut state, mut current_scopes) = {
292 let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
293 let markers = self.checkpoint_markers.query_range(search_start, dirty);
294 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
295 if let Some((id, cp_pos, _)) = nearest {
296 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
297 (cp_pos, s.clone(), sc.clone())
298 } else {
299 self.checkpoint_markers.delete(id);
300 (
301 0,
302 syntect::parsing::ParseState::new(syntax),
303 syntect::parsing::ScopeStack::new(),
304 )
305 }
306 } else if walk_end <= MAX_PARSE_BYTES {
307 (
308 0,
309 syntect::parsing::ParseState::new(syntax),
310 syntect::parsing::ScopeStack::new(),
311 )
312 } else {
313 self.dirty_from = Some(dirty);
314 return;
315 }
316 };
317
318 let mut markers_ahead: Vec<(MarkerId, usize)> = self
319 .checkpoint_markers
320 .query_range(dirty, walk_end)
321 .into_iter()
322 .map(|(id, start, _)| (id, start))
323 .collect();
324 markers_ahead.sort_by_key(|(_, pos)| *pos);
325
326 if markers_ahead.is_empty() {
327 return;
328 }
329
330 let content_end = walk_end.min(buffer.len());
331 if resume_pos >= content_end {
332 return;
333 }
334 let content = buffer.slice_bytes(resume_pos..content_end);
335 let content_str = match std::str::from_utf8(&content) {
336 Ok(s) => s,
337 Err(_) => return,
338 };
339
340 let content_bytes = content_str.as_bytes();
341 let mut pos = 0;
342 let mut current_offset = resume_pos;
343 let mut marker_idx = 0;
344
345 while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
346 let mut line_end = pos;
347 while line_end < content_bytes.len() {
348 if content_bytes[line_end] == b'\n' {
349 line_end += 1;
350 break;
351 } else if content_bytes[line_end] == b'\r' {
352 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
353 line_end += 2;
354 } else {
355 line_end += 1;
356 }
357 break;
358 }
359 line_end += 1;
360 }
361
362 let line_bytes = &content_bytes[pos..line_end];
363 let actual_line_byte_len = line_bytes.len();
364
365 let line_str = match std::str::from_utf8(line_bytes) {
366 Ok(s) => s,
367 Err(_) => {
368 pos = line_end;
369 current_offset += actual_line_byte_len;
370 continue;
371 }
372 };
373
374 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
375 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
376 format!("{}\n", line_content)
377 } else {
378 line_content.to_string()
379 };
380
381 if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
382 for (_op_offset, op) in ops {
383 #[allow(clippy::let_underscore_must_use)]
384 let _ = current_scopes.apply(&op);
385 }
386 }
387
388 pos = line_end;
389 current_offset += actual_line_byte_len;
390
391 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
392 {
393 let (marker_id, _) = markers_ahead[marker_idx];
394 marker_idx += 1;
395
396 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
397 if state == stored.0 && current_scopes == stored.1 {
398 return;
399 }
400 }
401 self.checkpoint_states
402 .insert(marker_id, (state.clone(), current_scopes.clone()));
403 }
404 }
405
406 if marker_idx < markers_ahead.len() {
407 self.dirty_from = Some(markers_ahead[marker_idx].1);
408 }
409 }
410
411 fn find_parse_resume_point(
412 &self,
413 desired_start: usize,
414 parse_end: usize,
415 syntax: &syntect::parsing::SyntaxReference,
416 ) -> (
417 usize,
418 syntect::parsing::ParseState,
419 syntect::parsing::ScopeStack,
420 bool,
421 ) {
422 use syntect::parsing::{ParseState, ScopeStack};
423
424 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
425 let markers = self
426 .checkpoint_markers
427 .query_range(search_start, desired_start + 1);
428 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
429
430 if let Some((id, cp_pos, _)) = nearest {
431 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
432 return (cp_pos, s.clone(), sc.clone(), true);
433 }
434 }
435 if parse_end <= MAX_PARSE_BYTES {
436 (0, ParseState::new(syntax), ScopeStack::new(), true)
437 } else {
438 (
439 desired_start,
440 ParseState::new(syntax),
441 ScopeStack::new(),
442 true,
443 )
444 }
445 }
446
447 fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
448 for scope in scopes.as_slice().iter().rev() {
449 let scope_str = scope.build_string();
450 if let Some(cat) = scope_to_category(&scope_str) {
451 return Some(cat);
452 }
453 }
454 None
455 }
456
457 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
458 if spans.len() < 2 {
459 return;
460 }
461 let mut write_idx = 0;
462 for read_idx in 1..spans.len() {
463 if spans[write_idx].category == spans[read_idx].category
464 && spans[write_idx].range.end == spans[read_idx].range.start
465 {
466 spans[write_idx].range.end = spans[read_idx].range.end;
467 } else {
468 write_idx += 1;
469 if write_idx != read_idx {
470 spans[write_idx] = spans[read_idx].clone();
471 }
472 }
473 }
474 spans.truncate(write_idx + 1);
475 }
476
477 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
478 if let Some(cache) = &self.cache {
479 if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
480 self.cache = None;
481 }
482 }
483 }
484
485 pub fn invalidate_all(&mut self) {
486 self.cache = None;
487 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
488 for id in ids {
489 self.checkpoint_markers.delete(id);
490 }
491 self.checkpoint_states.clear();
492 self.dirty_from = None;
493 }
494
495 pub fn syntax_name(&self) -> &str {
496 &self.syntax_set.syntaxes()[self.syntax_index].name
497 }
498}
499
500fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
502 let scope_lower = scope.to_lowercase();
503
504 if scope_lower.starts_with("comment") {
506 return Some(HighlightCategory::Comment);
507 }
508
509 if scope_lower.starts_with("string") {
511 return Some(HighlightCategory::String);
512 }
513
514 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
516 return Some(HighlightCategory::Keyword);
517 }
518 if scope_lower.starts_with("markup.bold") {
519 return Some(HighlightCategory::Constant);
520 }
521 if scope_lower.starts_with("markup.italic") {
522 return Some(HighlightCategory::Variable);
523 }
524 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
525 return Some(HighlightCategory::String);
526 }
527 if scope_lower.starts_with("markup.underline.link")
528 || scope_lower.starts_with("markup.underline")
529 {
530 return Some(HighlightCategory::Function);
531 }
532 if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
533 return Some(HighlightCategory::Comment);
534 }
535 if scope_lower.starts_with("markup.list") {
536 return Some(HighlightCategory::Operator);
537 }
538 if scope_lower.starts_with("markup.inserted") {
540 return Some(HighlightCategory::String); }
542 if scope_lower.starts_with("markup.deleted") {
543 return Some(HighlightCategory::Keyword); }
545 if scope_lower.starts_with("meta.diff.range")
547 || scope_lower.starts_with("meta.diff.header")
548 || scope_lower.starts_with("meta.diff.index")
549 {
550 return Some(HighlightCategory::Function); }
552 if scope_lower.starts_with("punctuation.definition.from-file")
554 || scope_lower.starts_with("punctuation.definition.to-file")
555 {
556 return Some(HighlightCategory::Type); }
558
559 if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
561 return Some(HighlightCategory::Keyword);
562 }
563
564 if scope_lower.starts_with("punctuation.definition.comment") {
568 return Some(HighlightCategory::Comment);
569 }
570 if scope_lower.starts_with("punctuation.definition.string") {
571 return Some(HighlightCategory::String);
572 }
573
574 if scope_lower.starts_with("keyword.operator") {
576 return Some(HighlightCategory::Operator);
577 }
578
579 if scope_lower.starts_with("punctuation.section")
583 || scope_lower.starts_with("punctuation.bracket")
584 || scope_lower.starts_with("punctuation.definition.array")
585 || scope_lower.starts_with("punctuation.definition.block")
586 || scope_lower.starts_with("punctuation.definition.brackets")
587 || scope_lower.starts_with("punctuation.definition.group")
588 || scope_lower.starts_with("punctuation.definition.inline-table")
589 || scope_lower.starts_with("punctuation.definition.section")
590 || scope_lower.starts_with("punctuation.definition.table")
591 || scope_lower.starts_with("punctuation.definition.tag")
592 {
593 return Some(HighlightCategory::PunctuationBracket);
594 }
595
596 if scope_lower.starts_with("punctuation.separator")
598 || scope_lower.starts_with("punctuation.terminator")
599 || scope_lower.starts_with("punctuation.accessor")
600 {
601 return Some(HighlightCategory::PunctuationDelimiter);
602 }
603
604 if scope_lower.starts_with("entity.name.function")
606 || scope_lower.starts_with("meta.function-call")
607 || scope_lower.starts_with("support.function")
608 {
609 return Some(HighlightCategory::Function);
610 }
611
612 if scope_lower.starts_with("entity.name.type")
614 || scope_lower.starts_with("storage.type")
615 || scope_lower.starts_with("support.type")
616 || scope_lower.starts_with("entity.name.class")
617 {
618 return Some(HighlightCategory::Type);
619 }
620
621 if scope_lower.starts_with("constant.numeric")
623 || scope_lower.starts_with("constant.language")
624 || scope_lower.starts_with("constant.character")
625 {
626 return Some(HighlightCategory::Constant);
627 }
628 if scope_lower.starts_with("constant") {
629 return Some(HighlightCategory::Constant);
630 }
631
632 if scope_lower.starts_with("variable.parameter") {
634 return Some(HighlightCategory::Variable);
635 }
636 if scope_lower.starts_with("variable") {
637 return Some(HighlightCategory::Variable);
638 }
639
640 if scope_lower.starts_with("storage.modifier") {
642 return Some(HighlightCategory::Keyword);
643 }
644
645 if scope_lower.starts_with("entity.name") {
647 return Some(HighlightCategory::Function);
648 }
649
650 None
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656
657 #[test]
658 fn test_scope_to_category() {
659 assert_eq!(
660 scope_to_category("comment.line"),
661 Some(HighlightCategory::Comment)
662 );
663 assert_eq!(
664 scope_to_category("string.quoted"),
665 Some(HighlightCategory::String)
666 );
667 assert_eq!(
668 scope_to_category("keyword.control"),
669 Some(HighlightCategory::Keyword)
670 );
671 assert_eq!(
672 scope_to_category("keyword.operator"),
673 Some(HighlightCategory::Operator)
674 );
675 assert_eq!(
676 scope_to_category("entity.name.function"),
677 Some(HighlightCategory::Function)
678 );
679 assert_eq!(
680 scope_to_category("constant.numeric"),
681 Some(HighlightCategory::Constant)
682 );
683 assert_eq!(
684 scope_to_category("variable.parameter"),
685 Some(HighlightCategory::Variable)
686 );
687 }
688
689 #[test]
690 fn test_comment_delimiter_uses_comment_color() {
691 assert_eq!(
693 scope_to_category("punctuation.definition.comment"),
694 Some(HighlightCategory::Comment)
695 );
696 assert_eq!(
697 scope_to_category("punctuation.definition.comment.python"),
698 Some(HighlightCategory::Comment)
699 );
700 assert_eq!(
701 scope_to_category("punctuation.definition.comment.begin"),
702 Some(HighlightCategory::Comment)
703 );
704 }
705
706 #[test]
707 fn test_string_delimiter_uses_string_color() {
708 assert_eq!(
710 scope_to_category("punctuation.definition.string.begin"),
711 Some(HighlightCategory::String)
712 );
713 assert_eq!(
714 scope_to_category("punctuation.definition.string.end"),
715 Some(HighlightCategory::String)
716 );
717 }
718
719 #[test]
720 fn test_diff_scopes_produce_categories() {
721 assert_eq!(
723 scope_to_category("markup.inserted"),
724 Some(HighlightCategory::String)
725 );
726 assert_eq!(
727 scope_to_category("markup.inserted.diff"),
728 Some(HighlightCategory::String)
729 );
730 assert_eq!(
731 scope_to_category("markup.deleted"),
732 Some(HighlightCategory::Keyword)
733 );
734 assert_eq!(
735 scope_to_category("markup.deleted.diff"),
736 Some(HighlightCategory::Keyword)
737 );
738 assert_eq!(
739 scope_to_category("meta.diff.range"),
740 Some(HighlightCategory::Function)
741 );
742 assert_eq!(
743 scope_to_category("meta.diff.header"),
744 Some(HighlightCategory::Function)
745 );
746 }
747
748 #[test]
749 fn test_diff_parsing_produces_scopes() {
750 use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
751
752 let ss = SyntaxSet::load_defaults_newlines();
753 let syntax = ss
754 .find_syntax_by_extension("diff")
755 .expect("Diff syntax should exist");
756 let mut state = ParseState::new(syntax);
757
758 let lines = [
759 "--- a/file.txt\n",
760 "+++ b/file.txt\n",
761 "@@ -1,3 +1,4 @@\n",
762 " unchanged\n",
763 "-removed line\n",
764 "+added line\n",
765 ];
766
767 let mut found_inserted = false;
768 let mut found_deleted = false;
769 let mut found_range = false;
770 let mut scopes = ScopeStack::new();
771
772 for line in &lines {
773 let ops = state.parse_line(line, &ss).unwrap();
774 for (_offset, op) in &ops {
775 scopes.apply(op).unwrap();
776 let scope_str = scopes
777 .as_slice()
778 .iter()
779 .map(|s| s.build_string())
780 .collect::<Vec<_>>()
781 .join(" ");
782 if scope_str.contains("markup.inserted") {
783 found_inserted = true;
784 }
785 if scope_str.contains("markup.deleted") {
786 found_deleted = true;
787 }
788 if scope_str.contains("meta.diff") {
789 found_range = true;
790 }
791 }
792 }
793
794 eprintln!(
795 "found_inserted={}, found_deleted={}, found_range={}",
796 found_inserted, found_deleted, found_range
797 );
798 assert!(
799 found_inserted || found_deleted || found_range,
800 "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
801 );
802 }
803}