1use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26const CHECKPOINT_INTERVAL: usize = 256;
28
29pub struct TextMateEngine {
34 syntax_set: Arc<SyntaxSet>,
35 syntax_index: usize,
36 checkpoint_markers: MarkerList,
37 checkpoint_states:
38 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39 dirty_from: Option<usize>,
40 cache: Option<TextMateCache>,
41 last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46 range: Range<usize>,
47 spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52 range: Range<usize>,
53 category: HighlightCategory,
54}
55
56impl TextMateEngine {
57 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59 Self {
60 syntax_set,
61 syntax_index,
62 checkpoint_markers: MarkerList::new(),
63 checkpoint_states: HashMap::new(),
64 dirty_from: None,
65 cache: None,
66 last_buffer_len: 0,
67 }
68 }
69
70 pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
77 let syntax_set = registry.syntax_set_arc();
78 let syntax = registry.find_syntax_for_file(path)?;
79
80 let index = syntax_set
82 .syntaxes()
83 .iter()
84 .position(|s| s.name == syntax.name)?;
85
86 Some(Self::new(syntax_set, index))
87 }
88
89 pub fn notify_insert(&mut self, position: usize, length: usize) {
90 self.checkpoint_markers.adjust_for_insert(position, length);
91 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
92 }
93
94 pub fn notify_delete(&mut self, position: usize, length: usize) {
95 self.checkpoint_markers.adjust_for_delete(position, length);
96 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
97 }
98
99 pub fn highlight_viewport(
101 &mut self,
102 buffer: &Buffer,
103 viewport_start: usize,
104 viewport_end: usize,
105 theme: &Theme,
106 context_bytes: usize,
107 ) -> Vec<HighlightSpan> {
108 if let Some(cache) = &self.cache {
109 if cache.range.start <= viewport_start
110 && cache.range.end >= viewport_end
111 && self.last_buffer_len == buffer.len()
112 {
113 return cache
114 .spans
115 .iter()
116 .filter(|span| {
117 span.range.start < viewport_end && span.range.end > viewport_start
118 })
119 .map(|span| HighlightSpan {
120 range: span.range.clone(),
121 color: highlight_color(span.category, theme),
122 category: Some(span.category),
123 })
124 .collect();
125 }
126 }
127
128 let desired_parse_start = viewport_start.saturating_sub(context_bytes);
129 let parse_end = (viewport_end + context_bytes).min(buffer.len());
130 if parse_end <= desired_parse_start {
131 return Vec::new();
132 }
133
134 if let Some(dirty) = self.dirty_from {
135 if dirty < parse_end {
136 self.run_convergence_walk(buffer, parse_end);
137 }
138 }
139
140 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
141 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
142 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
143
144 let content = buffer.slice_bytes(actual_start..parse_end);
145 let content_str = match std::str::from_utf8(&content) {
146 Ok(s) => s,
147 Err(_) => return Vec::new(),
148 };
149
150 let mut spans = Vec::new();
151 let content_bytes = content_str.as_bytes();
152 let mut pos = 0;
153 let mut current_offset = actual_start;
154 let mut bytes_since_checkpoint: usize = 0;
155
156 while pos < content_bytes.len() {
157 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
158 let nearby = self.checkpoint_markers.query_range(
159 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
160 current_offset + CHECKPOINT_INTERVAL / 2,
161 );
162 if nearby.is_empty() {
163 let marker_id = self.checkpoint_markers.create(current_offset, true);
164 self.checkpoint_states
165 .insert(marker_id, (state.clone(), current_scopes.clone()));
166 }
167 bytes_since_checkpoint = 0;
168 }
169
170 let mut line_end = pos;
171 while line_end < content_bytes.len() {
172 if content_bytes[line_end] == b'\n' {
173 line_end += 1;
174 break;
175 } else if content_bytes[line_end] == b'\r' {
176 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
177 line_end += 2;
178 } else {
179 line_end += 1;
180 }
181 break;
182 }
183 line_end += 1;
184 }
185
186 let line_bytes = &content_bytes[pos..line_end];
187 let actual_line_byte_len = line_bytes.len();
188
189 let line_str = match std::str::from_utf8(line_bytes) {
190 Ok(s) => s,
191 Err(_) => {
192 pos = line_end;
193 current_offset += actual_line_byte_len;
194 bytes_since_checkpoint += actual_line_byte_len;
195 continue;
196 }
197 };
198
199 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
200 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
201 format!("{}\n", line_content)
202 } else {
203 line_content.to_string()
204 };
205
206 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
207 Ok(ops) => ops,
208 Err(_) => {
209 pos = line_end;
210 current_offset += actual_line_byte_len;
211 bytes_since_checkpoint += actual_line_byte_len;
212 continue;
213 }
214 };
215
216 let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
217 let mut syntect_offset = 0;
218 let line_content_len = line_content.len();
219
220 for (op_offset, op) in ops {
221 let clamped_op_offset = op_offset.min(line_content_len);
222 if collect_spans && clamped_op_offset > syntect_offset {
223 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
224 let byte_start = current_offset + syntect_offset;
225 let byte_end = current_offset + clamped_op_offset;
226 let clamped_start = byte_start.max(desired_parse_start);
227 if clamped_start < byte_end {
228 spans.push(CachedSpan {
229 range: clamped_start..byte_end,
230 category,
231 });
232 }
233 }
234 }
235 syntect_offset = clamped_op_offset;
236 #[allow(clippy::let_underscore_must_use)]
237 let _ = current_scopes.apply(&op);
238 }
239
240 if collect_spans && syntect_offset < line_content_len {
241 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
242 let byte_start = current_offset + syntect_offset;
243 let byte_end = current_offset + line_content_len;
244 let clamped_start = byte_start.max(desired_parse_start);
245 if clamped_start < byte_end {
246 spans.push(CachedSpan {
247 range: clamped_start..byte_end,
248 category,
249 });
250 }
251 }
252 }
253
254 pos = line_end;
255 current_offset += actual_line_byte_len;
256 bytes_since_checkpoint += actual_line_byte_len;
257 }
258
259 Self::merge_adjacent_spans(&mut spans);
260
261 self.cache = Some(TextMateCache {
262 range: desired_parse_start..parse_end,
263 spans: spans.clone(),
264 });
265 self.last_buffer_len = buffer.len();
266
267 spans
268 .into_iter()
269 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
270 .map(|span| {
271 let cat = span.category;
272 HighlightSpan {
273 range: span.range,
274 color: highlight_color(cat, theme),
275 category: Some(cat),
276 }
277 })
278 .collect()
279 }
280
281 fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
282 let dirty = match self.dirty_from.take() {
283 Some(d) => d,
284 None => return,
285 };
286
287 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
288
289 let (resume_pos, mut state, mut current_scopes) = {
290 let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
291 let markers = self.checkpoint_markers.query_range(search_start, dirty);
292 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
293 if let Some((id, cp_pos, _)) = nearest {
294 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
295 (cp_pos, s.clone(), sc.clone())
296 } else {
297 self.checkpoint_markers.delete(id);
298 (
299 0,
300 syntect::parsing::ParseState::new(syntax),
301 syntect::parsing::ScopeStack::new(),
302 )
303 }
304 } else if walk_end <= MAX_PARSE_BYTES {
305 (
306 0,
307 syntect::parsing::ParseState::new(syntax),
308 syntect::parsing::ScopeStack::new(),
309 )
310 } else {
311 self.dirty_from = Some(dirty);
312 return;
313 }
314 };
315
316 let mut markers_ahead: Vec<(MarkerId, usize)> = self
317 .checkpoint_markers
318 .query_range(dirty, walk_end)
319 .into_iter()
320 .map(|(id, start, _)| (id, start))
321 .collect();
322 markers_ahead.sort_by_key(|(_, pos)| *pos);
323
324 if markers_ahead.is_empty() {
325 return;
326 }
327
328 let content_end = walk_end.min(buffer.len());
329 if resume_pos >= content_end {
330 return;
331 }
332 let content = buffer.slice_bytes(resume_pos..content_end);
333 let content_str = match std::str::from_utf8(&content) {
334 Ok(s) => s,
335 Err(_) => return,
336 };
337
338 let content_bytes = content_str.as_bytes();
339 let mut pos = 0;
340 let mut current_offset = resume_pos;
341 let mut marker_idx = 0;
342
343 while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
344 let mut line_end = pos;
345 while line_end < content_bytes.len() {
346 if content_bytes[line_end] == b'\n' {
347 line_end += 1;
348 break;
349 } else if content_bytes[line_end] == b'\r' {
350 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
351 line_end += 2;
352 } else {
353 line_end += 1;
354 }
355 break;
356 }
357 line_end += 1;
358 }
359
360 let line_bytes = &content_bytes[pos..line_end];
361 let actual_line_byte_len = line_bytes.len();
362
363 let line_str = match std::str::from_utf8(line_bytes) {
364 Ok(s) => s,
365 Err(_) => {
366 pos = line_end;
367 current_offset += actual_line_byte_len;
368 continue;
369 }
370 };
371
372 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
373 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
374 format!("{}\n", line_content)
375 } else {
376 line_content.to_string()
377 };
378
379 if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
380 for (_op_offset, op) in ops {
381 #[allow(clippy::let_underscore_must_use)]
382 let _ = current_scopes.apply(&op);
383 }
384 }
385
386 pos = line_end;
387 current_offset += actual_line_byte_len;
388
389 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
390 {
391 let (marker_id, _) = markers_ahead[marker_idx];
392 marker_idx += 1;
393
394 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
395 if state == stored.0 && current_scopes == stored.1 {
396 return;
397 }
398 }
399 self.checkpoint_states
400 .insert(marker_id, (state.clone(), current_scopes.clone()));
401 }
402 }
403
404 if marker_idx < markers_ahead.len() {
405 self.dirty_from = Some(markers_ahead[marker_idx].1);
406 }
407 }
408
409 fn find_parse_resume_point(
410 &self,
411 desired_start: usize,
412 parse_end: usize,
413 syntax: &syntect::parsing::SyntaxReference,
414 ) -> (
415 usize,
416 syntect::parsing::ParseState,
417 syntect::parsing::ScopeStack,
418 bool,
419 ) {
420 use syntect::parsing::{ParseState, ScopeStack};
421
422 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
423 let markers = self
424 .checkpoint_markers
425 .query_range(search_start, desired_start + 1);
426 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
427
428 if let Some((id, cp_pos, _)) = nearest {
429 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
430 return (cp_pos, s.clone(), sc.clone(), true);
431 }
432 }
433 if parse_end <= MAX_PARSE_BYTES {
434 (0, ParseState::new(syntax), ScopeStack::new(), true)
435 } else {
436 (
437 desired_start,
438 ParseState::new(syntax),
439 ScopeStack::new(),
440 true,
441 )
442 }
443 }
444
445 fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
446 for scope in scopes.as_slice().iter().rev() {
447 let scope_str = scope.build_string();
448 if let Some(cat) = scope_to_category(&scope_str) {
449 return Some(cat);
450 }
451 }
452 None
453 }
454
455 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
456 if spans.len() < 2 {
457 return;
458 }
459 let mut write_idx = 0;
460 for read_idx in 1..spans.len() {
461 if spans[write_idx].category == spans[read_idx].category
462 && spans[write_idx].range.end == spans[read_idx].range.start
463 {
464 spans[write_idx].range.end = spans[read_idx].range.end;
465 } else {
466 write_idx += 1;
467 if write_idx != read_idx {
468 spans[write_idx] = spans[read_idx].clone();
469 }
470 }
471 }
472 spans.truncate(write_idx + 1);
473 }
474
475 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
476 if let Some(cache) = &self.cache {
477 if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
478 self.cache = None;
479 }
480 }
481 }
482
483 pub fn invalidate_all(&mut self) {
484 self.cache = None;
485 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
486 for id in ids {
487 self.checkpoint_markers.delete(id);
488 }
489 self.checkpoint_states.clear();
490 self.dirty_from = None;
491 }
492
493 pub fn syntax_name(&self) -> &str {
494 &self.syntax_set.syntaxes()[self.syntax_index].name
495 }
496}
497
498fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
500 let scope_lower = scope.to_lowercase();
501
502 if scope_lower.starts_with("comment") {
504 return Some(HighlightCategory::Comment);
505 }
506
507 if scope_lower.starts_with("string") {
509 return Some(HighlightCategory::String);
510 }
511
512 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
514 return Some(HighlightCategory::Keyword);
515 }
516 if scope_lower.starts_with("markup.bold") {
517 return Some(HighlightCategory::Constant);
518 }
519 if scope_lower.starts_with("markup.italic") {
520 return Some(HighlightCategory::Variable);
521 }
522 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
523 return Some(HighlightCategory::String);
524 }
525 if scope_lower.starts_with("markup.underline.link")
526 || scope_lower.starts_with("markup.underline")
527 {
528 return Some(HighlightCategory::Function);
529 }
530 if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
531 return Some(HighlightCategory::Comment);
532 }
533 if scope_lower.starts_with("markup.list") {
534 return Some(HighlightCategory::Operator);
535 }
536 if scope_lower.starts_with("markup.inserted") {
538 return Some(HighlightCategory::String); }
540 if scope_lower.starts_with("markup.deleted") {
541 return Some(HighlightCategory::Keyword); }
543 if scope_lower.starts_with("meta.diff.range")
545 || scope_lower.starts_with("meta.diff.header")
546 || scope_lower.starts_with("meta.diff.index")
547 {
548 return Some(HighlightCategory::Function); }
550 if scope_lower.starts_with("punctuation.definition.from-file")
552 || scope_lower.starts_with("punctuation.definition.to-file")
553 {
554 return Some(HighlightCategory::Type); }
556
557 if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
559 return Some(HighlightCategory::Keyword);
560 }
561
562 if scope_lower.starts_with("punctuation.definition.comment") {
566 return Some(HighlightCategory::Comment);
567 }
568 if scope_lower.starts_with("punctuation.definition.string") {
569 return Some(HighlightCategory::String);
570 }
571
572 if scope_lower.starts_with("keyword.operator") {
574 return Some(HighlightCategory::Operator);
575 }
576
577 if scope_lower.starts_with("punctuation.section")
581 || scope_lower.starts_with("punctuation.bracket")
582 || scope_lower.starts_with("punctuation.definition.array")
583 || scope_lower.starts_with("punctuation.definition.block")
584 || scope_lower.starts_with("punctuation.definition.brackets")
585 || scope_lower.starts_with("punctuation.definition.group")
586 || scope_lower.starts_with("punctuation.definition.inline-table")
587 || scope_lower.starts_with("punctuation.definition.section")
588 || scope_lower.starts_with("punctuation.definition.table")
589 || scope_lower.starts_with("punctuation.definition.tag")
590 {
591 return Some(HighlightCategory::PunctuationBracket);
592 }
593
594 if scope_lower.starts_with("punctuation.separator")
596 || scope_lower.starts_with("punctuation.terminator")
597 || scope_lower.starts_with("punctuation.accessor")
598 {
599 return Some(HighlightCategory::PunctuationDelimiter);
600 }
601
602 if scope_lower.starts_with("entity.name.function")
604 || scope_lower.starts_with("meta.function-call")
605 || scope_lower.starts_with("support.function")
606 {
607 return Some(HighlightCategory::Function);
608 }
609
610 if scope_lower.starts_with("entity.name.type")
612 || scope_lower.starts_with("storage.type")
613 || scope_lower.starts_with("support.type")
614 || scope_lower.starts_with("entity.name.class")
615 {
616 return Some(HighlightCategory::Type);
617 }
618
619 if scope_lower.starts_with("constant.numeric")
621 || scope_lower.starts_with("constant.language")
622 || scope_lower.starts_with("constant.character")
623 {
624 return Some(HighlightCategory::Constant);
625 }
626 if scope_lower.starts_with("constant") {
627 return Some(HighlightCategory::Constant);
628 }
629
630 if scope_lower.starts_with("variable.parameter") {
632 return Some(HighlightCategory::Variable);
633 }
634 if scope_lower.starts_with("variable") {
635 return Some(HighlightCategory::Variable);
636 }
637
638 if scope_lower.starts_with("storage.modifier") {
640 return Some(HighlightCategory::Keyword);
641 }
642
643 if scope_lower.starts_with("entity.name") {
645 return Some(HighlightCategory::Function);
646 }
647
648 None
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654
655 #[test]
656 fn test_scope_to_category() {
657 assert_eq!(
658 scope_to_category("comment.line"),
659 Some(HighlightCategory::Comment)
660 );
661 assert_eq!(
662 scope_to_category("string.quoted"),
663 Some(HighlightCategory::String)
664 );
665 assert_eq!(
666 scope_to_category("keyword.control"),
667 Some(HighlightCategory::Keyword)
668 );
669 assert_eq!(
670 scope_to_category("keyword.operator"),
671 Some(HighlightCategory::Operator)
672 );
673 assert_eq!(
674 scope_to_category("entity.name.function"),
675 Some(HighlightCategory::Function)
676 );
677 assert_eq!(
678 scope_to_category("constant.numeric"),
679 Some(HighlightCategory::Constant)
680 );
681 assert_eq!(
682 scope_to_category("variable.parameter"),
683 Some(HighlightCategory::Variable)
684 );
685 }
686
687 #[test]
688 fn test_comment_delimiter_uses_comment_color() {
689 assert_eq!(
691 scope_to_category("punctuation.definition.comment"),
692 Some(HighlightCategory::Comment)
693 );
694 assert_eq!(
695 scope_to_category("punctuation.definition.comment.python"),
696 Some(HighlightCategory::Comment)
697 );
698 assert_eq!(
699 scope_to_category("punctuation.definition.comment.begin"),
700 Some(HighlightCategory::Comment)
701 );
702 }
703
704 #[test]
705 fn test_string_delimiter_uses_string_color() {
706 assert_eq!(
708 scope_to_category("punctuation.definition.string.begin"),
709 Some(HighlightCategory::String)
710 );
711 assert_eq!(
712 scope_to_category("punctuation.definition.string.end"),
713 Some(HighlightCategory::String)
714 );
715 }
716
717 #[test]
718 fn test_diff_scopes_produce_categories() {
719 assert_eq!(
721 scope_to_category("markup.inserted"),
722 Some(HighlightCategory::String)
723 );
724 assert_eq!(
725 scope_to_category("markup.inserted.diff"),
726 Some(HighlightCategory::String)
727 );
728 assert_eq!(
729 scope_to_category("markup.deleted"),
730 Some(HighlightCategory::Keyword)
731 );
732 assert_eq!(
733 scope_to_category("markup.deleted.diff"),
734 Some(HighlightCategory::Keyword)
735 );
736 assert_eq!(
737 scope_to_category("meta.diff.range"),
738 Some(HighlightCategory::Function)
739 );
740 assert_eq!(
741 scope_to_category("meta.diff.header"),
742 Some(HighlightCategory::Function)
743 );
744 }
745
746 #[test]
747 fn test_diff_parsing_produces_scopes() {
748 use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
749
750 let ss = SyntaxSet::load_defaults_newlines();
751 let syntax = ss
752 .find_syntax_by_extension("diff")
753 .expect("Diff syntax should exist");
754 let mut state = ParseState::new(syntax);
755
756 let lines = [
757 "--- a/file.txt\n",
758 "+++ b/file.txt\n",
759 "@@ -1,3 +1,4 @@\n",
760 " unchanged\n",
761 "-removed line\n",
762 "+added line\n",
763 ];
764
765 let mut found_inserted = false;
766 let mut found_deleted = false;
767 let mut found_range = false;
768 let mut scopes = ScopeStack::new();
769
770 for line in &lines {
771 let ops = state.parse_line(line, &ss).unwrap();
772 for (_offset, op) in &ops {
773 scopes.apply(op).unwrap();
774 let scope_str = scopes
775 .as_slice()
776 .iter()
777 .map(|s| s.build_string())
778 .collect::<Vec<_>>()
779 .join(" ");
780 if scope_str.contains("markup.inserted") {
781 found_inserted = true;
782 }
783 if scope_str.contains("markup.deleted") {
784 found_deleted = true;
785 }
786 if scope_str.contains("meta.diff") {
787 found_range = true;
788 }
789 }
790 }
791
792 eprintln!(
793 "found_inserted={}, found_deleted={}, found_range={}",
794 found_inserted, found_deleted, found_range
795 );
796 assert!(
797 found_inserted || found_deleted || found_range,
798 "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
799 );
800 }
801}