1use crate::model::buffer::Buffer;
13use crate::model::marker::{MarkerId, MarkerList};
14use crate::primitives::grammar::GrammarRegistry;
15use crate::primitives::highlight_types::{highlight_color, HighlightCategory, HighlightSpan};
16use crate::view::theme::Theme;
17use std::collections::HashMap;
18use std::ops::Range;
19use std::path::Path;
20use std::sync::Arc;
21use syntect::parsing::SyntaxSet;
22
23const MAX_PARSE_BYTES: usize = 1024 * 1024;
25
26const CHECKPOINT_INTERVAL: usize = 256;
28
29pub struct TextMateEngine {
34 syntax_set: Arc<SyntaxSet>,
35 syntax_index: usize,
36 checkpoint_markers: MarkerList,
37 checkpoint_states:
38 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
39 dirty_from: Option<usize>,
40 cache: Option<TextMateCache>,
41 last_buffer_len: usize,
42}
43
44#[derive(Debug, Clone)]
45struct TextMateCache {
46 range: Range<usize>,
47 spans: Vec<CachedSpan>,
48}
49
50#[derive(Debug, Clone)]
51struct CachedSpan {
52 range: Range<usize>,
53 category: HighlightCategory,
54}
55
56impl TextMateEngine {
57 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
59 Self {
60 syntax_set,
61 syntax_index,
62 checkpoint_markers: MarkerList::new(),
63 checkpoint_states: HashMap::new(),
64 dirty_from: None,
65 cache: None,
66 last_buffer_len: 0,
67 }
68 }
69
70 pub fn for_file(path: &Path, registry: &GrammarRegistry) -> Option<Self> {
72 let syntax_set = registry.syntax_set_arc();
73
74 let syntax = registry.find_syntax_for_file(path)?;
76
77 let index = syntax_set
79 .syntaxes()
80 .iter()
81 .position(|s| s.name == syntax.name)?;
82
83 Some(Self::new(syntax_set, index))
84 }
85
86 pub fn notify_insert(&mut self, position: usize, length: usize) {
87 self.checkpoint_markers.adjust_for_insert(position, length);
88 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
89 }
90
91 pub fn notify_delete(&mut self, position: usize, length: usize) {
92 self.checkpoint_markers.adjust_for_delete(position, length);
93 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
94 }
95
96 pub fn highlight_viewport(
98 &mut self,
99 buffer: &Buffer,
100 viewport_start: usize,
101 viewport_end: usize,
102 theme: &Theme,
103 context_bytes: usize,
104 ) -> Vec<HighlightSpan> {
105 if let Some(cache) = &self.cache {
106 if cache.range.start <= viewport_start
107 && cache.range.end >= viewport_end
108 && self.last_buffer_len == buffer.len()
109 {
110 return cache
111 .spans
112 .iter()
113 .filter(|span| {
114 span.range.start < viewport_end && span.range.end > viewport_start
115 })
116 .map(|span| HighlightSpan {
117 range: span.range.clone(),
118 color: highlight_color(span.category, theme),
119 category: Some(span.category),
120 })
121 .collect();
122 }
123 }
124
125 let desired_parse_start = viewport_start.saturating_sub(context_bytes);
126 let parse_end = (viewport_end + context_bytes).min(buffer.len());
127 if parse_end <= desired_parse_start {
128 return Vec::new();
129 }
130
131 if let Some(dirty) = self.dirty_from {
132 if dirty < parse_end {
133 self.run_convergence_walk(buffer, parse_end);
134 }
135 }
136
137 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
138 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
139 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
140
141 let content = buffer.slice_bytes(actual_start..parse_end);
142 let content_str = match std::str::from_utf8(&content) {
143 Ok(s) => s,
144 Err(_) => return Vec::new(),
145 };
146
147 let mut spans = Vec::new();
148 let content_bytes = content_str.as_bytes();
149 let mut pos = 0;
150 let mut current_offset = actual_start;
151 let mut bytes_since_checkpoint: usize = 0;
152
153 while pos < content_bytes.len() {
154 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
155 let nearby = self.checkpoint_markers.query_range(
156 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
157 current_offset + CHECKPOINT_INTERVAL / 2,
158 );
159 if nearby.is_empty() {
160 let marker_id = self.checkpoint_markers.create(current_offset, true);
161 self.checkpoint_states
162 .insert(marker_id, (state.clone(), current_scopes.clone()));
163 }
164 bytes_since_checkpoint = 0;
165 }
166
167 let mut line_end = pos;
168 while line_end < content_bytes.len() {
169 if content_bytes[line_end] == b'\n' {
170 line_end += 1;
171 break;
172 } else if content_bytes[line_end] == b'\r' {
173 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
174 line_end += 2;
175 } else {
176 line_end += 1;
177 }
178 break;
179 }
180 line_end += 1;
181 }
182
183 let line_bytes = &content_bytes[pos..line_end];
184 let actual_line_byte_len = line_bytes.len();
185
186 let line_str = match std::str::from_utf8(line_bytes) {
187 Ok(s) => s,
188 Err(_) => {
189 pos = line_end;
190 current_offset += actual_line_byte_len;
191 bytes_since_checkpoint += actual_line_byte_len;
192 continue;
193 }
194 };
195
196 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
197 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
198 format!("{}\n", line_content)
199 } else {
200 line_content.to_string()
201 };
202
203 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
204 Ok(ops) => ops,
205 Err(_) => {
206 pos = line_end;
207 current_offset += actual_line_byte_len;
208 bytes_since_checkpoint += actual_line_byte_len;
209 continue;
210 }
211 };
212
213 let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
214 let mut syntect_offset = 0;
215 let line_content_len = line_content.len();
216
217 for (op_offset, op) in ops {
218 let clamped_op_offset = op_offset.min(line_content_len);
219 if collect_spans && clamped_op_offset > syntect_offset {
220 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
221 let byte_start = current_offset + syntect_offset;
222 let byte_end = current_offset + clamped_op_offset;
223 let clamped_start = byte_start.max(desired_parse_start);
224 if clamped_start < byte_end {
225 spans.push(CachedSpan {
226 range: clamped_start..byte_end,
227 category,
228 });
229 }
230 }
231 }
232 syntect_offset = clamped_op_offset;
233 #[allow(clippy::let_underscore_must_use)]
234 let _ = current_scopes.apply(&op);
235 }
236
237 if collect_spans && syntect_offset < line_content_len {
238 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
239 let byte_start = current_offset + syntect_offset;
240 let byte_end = current_offset + line_content_len;
241 let clamped_start = byte_start.max(desired_parse_start);
242 if clamped_start < byte_end {
243 spans.push(CachedSpan {
244 range: clamped_start..byte_end,
245 category,
246 });
247 }
248 }
249 }
250
251 pos = line_end;
252 current_offset += actual_line_byte_len;
253 bytes_since_checkpoint += actual_line_byte_len;
254 }
255
256 Self::merge_adjacent_spans(&mut spans);
257
258 self.cache = Some(TextMateCache {
259 range: desired_parse_start..parse_end,
260 spans: spans.clone(),
261 });
262 self.last_buffer_len = buffer.len();
263
264 spans
265 .into_iter()
266 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
267 .map(|span| {
268 let cat = span.category;
269 HighlightSpan {
270 range: span.range,
271 color: highlight_color(cat, theme),
272 category: Some(cat),
273 }
274 })
275 .collect()
276 }
277
278 fn run_convergence_walk(&mut self, buffer: &Buffer, walk_end: usize) {
279 let dirty = match self.dirty_from.take() {
280 Some(d) => d,
281 None => return,
282 };
283
284 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
285
286 let (resume_pos, mut state, mut current_scopes) = {
287 let search_start = dirty.saturating_sub(MAX_PARSE_BYTES);
288 let markers = self.checkpoint_markers.query_range(search_start, dirty);
289 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
290 if let Some((id, cp_pos, _)) = nearest {
291 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
292 (cp_pos, s.clone(), sc.clone())
293 } else {
294 self.checkpoint_markers.delete(id);
295 (
296 0,
297 syntect::parsing::ParseState::new(syntax),
298 syntect::parsing::ScopeStack::new(),
299 )
300 }
301 } else if walk_end <= MAX_PARSE_BYTES {
302 (
303 0,
304 syntect::parsing::ParseState::new(syntax),
305 syntect::parsing::ScopeStack::new(),
306 )
307 } else {
308 self.dirty_from = Some(dirty);
309 return;
310 }
311 };
312
313 let mut markers_ahead: Vec<(MarkerId, usize)> = self
314 .checkpoint_markers
315 .query_range(dirty, walk_end)
316 .into_iter()
317 .map(|(id, start, _)| (id, start))
318 .collect();
319 markers_ahead.sort_by_key(|(_, pos)| *pos);
320
321 if markers_ahead.is_empty() {
322 return;
323 }
324
325 let content_end = walk_end.min(buffer.len());
326 if resume_pos >= content_end {
327 return;
328 }
329 let content = buffer.slice_bytes(resume_pos..content_end);
330 let content_str = match std::str::from_utf8(&content) {
331 Ok(s) => s,
332 Err(_) => return,
333 };
334
335 let content_bytes = content_str.as_bytes();
336 let mut pos = 0;
337 let mut current_offset = resume_pos;
338 let mut marker_idx = 0;
339
340 while pos < content_bytes.len() && marker_idx < markers_ahead.len() {
341 let mut line_end = pos;
342 while line_end < content_bytes.len() {
343 if content_bytes[line_end] == b'\n' {
344 line_end += 1;
345 break;
346 } else if content_bytes[line_end] == b'\r' {
347 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
348 line_end += 2;
349 } else {
350 line_end += 1;
351 }
352 break;
353 }
354 line_end += 1;
355 }
356
357 let line_bytes = &content_bytes[pos..line_end];
358 let actual_line_byte_len = line_bytes.len();
359
360 let line_str = match std::str::from_utf8(line_bytes) {
361 Ok(s) => s,
362 Err(_) => {
363 pos = line_end;
364 current_offset += actual_line_byte_len;
365 continue;
366 }
367 };
368
369 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
370 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
371 format!("{}\n", line_content)
372 } else {
373 line_content.to_string()
374 };
375
376 if let Ok(ops) = state.parse_line(&line_for_syntect, &self.syntax_set) {
377 for (_op_offset, op) in ops {
378 #[allow(clippy::let_underscore_must_use)]
379 let _ = current_scopes.apply(&op);
380 }
381 }
382
383 pos = line_end;
384 current_offset += actual_line_byte_len;
385
386 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
387 {
388 let (marker_id, _) = markers_ahead[marker_idx];
389 marker_idx += 1;
390
391 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
392 if state == stored.0 && current_scopes == stored.1 {
393 return;
394 }
395 }
396 self.checkpoint_states
397 .insert(marker_id, (state.clone(), current_scopes.clone()));
398 }
399 }
400
401 if marker_idx < markers_ahead.len() {
402 self.dirty_from = Some(markers_ahead[marker_idx].1);
403 }
404 }
405
406 fn find_parse_resume_point(
407 &self,
408 desired_start: usize,
409 parse_end: usize,
410 syntax: &syntect::parsing::SyntaxReference,
411 ) -> (
412 usize,
413 syntect::parsing::ParseState,
414 syntect::parsing::ScopeStack,
415 bool,
416 ) {
417 use syntect::parsing::{ParseState, ScopeStack};
418
419 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
420 let markers = self
421 .checkpoint_markers
422 .query_range(search_start, desired_start + 1);
423 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
424
425 if let Some((id, cp_pos, _)) = nearest {
426 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
427 return (cp_pos, s.clone(), sc.clone(), true);
428 }
429 }
430 if parse_end <= MAX_PARSE_BYTES {
431 (0, ParseState::new(syntax), ScopeStack::new(), true)
432 } else {
433 (
434 desired_start,
435 ParseState::new(syntax),
436 ScopeStack::new(),
437 true,
438 )
439 }
440 }
441
442 fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
443 for scope in scopes.as_slice().iter().rev() {
444 let scope_str = scope.build_string();
445 if let Some(cat) = scope_to_category(&scope_str) {
446 return Some(cat);
447 }
448 }
449 None
450 }
451
452 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
453 if spans.len() < 2 {
454 return;
455 }
456 let mut write_idx = 0;
457 for read_idx in 1..spans.len() {
458 if spans[write_idx].category == spans[read_idx].category
459 && spans[write_idx].range.end == spans[read_idx].range.start
460 {
461 spans[write_idx].range.end = spans[read_idx].range.end;
462 } else {
463 write_idx += 1;
464 if write_idx != read_idx {
465 spans[write_idx] = spans[read_idx].clone();
466 }
467 }
468 }
469 spans.truncate(write_idx + 1);
470 }
471
472 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
473 if let Some(cache) = &self.cache {
474 if edit_range.start < cache.range.end && edit_range.end > cache.range.start {
475 self.cache = None;
476 }
477 }
478 }
479
480 pub fn invalidate_all(&mut self) {
481 self.cache = None;
482 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
483 for id in ids {
484 self.checkpoint_markers.delete(id);
485 }
486 self.checkpoint_states.clear();
487 self.dirty_from = None;
488 }
489
490 pub fn syntax_name(&self) -> &str {
491 &self.syntax_set.syntaxes()[self.syntax_index].name
492 }
493}
494
495fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
497 let scope_lower = scope.to_lowercase();
498
499 if scope_lower.starts_with("comment") {
501 return Some(HighlightCategory::Comment);
502 }
503
504 if scope_lower.starts_with("string") {
506 return Some(HighlightCategory::String);
507 }
508
509 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
511 return Some(HighlightCategory::Keyword);
512 }
513 if scope_lower.starts_with("markup.bold") {
514 return Some(HighlightCategory::Constant);
515 }
516 if scope_lower.starts_with("markup.italic") {
517 return Some(HighlightCategory::Variable);
518 }
519 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
520 return Some(HighlightCategory::String);
521 }
522 if scope_lower.starts_with("markup.underline.link")
523 || scope_lower.starts_with("markup.underline")
524 {
525 return Some(HighlightCategory::Function);
526 }
527 if scope_lower.starts_with("markup.quote") || scope_lower.starts_with("markup.strikethrough") {
528 return Some(HighlightCategory::Comment);
529 }
530 if scope_lower.starts_with("markup.list") {
531 return Some(HighlightCategory::Operator);
532 }
533 if scope_lower.starts_with("markup.inserted") {
535 return Some(HighlightCategory::String); }
537 if scope_lower.starts_with("markup.deleted") {
538 return Some(HighlightCategory::Keyword); }
540 if scope_lower.starts_with("meta.diff.range")
542 || scope_lower.starts_with("meta.diff.header")
543 || scope_lower.starts_with("meta.diff.index")
544 {
545 return Some(HighlightCategory::Function); }
547 if scope_lower.starts_with("punctuation.definition.from-file")
549 || scope_lower.starts_with("punctuation.definition.to-file")
550 {
551 return Some(HighlightCategory::Type); }
553
554 if scope_lower.starts_with("keyword") && !scope_lower.starts_with("keyword.operator") {
556 return Some(HighlightCategory::Keyword);
557 }
558
559 if scope_lower.starts_with("punctuation.definition.comment") {
563 return Some(HighlightCategory::Comment);
564 }
565 if scope_lower.starts_with("punctuation.definition.string") {
566 return Some(HighlightCategory::String);
567 }
568
569 if scope_lower.starts_with("keyword.operator") {
571 return Some(HighlightCategory::Operator);
572 }
573
574 if scope_lower.starts_with("punctuation.section")
578 || scope_lower.starts_with("punctuation.bracket")
579 || scope_lower.starts_with("punctuation.definition.array")
580 || scope_lower.starts_with("punctuation.definition.block")
581 || scope_lower.starts_with("punctuation.definition.brackets")
582 || scope_lower.starts_with("punctuation.definition.group")
583 || scope_lower.starts_with("punctuation.definition.inline-table")
584 || scope_lower.starts_with("punctuation.definition.section")
585 || scope_lower.starts_with("punctuation.definition.table")
586 || scope_lower.starts_with("punctuation.definition.tag")
587 {
588 return Some(HighlightCategory::PunctuationBracket);
589 }
590
591 if scope_lower.starts_with("punctuation.separator")
593 || scope_lower.starts_with("punctuation.terminator")
594 || scope_lower.starts_with("punctuation.accessor")
595 {
596 return Some(HighlightCategory::PunctuationDelimiter);
597 }
598
599 if scope_lower.starts_with("entity.name.function")
601 || scope_lower.starts_with("meta.function-call")
602 || scope_lower.starts_with("support.function")
603 {
604 return Some(HighlightCategory::Function);
605 }
606
607 if scope_lower.starts_with("entity.name.type")
609 || scope_lower.starts_with("storage.type")
610 || scope_lower.starts_with("support.type")
611 || scope_lower.starts_with("entity.name.class")
612 {
613 return Some(HighlightCategory::Type);
614 }
615
616 if scope_lower.starts_with("constant.numeric")
618 || scope_lower.starts_with("constant.language")
619 || scope_lower.starts_with("constant.character")
620 {
621 return Some(HighlightCategory::Constant);
622 }
623 if scope_lower.starts_with("constant") {
624 return Some(HighlightCategory::Constant);
625 }
626
627 if scope_lower.starts_with("variable.parameter") {
629 return Some(HighlightCategory::Variable);
630 }
631 if scope_lower.starts_with("variable") {
632 return Some(HighlightCategory::Variable);
633 }
634
635 if scope_lower.starts_with("storage.modifier") {
637 return Some(HighlightCategory::Keyword);
638 }
639
640 if scope_lower.starts_with("entity.name") {
642 return Some(HighlightCategory::Function);
643 }
644
645 None
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651
652 #[test]
653 fn test_scope_to_category() {
654 assert_eq!(
655 scope_to_category("comment.line"),
656 Some(HighlightCategory::Comment)
657 );
658 assert_eq!(
659 scope_to_category("string.quoted"),
660 Some(HighlightCategory::String)
661 );
662 assert_eq!(
663 scope_to_category("keyword.control"),
664 Some(HighlightCategory::Keyword)
665 );
666 assert_eq!(
667 scope_to_category("keyword.operator"),
668 Some(HighlightCategory::Operator)
669 );
670 assert_eq!(
671 scope_to_category("entity.name.function"),
672 Some(HighlightCategory::Function)
673 );
674 assert_eq!(
675 scope_to_category("constant.numeric"),
676 Some(HighlightCategory::Constant)
677 );
678 assert_eq!(
679 scope_to_category("variable.parameter"),
680 Some(HighlightCategory::Variable)
681 );
682 }
683
684 #[test]
685 fn test_comment_delimiter_uses_comment_color() {
686 assert_eq!(
688 scope_to_category("punctuation.definition.comment"),
689 Some(HighlightCategory::Comment)
690 );
691 assert_eq!(
692 scope_to_category("punctuation.definition.comment.python"),
693 Some(HighlightCategory::Comment)
694 );
695 assert_eq!(
696 scope_to_category("punctuation.definition.comment.begin"),
697 Some(HighlightCategory::Comment)
698 );
699 }
700
701 #[test]
702 fn test_string_delimiter_uses_string_color() {
703 assert_eq!(
705 scope_to_category("punctuation.definition.string.begin"),
706 Some(HighlightCategory::String)
707 );
708 assert_eq!(
709 scope_to_category("punctuation.definition.string.end"),
710 Some(HighlightCategory::String)
711 );
712 }
713
714 #[test]
715 fn test_diff_scopes_produce_categories() {
716 assert_eq!(
718 scope_to_category("markup.inserted"),
719 Some(HighlightCategory::String)
720 );
721 assert_eq!(
722 scope_to_category("markup.inserted.diff"),
723 Some(HighlightCategory::String)
724 );
725 assert_eq!(
726 scope_to_category("markup.deleted"),
727 Some(HighlightCategory::Keyword)
728 );
729 assert_eq!(
730 scope_to_category("markup.deleted.diff"),
731 Some(HighlightCategory::Keyword)
732 );
733 assert_eq!(
734 scope_to_category("meta.diff.range"),
735 Some(HighlightCategory::Function)
736 );
737 assert_eq!(
738 scope_to_category("meta.diff.header"),
739 Some(HighlightCategory::Function)
740 );
741 }
742
743 #[test]
744 fn test_diff_parsing_produces_scopes() {
745 use syntect::parsing::{ParseState, ScopeStack, SyntaxSet};
746
747 let ss = SyntaxSet::load_defaults_newlines();
748 let syntax = ss
749 .find_syntax_by_extension("diff")
750 .expect("Diff syntax should exist");
751 let mut state = ParseState::new(syntax);
752
753 let lines = [
754 "--- a/file.txt\n",
755 "+++ b/file.txt\n",
756 "@@ -1,3 +1,4 @@\n",
757 " unchanged\n",
758 "-removed line\n",
759 "+added line\n",
760 ];
761
762 let mut found_inserted = false;
763 let mut found_deleted = false;
764 let mut found_range = false;
765 let mut scopes = ScopeStack::new();
766
767 for line in &lines {
768 let ops = state.parse_line(line, &ss).unwrap();
769 for (_offset, op) in &ops {
770 scopes.apply(op).unwrap();
771 let scope_str = scopes
772 .as_slice()
773 .iter()
774 .map(|s| s.build_string())
775 .collect::<Vec<_>>()
776 .join(" ");
777 if scope_str.contains("markup.inserted") {
778 found_inserted = true;
779 }
780 if scope_str.contains("markup.deleted") {
781 found_deleted = true;
782 }
783 if scope_str.contains("meta.diff") {
784 found_range = true;
785 }
786 }
787 }
788
789 eprintln!(
790 "found_inserted={}, found_deleted={}, found_range={}",
791 found_inserted, found_deleted, found_range
792 );
793 assert!(
794 found_inserted || found_deleted || found_range,
795 "Diff grammar should produce markup.inserted, markup.deleted, or meta.diff scopes"
796 );
797 }
798}