1use crate::model::buffer::Buffer;
20use crate::model::marker::{MarkerId, MarkerList};
21use crate::primitives::grammar::GrammarRegistry;
22use crate::primitives::highlighter::{
23 highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
24};
25use crate::view::theme::Theme;
26use std::collections::HashMap;
27use std::ops::Range;
28use std::path::Path;
29use std::sync::Arc;
30use syntect::parsing::SyntaxSet;
31
32fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
34 let scope_lower = scope.to_lowercase();
35
36 if scope_lower.starts_with("comment") {
38 return Some(HighlightCategory::Comment);
39 }
40
41 if scope_lower.starts_with("string") {
43 return Some(HighlightCategory::String);
44 }
45
46 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
50 return Some(HighlightCategory::Keyword); }
52 if scope_lower.starts_with("markup.bold") {
54 return Some(HighlightCategory::Constant); }
56 if scope_lower.starts_with("markup.italic") {
58 return Some(HighlightCategory::Variable); }
60 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
62 return Some(HighlightCategory::String); }
64 if scope_lower.starts_with("markup.underline.link") {
66 return Some(HighlightCategory::Function); }
68 if scope_lower.starts_with("markup.underline") {
70 return Some(HighlightCategory::Function);
71 }
72 if scope_lower.starts_with("markup.quote") {
74 return Some(HighlightCategory::Comment); }
76 if scope_lower.starts_with("markup.list") {
78 return Some(HighlightCategory::Operator); }
80 if scope_lower.starts_with("markup.strikethrough") {
82 return Some(HighlightCategory::Comment); }
84
85 if scope_lower.starts_with("keyword.control")
87 || scope_lower.starts_with("keyword.other")
88 || scope_lower.starts_with("keyword.declaration")
89 || scope_lower.starts_with("keyword")
90 {
91 if !scope_lower.starts_with("keyword.operator") {
93 return Some(HighlightCategory::Keyword);
94 }
95 }
96
97 if scope_lower.starts_with("punctuation.definition.comment") {
101 return Some(HighlightCategory::Comment);
102 }
103 if scope_lower.starts_with("punctuation.definition.string") {
104 return Some(HighlightCategory::String);
105 }
106
107 if scope_lower.starts_with("keyword.operator") {
109 return Some(HighlightCategory::Operator);
110 }
111
112 if scope_lower.starts_with("punctuation.section")
116 || scope_lower.starts_with("punctuation.bracket")
117 || scope_lower.starts_with("punctuation.definition.array")
118 || scope_lower.starts_with("punctuation.definition.block")
119 || scope_lower.starts_with("punctuation.definition.brackets")
120 || scope_lower.starts_with("punctuation.definition.group")
121 || scope_lower.starts_with("punctuation.definition.inline-table")
122 || scope_lower.starts_with("punctuation.definition.section")
123 || scope_lower.starts_with("punctuation.definition.table")
124 || scope_lower.starts_with("punctuation.definition.tag")
125 {
126 return Some(HighlightCategory::PunctuationBracket);
127 }
128
129 if scope_lower.starts_with("punctuation.separator")
131 || scope_lower.starts_with("punctuation.terminator")
132 || scope_lower.starts_with("punctuation.accessor")
133 {
134 return Some(HighlightCategory::PunctuationDelimiter);
135 }
136
137 if scope_lower.starts_with("entity.name.function")
139 || scope_lower.starts_with("support.function")
140 || scope_lower.starts_with("meta.function-call")
141 || scope_lower.starts_with("variable.function")
142 {
143 return Some(HighlightCategory::Function);
144 }
145
146 if scope_lower.starts_with("entity.name.type")
148 || scope_lower.starts_with("entity.name.class")
149 || scope_lower.starts_with("entity.name.struct")
150 || scope_lower.starts_with("entity.name.enum")
151 || scope_lower.starts_with("entity.name.interface")
152 || scope_lower.starts_with("entity.name.trait")
153 || scope_lower.starts_with("support.type")
154 || scope_lower.starts_with("support.class")
155 || scope_lower.starts_with("storage.type")
156 {
157 return Some(HighlightCategory::Type);
158 }
159
160 if scope_lower.starts_with("storage.modifier") {
162 return Some(HighlightCategory::Keyword);
163 }
164
165 if scope_lower.starts_with("constant.numeric")
167 || scope_lower.starts_with("constant.language.boolean")
168 {
169 return Some(HighlightCategory::Number);
170 }
171 if scope_lower.starts_with("constant") {
172 return Some(HighlightCategory::Constant);
173 }
174
175 if scope_lower.starts_with("variable.parameter")
177 || scope_lower.starts_with("variable.other")
178 || scope_lower.starts_with("variable.language")
179 {
180 return Some(HighlightCategory::Variable);
181 }
182
183 if scope_lower.starts_with("entity.name.tag")
185 || scope_lower.starts_with("support.other.property")
186 || scope_lower.starts_with("meta.object-literal.key")
187 || scope_lower.starts_with("variable.other.property")
188 || scope_lower.starts_with("variable.other.object.property")
189 {
190 return Some(HighlightCategory::Property);
191 }
192
193 if scope_lower.starts_with("entity.other.attribute")
195 || scope_lower.starts_with("meta.attribute")
196 || scope_lower.starts_with("entity.name.decorator")
197 {
198 return Some(HighlightCategory::Attribute);
199 }
200
201 if scope_lower.starts_with("variable") {
203 return Some(HighlightCategory::Variable);
204 }
205
206 None
207}
208
209#[derive(Default)]
211pub enum HighlightEngine {
212 TreeSitter(Box<Highlighter>),
214 TextMate(Box<TextMateEngine>),
216 #[default]
218 None,
219}
220
221pub struct TextMateEngine {
240 syntax_set: Arc<SyntaxSet>,
241 syntax_index: usize,
242 checkpoint_markers: MarkerList,
244 checkpoint_states:
246 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247 dirty_from: Option<usize>,
250 cache: Option<TextMateCache>,
252 last_buffer_len: usize,
253 ts_language: Option<Language>,
255 stats: HighlightStats,
257}
258
259#[derive(Debug, Default, Clone)]
261pub struct HighlightStats {
262 pub bytes_parsed: usize,
264 pub cache_hits: usize,
266 pub cache_misses: usize,
268 pub checkpoints_updated: usize,
270 pub convergences: usize,
272}
273
274#[derive(Debug, Clone)]
275struct TextMateCache {
276 range: Range<usize>,
277 spans: Vec<CachedSpan>,
278}
279
280#[derive(Debug, Clone)]
281struct CachedSpan {
282 range: Range<usize>,
283 category: crate::primitives::highlighter::HighlightCategory,
284}
285
286const MAX_PARSE_BYTES: usize = 1024 * 1024;
288
289const CHECKPOINT_INTERVAL: usize = 256;
294
295impl TextMateEngine {
296 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
298 Self {
299 syntax_set,
300 syntax_index,
301 checkpoint_markers: MarkerList::new(),
302 checkpoint_states: HashMap::new(),
303 dirty_from: None,
304 cache: None,
305 last_buffer_len: 0,
306 ts_language: None,
307 stats: HighlightStats::default(),
308 }
309 }
310
311 pub fn with_language(
313 syntax_set: Arc<SyntaxSet>,
314 syntax_index: usize,
315 ts_language: Option<Language>,
316 ) -> Self {
317 Self {
318 syntax_set,
319 syntax_index,
320 checkpoint_markers: MarkerList::new(),
321 checkpoint_states: HashMap::new(),
322 dirty_from: None,
323 cache: None,
324 last_buffer_len: 0,
325 ts_language,
326 stats: HighlightStats::default(),
327 }
328 }
329
330 pub fn stats(&self) -> &HighlightStats {
332 &self.stats
333 }
334
335 pub fn reset_stats(&mut self) {
337 self.stats = HighlightStats::default();
338 }
339
340 pub fn language(&self) -> Option<&Language> {
342 self.ts_language.as_ref()
343 }
344
345 pub fn notify_insert(&mut self, position: usize, length: usize) {
349 self.checkpoint_markers.adjust_for_insert(position, length);
350 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
351 if let Some(cache) = &mut self.cache {
353 for span in &mut cache.spans {
354 if span.range.start >= position {
355 span.range.start += length;
356 span.range.end += length;
357 } else if span.range.end > position {
358 span.range.end += length;
360 }
361 }
362 if cache.range.end >= position {
363 cache.range.end += length;
364 }
365 }
366 }
367
368 pub fn notify_delete(&mut self, position: usize, length: usize) {
371 self.checkpoint_markers.adjust_for_delete(position, length);
372 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
373 if let Some(cache) = &mut self.cache {
375 let delete_end = position + length;
376 cache.spans.retain_mut(|span| {
377 if span.range.start >= delete_end {
378 span.range.start -= length;
380 span.range.end -= length;
381 true
382 } else if span.range.end <= position {
383 true
385 } else if span.range.start >= position && span.range.end <= delete_end {
386 false
388 } else {
389 if span.range.start < position {
391 span.range.end = position.min(span.range.end);
392 } else {
393 span.range.start = position;
394 span.range.end = position + span.range.end.saturating_sub(delete_end);
395 }
396 span.range.start < span.range.end
397 }
398 });
399 if cache.range.end > delete_end {
400 cache.range.end -= length;
401 } else if cache.range.end > position {
402 cache.range.end = position;
403 }
404 }
405 }
406
407 pub fn highlight_viewport(
414 &mut self,
415 buffer: &Buffer,
416 viewport_start: usize,
417 viewport_end: usize,
418 theme: &Theme,
419 context_bytes: usize,
420 ) -> Vec<HighlightSpan> {
421 let desired_parse_start = viewport_start.saturating_sub(context_bytes);
422 let parse_end = (viewport_end + context_bytes).min(buffer.len());
423
424 let dirty = self.dirty_from.take();
429 let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
430 c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
431 });
432 let exact_cache_hit = cache_covers_viewport
433 && dirty.is_none()
434 && self.last_buffer_len == buffer.len()
435 && self
436 .cache
437 .as_ref()
438 .is_some_and(|c| c.range.end >= parse_end);
439
440 if exact_cache_hit {
441 self.stats.cache_hits += 1;
443 return self.filter_cached_spans(viewport_start, viewport_end, theme);
444 }
445
446 if cache_covers_viewport && dirty.is_some() {
447 if let Some(dirty_pos) = dirty {
448 if dirty_pos < parse_end {
449 if let Some(result) = self.try_partial_update(
452 buffer,
453 dirty_pos,
454 desired_parse_start,
455 parse_end,
456 viewport_start,
457 viewport_end,
458 theme,
459 ) {
460 return result;
461 }
462 } else {
464 self.dirty_from = Some(dirty_pos);
466 self.stats.cache_hits += 1;
467 return self.filter_cached_spans(viewport_start, viewport_end, theme);
468 }
469 }
470 } else if let Some(d) = dirty {
471 self.dirty_from = Some(d);
473 }
474
475 self.full_parse(
477 buffer,
478 desired_parse_start,
479 parse_end,
480 viewport_start,
481 viewport_end,
482 theme,
483 context_bytes,
484 )
485 }
486
487 fn filter_cached_spans(
489 &self,
490 viewport_start: usize,
491 viewport_end: usize,
492 theme: &Theme,
493 ) -> Vec<HighlightSpan> {
494 let cache = self.cache.as_ref().unwrap();
495 cache
496 .spans
497 .iter()
498 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
499 .map(|span| HighlightSpan {
500 range: span.range.clone(),
501 color: highlight_color(span.category, theme),
502 category: Some(span.category),
503 })
504 .collect()
505 }
506
507 #[allow(clippy::too_many_arguments)]
511 fn try_partial_update(
512 &mut self,
513 buffer: &Buffer,
514 dirty_pos: usize,
515 desired_parse_start: usize,
516 parse_end: usize,
517 viewport_start: usize,
518 viewport_end: usize,
519 theme: &Theme,
520 ) -> Option<Vec<HighlightSpan>> {
521 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
522
523 let (actual_start, mut state, mut current_scopes) = {
525 let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
526 let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
527 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
528 if let Some((id, cp_pos, _)) = nearest {
529 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
530 (cp_pos, s.clone(), sc.clone())
531 } else {
532 return None; }
534 } else if parse_end <= MAX_PARSE_BYTES {
535 (
536 0,
537 syntect::parsing::ParseState::new(syntax),
538 syntect::parsing::ScopeStack::new(),
539 )
540 } else {
541 return None; }
543 };
544
545 let mut markers_ahead: Vec<(MarkerId, usize)> = self
547 .checkpoint_markers
548 .query_range(dirty_pos, parse_end)
549 .into_iter()
550 .map(|(id, start, _)| (id, start))
551 .collect();
552 markers_ahead.sort_by_key(|(_, pos)| *pos);
553 let mut marker_idx = 0;
554
555 let content_end = parse_end.min(buffer.len());
557 if actual_start >= content_end {
558 return None;
559 }
560 let content = buffer.slice_bytes(actual_start..content_end);
561 let content_str = match std::str::from_utf8(&content) {
562 Ok(s) => s,
563 Err(_) => return None,
564 };
565
566 let mut new_spans = Vec::new();
567 let content_bytes = content_str.as_bytes();
568 let mut pos = 0;
569 let mut current_offset = actual_start;
570 let mut converged_at: Option<usize> = None;
571 let mut bytes_since_checkpoint: usize = 0;
572
573 while pos < content_bytes.len() {
574 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
576 let nearby = self.checkpoint_markers.query_range(
577 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
578 current_offset + CHECKPOINT_INTERVAL / 2,
579 );
580 if nearby.is_empty() {
581 let marker_id = self.checkpoint_markers.create(current_offset, true);
582 self.checkpoint_states
583 .insert(marker_id, (state.clone(), current_scopes.clone()));
584 }
585 bytes_since_checkpoint = 0;
586 }
587
588 let line_start = pos;
589 let mut line_end = pos;
590 while line_end < content_bytes.len() {
591 if content_bytes[line_end] == b'\n' {
592 line_end += 1;
593 break;
594 } else if content_bytes[line_end] == b'\r' {
595 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
596 line_end += 2;
597 } else {
598 line_end += 1;
599 }
600 break;
601 }
602 line_end += 1;
603 }
604
605 let line_bytes = &content_bytes[line_start..line_end];
606 let actual_line_byte_len = line_bytes.len();
607
608 let line_str = match std::str::from_utf8(line_bytes) {
609 Ok(s) => s,
610 Err(_) => {
611 pos = line_end;
612 current_offset += actual_line_byte_len;
613 bytes_since_checkpoint += actual_line_byte_len;
614 continue;
615 }
616 };
617
618 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
619 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
620 format!("{}\n", line_content)
621 } else {
622 line_content.to_string()
623 };
624
625 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
626 Ok(ops) => ops,
627 Err(_) => {
628 pos = line_end;
629 current_offset += actual_line_byte_len;
630 bytes_since_checkpoint += actual_line_byte_len;
631 continue;
632 }
633 };
634
635 let collect_spans =
637 current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
638 let mut syntect_offset = 0;
639 let line_content_len = line_content.len();
640
641 for (op_offset, op) in ops {
642 let clamped_op_offset = op_offset.min(line_content_len);
643 if collect_spans && clamped_op_offset > syntect_offset {
644 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
645 let byte_start = current_offset + syntect_offset;
646 let byte_end = current_offset + clamped_op_offset;
647 let clamped_start = byte_start.max(actual_start);
648 if clamped_start < byte_end {
649 new_spans.push(CachedSpan {
650 range: clamped_start..byte_end,
651 category,
652 });
653 }
654 }
655 }
656 syntect_offset = clamped_op_offset;
657 #[allow(clippy::let_underscore_must_use)]
658 let _ = current_scopes.apply(&op);
659 }
660
661 if collect_spans && syntect_offset < line_content_len {
662 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
663 let byte_start = current_offset + syntect_offset;
664 let byte_end = current_offset + line_content_len;
665 let clamped_start = byte_start.max(actual_start);
666 if clamped_start < byte_end {
667 new_spans.push(CachedSpan {
668 range: clamped_start..byte_end,
669 category,
670 });
671 }
672 }
673 }
674
675 pos = line_end;
676 current_offset += actual_line_byte_len;
677 bytes_since_checkpoint += actual_line_byte_len;
678
679 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
681 {
682 let (marker_id, _) = markers_ahead[marker_idx];
683 marker_idx += 1;
684 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
685 if *stored == (state.clone(), current_scopes.clone()) {
686 self.stats.convergences += 1;
687 converged_at = Some(current_offset);
688 break;
689 }
690 }
691 self.stats.checkpoints_updated += 1;
692 self.checkpoint_states
693 .insert(marker_id, (state.clone(), current_scopes.clone()));
694 }
695
696 if converged_at.is_some() {
697 break;
698 }
699 }
700
701 self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
702
703 let convergence_point = converged_at?; self.stats.cache_misses += 1; Self::merge_adjacent_spans(&mut new_spans);
710
711 if let Some(cache) = &mut self.cache {
712 let splice_start = actual_start;
714 let splice_end = convergence_point;
715 cache
716 .spans
717 .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
718 cache.spans.extend(new_spans);
720 cache.spans.sort_by_key(|s| s.range.start);
721 Self::merge_adjacent_spans(&mut cache.spans);
722 }
723
724 self.last_buffer_len = buffer.len();
725
726 Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
727 }
728
729 #[allow(clippy::too_many_arguments)]
732 fn full_parse(
733 &mut self,
734 buffer: &Buffer,
735 desired_parse_start: usize,
736 parse_end: usize,
737 viewport_start: usize,
738 viewport_end: usize,
739 theme: &Theme,
740 _context_bytes: usize,
741 ) -> Vec<HighlightSpan> {
742 self.stats.cache_misses += 1;
743 self.dirty_from = None; if parse_end <= desired_parse_start {
746 return Vec::new();
747 }
748
749 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
750 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
751 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
752
753 let content = buffer.slice_bytes(actual_start..parse_end);
754 let content_str = match std::str::from_utf8(&content) {
755 Ok(s) => s,
756 Err(_) => return Vec::new(),
757 };
758
759 let mut spans = Vec::new();
760 let content_bytes = content_str.as_bytes();
761 let mut pos = 0;
762 let mut current_offset = actual_start;
763 let mut bytes_since_checkpoint: usize = 0;
764
765 while pos < content_bytes.len() {
766 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767 let nearby = self.checkpoint_markers.query_range(
768 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
769 current_offset + CHECKPOINT_INTERVAL / 2,
770 );
771 if nearby.is_empty() {
772 let marker_id = self.checkpoint_markers.create(current_offset, true);
773 self.checkpoint_states
774 .insert(marker_id, (state.clone(), current_scopes.clone()));
775 }
776 bytes_since_checkpoint = 0;
777 }
778
779 let line_start = pos;
780 let mut line_end = pos;
781
782 while line_end < content_bytes.len() {
783 if content_bytes[line_end] == b'\n' {
784 line_end += 1;
785 break;
786 } else if content_bytes[line_end] == b'\r' {
787 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
788 line_end += 2;
789 } else {
790 line_end += 1;
791 }
792 break;
793 }
794 line_end += 1;
795 }
796
797 let line_bytes = &content_bytes[line_start..line_end];
798 let actual_line_byte_len = line_bytes.len();
799
800 let line_str = match std::str::from_utf8(line_bytes) {
801 Ok(s) => s,
802 Err(_) => {
803 pos = line_end;
804 current_offset += actual_line_byte_len;
805 bytes_since_checkpoint += actual_line_byte_len;
806 continue;
807 }
808 };
809
810 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
811 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
812 format!("{}\n", line_content)
813 } else {
814 line_content.to_string()
815 };
816
817 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
818 Ok(ops) => ops,
819 Err(_) => {
820 pos = line_end;
821 current_offset += actual_line_byte_len;
822 bytes_since_checkpoint += actual_line_byte_len;
823 continue;
824 }
825 };
826
827 let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
828 let mut syntect_offset = 0;
829 let line_content_len = line_content.len();
830
831 for (op_offset, op) in ops {
832 let clamped_op_offset = op_offset.min(line_content_len);
833 if collect_spans && clamped_op_offset > syntect_offset {
834 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
835 let byte_start = current_offset + syntect_offset;
836 let byte_end = current_offset + clamped_op_offset;
837 let clamped_start = byte_start.max(desired_parse_start);
838 if clamped_start < byte_end {
839 spans.push(CachedSpan {
840 range: clamped_start..byte_end,
841 category,
842 });
843 }
844 }
845 }
846 syntect_offset = clamped_op_offset;
847 #[allow(clippy::let_underscore_must_use)]
848 let _ = current_scopes.apply(&op);
849 }
850
851 if collect_spans && syntect_offset < line_content_len {
852 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
853 let byte_start = current_offset + syntect_offset;
854 let byte_end = current_offset + line_content_len;
855 let clamped_start = byte_start.max(desired_parse_start);
856 if clamped_start < byte_end {
857 spans.push(CachedSpan {
858 range: clamped_start..byte_end,
859 category,
860 });
861 }
862 }
863 }
864
865 pos = line_end;
866 current_offset += actual_line_byte_len;
867 bytes_since_checkpoint += actual_line_byte_len;
868
869 let markers_here: Vec<(MarkerId, usize)> = self
871 .checkpoint_markers
872 .query_range(
873 current_offset.saturating_sub(actual_line_byte_len),
874 current_offset,
875 )
876 .into_iter()
877 .map(|(id, start, _)| (id, start))
878 .collect();
879 for (marker_id, _) in markers_here {
880 self.checkpoint_states
881 .insert(marker_id, (state.clone(), current_scopes.clone()));
882 }
883 }
884
885 self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
886
887 Self::merge_adjacent_spans(&mut spans);
888
889 self.cache = Some(TextMateCache {
890 range: desired_parse_start..parse_end,
891 spans: spans.clone(),
892 });
893 self.last_buffer_len = buffer.len();
894
895 spans
896 .into_iter()
897 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
898 .map(|span| {
899 let cat = span.category;
900 HighlightSpan {
901 range: span.range,
902 color: highlight_color(cat, theme),
903 category: Some(cat),
904 }
905 })
906 .collect()
907 }
908
909 fn find_parse_resume_point(
911 &self,
912 desired_start: usize,
913 parse_end: usize,
914 syntax: &syntect::parsing::SyntaxReference,
915 ) -> (
916 usize,
917 syntect::parsing::ParseState,
918 syntect::parsing::ScopeStack,
919 bool,
920 ) {
921 use syntect::parsing::{ParseState, ScopeStack};
922
923 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
927 let markers = self
928 .checkpoint_markers
929 .query_range(search_start, desired_start + 1);
930 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
931
932 if let Some((id, cp_pos, _)) = nearest {
933 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
934 return (cp_pos, s.clone(), sc.clone(), true);
935 }
936 }
937
938 if parse_end <= MAX_PARSE_BYTES {
939 (0, ParseState::new(syntax), ScopeStack::new(), true)
941 } else {
942 (
945 desired_start,
946 ParseState::new(syntax),
947 ScopeStack::new(),
948 true,
949 )
950 }
951 }
952
953 fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
955 for scope in scopes.as_slice().iter().rev() {
956 let scope_str = scope.build_string();
957 if let Some(cat) = scope_to_category(&scope_str) {
958 return Some(cat);
959 }
960 }
961 None
962 }
963
964 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
966 if spans.len() < 2 {
967 return;
968 }
969
970 let mut write_idx = 0;
971 for read_idx in 1..spans.len() {
972 if spans[write_idx].category == spans[read_idx].category
973 && spans[write_idx].range.end == spans[read_idx].range.start
974 {
975 spans[write_idx].range.end = spans[read_idx].range.end;
976 } else {
977 write_idx += 1;
978 if write_idx != read_idx {
979 spans[write_idx] = spans[read_idx].clone();
980 }
981 }
982 }
983 spans.truncate(write_idx + 1);
984 }
985
986 pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
992 }
996
997 pub fn invalidate_all(&mut self) {
999 self.cache = None;
1000 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1001 for id in ids {
1002 self.checkpoint_markers.delete(id);
1003 }
1004 self.checkpoint_states.clear();
1005 self.dirty_from = None;
1006 }
1007
1008 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1013 let cache = self.cache.as_ref()?;
1014 cache
1015 .spans
1016 .iter()
1017 .find(|span| span.range.start <= position && position < span.range.end)
1018 .map(|span| span.category)
1019 }
1020
1021 pub fn syntax_name(&self) -> &str {
1023 &self.syntax_set.syntaxes()[self.syntax_index].name
1024 }
1025}
1026
1027fn syntax_index(syntax_set: &syntect::parsing::SyntaxSet, name: &str) -> Option<usize> {
1029 syntax_set.syntaxes().iter().position(|s| s.name == name)
1030}
1031
1032impl HighlightEngine {
1033 pub fn for_file(
1043 path: &Path,
1044 registry: &GrammarRegistry,
1045 languages: Option<&std::collections::HashMap<String, crate::config::LanguageConfig>>,
1046 ) -> Self {
1047 let syntax_set = registry.syntax_set_arc();
1048 let ts_language = Language::from_path(path);
1049
1050 let syntax = if let Some(langs) = languages {
1052 registry.find_syntax_for_file_with_languages(path, langs)
1053 } else {
1054 registry.find_syntax_for_file(path)
1055 };
1056
1057 if let Some(syntax) = syntax {
1058 if let Some(index) = syntax_index(&syntax_set, &syntax.name) {
1059 return Self::TextMate(Box::new(TextMateEngine::with_language(
1060 syntax_set,
1061 index,
1062 ts_language,
1063 )));
1064 }
1065 }
1066
1067 if let Some(lang) = ts_language {
1070 if let Ok(highlighter) = Highlighter::new(lang) {
1071 tracing::debug!(
1072 "No TextMate grammar for {:?}, falling back to tree-sitter",
1073 path.extension()
1074 );
1075 return Self::TreeSitter(Box::new(highlighter));
1076 }
1077 }
1078
1079 Self::None
1080 }
1081
1082 pub fn for_syntax_name(
1091 name: &str,
1092 registry: &GrammarRegistry,
1093 ts_language: Option<Language>,
1094 ) -> Self {
1095 let syntax_set = registry.syntax_set_arc();
1096
1097 if let Some(syntax) = registry.find_syntax_by_name(name) {
1098 if let Some(index) = syntax_index(&syntax_set, &syntax.name) {
1099 return Self::TextMate(Box::new(TextMateEngine::with_language(
1100 syntax_set,
1101 index,
1102 ts_language,
1103 )));
1104 }
1105 }
1106
1107 Self::None
1108 }
1109
1110 pub fn highlight_viewport(
1115 &mut self,
1116 buffer: &Buffer,
1117 viewport_start: usize,
1118 viewport_end: usize,
1119 theme: &Theme,
1120 context_bytes: usize,
1121 ) -> Vec<HighlightSpan> {
1122 match self {
1123 Self::TreeSitter(h) => {
1124 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1125 }
1126 Self::TextMate(h) => {
1127 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1128 }
1129 Self::None => Vec::new(),
1130 }
1131 }
1132
1133 pub fn notify_insert(&mut self, position: usize, length: usize) {
1135 if let Self::TextMate(h) = self {
1136 h.notify_insert(position, length);
1137 }
1138 }
1139
1140 pub fn notify_delete(&mut self, position: usize, length: usize) {
1142 if let Self::TextMate(h) = self {
1143 h.notify_delete(position, length);
1144 }
1145 }
1146
1147 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1149 match self {
1150 Self::TreeSitter(h) => h.invalidate_range(edit_range),
1151 Self::TextMate(h) => h.invalidate_range(edit_range),
1152 Self::None => {}
1153 }
1154 }
1155
1156 pub fn invalidate_all(&mut self) {
1158 match self {
1159 Self::TreeSitter(h) => h.invalidate_all(),
1160 Self::TextMate(h) => h.invalidate_all(),
1161 Self::None => {}
1162 }
1163 }
1164
1165 pub fn has_highlighting(&self) -> bool {
1167 !matches!(self, Self::None)
1168 }
1169
1170 pub fn backend_name(&self) -> &str {
1172 match self {
1173 Self::TreeSitter(_) => "tree-sitter",
1174 Self::TextMate(_) => "textmate",
1175 Self::None => "none",
1176 }
1177 }
1178
1179 pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1181 if let Self::TextMate(h) = self {
1182 Some(h.stats())
1183 } else {
1184 None
1185 }
1186 }
1187
1188 pub fn reset_highlight_stats(&mut self) {
1190 if let Self::TextMate(h) = self {
1191 h.reset_stats();
1192 }
1193 }
1194
1195 pub fn syntax_name(&self) -> Option<&str> {
1197 match self {
1198 Self::TreeSitter(_) => None, Self::TextMate(h) => Some(h.syntax_name()),
1200 Self::None => None,
1201 }
1202 }
1203
1204 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1209 match self {
1210 Self::TreeSitter(h) => h.category_at_position(position),
1211 Self::TextMate(h) => h.category_at_position(position),
1212 Self::None => None,
1213 }
1214 }
1215
1216 pub fn language(&self) -> Option<&Language> {
1219 match self {
1220 Self::TreeSitter(h) => Some(h.language()),
1221 Self::TextMate(h) => h.language(),
1222 Self::None => None,
1223 }
1224 }
1225}
1226
1227pub fn highlight_string(
1233 code: &str,
1234 lang_hint: &str,
1235 registry: &GrammarRegistry,
1236 theme: &Theme,
1237) -> Vec<HighlightSpan> {
1238 use syntect::parsing::{ParseState, ScopeStack};
1239
1240 let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1242 Some(s) => s,
1243 None => return Vec::new(),
1244 };
1245
1246 let syntax_set = registry.syntax_set();
1247 let mut state = ParseState::new(syntax);
1248 let mut spans = Vec::new();
1249 let mut current_scopes = ScopeStack::new();
1250 let mut current_offset = 0;
1251
1252 for line in code.split_inclusive('\n') {
1254 let line_start = current_offset;
1255 let line_len = line.len();
1256
1257 let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1259 let line_for_syntect = if line.ends_with('\n') {
1260 format!("{}\n", line_content)
1261 } else {
1262 line_content.to_string()
1263 };
1264
1265 let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1266 Ok(ops) => ops,
1267 Err(_) => {
1268 current_offset += line_len;
1269 continue;
1270 }
1271 };
1272
1273 let mut syntect_offset = 0;
1274 let line_content_len = line_content.len();
1275
1276 for (op_offset, op) in ops {
1277 let clamped_op_offset = op_offset.min(line_content_len);
1278 if clamped_op_offset > syntect_offset {
1279 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1280 let byte_start = line_start + syntect_offset;
1281 let byte_end = line_start + clamped_op_offset;
1282 if byte_start < byte_end {
1283 spans.push(HighlightSpan {
1284 range: byte_start..byte_end,
1285 color: highlight_color(category, theme),
1286 category: Some(category),
1287 });
1288 }
1289 }
1290 }
1291 syntect_offset = clamped_op_offset;
1292 #[allow(clippy::let_underscore_must_use)]
1294 let _ = current_scopes.apply(&op);
1295 }
1296
1297 if syntect_offset < line_content_len {
1299 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1300 let byte_start = line_start + syntect_offset;
1301 let byte_end = line_start + line_content_len;
1302 if byte_start < byte_end {
1303 spans.push(HighlightSpan {
1304 range: byte_start..byte_end,
1305 color: highlight_color(category, theme),
1306 category: Some(category),
1307 });
1308 }
1309 }
1310 }
1311
1312 current_offset += line_len;
1313 }
1314
1315 merge_adjacent_highlight_spans(&mut spans);
1317
1318 spans
1319}
1320
1321fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1323 for scope in scopes.as_slice().iter().rev() {
1324 let scope_str = scope.build_string();
1325 if let Some(cat) = scope_to_category(&scope_str) {
1326 return Some(cat);
1327 }
1328 }
1329 None
1330}
1331
1332fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1334 if spans.len() < 2 {
1335 return;
1336 }
1337
1338 let mut write_idx = 0;
1339 for read_idx in 1..spans.len() {
1340 if spans[write_idx].color == spans[read_idx].color
1341 && spans[write_idx].range.end == spans[read_idx].range.start
1342 {
1343 spans[write_idx].range.end = spans[read_idx].range.end;
1344 } else {
1345 write_idx += 1;
1346 if write_idx != read_idx {
1347 spans[write_idx] = spans[read_idx].clone();
1348 }
1349 }
1350 }
1351 spans.truncate(write_idx + 1);
1352}
1353
1354#[cfg(test)]
1355mod tests {
1356 use crate::model::filesystem::StdFileSystem;
1357 use std::sync::Arc;
1358
1359 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1360 Arc::new(StdFileSystem)
1361 }
1362 use super::*;
1363 use crate::view::theme;
1364
1365 #[test]
1366 fn test_highlight_engine_default() {
1367 let engine = HighlightEngine::default();
1368 assert!(!engine.has_highlighting());
1369 assert_eq!(engine.backend_name(), "none");
1370 }
1371
1372 #[test]
1373 fn test_textmate_backend_selection() {
1374 let registry =
1375 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1376
1377 let engine = HighlightEngine::for_file(Path::new("test.rs"), ®istry, None);
1379 assert_eq!(engine.backend_name(), "textmate");
1380 assert!(engine.language().is_some());
1382
1383 let engine = HighlightEngine::for_file(Path::new("test.py"), ®istry, None);
1384 assert_eq!(engine.backend_name(), "textmate");
1385 assert!(engine.language().is_some());
1386
1387 let engine = HighlightEngine::for_file(Path::new("test.js"), ®istry, None);
1388 assert_eq!(engine.backend_name(), "textmate");
1389 assert!(engine.language().is_some());
1390
1391 let engine = HighlightEngine::for_file(Path::new("test.ts"), ®istry, None);
1393 assert_eq!(engine.backend_name(), "tree-sitter");
1394 assert!(engine.language().is_some());
1395
1396 let engine = HighlightEngine::for_file(Path::new("test.tsx"), ®istry, None);
1397 assert_eq!(engine.backend_name(), "tree-sitter");
1398 assert!(engine.language().is_some());
1399 }
1400
1401 #[test]
1402 fn test_tree_sitter_direct() {
1403 let highlighter = Highlighter::new(Language::Rust);
1405 assert!(highlighter.is_ok());
1406 }
1407
1408 #[test]
1409 fn test_unknown_extension() {
1410 let registry =
1411 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1412
1413 let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), ®istry, None);
1415 let _ = engine.backend_name();
1418 }
1419
1420 #[test]
1421 fn test_highlight_viewport_empty_buffer_no_panic() {
1422 let registry =
1431 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1432
1433 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), ®istry, None);
1434
1435 let buffer = Buffer::from_str("", 0, test_fs());
1437 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1438
1439 if let HighlightEngine::TextMate(ref mut tm) = engine {
1443 let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1445 assert!(spans.is_empty());
1446 }
1447 }
1448
1449 #[test]
1453 fn test_textmate_engine_crlf_byte_offsets() {
1454 let registry =
1455 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1456
1457 let mut engine = HighlightEngine::for_file(Path::new("test.java"), ®istry, None);
1458
1459 let content = b"public\r\npublic\r\npublic\r\n";
1465 let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1466 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1467
1468 if let HighlightEngine::TextMate(ref mut tm) = engine {
1469 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1471
1472 eprintln!(
1479 "Spans: {:?}",
1480 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1481 );
1482
1483 let has_span_at = |start: usize, end: usize| -> bool {
1485 spans
1486 .iter()
1487 .any(|s| s.range.start <= start && s.range.end >= end)
1488 };
1489
1490 assert!(
1492 has_span_at(0, 6),
1493 "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1494 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1495 );
1496
1497 assert!(
1500 has_span_at(8, 14),
1501 "Should have span covering bytes 8-14 (line 2 'public'). \
1502 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1503 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1504 );
1505
1506 assert!(
1509 has_span_at(16, 22),
1510 "Should have span covering bytes 16-22 (line 3 'public'). \
1511 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1512 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1513 );
1514 } else {
1515 panic!("Expected TextMate engine for .java file");
1516 }
1517 }
1518
1519 #[test]
1520 fn test_git_rebase_todo_highlighting() {
1521 let registry =
1522 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1523
1524 let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), ®istry, None);
1526 assert_eq!(engine.backend_name(), "textmate");
1527 assert!(engine.has_highlighting());
1528 }
1529
1530 #[test]
1531 fn test_git_commit_message_highlighting() {
1532 let registry =
1533 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1534
1535 let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), ®istry, None);
1537 assert_eq!(engine.backend_name(), "textmate");
1538 assert!(engine.has_highlighting());
1539
1540 let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), ®istry, None);
1542 assert_eq!(engine.backend_name(), "textmate");
1543 assert!(engine.has_highlighting());
1544 }
1545
1546 #[test]
1547 fn test_gitignore_highlighting() {
1548 let registry =
1549 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1550
1551 let engine = HighlightEngine::for_file(Path::new(".gitignore"), ®istry, None);
1553 assert_eq!(engine.backend_name(), "textmate");
1554 assert!(engine.has_highlighting());
1555
1556 let engine = HighlightEngine::for_file(Path::new(".dockerignore"), ®istry, None);
1558 assert_eq!(engine.backend_name(), "textmate");
1559 assert!(engine.has_highlighting());
1560 }
1561
1562 #[test]
1563 fn test_gitconfig_highlighting() {
1564 let registry =
1565 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1566
1567 let engine = HighlightEngine::for_file(Path::new(".gitconfig"), ®istry, None);
1569 assert_eq!(engine.backend_name(), "textmate");
1570 assert!(engine.has_highlighting());
1571
1572 let engine = HighlightEngine::for_file(Path::new(".gitmodules"), ®istry, None);
1574 assert_eq!(engine.backend_name(), "textmate");
1575 assert!(engine.has_highlighting());
1576 }
1577
1578 #[test]
1579 fn test_gitattributes_highlighting() {
1580 let registry =
1581 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1582
1583 let engine = HighlightEngine::for_file(Path::new(".gitattributes"), ®istry, None);
1585 assert_eq!(engine.backend_name(), "textmate");
1586 assert!(engine.has_highlighting());
1587 }
1588
1589 #[test]
1590 fn test_comment_delimiter_uses_comment_color() {
1591 assert_eq!(
1593 scope_to_category("punctuation.definition.comment"),
1594 Some(HighlightCategory::Comment)
1595 );
1596 assert_eq!(
1597 scope_to_category("punctuation.definition.comment.python"),
1598 Some(HighlightCategory::Comment)
1599 );
1600 assert_eq!(
1601 scope_to_category("punctuation.definition.comment.begin"),
1602 Some(HighlightCategory::Comment)
1603 );
1604 }
1605
1606 #[test]
1607 fn test_string_delimiter_uses_string_color() {
1608 assert_eq!(
1610 scope_to_category("punctuation.definition.string.begin"),
1611 Some(HighlightCategory::String)
1612 );
1613 assert_eq!(
1614 scope_to_category("punctuation.definition.string.end"),
1615 Some(HighlightCategory::String)
1616 );
1617 }
1618
1619 #[test]
1620 fn test_punctuation_bracket() {
1621 assert_eq!(
1623 scope_to_category("punctuation.section"),
1624 Some(HighlightCategory::PunctuationBracket)
1625 );
1626 assert_eq!(
1627 scope_to_category("punctuation.section.block.begin.c"),
1628 Some(HighlightCategory::PunctuationBracket)
1629 );
1630 assert_eq!(
1631 scope_to_category("punctuation.bracket"),
1632 Some(HighlightCategory::PunctuationBracket)
1633 );
1634 assert_eq!(
1636 scope_to_category("punctuation.definition.array.begin.toml"),
1637 Some(HighlightCategory::PunctuationBracket)
1638 );
1639 assert_eq!(
1640 scope_to_category("punctuation.definition.block.code.typst"),
1641 Some(HighlightCategory::PunctuationBracket)
1642 );
1643 assert_eq!(
1644 scope_to_category("punctuation.definition.group.typst"),
1645 Some(HighlightCategory::PunctuationBracket)
1646 );
1647 assert_eq!(
1648 scope_to_category("punctuation.definition.inline-table.begin.toml"),
1649 Some(HighlightCategory::PunctuationBracket)
1650 );
1651 assert_eq!(
1652 scope_to_category("punctuation.definition.tag.end.svelte"),
1653 Some(HighlightCategory::PunctuationBracket)
1654 );
1655 }
1656
1657 #[test]
1658 fn test_punctuation_delimiter() {
1659 assert_eq!(
1660 scope_to_category("punctuation.separator"),
1661 Some(HighlightCategory::PunctuationDelimiter)
1662 );
1663 assert_eq!(
1664 scope_to_category("punctuation.terminator.statement.c"),
1665 Some(HighlightCategory::PunctuationDelimiter)
1666 );
1667 assert_eq!(
1668 scope_to_category("punctuation.accessor"),
1669 Some(HighlightCategory::PunctuationDelimiter)
1670 );
1671 }
1672}