1use crate::model::buffer::Buffer;
20use crate::model::marker::{MarkerId, MarkerList};
21use crate::primitives::grammar::GrammarRegistry;
22use crate::primitives::highlighter::{
23 highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
24};
25use crate::view::theme::Theme;
26use std::collections::HashMap;
27use std::ops::Range;
28use std::path::Path;
29use std::sync::Arc;
30use syntect::parsing::SyntaxSet;
31
32fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
34 let scope_lower = scope.to_lowercase();
35
36 if scope_lower.starts_with("comment") {
38 return Some(HighlightCategory::Comment);
39 }
40
41 if scope_lower.starts_with("string") {
43 return Some(HighlightCategory::String);
44 }
45
46 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
50 return Some(HighlightCategory::Keyword); }
52 if scope_lower.starts_with("markup.bold") {
54 return Some(HighlightCategory::Constant); }
56 if scope_lower.starts_with("markup.italic") {
58 return Some(HighlightCategory::Variable); }
60 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
62 return Some(HighlightCategory::String); }
64 if scope_lower.starts_with("markup.underline.link") {
66 return Some(HighlightCategory::Function); }
68 if scope_lower.starts_with("markup.underline") {
70 return Some(HighlightCategory::Function);
71 }
72 if scope_lower.starts_with("markup.quote") {
74 return Some(HighlightCategory::Comment); }
76 if scope_lower.starts_with("markup.list") {
78 return Some(HighlightCategory::Operator); }
80 if scope_lower.starts_with("markup.strikethrough") {
82 return Some(HighlightCategory::Comment); }
84
85 if scope_lower.starts_with("keyword.control")
87 || scope_lower.starts_with("keyword.other")
88 || scope_lower.starts_with("keyword.declaration")
89 || scope_lower.starts_with("keyword")
90 {
91 if !scope_lower.starts_with("keyword.operator") {
93 return Some(HighlightCategory::Keyword);
94 }
95 }
96
97 if scope_lower.starts_with("punctuation.definition.comment") {
101 return Some(HighlightCategory::Comment);
102 }
103 if scope_lower.starts_with("punctuation.definition.string") {
104 return Some(HighlightCategory::String);
105 }
106
107 if scope_lower.starts_with("keyword.operator") {
109 return Some(HighlightCategory::Operator);
110 }
111
112 if scope_lower.starts_with("punctuation.section")
116 || scope_lower.starts_with("punctuation.bracket")
117 || scope_lower.starts_with("punctuation.definition.array")
118 || scope_lower.starts_with("punctuation.definition.block")
119 || scope_lower.starts_with("punctuation.definition.brackets")
120 || scope_lower.starts_with("punctuation.definition.group")
121 || scope_lower.starts_with("punctuation.definition.inline-table")
122 || scope_lower.starts_with("punctuation.definition.section")
123 || scope_lower.starts_with("punctuation.definition.table")
124 || scope_lower.starts_with("punctuation.definition.tag")
125 {
126 return Some(HighlightCategory::PunctuationBracket);
127 }
128
129 if scope_lower.starts_with("punctuation.separator")
131 || scope_lower.starts_with("punctuation.terminator")
132 || scope_lower.starts_with("punctuation.accessor")
133 {
134 return Some(HighlightCategory::PunctuationDelimiter);
135 }
136
137 if scope_lower.starts_with("entity.name.function")
139 || scope_lower.starts_with("support.function")
140 || scope_lower.starts_with("meta.function-call")
141 || scope_lower.starts_with("variable.function")
142 {
143 return Some(HighlightCategory::Function);
144 }
145
146 if scope_lower.starts_with("entity.name.type")
148 || scope_lower.starts_with("entity.name.class")
149 || scope_lower.starts_with("entity.name.struct")
150 || scope_lower.starts_with("entity.name.enum")
151 || scope_lower.starts_with("entity.name.interface")
152 || scope_lower.starts_with("entity.name.trait")
153 || scope_lower.starts_with("support.type")
154 || scope_lower.starts_with("support.class")
155 || scope_lower.starts_with("storage.type")
156 {
157 return Some(HighlightCategory::Type);
158 }
159
160 if scope_lower.starts_with("storage.modifier") {
162 return Some(HighlightCategory::Keyword);
163 }
164
165 if scope_lower.starts_with("constant.numeric")
167 || scope_lower.starts_with("constant.language.boolean")
168 {
169 return Some(HighlightCategory::Number);
170 }
171 if scope_lower.starts_with("constant") {
172 return Some(HighlightCategory::Constant);
173 }
174
175 if scope_lower.starts_with("variable.parameter")
177 || scope_lower.starts_with("variable.other")
178 || scope_lower.starts_with("variable.language")
179 {
180 return Some(HighlightCategory::Variable);
181 }
182
183 if scope_lower.starts_with("entity.name.tag")
185 || scope_lower.starts_with("support.other.property")
186 || scope_lower.starts_with("meta.object-literal.key")
187 || scope_lower.starts_with("variable.other.property")
188 || scope_lower.starts_with("variable.other.object.property")
189 {
190 return Some(HighlightCategory::Property);
191 }
192
193 if scope_lower.starts_with("entity.other.attribute")
195 || scope_lower.starts_with("meta.attribute")
196 || scope_lower.starts_with("entity.name.decorator")
197 {
198 return Some(HighlightCategory::Attribute);
199 }
200
201 if scope_lower.starts_with("variable") {
203 return Some(HighlightCategory::Variable);
204 }
205
206 None
207}
208
209#[derive(Default)]
211pub enum HighlightEngine {
212 TreeSitter(Box<Highlighter>),
214 TextMate(Box<TextMateEngine>),
216 #[default]
218 None,
219}
220
221pub struct TextMateEngine {
240 syntax_set: Arc<SyntaxSet>,
241 syntax_index: usize,
242 checkpoint_markers: MarkerList,
244 checkpoint_states:
246 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
247 dirty_from: Option<usize>,
250 cache: Option<TextMateCache>,
252 last_buffer_len: usize,
253 ts_language: Option<Language>,
255 stats: HighlightStats,
257}
258
259#[derive(Debug, Default, Clone)]
261pub struct HighlightStats {
262 pub bytes_parsed: usize,
264 pub cache_hits: usize,
266 pub cache_misses: usize,
268 pub checkpoints_updated: usize,
270 pub convergences: usize,
272}
273
274#[derive(Debug, Clone)]
275struct TextMateCache {
276 range: Range<usize>,
277 spans: Vec<CachedSpan>,
278}
279
280#[derive(Debug, Clone)]
281struct CachedSpan {
282 range: Range<usize>,
283 category: crate::primitives::highlighter::HighlightCategory,
284}
285
286const MAX_PARSE_BYTES: usize = 1024 * 1024;
288
289const CHECKPOINT_INTERVAL: usize = 256;
294
295impl TextMateEngine {
296 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
298 Self {
299 syntax_set,
300 syntax_index,
301 checkpoint_markers: MarkerList::new(),
302 checkpoint_states: HashMap::new(),
303 dirty_from: None,
304 cache: None,
305 last_buffer_len: 0,
306 ts_language: None,
307 stats: HighlightStats::default(),
308 }
309 }
310
311 pub fn with_language(
313 syntax_set: Arc<SyntaxSet>,
314 syntax_index: usize,
315 ts_language: Option<Language>,
316 ) -> Self {
317 Self {
318 syntax_set,
319 syntax_index,
320 checkpoint_markers: MarkerList::new(),
321 checkpoint_states: HashMap::new(),
322 dirty_from: None,
323 cache: None,
324 last_buffer_len: 0,
325 ts_language,
326 stats: HighlightStats::default(),
327 }
328 }
329
330 pub fn stats(&self) -> &HighlightStats {
332 &self.stats
333 }
334
335 pub fn reset_stats(&mut self) {
337 self.stats = HighlightStats::default();
338 }
339
340 pub fn language(&self) -> Option<&Language> {
342 self.ts_language.as_ref()
343 }
344
345 pub fn notify_insert(&mut self, position: usize, length: usize) {
349 self.checkpoint_markers.adjust_for_insert(position, length);
350 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
351 if let Some(cache) = &mut self.cache {
353 for span in &mut cache.spans {
354 if span.range.start >= position {
355 span.range.start += length;
356 span.range.end += length;
357 } else if span.range.end > position {
358 span.range.end += length;
360 }
361 }
362 if cache.range.end >= position {
363 cache.range.end += length;
364 }
365 }
366 }
367
368 pub fn notify_delete(&mut self, position: usize, length: usize) {
371 self.checkpoint_markers.adjust_for_delete(position, length);
372 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
373 if let Some(cache) = &mut self.cache {
375 let delete_end = position + length;
376 cache.spans.retain_mut(|span| {
377 if span.range.start >= delete_end {
378 span.range.start -= length;
380 span.range.end -= length;
381 true
382 } else if span.range.end <= position {
383 true
385 } else if span.range.start >= position && span.range.end <= delete_end {
386 false
388 } else {
389 if span.range.start < position {
391 span.range.end = position.min(span.range.end);
392 } else {
393 span.range.start = position;
394 span.range.end = position + span.range.end.saturating_sub(delete_end);
395 }
396 span.range.start < span.range.end
397 }
398 });
399 if cache.range.end > delete_end {
400 cache.range.end -= length;
401 } else if cache.range.end > position {
402 cache.range.end = position;
403 }
404 }
405 }
406
407 pub fn highlight_viewport(
414 &mut self,
415 buffer: &Buffer,
416 viewport_start: usize,
417 viewport_end: usize,
418 theme: &Theme,
419 context_bytes: usize,
420 ) -> Vec<HighlightSpan> {
421 let desired_parse_start = viewport_start.saturating_sub(context_bytes);
422 let parse_end = (viewport_end + context_bytes).min(buffer.len());
423
424 let dirty = self.dirty_from.take();
429 let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
430 c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
431 });
432 let exact_cache_hit = cache_covers_viewport
433 && dirty.is_none()
434 && self.last_buffer_len == buffer.len()
435 && self
436 .cache
437 .as_ref()
438 .is_some_and(|c| c.range.end >= parse_end);
439
440 if exact_cache_hit {
441 self.stats.cache_hits += 1;
443 return self.filter_cached_spans(viewport_start, viewport_end, theme);
444 }
445
446 if cache_covers_viewport && dirty.is_some() {
447 if let Some(dirty_pos) = dirty {
448 if dirty_pos < parse_end {
449 if let Some(result) = self.try_partial_update(
452 buffer,
453 dirty_pos,
454 desired_parse_start,
455 parse_end,
456 viewport_start,
457 viewport_end,
458 theme,
459 ) {
460 return result;
461 }
462 } else {
464 self.dirty_from = Some(dirty_pos);
466 self.stats.cache_hits += 1;
467 return self.filter_cached_spans(viewport_start, viewport_end, theme);
468 }
469 }
470 } else if let Some(d) = dirty {
471 self.dirty_from = Some(d);
473 }
474
475 self.full_parse(
477 buffer,
478 desired_parse_start,
479 parse_end,
480 viewport_start,
481 viewport_end,
482 theme,
483 context_bytes,
484 )
485 }
486
487 fn filter_cached_spans(
489 &self,
490 viewport_start: usize,
491 viewport_end: usize,
492 theme: &Theme,
493 ) -> Vec<HighlightSpan> {
494 let cache = self.cache.as_ref().unwrap();
495 cache
496 .spans
497 .iter()
498 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
499 .map(|span| HighlightSpan {
500 range: span.range.clone(),
501 color: highlight_color(span.category, theme),
502 category: Some(span.category),
503 })
504 .collect()
505 }
506
507 #[allow(clippy::too_many_arguments)]
511 fn try_partial_update(
512 &mut self,
513 buffer: &Buffer,
514 dirty_pos: usize,
515 desired_parse_start: usize,
516 parse_end: usize,
517 viewport_start: usize,
518 viewport_end: usize,
519 theme: &Theme,
520 ) -> Option<Vec<HighlightSpan>> {
521 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
522
523 let (actual_start, mut state, mut current_scopes) = {
525 let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
526 let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
527 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
528 if let Some((id, cp_pos, _)) = nearest {
529 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
530 (cp_pos, s.clone(), sc.clone())
531 } else {
532 return None; }
534 } else if parse_end <= MAX_PARSE_BYTES {
535 (
536 0,
537 syntect::parsing::ParseState::new(syntax),
538 syntect::parsing::ScopeStack::new(),
539 )
540 } else {
541 return None; }
543 };
544
545 let mut markers_ahead: Vec<(MarkerId, usize)> = self
547 .checkpoint_markers
548 .query_range(dirty_pos, parse_end)
549 .into_iter()
550 .map(|(id, start, _)| (id, start))
551 .collect();
552 markers_ahead.sort_by_key(|(_, pos)| *pos);
553 let mut marker_idx = 0;
554
555 let content_end = parse_end.min(buffer.len());
557 if actual_start >= content_end {
558 return None;
559 }
560 let content = buffer.slice_bytes(actual_start..content_end);
561 let content_str = match std::str::from_utf8(&content) {
562 Ok(s) => s,
563 Err(_) => return None,
564 };
565
566 let mut new_spans = Vec::new();
567 let content_bytes = content_str.as_bytes();
568 let mut pos = 0;
569 let mut current_offset = actual_start;
570 let mut converged_at: Option<usize> = None;
571 let mut bytes_since_checkpoint: usize = 0;
572
573 while pos < content_bytes.len() {
574 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
576 let nearby = self.checkpoint_markers.query_range(
577 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
578 current_offset + CHECKPOINT_INTERVAL / 2,
579 );
580 if nearby.is_empty() {
581 let marker_id = self.checkpoint_markers.create(current_offset, true);
582 self.checkpoint_states
583 .insert(marker_id, (state.clone(), current_scopes.clone()));
584 }
585 bytes_since_checkpoint = 0;
586 }
587
588 let line_start = pos;
589 let mut line_end = pos;
590 while line_end < content_bytes.len() {
591 if content_bytes[line_end] == b'\n' {
592 line_end += 1;
593 break;
594 } else if content_bytes[line_end] == b'\r' {
595 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
596 line_end += 2;
597 } else {
598 line_end += 1;
599 }
600 break;
601 }
602 line_end += 1;
603 }
604
605 let line_bytes = &content_bytes[line_start..line_end];
606 let actual_line_byte_len = line_bytes.len();
607
608 let line_str = match std::str::from_utf8(line_bytes) {
609 Ok(s) => s,
610 Err(_) => {
611 pos = line_end;
612 current_offset += actual_line_byte_len;
613 bytes_since_checkpoint += actual_line_byte_len;
614 continue;
615 }
616 };
617
618 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
619 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
620 format!("{}\n", line_content)
621 } else {
622 line_content.to_string()
623 };
624
625 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
626 Ok(ops) => ops,
627 Err(_) => {
628 pos = line_end;
629 current_offset += actual_line_byte_len;
630 bytes_since_checkpoint += actual_line_byte_len;
631 continue;
632 }
633 };
634
635 let collect_spans =
637 current_offset + actual_line_byte_len > desired_parse_start.max(actual_start);
638 let mut syntect_offset = 0;
639 let line_content_len = line_content.len();
640
641 for (op_offset, op) in ops {
642 let clamped_op_offset = op_offset.min(line_content_len);
643 if collect_spans && clamped_op_offset > syntect_offset {
644 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
645 let byte_start = current_offset + syntect_offset;
646 let byte_end = current_offset + clamped_op_offset;
647 let clamped_start = byte_start.max(actual_start);
648 if clamped_start < byte_end {
649 new_spans.push(CachedSpan {
650 range: clamped_start..byte_end,
651 category,
652 });
653 }
654 }
655 }
656 syntect_offset = clamped_op_offset;
657 #[allow(clippy::let_underscore_must_use)]
658 let _ = current_scopes.apply(&op);
659 }
660
661 if collect_spans && syntect_offset < line_content_len {
662 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
663 let byte_start = current_offset + syntect_offset;
664 let byte_end = current_offset + line_content_len;
665 let clamped_start = byte_start.max(actual_start);
666 if clamped_start < byte_end {
667 new_spans.push(CachedSpan {
668 range: clamped_start..byte_end,
669 category,
670 });
671 }
672 }
673 }
674
675 pos = line_end;
676 current_offset += actual_line_byte_len;
677 bytes_since_checkpoint += actual_line_byte_len;
678
679 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
681 {
682 let (marker_id, _) = markers_ahead[marker_idx];
683 marker_idx += 1;
684 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
685 if *stored == (state.clone(), current_scopes.clone()) {
686 self.stats.convergences += 1;
687 converged_at = Some(current_offset);
688 break;
689 }
690 }
691 self.stats.checkpoints_updated += 1;
692 self.checkpoint_states
693 .insert(marker_id, (state.clone(), current_scopes.clone()));
694 }
695
696 if converged_at.is_some() {
697 break;
698 }
699 }
700
701 self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
702
703 let convergence_point = converged_at?; self.stats.cache_misses += 1; Self::merge_adjacent_spans(&mut new_spans);
710
711 if let Some(cache) = &mut self.cache {
712 let splice_start = actual_start;
714 let splice_end = convergence_point;
715 cache
716 .spans
717 .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
718 cache.spans.extend(new_spans);
720 cache.spans.sort_by_key(|s| s.range.start);
721 Self::merge_adjacent_spans(&mut cache.spans);
722 }
723
724 self.last_buffer_len = buffer.len();
725
726 Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
727 }
728
729 #[allow(clippy::too_many_arguments)]
732 fn full_parse(
733 &mut self,
734 buffer: &Buffer,
735 desired_parse_start: usize,
736 parse_end: usize,
737 viewport_start: usize,
738 viewport_end: usize,
739 theme: &Theme,
740 _context_bytes: usize,
741 ) -> Vec<HighlightSpan> {
742 self.stats.cache_misses += 1;
743 self.dirty_from = None; if parse_end <= desired_parse_start {
746 return Vec::new();
747 }
748
749 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
750 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
751 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
752
753 let content = buffer.slice_bytes(actual_start..parse_end);
754 let content_str = match std::str::from_utf8(&content) {
755 Ok(s) => s,
756 Err(_) => return Vec::new(),
757 };
758
759 let mut spans = Vec::new();
760 let content_bytes = content_str.as_bytes();
761 let mut pos = 0;
762 let mut current_offset = actual_start;
763 let mut bytes_since_checkpoint: usize = 0;
764
765 while pos < content_bytes.len() {
766 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767 let nearby = self.checkpoint_markers.query_range(
768 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
769 current_offset + CHECKPOINT_INTERVAL / 2,
770 );
771 if nearby.is_empty() {
772 let marker_id = self.checkpoint_markers.create(current_offset, true);
773 self.checkpoint_states
774 .insert(marker_id, (state.clone(), current_scopes.clone()));
775 }
776 bytes_since_checkpoint = 0;
777 }
778
779 let line_start = pos;
780 let mut line_end = pos;
781
782 while line_end < content_bytes.len() {
783 if content_bytes[line_end] == b'\n' {
784 line_end += 1;
785 break;
786 } else if content_bytes[line_end] == b'\r' {
787 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
788 line_end += 2;
789 } else {
790 line_end += 1;
791 }
792 break;
793 }
794 line_end += 1;
795 }
796
797 let line_bytes = &content_bytes[line_start..line_end];
798 let actual_line_byte_len = line_bytes.len();
799
800 let line_str = match std::str::from_utf8(line_bytes) {
801 Ok(s) => s,
802 Err(_) => {
803 pos = line_end;
804 current_offset += actual_line_byte_len;
805 bytes_since_checkpoint += actual_line_byte_len;
806 continue;
807 }
808 };
809
810 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
811 let line_for_syntect = if line_end < content_bytes.len() || line_str.ends_with('\n') {
812 format!("{}\n", line_content)
813 } else {
814 line_content.to_string()
815 };
816
817 let ops = match state.parse_line(&line_for_syntect, &self.syntax_set) {
818 Ok(ops) => ops,
819 Err(_) => {
820 pos = line_end;
821 current_offset += actual_line_byte_len;
822 bytes_since_checkpoint += actual_line_byte_len;
823 continue;
824 }
825 };
826
827 let collect_spans = current_offset + actual_line_byte_len > desired_parse_start;
828 let mut syntect_offset = 0;
829 let line_content_len = line_content.len();
830
831 for (op_offset, op) in ops {
832 let clamped_op_offset = op_offset.min(line_content_len);
833 if collect_spans && clamped_op_offset > syntect_offset {
834 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
835 let byte_start = current_offset + syntect_offset;
836 let byte_end = current_offset + clamped_op_offset;
837 let clamped_start = byte_start.max(desired_parse_start);
838 if clamped_start < byte_end {
839 spans.push(CachedSpan {
840 range: clamped_start..byte_end,
841 category,
842 });
843 }
844 }
845 }
846 syntect_offset = clamped_op_offset;
847 #[allow(clippy::let_underscore_must_use)]
848 let _ = current_scopes.apply(&op);
849 }
850
851 if collect_spans && syntect_offset < line_content_len {
852 if let Some(category) = Self::scope_stack_to_category(¤t_scopes) {
853 let byte_start = current_offset + syntect_offset;
854 let byte_end = current_offset + line_content_len;
855 let clamped_start = byte_start.max(desired_parse_start);
856 if clamped_start < byte_end {
857 spans.push(CachedSpan {
858 range: clamped_start..byte_end,
859 category,
860 });
861 }
862 }
863 }
864
865 pos = line_end;
866 current_offset += actual_line_byte_len;
867 bytes_since_checkpoint += actual_line_byte_len;
868
869 let markers_here: Vec<(MarkerId, usize)> = self
871 .checkpoint_markers
872 .query_range(
873 current_offset.saturating_sub(actual_line_byte_len),
874 current_offset,
875 )
876 .into_iter()
877 .map(|(id, start, _)| (id, start))
878 .collect();
879 for (marker_id, _) in markers_here {
880 self.checkpoint_states
881 .insert(marker_id, (state.clone(), current_scopes.clone()));
882 }
883 }
884
885 self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
886
887 Self::merge_adjacent_spans(&mut spans);
888
889 self.cache = Some(TextMateCache {
890 range: desired_parse_start..parse_end,
891 spans: spans.clone(),
892 });
893 self.last_buffer_len = buffer.len();
894
895 spans
896 .into_iter()
897 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
898 .map(|span| {
899 let cat = span.category;
900 HighlightSpan {
901 range: span.range,
902 color: highlight_color(cat, theme),
903 category: Some(cat),
904 }
905 })
906 .collect()
907 }
908
909 fn find_parse_resume_point(
911 &self,
912 desired_start: usize,
913 parse_end: usize,
914 syntax: &syntect::parsing::SyntaxReference,
915 ) -> (
916 usize,
917 syntect::parsing::ParseState,
918 syntect::parsing::ScopeStack,
919 bool,
920 ) {
921 use syntect::parsing::{ParseState, ScopeStack};
922
923 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
927 let markers = self
928 .checkpoint_markers
929 .query_range(search_start, desired_start + 1);
930 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
931
932 if let Some((id, cp_pos, _)) = nearest {
933 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
934 return (cp_pos, s.clone(), sc.clone(), true);
935 }
936 }
937
938 if parse_end <= MAX_PARSE_BYTES {
939 (0, ParseState::new(syntax), ScopeStack::new(), true)
941 } else {
942 (
945 desired_start,
946 ParseState::new(syntax),
947 ScopeStack::new(),
948 true,
949 )
950 }
951 }
952
953 fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
955 for scope in scopes.as_slice().iter().rev() {
956 let scope_str = scope.build_string();
957 if let Some(cat) = scope_to_category(&scope_str) {
958 return Some(cat);
959 }
960 }
961 None
962 }
963
964 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
966 if spans.len() < 2 {
967 return;
968 }
969
970 let mut write_idx = 0;
971 for read_idx in 1..spans.len() {
972 if spans[write_idx].category == spans[read_idx].category
973 && spans[write_idx].range.end == spans[read_idx].range.start
974 {
975 spans[write_idx].range.end = spans[read_idx].range.end;
976 } else {
977 write_idx += 1;
978 if write_idx != read_idx {
979 spans[write_idx] = spans[read_idx].clone();
980 }
981 }
982 }
983 spans.truncate(write_idx + 1);
984 }
985
986 pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
992 }
996
997 pub fn invalidate_all(&mut self) {
999 self.cache = None;
1000 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1001 for id in ids {
1002 self.checkpoint_markers.delete(id);
1003 }
1004 self.checkpoint_states.clear();
1005 self.dirty_from = None;
1006 }
1007
1008 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1013 let cache = self.cache.as_ref()?;
1014 cache
1015 .spans
1016 .iter()
1017 .find(|span| span.range.start <= position && position < span.range.end)
1018 .map(|span| span.category)
1019 }
1020
1021 pub fn syntax_name(&self) -> &str {
1023 &self.syntax_set.syntaxes()[self.syntax_index].name
1024 }
1025}
1026
1027impl HighlightEngine {
1028 pub fn from_entry(
1035 entry: &crate::primitives::grammar::GrammarEntry,
1036 registry: &GrammarRegistry,
1037 ) -> Self {
1038 let syntax_set = registry.syntax_set_arc();
1039 if let Some(index) = entry.engines.syntect {
1040 return Self::TextMate(Box::new(TextMateEngine::with_language(
1041 syntax_set,
1042 index,
1043 entry.engines.tree_sitter,
1044 )));
1045 }
1046 if let Some(lang) = entry.engines.tree_sitter {
1047 if let Ok(highlighter) = Highlighter::new(lang) {
1048 return Self::TreeSitter(Box::new(highlighter));
1049 }
1050 }
1051 Self::None
1052 }
1053
1054 pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1062 if let Some(entry) = registry.find_by_path(path, first_line) {
1063 return Self::from_entry(entry, registry);
1064 }
1065 Self::None
1066 }
1067
1068 pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1074 if let Some(entry) = registry.find_by_name(name) {
1075 return Self::from_entry(entry, registry);
1076 }
1077 Self::None
1078 }
1079
1080 pub fn highlight_viewport(
1085 &mut self,
1086 buffer: &Buffer,
1087 viewport_start: usize,
1088 viewport_end: usize,
1089 theme: &Theme,
1090 context_bytes: usize,
1091 ) -> Vec<HighlightSpan> {
1092 match self {
1093 Self::TreeSitter(h) => {
1094 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1095 }
1096 Self::TextMate(h) => {
1097 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1098 }
1099 Self::None => Vec::new(),
1100 }
1101 }
1102
1103 pub fn notify_insert(&mut self, position: usize, length: usize) {
1105 if let Self::TextMate(h) = self {
1106 h.notify_insert(position, length);
1107 }
1108 }
1109
1110 pub fn notify_delete(&mut self, position: usize, length: usize) {
1112 if let Self::TextMate(h) = self {
1113 h.notify_delete(position, length);
1114 }
1115 }
1116
1117 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1119 match self {
1120 Self::TreeSitter(h) => h.invalidate_range(edit_range),
1121 Self::TextMate(h) => h.invalidate_range(edit_range),
1122 Self::None => {}
1123 }
1124 }
1125
1126 pub fn invalidate_all(&mut self) {
1128 match self {
1129 Self::TreeSitter(h) => h.invalidate_all(),
1130 Self::TextMate(h) => h.invalidate_all(),
1131 Self::None => {}
1132 }
1133 }
1134
1135 pub fn has_highlighting(&self) -> bool {
1137 !matches!(self, Self::None)
1138 }
1139
1140 pub fn backend_name(&self) -> &str {
1142 match self {
1143 Self::TreeSitter(_) => "tree-sitter",
1144 Self::TextMate(_) => "textmate",
1145 Self::None => "none",
1146 }
1147 }
1148
1149 pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1151 if let Self::TextMate(h) = self {
1152 Some(h.stats())
1153 } else {
1154 None
1155 }
1156 }
1157
1158 pub fn reset_highlight_stats(&mut self) {
1160 if let Self::TextMate(h) = self {
1161 h.reset_stats();
1162 }
1163 }
1164
1165 pub fn syntax_name(&self) -> Option<&str> {
1167 match self {
1168 Self::TreeSitter(_) => None, Self::TextMate(h) => Some(h.syntax_name()),
1170 Self::None => None,
1171 }
1172 }
1173
1174 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1179 match self {
1180 Self::TreeSitter(h) => h.category_at_position(position),
1181 Self::TextMate(h) => h.category_at_position(position),
1182 Self::None => None,
1183 }
1184 }
1185
1186 pub fn language(&self) -> Option<&Language> {
1189 match self {
1190 Self::TreeSitter(h) => Some(h.language()),
1191 Self::TextMate(h) => h.language(),
1192 Self::None => None,
1193 }
1194 }
1195}
1196
1197pub fn highlight_string(
1203 code: &str,
1204 lang_hint: &str,
1205 registry: &GrammarRegistry,
1206 theme: &Theme,
1207) -> Vec<HighlightSpan> {
1208 use syntect::parsing::{ParseState, ScopeStack};
1209
1210 let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1212 Some(s) => s,
1213 None => return Vec::new(),
1214 };
1215
1216 let syntax_set = registry.syntax_set();
1217 let mut state = ParseState::new(syntax);
1218 let mut spans = Vec::new();
1219 let mut current_scopes = ScopeStack::new();
1220 let mut current_offset = 0;
1221
1222 for line in code.split_inclusive('\n') {
1224 let line_start = current_offset;
1225 let line_len = line.len();
1226
1227 let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1229 let line_for_syntect = if line.ends_with('\n') {
1230 format!("{}\n", line_content)
1231 } else {
1232 line_content.to_string()
1233 };
1234
1235 let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1236 Ok(ops) => ops,
1237 Err(_) => {
1238 current_offset += line_len;
1239 continue;
1240 }
1241 };
1242
1243 let mut syntect_offset = 0;
1244 let line_content_len = line_content.len();
1245
1246 for (op_offset, op) in ops {
1247 let clamped_op_offset = op_offset.min(line_content_len);
1248 if clamped_op_offset > syntect_offset {
1249 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1250 let byte_start = line_start + syntect_offset;
1251 let byte_end = line_start + clamped_op_offset;
1252 if byte_start < byte_end {
1253 spans.push(HighlightSpan {
1254 range: byte_start..byte_end,
1255 color: highlight_color(category, theme),
1256 category: Some(category),
1257 });
1258 }
1259 }
1260 }
1261 syntect_offset = clamped_op_offset;
1262 #[allow(clippy::let_underscore_must_use)]
1264 let _ = current_scopes.apply(&op);
1265 }
1266
1267 if syntect_offset < line_content_len {
1269 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1270 let byte_start = line_start + syntect_offset;
1271 let byte_end = line_start + line_content_len;
1272 if byte_start < byte_end {
1273 spans.push(HighlightSpan {
1274 range: byte_start..byte_end,
1275 color: highlight_color(category, theme),
1276 category: Some(category),
1277 });
1278 }
1279 }
1280 }
1281
1282 current_offset += line_len;
1283 }
1284
1285 merge_adjacent_highlight_spans(&mut spans);
1287
1288 spans
1289}
1290
1291fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1293 for scope in scopes.as_slice().iter().rev() {
1294 let scope_str = scope.build_string();
1295 if let Some(cat) = scope_to_category(&scope_str) {
1296 return Some(cat);
1297 }
1298 }
1299 None
1300}
1301
1302fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1304 if spans.len() < 2 {
1305 return;
1306 }
1307
1308 let mut write_idx = 0;
1309 for read_idx in 1..spans.len() {
1310 if spans[write_idx].color == spans[read_idx].color
1311 && spans[write_idx].range.end == spans[read_idx].range.start
1312 {
1313 spans[write_idx].range.end = spans[read_idx].range.end;
1314 } else {
1315 write_idx += 1;
1316 if write_idx != read_idx {
1317 spans[write_idx] = spans[read_idx].clone();
1318 }
1319 }
1320 }
1321 spans.truncate(write_idx + 1);
1322}
1323
1324#[cfg(test)]
1325mod tests {
1326 use crate::model::filesystem::StdFileSystem;
1327 use std::sync::Arc;
1328
1329 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1330 Arc::new(StdFileSystem)
1331 }
1332 use super::*;
1333 use crate::view::theme;
1334
1335 #[test]
1336 fn test_highlight_engine_default() {
1337 let engine = HighlightEngine::default();
1338 assert!(!engine.has_highlighting());
1339 assert_eq!(engine.backend_name(), "none");
1340 }
1341
1342 #[test]
1343 fn test_textmate_backend_selection() {
1344 let registry =
1345 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1346
1347 let engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1349 assert_eq!(engine.backend_name(), "textmate");
1350 assert!(engine.language().is_some());
1352
1353 let engine = HighlightEngine::for_file(Path::new("test.py"), None, ®istry);
1354 assert_eq!(engine.backend_name(), "textmate");
1355 assert!(engine.language().is_some());
1356
1357 let engine = HighlightEngine::for_file(Path::new("test.js"), None, ®istry);
1358 assert_eq!(engine.backend_name(), "textmate");
1359 assert!(engine.language().is_some());
1360
1361 let engine = HighlightEngine::for_file(Path::new("test.ts"), None, ®istry);
1363 assert_eq!(engine.backend_name(), "tree-sitter");
1364 assert!(engine.language().is_some());
1365
1366 let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, ®istry);
1367 assert_eq!(engine.backend_name(), "tree-sitter");
1368 assert!(engine.language().is_some());
1369 }
1370
1371 #[test]
1372 fn test_tree_sitter_direct() {
1373 let highlighter = Highlighter::new(Language::Rust);
1375 assert!(highlighter.is_ok());
1376 }
1377
1378 #[test]
1379 fn test_unknown_extension() {
1380 let registry =
1381 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1382
1383 let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, ®istry);
1385 let _ = engine.backend_name();
1388 }
1389
1390 #[test]
1391 fn test_highlight_viewport_empty_buffer_no_panic() {
1392 let registry =
1401 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1402
1403 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1404
1405 let buffer = Buffer::from_str("", 0, test_fs());
1407 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1408
1409 if let HighlightEngine::TextMate(ref mut tm) = engine {
1413 let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1415 assert!(spans.is_empty());
1416 }
1417 }
1418
1419 #[test]
1423 fn test_textmate_engine_crlf_byte_offsets() {
1424 let registry =
1425 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1426
1427 let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, ®istry);
1428
1429 let content = b"public\r\npublic\r\npublic\r\n";
1435 let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1436 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1437
1438 if let HighlightEngine::TextMate(ref mut tm) = engine {
1439 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1441
1442 eprintln!(
1449 "Spans: {:?}",
1450 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1451 );
1452
1453 let has_span_at = |start: usize, end: usize| -> bool {
1455 spans
1456 .iter()
1457 .any(|s| s.range.start <= start && s.range.end >= end)
1458 };
1459
1460 assert!(
1462 has_span_at(0, 6),
1463 "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1464 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1465 );
1466
1467 assert!(
1470 has_span_at(8, 14),
1471 "Should have span covering bytes 8-14 (line 2 'public'). \
1472 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1473 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1474 );
1475
1476 assert!(
1479 has_span_at(16, 22),
1480 "Should have span covering bytes 16-22 (line 3 'public'). \
1481 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1482 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1483 );
1484 } else {
1485 panic!("Expected TextMate engine for .java file");
1486 }
1487 }
1488
1489 #[test]
1490 fn test_git_rebase_todo_highlighting() {
1491 let registry =
1492 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1493
1494 let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, ®istry);
1496 assert_eq!(engine.backend_name(), "textmate");
1497 assert!(engine.has_highlighting());
1498 }
1499
1500 #[test]
1501 fn test_git_commit_message_highlighting() {
1502 let registry =
1503 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1504
1505 let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, ®istry);
1507 assert_eq!(engine.backend_name(), "textmate");
1508 assert!(engine.has_highlighting());
1509
1510 let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, ®istry);
1512 assert_eq!(engine.backend_name(), "textmate");
1513 assert!(engine.has_highlighting());
1514 }
1515
1516 #[test]
1517 fn test_gitignore_highlighting() {
1518 let registry =
1519 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1520
1521 let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, ®istry);
1523 assert_eq!(engine.backend_name(), "textmate");
1524 assert!(engine.has_highlighting());
1525
1526 let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, ®istry);
1528 assert_eq!(engine.backend_name(), "textmate");
1529 assert!(engine.has_highlighting());
1530 }
1531
1532 #[test]
1533 fn test_gitconfig_highlighting() {
1534 let registry =
1535 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1536
1537 let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, ®istry);
1539 assert_eq!(engine.backend_name(), "textmate");
1540 assert!(engine.has_highlighting());
1541
1542 let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, ®istry);
1544 assert_eq!(engine.backend_name(), "textmate");
1545 assert!(engine.has_highlighting());
1546 }
1547
1548 #[test]
1549 fn test_gitattributes_highlighting() {
1550 let registry =
1551 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1552
1553 let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, ®istry);
1555 assert_eq!(engine.backend_name(), "textmate");
1556 assert!(engine.has_highlighting());
1557 }
1558
1559 #[test]
1560 fn test_comment_delimiter_uses_comment_color() {
1561 assert_eq!(
1563 scope_to_category("punctuation.definition.comment"),
1564 Some(HighlightCategory::Comment)
1565 );
1566 assert_eq!(
1567 scope_to_category("punctuation.definition.comment.python"),
1568 Some(HighlightCategory::Comment)
1569 );
1570 assert_eq!(
1571 scope_to_category("punctuation.definition.comment.begin"),
1572 Some(HighlightCategory::Comment)
1573 );
1574 }
1575
1576 #[test]
1577 fn test_string_delimiter_uses_string_color() {
1578 assert_eq!(
1580 scope_to_category("punctuation.definition.string.begin"),
1581 Some(HighlightCategory::String)
1582 );
1583 assert_eq!(
1584 scope_to_category("punctuation.definition.string.end"),
1585 Some(HighlightCategory::String)
1586 );
1587 }
1588
1589 #[test]
1590 fn test_punctuation_bracket() {
1591 assert_eq!(
1593 scope_to_category("punctuation.section"),
1594 Some(HighlightCategory::PunctuationBracket)
1595 );
1596 assert_eq!(
1597 scope_to_category("punctuation.section.block.begin.c"),
1598 Some(HighlightCategory::PunctuationBracket)
1599 );
1600 assert_eq!(
1601 scope_to_category("punctuation.bracket"),
1602 Some(HighlightCategory::PunctuationBracket)
1603 );
1604 assert_eq!(
1606 scope_to_category("punctuation.definition.array.begin.toml"),
1607 Some(HighlightCategory::PunctuationBracket)
1608 );
1609 assert_eq!(
1610 scope_to_category("punctuation.definition.block.code.typst"),
1611 Some(HighlightCategory::PunctuationBracket)
1612 );
1613 assert_eq!(
1614 scope_to_category("punctuation.definition.group.typst"),
1615 Some(HighlightCategory::PunctuationBracket)
1616 );
1617 assert_eq!(
1618 scope_to_category("punctuation.definition.inline-table.begin.toml"),
1619 Some(HighlightCategory::PunctuationBracket)
1620 );
1621 assert_eq!(
1622 scope_to_category("punctuation.definition.tag.end.svelte"),
1623 Some(HighlightCategory::PunctuationBracket)
1624 );
1625 }
1626
1627 #[test]
1628 fn test_punctuation_delimiter() {
1629 assert_eq!(
1630 scope_to_category("punctuation.separator"),
1631 Some(HighlightCategory::PunctuationDelimiter)
1632 );
1633 assert_eq!(
1634 scope_to_category("punctuation.terminator.statement.c"),
1635 Some(HighlightCategory::PunctuationDelimiter)
1636 );
1637 assert_eq!(
1638 scope_to_category("punctuation.accessor"),
1639 Some(HighlightCategory::PunctuationDelimiter)
1640 );
1641 }
1642}