1use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42 highlight_bg, highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53 let scope_lower = scope.to_lowercase();
54
55 if scope_lower.starts_with("comment") {
57 return Some(HighlightCategory::Comment);
58 }
59
60 if scope_lower.starts_with("string") {
62 return Some(HighlightCategory::String);
63 }
64
65 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69 return Some(HighlightCategory::Keyword); }
71 if scope_lower.starts_with("markup.bold") {
73 return Some(HighlightCategory::Constant); }
75 if scope_lower.starts_with("markup.italic") {
77 return Some(HighlightCategory::Variable); }
79 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81 return Some(HighlightCategory::String); }
83 if scope_lower.starts_with("markup.underline.link") {
85 return Some(HighlightCategory::Function); }
87 if scope_lower.starts_with("markup.underline") {
89 return Some(HighlightCategory::Function);
90 }
91 if scope_lower.starts_with("markup.quote") {
93 return Some(HighlightCategory::Comment); }
95 if scope_lower.starts_with("markup.list") {
97 return Some(HighlightCategory::Operator); }
99 if scope_lower.starts_with("markup.strikethrough") {
101 return Some(HighlightCategory::Comment); }
103
104 if scope_lower.starts_with("markup.inserted") {
116 return Some(HighlightCategory::Inserted);
117 }
118 if scope_lower.starts_with("markup.deleted") {
119 return Some(HighlightCategory::Deleted);
120 }
121 if scope_lower.starts_with("markup.changed") || scope_lower.starts_with("meta.diff.range") {
122 return Some(HighlightCategory::Changed);
123 }
124 if scope_lower.starts_with("meta.diff.header") {
125 return Some(HighlightCategory::Type);
126 }
127
128 if scope_lower.starts_with("keyword.control")
130 || scope_lower.starts_with("keyword.other")
131 || scope_lower.starts_with("keyword.declaration")
132 || scope_lower.starts_with("keyword")
133 {
134 if !scope_lower.starts_with("keyword.operator") {
136 return Some(HighlightCategory::Keyword);
137 }
138 }
139
140 if scope_lower.starts_with("punctuation.definition.comment") {
144 return Some(HighlightCategory::Comment);
145 }
146 if scope_lower.starts_with("punctuation.definition.string") {
147 return Some(HighlightCategory::String);
148 }
149
150 if scope_lower.starts_with("keyword.operator") {
152 return Some(HighlightCategory::Operator);
153 }
154
155 if scope_lower.starts_with("punctuation.section")
159 || scope_lower.starts_with("punctuation.bracket")
160 || scope_lower.starts_with("punctuation.definition.array")
161 || scope_lower.starts_with("punctuation.definition.block")
162 || scope_lower.starts_with("punctuation.definition.brackets")
163 || scope_lower.starts_with("punctuation.definition.group")
164 || scope_lower.starts_with("punctuation.definition.inline-table")
165 || scope_lower.starts_with("punctuation.definition.section")
166 || scope_lower.starts_with("punctuation.definition.table")
167 || scope_lower.starts_with("punctuation.definition.tag")
168 {
169 return Some(HighlightCategory::PunctuationBracket);
170 }
171
172 if scope_lower.starts_with("punctuation.separator")
174 || scope_lower.starts_with("punctuation.terminator")
175 || scope_lower.starts_with("punctuation.accessor")
176 {
177 return Some(HighlightCategory::PunctuationDelimiter);
178 }
179
180 if scope_lower.starts_with("entity.name.function")
182 || scope_lower.starts_with("support.function")
183 || scope_lower.starts_with("meta.function-call")
184 || scope_lower.starts_with("variable.function")
185 {
186 return Some(HighlightCategory::Function);
187 }
188
189 if scope_lower.starts_with("entity.name.type")
191 || scope_lower.starts_with("entity.name.class")
192 || scope_lower.starts_with("entity.name.struct")
193 || scope_lower.starts_with("entity.name.enum")
194 || scope_lower.starts_with("entity.name.interface")
195 || scope_lower.starts_with("entity.name.trait")
196 || scope_lower.starts_with("support.type")
197 || scope_lower.starts_with("support.class")
198 || scope_lower.starts_with("storage.type")
199 {
200 return Some(HighlightCategory::Type);
201 }
202
203 if scope_lower.starts_with("storage.modifier") {
205 return Some(HighlightCategory::Keyword);
206 }
207
208 if scope_lower.starts_with("constant.numeric")
210 || scope_lower.starts_with("constant.language.boolean")
211 {
212 return Some(HighlightCategory::Number);
213 }
214 if scope_lower.starts_with("constant") {
215 return Some(HighlightCategory::Constant);
216 }
217
218 if scope_lower.starts_with("variable.parameter")
220 || scope_lower.starts_with("variable.other")
221 || scope_lower.starts_with("variable.language")
222 {
223 return Some(HighlightCategory::Variable);
224 }
225
226 if scope_lower.starts_with("entity.name.tag")
228 || scope_lower.starts_with("support.other.property")
229 || scope_lower.starts_with("meta.object-literal.key")
230 || scope_lower.starts_with("variable.other.property")
231 || scope_lower.starts_with("variable.other.object.property")
232 {
233 return Some(HighlightCategory::Property);
234 }
235
236 if scope_lower.starts_with("entity.other.attribute")
238 || scope_lower.starts_with("meta.attribute")
239 || scope_lower.starts_with("entity.name.decorator")
240 {
241 return Some(HighlightCategory::Attribute);
242 }
243
244 if scope_lower.starts_with("variable") {
246 return Some(HighlightCategory::Variable);
247 }
248
249 None
250}
251
252#[derive(Default)]
254pub enum HighlightEngine {
255 TreeSitter(Box<Highlighter>),
257 TextMate(Box<TextMateEngine>),
259 #[default]
261 None,
262}
263
264pub struct TextMateEngine {
266 syntax_set: Arc<SyntaxSet>,
267 syntax_index: usize,
268 checkpoint_markers: MarkerList,
269 checkpoint_states:
270 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
271 dirty_from: Option<usize>,
272 cache: Option<TextMateCache>,
273 last_buffer_len: usize,
274 ts_language: Option<Language>,
275 stats: HighlightStats,
276 scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
279}
280
281#[derive(Debug, Default, Clone)]
283pub struct HighlightStats {
284 pub bytes_parsed: usize,
286 pub cache_hits: usize,
288 pub cache_misses: usize,
290 pub checkpoints_updated: usize,
292 pub convergences: usize,
294}
295
296#[derive(Debug, Clone)]
297struct TextMateCache {
298 range: Range<usize>,
299 spans: Vec<CachedSpan>,
300 tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
303}
304
305#[derive(Debug, Clone)]
306struct CachedSpan {
307 range: Range<usize>,
308 category: crate::primitives::highlighter::HighlightCategory,
309}
310
311const MAX_PARSE_BYTES: usize = 1024 * 1024;
313
314const CHECKPOINT_INTERVAL: usize = 256;
316
317const CONVERGENCE_BUDGET: usize = 64 * 1024;
320
321fn find_line_end(content_bytes: &[u8], pos: usize) -> usize {
325 let mut line_end = pos;
326 while line_end < content_bytes.len() {
327 if content_bytes[line_end] == b'\n' {
328 line_end += 1;
329 break;
330 } else if content_bytes[line_end] == b'\r' {
331 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
332 line_end += 2;
333 } else {
334 line_end += 1;
335 }
336 break;
337 }
338 line_end += 1;
339 }
340 line_end
341}
342
343struct PreparedLine {
345 line_for_syntect: String,
350 line_content_len: usize,
352 ends_with_newline: bool,
355}
356
357fn prepare_line_at(content_bytes: &[u8], pos: usize) -> (usize, usize, Option<PreparedLine>) {
362 let line_end = find_line_end(content_bytes, pos);
363 let line_bytes = &content_bytes[pos..line_end];
364 let line_byte_len = line_bytes.len();
365 let prepared = std::str::from_utf8(line_bytes).ok().map(|line_str| {
366 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
367 let ends_with_newline = line_str.ends_with('\n');
368 let is_streaming_tail = line_end == content_bytes.len() && !ends_with_newline;
369 let line_for_syntect = if is_streaming_tail {
370 line_content.to_string()
371 } else {
372 format!("{}\n", line_content)
373 };
374 PreparedLine {
375 line_for_syntect,
376 line_content_len: line_content.len(),
377 ends_with_newline,
378 }
379 });
380 (line_end, line_byte_len, prepared)
381}
382
383impl TextMateEngine {
384 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
386 Self {
387 syntax_set,
388 syntax_index,
389 checkpoint_markers: MarkerList::new(),
390 checkpoint_states: HashMap::new(),
391 dirty_from: None,
392 cache: None,
393 last_buffer_len: 0,
394 ts_language: None,
395 stats: HighlightStats::default(),
396 scope_category_cache: HashMap::new(),
397 }
398 }
399
400 pub fn with_language(
402 syntax_set: Arc<SyntaxSet>,
403 syntax_index: usize,
404 ts_language: Option<Language>,
405 ) -> Self {
406 Self {
407 syntax_set,
408 syntax_index,
409 checkpoint_markers: MarkerList::new(),
410 checkpoint_states: HashMap::new(),
411 dirty_from: None,
412 cache: None,
413 last_buffer_len: 0,
414 ts_language,
415 stats: HighlightStats::default(),
416 scope_category_cache: HashMap::new(),
417 }
418 }
419
420 pub fn stats(&self) -> &HighlightStats {
422 &self.stats
423 }
424
425 pub fn reset_stats(&mut self) {
427 self.stats = HighlightStats::default();
428 }
429
430 pub fn language(&self) -> Option<&Language> {
432 self.ts_language.as_ref()
433 }
434
435 pub fn notify_insert(&mut self, position: usize, length: usize) {
438 self.checkpoint_markers.adjust_for_insert(position, length);
439 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
440 if let Some(cache) = &mut self.cache {
441 for span in &mut cache.spans {
442 if span.range.start >= position {
443 span.range.start += length;
444 span.range.end += length;
445 } else if span.range.end > position {
446 span.range.end += length;
447 }
448 }
449 if cache.range.end >= position {
450 cache.range.end += length;
451 if position < cache.range.end {
452 cache.tail_state = None;
453 }
454 }
455 }
456 }
457
458 pub fn notify_delete(&mut self, position: usize, length: usize) {
460 self.checkpoint_markers.adjust_for_delete(position, length);
461 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
462 if let Some(cache) = &mut self.cache {
463 let delete_end = position + length;
464 cache.spans.retain_mut(|span| {
465 if span.range.start >= delete_end {
466 span.range.start -= length;
467 span.range.end -= length;
468 true
469 } else if span.range.end <= position {
470 true
471 } else if span.range.start >= position && span.range.end <= delete_end {
472 false
473 } else {
474 if span.range.start < position {
475 span.range.end = position.min(span.range.end);
476 } else {
477 span.range.start = position;
478 span.range.end = position + span.range.end.saturating_sub(delete_end);
479 }
480 span.range.start < span.range.end
481 }
482 });
483 if cache.range.end > delete_end {
484 cache.range.end -= length;
485 } else if cache.range.end > position {
486 cache.range.end = position;
487 }
488 if position < cache.range.end {
489 cache.tail_state = None;
490 }
491 }
492 }
493
494 fn maybe_create_checkpoint(
500 &mut self,
501 current_offset: usize,
502 state: &syntect::parsing::ParseState,
503 current_scopes: &syntect::parsing::ScopeStack,
504 ) {
505 let nearby = self.checkpoint_markers.query_range(
506 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
507 current_offset + CHECKPOINT_INTERVAL / 2,
508 );
509 if nearby.is_empty() {
510 let marker_id = self.checkpoint_markers.create(current_offset, true);
511 self.checkpoint_states
512 .insert(marker_id, (state.clone(), current_scopes.clone()));
513 }
514 }
515
516 fn parse_line_into_spans(
527 &mut self,
528 state: &mut syntect::parsing::ParseState,
529 current_scopes: &mut syntect::parsing::ScopeStack,
530 prepared: &PreparedLine,
531 current_offset: usize,
532 mut on_span: impl FnMut(usize, usize, HighlightCategory),
533 ) -> bool {
534 let ops = match state.parse_line(&prepared.line_for_syntect, &self.syntax_set) {
535 Ok(ops) => ops,
536 Err(_) => return false,
537 };
538
539 let line_content_len = prepared.line_content_len;
540 let mut syntect_offset = 0;
541
542 for (op_offset, op) in ops {
543 let clamped_op_offset = op_offset.min(line_content_len);
544 if clamped_op_offset > syntect_offset {
545 if let Some(category) = self.scope_stack_to_category(current_scopes) {
546 on_span(
547 current_offset + syntect_offset,
548 current_offset + clamped_op_offset,
549 category,
550 );
551 }
552 }
553 syntect_offset = clamped_op_offset;
554 #[allow(clippy::let_underscore_must_use)]
555 let _ = current_scopes.apply(&op);
556 }
557
558 if syntect_offset < line_content_len {
559 if let Some(category) = self.scope_stack_to_category(current_scopes) {
560 on_span(
561 current_offset + syntect_offset,
562 current_offset + line_content_len,
563 category,
564 );
565 }
566 }
567 true
568 }
569
570 #[cfg(test)]
578 pub fn cache_commit_for_test(&self) -> (usize, bool) {
579 match &self.cache {
580 Some(c) => (c.range.end, c.tail_state.is_some()),
581 None => (0, false),
582 }
583 }
584
585 pub fn highlight_viewport(
586 &mut self,
587 buffer: &Buffer,
588 viewport_start: usize,
589 viewport_end: usize,
590 theme: &Theme,
591 context_bytes: usize,
592 ) -> Vec<HighlightSpan> {
593 let buf_len = buffer.len();
594 let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
595 (0, buf_len)
596 } else {
597 let s = viewport_start.saturating_sub(context_bytes);
598 let e = (viewport_end + context_bytes).min(buf_len);
599 (s, e)
600 };
601
602 let dirty = self.dirty_from.take();
603 let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
604 c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
605 });
606 let exact_cache_hit = cache_covers_viewport
607 && dirty.is_none()
608 && self.last_buffer_len == buffer.len()
609 && self
610 .cache
611 .as_ref()
612 .is_some_and(|c| c.range.end >= parse_end);
613
614 if exact_cache_hit {
616 self.stats.cache_hits += 1;
617 return self.filter_cached_spans(viewport_start, viewport_end, theme);
618 }
619
620 if dirty.is_none()
622 && cache_covers_viewport
623 && self.last_buffer_len == buffer.len()
624 && self
625 .cache
626 .as_ref()
627 .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
628 {
629 return self.extend_cache_forward(
630 buffer,
631 parse_end,
632 viewport_start,
633 viewport_end,
634 theme,
635 );
636 }
637
638 if cache_covers_viewport && dirty.is_some() {
640 if let Some(dirty_pos) = dirty {
641 if dirty_pos < parse_end {
642 if let Some(result) = self.try_partial_update(
643 buffer,
644 dirty_pos,
645 desired_parse_start,
646 parse_end,
647 viewport_start,
648 viewport_end,
649 theme,
650 ) {
651 return result;
652 }
653 } else {
654 self.dirty_from = Some(dirty_pos);
656 self.stats.cache_hits += 1;
657 return self.filter_cached_spans(viewport_start, viewport_end, theme);
658 }
659 }
660 } else if let Some(d) = dirty {
661 self.dirty_from = Some(d);
662 }
663
664 self.full_parse(
666 buffer,
667 desired_parse_start,
668 parse_end,
669 viewport_start,
670 viewport_end,
671 theme,
672 context_bytes,
673 )
674 }
675
676 fn filter_cached_spans(
678 &self,
679 viewport_start: usize,
680 viewport_end: usize,
681 theme: &Theme,
682 ) -> Vec<HighlightSpan> {
683 let cache = self.cache.as_ref().unwrap();
684 cache
685 .spans
686 .iter()
687 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
688 .map(|span| HighlightSpan {
689 range: span.range.clone(),
690 color: highlight_color(span.category, theme),
691 bg: highlight_bg(span.category, theme),
692 category: Some(span.category),
693 })
694 .collect()
695 }
696
697 #[allow(clippy::too_many_arguments)]
701 fn try_partial_update(
702 &mut self,
703 buffer: &Buffer,
704 dirty_pos: usize,
705 desired_parse_start: usize,
706 parse_end: usize,
707 viewport_start: usize,
708 viewport_end: usize,
709 theme: &Theme,
710 ) -> Option<Vec<HighlightSpan>> {
711 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
712
713 let (actual_start, mut state, mut current_scopes) = {
715 let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
716 let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
717 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
718 if let Some((id, cp_pos, _)) = nearest {
719 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
720 (cp_pos, s.clone(), sc.clone())
721 } else {
722 return None; }
724 } else if parse_end <= MAX_PARSE_BYTES {
725 (
726 0,
727 syntect::parsing::ParseState::new(syntax),
728 syntect::parsing::ScopeStack::new(),
729 )
730 } else {
731 return None; }
733 };
734
735 let mut markers_ahead: Vec<(MarkerId, usize)> = self
737 .checkpoint_markers
738 .query_range(dirty_pos, parse_end)
739 .into_iter()
740 .map(|(id, start, _)| (id, start))
741 .collect();
742 markers_ahead.sort_by_key(|(_, pos)| *pos);
743 let mut marker_idx = 0;
744
745 let content_end = parse_end.min(buffer.len());
747 if actual_start >= content_end {
748 return None;
749 }
750 let content = buffer.slice_bytes(actual_start..content_end);
751 let content_str = match std::str::from_utf8(&content) {
752 Ok(s) => s,
753 Err(_) => return None,
754 };
755
756 let mut new_spans = Vec::new();
757 let content_bytes = content_str.as_bytes();
758 let mut pos = 0;
759 let mut current_offset = actual_start;
760 let mut converged_at: Option<usize> = None;
761 let mut budget_hit_at: Option<usize> = None;
762 let mut bytes_since_checkpoint: usize = 0;
763
764 while pos < content_bytes.len() {
765 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
767 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
768 bytes_since_checkpoint = 0;
769 }
770
771 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
772 let collect_spans =
774 current_offset + line_byte_len > desired_parse_start.max(actual_start);
775 if let Some(prepared) = prepared {
776 let _ = self.parse_line_into_spans(
777 &mut state,
778 &mut current_scopes,
779 &prepared,
780 current_offset,
781 |byte_start, byte_end, category| {
782 if !collect_spans {
783 return;
784 }
785 let clamped_start = byte_start.max(actual_start);
786 if clamped_start < byte_end {
787 new_spans.push(CachedSpan {
788 range: clamped_start..byte_end,
789 category,
790 });
791 }
792 },
793 );
794 }
795
796 pos = line_end;
797 current_offset += line_byte_len;
798 bytes_since_checkpoint += line_byte_len;
799
800 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
802 {
803 let (marker_id, _) = markers_ahead[marker_idx];
804 marker_idx += 1;
805 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
806 if *stored == (state.clone(), current_scopes.clone()) {
807 self.stats.convergences += 1;
808 converged_at = Some(current_offset);
809 break;
810 }
811 }
812 self.stats.checkpoints_updated += 1;
813 self.checkpoint_states
814 .insert(marker_id, (state.clone(), current_scopes.clone()));
815 }
816
817 if converged_at.is_some() {
818 break;
819 }
820
821 if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
825 budget_hit_at = Some(current_offset);
826 break;
827 }
828 }
829
830 self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
831
832 let (splice_end, dirty_after) = if let Some(c) = converged_at {
835 (c, None)
836 } else if let Some(b) = budget_hit_at {
837 (b, Some(b))
838 } else {
839 (current_offset, None)
840 };
841
842 self.stats.cache_misses += 1; Self::merge_adjacent_spans(&mut new_spans);
845
846 if let Some(cache) = &mut self.cache {
847 let splice_start = actual_start;
848 cache
849 .spans
850 .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
851 cache.spans.extend(new_spans);
852 cache.spans.sort_by_key(|s| s.range.start);
853 Self::merge_adjacent_spans(&mut cache.spans);
854 if splice_end > cache.range.end {
855 cache.range.end = splice_end;
856 }
857 cache.tail_state = None;
858 }
859
860 self.last_buffer_len = buffer.len();
861 self.dirty_from = dirty_after;
862
863 Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
864 }
865
866 fn extend_cache_forward(
870 &mut self,
871 buffer: &Buffer,
872 parse_end: usize,
873 viewport_start: usize,
874 viewport_end: usize,
875 theme: &Theme,
876 ) -> Vec<HighlightSpan> {
877 self.stats.cache_misses += 1;
878 let buf_len = buffer.len();
879 let parse_end = parse_end.min(buf_len);
880
881 let (extension_start, mut state, mut current_scopes) = {
882 let cache = self
883 .cache
884 .as_ref()
885 .expect("extend_cache_forward: cache must exist");
886 let (s, sc) = cache
887 .tail_state
888 .as_ref()
889 .expect("extend_cache_forward: tail_state must exist")
890 .clone();
891 (cache.range.end, s, sc)
892 };
893
894 if parse_end <= extension_start {
895 return self.filter_cached_spans(viewport_start, viewport_end, theme);
896 }
897
898 let content = buffer.slice_bytes(extension_start..parse_end);
899 let content_str = match std::str::from_utf8(&content) {
900 Ok(s) => s,
901 Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
902 };
903
904 let mut new_spans = Vec::new();
905 let content_bytes = content_str.as_bytes();
906 let mut pos = 0;
907 let mut current_offset = extension_start;
908 let mut bytes_since_checkpoint: usize = 0;
909 let mut safe_offset = extension_start;
919 let mut safe_state = state.clone();
920 let mut safe_scopes = current_scopes.clone();
921
922 while pos < content_bytes.len() {
923 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
924 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
925 bytes_since_checkpoint = 0;
926 }
927
928 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
929 let mut newline_terminated = false;
930 if let Some(prepared) = prepared {
931 let parse_ok = self.parse_line_into_spans(
932 &mut state,
933 &mut current_scopes,
934 &prepared,
935 current_offset,
936 |byte_start, byte_end, category| {
937 new_spans.push(CachedSpan {
938 range: byte_start..byte_end,
939 category,
940 });
941 },
942 );
943 if parse_ok {
944 newline_terminated = prepared.ends_with_newline;
945 }
946 }
947
948 pos = line_end;
949 current_offset += line_byte_len;
950 bytes_since_checkpoint += line_byte_len;
951
952 if newline_terminated {
953 safe_offset = current_offset;
954 safe_state = state.clone();
955 safe_scopes = current_scopes.clone();
956 }
957 }
958
959 self.stats.bytes_parsed += parse_end - extension_start;
960
961 Self::merge_adjacent_spans(&mut new_spans);
962
963 let (safe_spans, unsafe_spans): (Vec<_>, Vec<_>) = new_spans
969 .into_iter()
970 .partition(|s| s.range.end <= safe_offset);
971
972 let cache = self
973 .cache
974 .as_mut()
975 .expect("extend_cache_forward: cache must still exist");
976 cache.spans.extend(safe_spans);
977 Self::merge_adjacent_spans(&mut cache.spans);
978 cache.range.end = safe_offset;
979 cache.tail_state = Some((safe_state, safe_scopes));
980 self.last_buffer_len = buf_len;
981
982 let mut result = self.filter_cached_spans(viewport_start, viewport_end, theme);
983 result.extend(
984 unsafe_spans
985 .into_iter()
986 .filter(|s| s.range.start < viewport_end && s.range.end > viewport_start)
987 .map(|s| HighlightSpan {
988 range: s.range,
989 color: highlight_color(s.category, theme),
990 bg: highlight_bg(s.category, theme),
991 category: Some(s.category),
992 }),
993 );
994 result
995 }
996
997 #[allow(clippy::too_many_arguments)]
1000 fn full_parse(
1001 &mut self,
1002 buffer: &Buffer,
1003 desired_parse_start: usize,
1004 parse_end: usize,
1005 viewport_start: usize,
1006 viewport_end: usize,
1007 theme: &Theme,
1008 _context_bytes: usize,
1009 ) -> Vec<HighlightSpan> {
1010 self.stats.cache_misses += 1;
1011 self.dirty_from = None; if parse_end <= desired_parse_start {
1014 return Vec::new();
1015 }
1016
1017 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
1018 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
1019 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
1020
1021 let content = buffer.slice_bytes(actual_start..parse_end);
1022 let content_str = match std::str::from_utf8(&content) {
1023 Ok(s) => s,
1024 Err(_) => return Vec::new(),
1025 };
1026
1027 let mut spans = Vec::new();
1028 let content_bytes = content_str.as_bytes();
1029 let mut pos = 0;
1030 let mut current_offset = actual_start;
1031 let mut bytes_since_checkpoint: usize = 0;
1032 let mut safe_offset = actual_start;
1038 let mut safe_state = state.clone();
1039 let mut safe_scopes = current_scopes.clone();
1040
1041 while pos < content_bytes.len() {
1042 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
1043 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
1044 bytes_since_checkpoint = 0;
1045 }
1046
1047 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
1048 let collect_spans = current_offset + line_byte_len > desired_parse_start;
1052 let mut newline_terminated = false;
1053 if let Some(prepared) = prepared {
1054 let parse_ok = self.parse_line_into_spans(
1055 &mut state,
1056 &mut current_scopes,
1057 &prepared,
1058 current_offset,
1059 |byte_start, byte_end, category| {
1060 if !collect_spans {
1061 return;
1062 }
1063 let clamped_start = byte_start.max(desired_parse_start);
1064 if clamped_start < byte_end {
1065 spans.push(CachedSpan {
1066 range: clamped_start..byte_end,
1067 category,
1068 });
1069 }
1070 },
1071 );
1072 if parse_ok {
1073 newline_terminated = prepared.ends_with_newline;
1074 }
1075 }
1076
1077 pos = line_end;
1078 current_offset += line_byte_len;
1079 bytes_since_checkpoint += line_byte_len;
1080
1081 if newline_terminated {
1082 safe_offset = current_offset;
1083 safe_state = state.clone();
1084 safe_scopes = current_scopes.clone();
1085 }
1086
1087 let markers_here: Vec<(MarkerId, usize)> = self
1092 .checkpoint_markers
1093 .query_range(current_offset.saturating_sub(line_byte_len), current_offset)
1094 .into_iter()
1095 .map(|(id, start, _)| (id, start))
1096 .collect();
1097 for (marker_id, _) in markers_here {
1098 self.checkpoint_states
1099 .insert(marker_id, (state.clone(), current_scopes.clone()));
1100 }
1101 }
1102
1103 self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1104
1105 Self::merge_adjacent_spans(&mut spans);
1106
1107 let cache_range_end = safe_offset.max(desired_parse_start);
1113 let cached_spans: Vec<CachedSpan> = spans
1114 .iter()
1115 .filter(|s| s.range.end <= cache_range_end)
1116 .cloned()
1117 .collect();
1118
1119 self.cache = Some(TextMateCache {
1120 range: desired_parse_start..cache_range_end,
1121 spans: cached_spans,
1122 tail_state: Some((safe_state, safe_scopes)),
1123 });
1124 self.last_buffer_len = buffer.len();
1125
1126 spans
1127 .into_iter()
1128 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1129 .map(|span| {
1130 let cat = span.category;
1131 HighlightSpan {
1132 range: span.range,
1133 color: highlight_color(cat, theme),
1134 bg: highlight_bg(cat, theme),
1135 category: Some(cat),
1136 }
1137 })
1138 .collect()
1139 }
1140
1141 fn find_parse_resume_point(
1143 &self,
1144 desired_start: usize,
1145 parse_end: usize,
1146 syntax: &syntect::parsing::SyntaxReference,
1147 ) -> (
1148 usize,
1149 syntect::parsing::ParseState,
1150 syntect::parsing::ScopeStack,
1151 bool,
1152 ) {
1153 use syntect::parsing::{ParseState, ScopeStack};
1154
1155 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1159 let markers = self
1160 .checkpoint_markers
1161 .query_range(search_start, desired_start + 1);
1162 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1163
1164 if let Some((id, cp_pos, _)) = nearest {
1165 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1166 return (cp_pos, s.clone(), sc.clone(), true);
1167 }
1168 }
1169
1170 if parse_end <= MAX_PARSE_BYTES {
1171 (0, ParseState::new(syntax), ScopeStack::new(), true)
1173 } else {
1174 (
1177 desired_start,
1178 ParseState::new(syntax),
1179 ScopeStack::new(),
1180 true,
1181 )
1182 }
1183 }
1184
1185 fn scope_stack_to_category(
1189 &mut self,
1190 scopes: &syntect::parsing::ScopeStack,
1191 ) -> Option<HighlightCategory> {
1192 for scope in scopes.as_slice().iter().rev() {
1193 let cat = match self.scope_category_cache.get(scope) {
1194 Some(c) => *c,
1195 None => {
1196 let computed = scope_to_category(&scope.build_string());
1197 self.scope_category_cache.insert(*scope, computed);
1198 computed
1199 }
1200 };
1201 if let Some(c) = cat {
1202 return Some(c);
1203 }
1204 }
1205 None
1206 }
1207
1208 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1210 if spans.len() < 2 {
1211 return;
1212 }
1213
1214 let mut write_idx = 0;
1215 for read_idx in 1..spans.len() {
1216 if spans[write_idx].category == spans[read_idx].category
1217 && spans[write_idx].range.end == spans[read_idx].range.start
1218 {
1219 spans[write_idx].range.end = spans[read_idx].range.end;
1220 } else {
1221 write_idx += 1;
1222 if write_idx != read_idx {
1223 spans[write_idx] = spans[read_idx].clone();
1224 }
1225 }
1226 }
1227 spans.truncate(write_idx + 1);
1228 }
1229
1230 pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1236 }
1240
1241 pub fn invalidate_all(&mut self) {
1243 self.cache = None;
1244 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1245 for id in ids {
1246 self.checkpoint_markers.delete(id);
1247 }
1248 self.checkpoint_states.clear();
1249 self.dirty_from = None;
1250 }
1251
1252 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1257 let cache = self.cache.as_ref()?;
1258 cache
1259 .spans
1260 .iter()
1261 .find(|span| span.range.start <= position && position < span.range.end)
1262 .map(|span| span.category)
1263 }
1264
1265 pub fn syntax_name(&self) -> &str {
1267 &self.syntax_set.syntaxes()[self.syntax_index].name
1268 }
1269}
1270
1271impl HighlightEngine {
1272 pub fn from_entry(
1279 entry: &crate::primitives::grammar::GrammarEntry,
1280 registry: &GrammarRegistry,
1281 ) -> Self {
1282 let syntax_set = registry.syntax_set_arc();
1283 if let Some(index) = entry.engines.syntect {
1284 return Self::TextMate(Box::new(TextMateEngine::with_language(
1285 syntax_set,
1286 index,
1287 entry.engines.tree_sitter,
1288 )));
1289 }
1290 if let Some(lang) = entry.engines.tree_sitter {
1291 if let Ok(highlighter) = Highlighter::new(lang) {
1292 return Self::TreeSitter(Box::new(highlighter));
1293 }
1294 }
1295 Self::None
1296 }
1297
1298 pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1306 if let Some(entry) = registry.find_by_path(path, first_line) {
1307 return Self::from_entry(entry, registry);
1308 }
1309 Self::None
1310 }
1311
1312 pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1318 if let Some(entry) = registry.find_by_name(name) {
1319 return Self::from_entry(entry, registry);
1320 }
1321 Self::None
1322 }
1323
1324 pub fn highlight_viewport(
1329 &mut self,
1330 buffer: &Buffer,
1331 viewport_start: usize,
1332 viewport_end: usize,
1333 theme: &Theme,
1334 context_bytes: usize,
1335 ) -> Vec<HighlightSpan> {
1336 match self {
1337 Self::TreeSitter(h) => {
1338 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1339 }
1340 Self::TextMate(h) => {
1341 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1342 }
1343 Self::None => Vec::new(),
1344 }
1345 }
1346
1347 pub fn notify_insert(&mut self, position: usize, length: usize) {
1349 if let Self::TextMate(h) = self {
1350 h.notify_insert(position, length);
1351 }
1352 }
1353
1354 pub fn notify_delete(&mut self, position: usize, length: usize) {
1356 if let Self::TextMate(h) = self {
1357 h.notify_delete(position, length);
1358 }
1359 }
1360
1361 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1363 match self {
1364 Self::TreeSitter(h) => h.invalidate_range(edit_range),
1365 Self::TextMate(h) => h.invalidate_range(edit_range),
1366 Self::None => {}
1367 }
1368 }
1369
1370 pub fn invalidate_all(&mut self) {
1372 match self {
1373 Self::TreeSitter(h) => h.invalidate_all(),
1374 Self::TextMate(h) => h.invalidate_all(),
1375 Self::None => {}
1376 }
1377 }
1378
1379 pub fn notify_edits(&mut self, edits: &[(usize, usize, usize)]) {
1391 for &(pos, del_len, ins_len) in edits {
1392 if del_len > 0 {
1393 self.notify_delete(pos, del_len);
1394 }
1395 if ins_len > 0 {
1396 self.notify_insert(pos, ins_len);
1397 }
1398 let edit_end = pos + del_len.max(ins_len);
1399 self.invalidate_range(pos..edit_end);
1400 }
1401 }
1402
1403 pub fn has_highlighting(&self) -> bool {
1405 !matches!(self, Self::None)
1406 }
1407
1408 pub fn backend_name(&self) -> &str {
1410 match self {
1411 Self::TreeSitter(_) => "tree-sitter",
1412 Self::TextMate(_) => "textmate",
1413 Self::None => "none",
1414 }
1415 }
1416
1417 pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1419 if let Self::TextMate(h) = self {
1420 Some(h.stats())
1421 } else {
1422 None
1423 }
1424 }
1425
1426 pub fn reset_highlight_stats(&mut self) {
1428 if let Self::TextMate(h) = self {
1429 h.reset_stats();
1430 }
1431 }
1432
1433 pub fn syntax_name(&self) -> Option<&str> {
1435 match self {
1436 Self::TreeSitter(_) => None, Self::TextMate(h) => Some(h.syntax_name()),
1438 Self::None => None,
1439 }
1440 }
1441
1442 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1447 match self {
1448 Self::TreeSitter(h) => h.category_at_position(position),
1449 Self::TextMate(h) => h.category_at_position(position),
1450 Self::None => None,
1451 }
1452 }
1453
1454 pub fn language(&self) -> Option<&Language> {
1457 match self {
1458 Self::TreeSitter(h) => Some(h.language()),
1459 Self::TextMate(h) => h.language(),
1460 Self::None => None,
1461 }
1462 }
1463}
1464
1465pub fn highlight_string(
1471 code: &str,
1472 lang_hint: &str,
1473 registry: &GrammarRegistry,
1474 theme: &Theme,
1475) -> Vec<HighlightSpan> {
1476 use syntect::parsing::{ParseState, ScopeStack};
1477
1478 let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1480 Some(s) => s,
1481 None => return Vec::new(),
1482 };
1483
1484 let syntax_set = registry.syntax_set();
1485 let mut state = ParseState::new(syntax);
1486 let mut spans = Vec::new();
1487 let mut current_scopes = ScopeStack::new();
1488 let mut current_offset = 0;
1489
1490 for line in code.split_inclusive('\n') {
1492 let line_start = current_offset;
1493 let line_len = line.len();
1494
1495 let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1497 let line_for_syntect = if line.ends_with('\n') {
1498 format!("{}\n", line_content)
1499 } else {
1500 line_content.to_string()
1501 };
1502
1503 let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1504 Ok(ops) => ops,
1505 Err(_) => {
1506 current_offset += line_len;
1507 continue;
1508 }
1509 };
1510
1511 let mut syntect_offset = 0;
1512 let line_content_len = line_content.len();
1513
1514 for (op_offset, op) in ops {
1515 let clamped_op_offset = op_offset.min(line_content_len);
1516 if clamped_op_offset > syntect_offset {
1517 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1518 let byte_start = line_start + syntect_offset;
1519 let byte_end = line_start + clamped_op_offset;
1520 if byte_start < byte_end {
1521 spans.push(HighlightSpan {
1522 range: byte_start..byte_end,
1523 color: highlight_color(category, theme),
1524 bg: highlight_bg(category, theme),
1525 category: Some(category),
1526 });
1527 }
1528 }
1529 }
1530 syntect_offset = clamped_op_offset;
1531 #[allow(clippy::let_underscore_must_use)]
1533 let _ = current_scopes.apply(&op);
1534 }
1535
1536 if syntect_offset < line_content_len {
1538 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1539 let byte_start = line_start + syntect_offset;
1540 let byte_end = line_start + line_content_len;
1541 if byte_start < byte_end {
1542 spans.push(HighlightSpan {
1543 range: byte_start..byte_end,
1544 color: highlight_color(category, theme),
1545 bg: highlight_bg(category, theme),
1546 category: Some(category),
1547 });
1548 }
1549 }
1550 }
1551
1552 current_offset += line_len;
1553 }
1554
1555 merge_adjacent_highlight_spans(&mut spans);
1557
1558 spans
1559}
1560
1561fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1563 for scope in scopes.as_slice().iter().rev() {
1564 let scope_str = scope.build_string();
1565 if let Some(cat) = scope_to_category(&scope_str) {
1566 return Some(cat);
1567 }
1568 }
1569 None
1570}
1571
1572fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1574 if spans.len() < 2 {
1575 return;
1576 }
1577
1578 let mut write_idx = 0;
1579 for read_idx in 1..spans.len() {
1580 if spans[write_idx].color == spans[read_idx].color
1581 && spans[write_idx].range.end == spans[read_idx].range.start
1582 {
1583 spans[write_idx].range.end = spans[read_idx].range.end;
1584 } else {
1585 write_idx += 1;
1586 if write_idx != read_idx {
1587 spans[write_idx] = spans[read_idx].clone();
1588 }
1589 }
1590 }
1591 spans.truncate(write_idx + 1);
1592}
1593
1594#[cfg(test)]
1595mod tests {
1596 use crate::model::filesystem::StdFileSystem;
1597 use std::sync::Arc;
1598
1599 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1600 Arc::new(StdFileSystem)
1601 }
1602 use super::*;
1603 use crate::view::theme;
1604
1605 #[test]
1606 fn test_highlight_engine_default() {
1607 let engine = HighlightEngine::default();
1608 assert!(!engine.has_highlighting());
1609 assert_eq!(engine.backend_name(), "none");
1610 }
1611
1612 #[test]
1613 fn test_textmate_backend_selection() {
1614 let registry =
1615 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1616
1617 let engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1619 assert_eq!(engine.backend_name(), "textmate");
1620 assert!(engine.language().is_some());
1622
1623 let engine = HighlightEngine::for_file(Path::new("test.py"), None, ®istry);
1624 assert_eq!(engine.backend_name(), "textmate");
1625 assert!(engine.language().is_some());
1626
1627 let engine = HighlightEngine::for_file(Path::new("test.js"), None, ®istry);
1631 assert_eq!(engine.backend_name(), "tree-sitter");
1632 assert!(engine.language().is_some());
1633
1634 let engine = HighlightEngine::for_file(Path::new("test.ts"), None, ®istry);
1636 assert_eq!(engine.backend_name(), "tree-sitter");
1637 assert!(engine.language().is_some());
1638
1639 let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, ®istry);
1640 assert_eq!(engine.backend_name(), "tree-sitter");
1641 assert!(engine.language().is_some());
1642 }
1643
1644 #[test]
1645 fn test_tree_sitter_direct() {
1646 let highlighter = Highlighter::new(Language::Rust);
1648 assert!(highlighter.is_ok());
1649 }
1650
1651 #[test]
1652 fn test_unknown_extension() {
1653 let registry =
1654 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1655
1656 let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, ®istry);
1658 let _ = engine.backend_name();
1661 }
1662
1663 #[test]
1664 fn test_highlight_viewport_empty_buffer_no_panic() {
1665 let registry =
1674 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1675
1676 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1677
1678 let buffer = Buffer::from_str("", 0, test_fs());
1680 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1681
1682 if let HighlightEngine::TextMate(ref mut tm) = engine {
1686 let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1688 assert!(spans.is_empty());
1689 }
1690 }
1691
1692 #[test]
1696 fn test_textmate_engine_crlf_byte_offsets() {
1697 let registry =
1698 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1699
1700 let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, ®istry);
1701
1702 let content = b"public\r\npublic\r\npublic\r\n";
1708 let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1709 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1710
1711 if let HighlightEngine::TextMate(ref mut tm) = engine {
1712 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1714
1715 eprintln!(
1722 "Spans: {:?}",
1723 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1724 );
1725
1726 let has_span_at = |start: usize, end: usize| -> bool {
1728 spans
1729 .iter()
1730 .any(|s| s.range.start <= start && s.range.end >= end)
1731 };
1732
1733 assert!(
1735 has_span_at(0, 6),
1736 "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1737 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1738 );
1739
1740 assert!(
1743 has_span_at(8, 14),
1744 "Should have span covering bytes 8-14 (line 2 'public'). \
1745 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1746 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1747 );
1748
1749 assert!(
1752 has_span_at(16, 22),
1753 "Should have span covering bytes 16-22 (line 3 'public'). \
1754 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1755 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1756 );
1757 } else {
1758 panic!("Expected TextMate engine for .java file");
1759 }
1760 }
1761
1762 #[test]
1776 fn test_partial_trailing_line_not_committed_to_cache() {
1777 let registry =
1778 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1779 let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, ®istry);
1780 let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1781
1782 let content = "+complete\n+partial";
1784 let buffer = Buffer::from_str(content, 0, test_fs());
1785
1786 if let HighlightEngine::TextMate(ref mut tm) = engine {
1787 let _ = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1788 let (cache_end, has_tail) = tm.cache_commit_for_test();
1789 assert_eq!(
1790 cache_end,
1791 "+complete\n".len(),
1792 "cache should commit at the last newline, not into the partial \
1793 trailing line — committing past the newline causes streaming \
1794 forward-extension to parse the line's continuation in the wrong \
1795 grammar context, losing the diff bg."
1796 );
1797 assert!(has_tail, "tail state should be saved at the safe boundary");
1798 }
1799 }
1800
1801 #[test]
1806 fn test_diff_inserted_line_is_fully_covered() {
1807 let registry =
1808 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1809 let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, ®istry);
1810 let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1811
1812 let content =
1813 "diff --git a/file.ts b/file.ts\n\
1814 index aaa..bbb 100644\n\
1815 --- a/file.ts\n\
1816 +++ b/file.ts\n\
1817 @@ -1,3 +1,5 @@\n\
1818 +${seen[g.subtree] > 1 ? `**Seen ${seen[g.subtree]}× — likely cross-subtree type seam.**` : \"\"}\n\
1819 + const k = `${b.fn}::${(b.what || \"\").slice(0, 80)}`;\n";
1820 let buffer = Buffer::from_str(content, 0, test_fs());
1821
1822 if let HighlightEngine::TextMate(ref mut tm) = engine {
1823 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1824
1825 let bytes = content.as_bytes();
1826 let mut line_start = 0;
1827 while line_start < bytes.len() {
1828 let mut line_end = line_start;
1829 while line_end < bytes.len() && bytes[line_end] != b'\n' {
1830 line_end += 1;
1831 }
1832 if bytes[line_start] == b'+' && !content[line_start..line_end].starts_with("+++") {
1833 for byte_pos in line_start..line_end {
1834 let span = spans
1835 .iter()
1836 .find(|s| s.range.start <= byte_pos && s.range.end > byte_pos);
1837 let bg = span.and_then(|s| s.bg);
1838 assert_eq!(
1839 bg,
1840 Some(theme.diff_add_bg),
1841 "byte {} (`{}`) of `+` line starting at {} should carry diff_add_bg; \
1842 got span={:?}",
1843 byte_pos,
1844 content[byte_pos..byte_pos + 1].escape_debug(),
1845 line_start,
1846 span,
1847 );
1848 }
1849 }
1850 line_start = line_end + 1;
1851 }
1852 } else {
1853 panic!("Expected TextMate engine for .diff file");
1854 }
1855 }
1856
1857 #[test]
1858 fn test_git_rebase_todo_highlighting() {
1859 let registry =
1860 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1861
1862 let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, ®istry);
1864 assert_eq!(engine.backend_name(), "textmate");
1865 assert!(engine.has_highlighting());
1866 }
1867
1868 #[test]
1869 fn test_git_commit_message_highlighting() {
1870 let registry =
1871 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1872
1873 let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, ®istry);
1875 assert_eq!(engine.backend_name(), "textmate");
1876 assert!(engine.has_highlighting());
1877
1878 let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, ®istry);
1880 assert_eq!(engine.backend_name(), "textmate");
1881 assert!(engine.has_highlighting());
1882 }
1883
1884 #[test]
1885 fn test_gitignore_highlighting() {
1886 let registry =
1887 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1888
1889 let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, ®istry);
1891 assert_eq!(engine.backend_name(), "textmate");
1892 assert!(engine.has_highlighting());
1893
1894 let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, ®istry);
1896 assert_eq!(engine.backend_name(), "textmate");
1897 assert!(engine.has_highlighting());
1898 }
1899
1900 #[test]
1901 fn test_gitconfig_highlighting() {
1902 let registry =
1903 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1904
1905 let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, ®istry);
1907 assert_eq!(engine.backend_name(), "textmate");
1908 assert!(engine.has_highlighting());
1909
1910 let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, ®istry);
1912 assert_eq!(engine.backend_name(), "textmate");
1913 assert!(engine.has_highlighting());
1914 }
1915
1916 #[test]
1917 fn test_gitattributes_highlighting() {
1918 let registry =
1919 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1920
1921 let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, ®istry);
1923 assert_eq!(engine.backend_name(), "textmate");
1924 assert!(engine.has_highlighting());
1925 }
1926
1927 #[test]
1928 fn test_comment_delimiter_uses_comment_color() {
1929 assert_eq!(
1931 scope_to_category("punctuation.definition.comment"),
1932 Some(HighlightCategory::Comment)
1933 );
1934 assert_eq!(
1935 scope_to_category("punctuation.definition.comment.python"),
1936 Some(HighlightCategory::Comment)
1937 );
1938 assert_eq!(
1939 scope_to_category("punctuation.definition.comment.begin"),
1940 Some(HighlightCategory::Comment)
1941 );
1942 }
1943
1944 #[test]
1945 fn test_string_delimiter_uses_string_color() {
1946 assert_eq!(
1948 scope_to_category("punctuation.definition.string.begin"),
1949 Some(HighlightCategory::String)
1950 );
1951 assert_eq!(
1952 scope_to_category("punctuation.definition.string.end"),
1953 Some(HighlightCategory::String)
1954 );
1955 }
1956
1957 #[test]
1958 fn test_punctuation_bracket() {
1959 assert_eq!(
1961 scope_to_category("punctuation.section"),
1962 Some(HighlightCategory::PunctuationBracket)
1963 );
1964 assert_eq!(
1965 scope_to_category("punctuation.section.block.begin.c"),
1966 Some(HighlightCategory::PunctuationBracket)
1967 );
1968 assert_eq!(
1969 scope_to_category("punctuation.bracket"),
1970 Some(HighlightCategory::PunctuationBracket)
1971 );
1972 assert_eq!(
1974 scope_to_category("punctuation.definition.array.begin.toml"),
1975 Some(HighlightCategory::PunctuationBracket)
1976 );
1977 assert_eq!(
1978 scope_to_category("punctuation.definition.block.code.typst"),
1979 Some(HighlightCategory::PunctuationBracket)
1980 );
1981 assert_eq!(
1982 scope_to_category("punctuation.definition.group.typst"),
1983 Some(HighlightCategory::PunctuationBracket)
1984 );
1985 assert_eq!(
1986 scope_to_category("punctuation.definition.inline-table.begin.toml"),
1987 Some(HighlightCategory::PunctuationBracket)
1988 );
1989 assert_eq!(
1990 scope_to_category("punctuation.definition.tag.end.svelte"),
1991 Some(HighlightCategory::PunctuationBracket)
1992 );
1993 }
1994
1995 #[test]
1996 fn test_punctuation_delimiter() {
1997 assert_eq!(
1998 scope_to_category("punctuation.separator"),
1999 Some(HighlightCategory::PunctuationDelimiter)
2000 );
2001 assert_eq!(
2002 scope_to_category("punctuation.terminator.statement.c"),
2003 Some(HighlightCategory::PunctuationDelimiter)
2004 );
2005 assert_eq!(
2006 scope_to_category("punctuation.accessor"),
2007 Some(HighlightCategory::PunctuationDelimiter)
2008 );
2009 }
2010
2011 #[test]
2015 fn test_small_file_scroll_is_cache_hit() {
2016 let registry =
2017 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2018 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2019
2020 let mut content = String::new();
2021 for i in 0..200 {
2022 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2023 }
2024 let buffer = Buffer::from_str(&content, 0, test_fs());
2025 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2026
2027 let HighlightEngine::TextMate(ref mut tm) = engine else {
2028 panic!("expected TextMate engine for .rs");
2029 };
2030
2031 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2033 let stats_after_first = tm.stats().clone();
2034 assert_eq!(
2035 stats_after_first.cache_hits, 0,
2036 "first call cannot hit cache"
2037 );
2038 assert_eq!(
2039 stats_after_first.cache_misses, 1,
2040 "first call must be a miss"
2041 );
2042
2043 let mid = buffer.len() / 2;
2045 let near_end = buffer.len().saturating_sub(200);
2046 let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
2047 for (vs, ve) in probes {
2048 let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
2049 }
2050
2051 let stats_after_scroll = tm.stats().clone();
2052 assert_eq!(
2053 stats_after_scroll.cache_misses,
2054 1,
2055 "scrolling must not add cache misses (got extra: {})",
2056 stats_after_scroll.cache_misses - 1
2057 );
2058 assert_eq!(
2059 stats_after_scroll.cache_hits, 3,
2060 "all three scroll probes must hit the cache"
2061 );
2062 assert_eq!(
2063 stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
2064 "scrolling must not parse any new bytes"
2065 );
2066 }
2067
2068 #[test]
2072 fn test_small_file_edit_uses_partial_update() {
2073 let registry =
2074 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2075 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2076
2077 let mut content = String::new();
2078 for i in 0..200 {
2079 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2080 }
2081 let buffer = Buffer::from_str(&content, 0, test_fs());
2082 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2083
2084 let HighlightEngine::TextMate(ref mut tm) = engine else {
2085 panic!("expected TextMate engine for .rs");
2086 };
2087
2088 let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2090 let bytes_before_edit = tm.stats().bytes_parsed;
2091 let buf_len = buffer.len();
2092 assert!(
2093 buf_len > 4000,
2094 "test needs a buffer larger than the partial-update region"
2095 );
2096
2097 let edit_pos = buf_len / 2;
2099 tm.notify_insert(edit_pos, 1);
2100 let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2105 let bytes_after_edit = tm.stats().bytes_parsed;
2106 let parsed = bytes_after_edit - bytes_before_edit;
2107
2108 assert!(
2109 parsed < buf_len,
2110 "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
2111 );
2112 }
2113
2114 #[test]
2120 fn test_bulk_edit_uses_partial_update() {
2121 let registry =
2122 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2123 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2124
2125 let mut content = String::new();
2126 for i in 0..200 {
2127 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2128 }
2129 let buffer = Buffer::from_str(&content, 0, test_fs());
2130 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2131
2132 let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2134 let bytes_before_edit = match &engine {
2135 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2136 _ => panic!("expected TextMate engine for .rs"),
2137 };
2138 let buf_len = buffer.len();
2139 assert!(
2140 buf_len > 4000,
2141 "test needs a buffer larger than the partial-update region"
2142 );
2143
2144 let edit_pos = buf_len / 2;
2148 let edits = vec![(edit_pos, 8usize, 1usize)];
2149 engine.notify_edits(&edits);
2150
2151 let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2152 let bytes_after_edit = match &engine {
2153 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2154 _ => unreachable!(),
2155 };
2156 let parsed = bytes_after_edit - bytes_before_edit;
2157
2158 assert!(
2159 parsed < buf_len,
2160 "bulk edit must not trigger a whole-file reparse \
2161 (parsed {parsed}, file {buf_len})"
2162 );
2163 }
2164
2165 #[test]
2171 fn test_bulk_edit_outside_cache_keeps_textmate_partial_update() {
2172 let registry =
2173 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2174 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2175
2176 let mut content = String::new();
2177 for i in 0..400 {
2178 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2179 }
2180 let buffer = Buffer::from_str(&content, 0, test_fs());
2181 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2182
2183 let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2185 let bytes_before = match &engine {
2186 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2187 _ => panic!("expected TextMate engine for .rs"),
2188 };
2189
2190 let far_pos = buffer.len() - 100;
2192 engine.notify_edits(&[(far_pos, 3, 1)]);
2193
2194 let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2197 let bytes_after = match &engine {
2198 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2199 _ => unreachable!(),
2200 };
2201 let parsed = bytes_after - bytes_before;
2202 let buf_len = buffer.len();
2203 assert!(
2204 parsed < buf_len,
2205 "bulk edit outside the viewport must not force a whole-file \
2206 reparse (parsed {parsed}, file {buf_len})"
2207 );
2208 }
2209
2210 #[test]
2214 fn test_partial_update_budget_caps_work() {
2215 let registry =
2216 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2217 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2218
2219 let mut content = String::new();
2221 while content.len() < (CONVERGENCE_BUDGET * 4) {
2222 content.push_str("fn name() { let mut v = 0; v += 1; }\n");
2223 }
2224 let buffer = Buffer::from_str(&content, 0, test_fs());
2225 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2226
2227 let HighlightEngine::TextMate(ref mut tm) = engine else {
2228 panic!("expected TextMate engine for .rs");
2229 };
2230
2231 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2233 tm.notify_insert(100, 0);
2237 tm.checkpoint_states.clear();
2238
2239 let bytes_before = tm.stats().bytes_parsed;
2240 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2241 let parsed = tm.stats().bytes_parsed - bytes_before;
2242
2243 assert!(
2247 parsed <= CONVERGENCE_BUDGET + 4096,
2248 "partial update parsed {parsed}, expected <= {} \
2249 (budget {CONVERGENCE_BUDGET} + slack)",
2250 CONVERGENCE_BUDGET + 4096
2251 );
2252
2253 assert!(
2255 tm.dirty_from.is_some(),
2256 "budget exit must keep dirty_from set"
2257 );
2258 }
2259
2260 #[test]
2270 fn test_large_file_uses_windowed_parse() {
2271 let registry =
2272 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2273 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2274
2275 let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2278 let bytes_needed = MAX_PARSE_BYTES * 2;
2279 let lines_needed = bytes_needed / line.len() + 100;
2280 let mut content = String::with_capacity(lines_needed * line.len());
2281 for _ in 0..lines_needed {
2282 content.push_str(line);
2283 }
2284 assert!(content.len() > MAX_PARSE_BYTES * 2);
2285 let buffer = Buffer::from_str(&content, 0, test_fs());
2286 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2287
2288 let HighlightEngine::TextMate(ref mut tm) = engine else {
2289 panic!("expected TextMate engine for .rs");
2290 };
2291
2292 let context_bytes = 10_000usize;
2295 let viewport_start = MAX_PARSE_BYTES + 200_000;
2296 let viewport_end = viewport_start + 1000;
2297 let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2298 let parsed = tm.stats().bytes_parsed;
2299
2300 let window = (viewport_end - viewport_start) + 2 * context_bytes;
2304 assert!(
2305 parsed <= window * 4,
2306 "large file windowed parse should be ~{window} bytes, got {parsed} \
2307 (file {})",
2308 buffer.len()
2309 );
2310 }
2311
2312 #[test]
2319 fn test_javascript_template_literal_does_not_bleed() {
2320 let registry =
2321 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2322 let mut engine = HighlightEngine::for_file(Path::new("repro.js"), None, ®istry);
2323
2324 let source = "class ExampleClass {\n\
2326 \texampleFunction = exampleArg => `${exampleArg}`;\n\
2327 \n\
2328 \tconstructor() {\n\
2329 \t\t// constructor body\n\
2330 \t}\n\
2331 \n\
2332 \t/* multiline comment */\n\
2333 }\n";
2334 let buffer = Buffer::from_str(source, 0, test_fs());
2335 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2336
2337 let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2338
2339 let ctor_pos = source.find("constructor").expect("locate constructor");
2342 let ctor_cat = engine.category_at_position(ctor_pos);
2343 assert_ne!(
2344 ctor_cat,
2345 Some(HighlightCategory::String),
2346 "constructor keyword must not inherit string state from earlier \
2347 template literal (got {:?})",
2348 ctor_cat,
2349 );
2350
2351 let last_brace = source.rfind('}').expect("locate closing brace");
2354 let brace_cat = engine.category_at_position(last_brace);
2355 assert_ne!(
2356 brace_cat,
2357 Some(HighlightCategory::String),
2358 "closing class brace must not be highlighted as string \
2359 (got {:?})",
2360 brace_cat,
2361 );
2362 }
2363
2364 #[test]
2374 fn test_javascript_template_substitution_closing_tokens_are_string() {
2375 let registry =
2376 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2377 let mut engine = HighlightEngine::for_file(Path::new("tmpl.js"), None, ®istry);
2378
2379 let source = "const x = `${name}`;\n";
2382 let buffer = Buffer::from_str(source, 0, test_fs());
2383 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2384
2385 let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2386
2387 let close_brace = source
2390 .find("}`")
2391 .expect("locate substitution closing brace");
2392 let close_backtick = close_brace + 1;
2393
2394 let name_pos = source.find("name").expect("locate identifier");
2398 let name_cat = engine.category_at_position(name_pos);
2399 assert_eq!(
2400 name_cat,
2401 Some(HighlightCategory::Variable),
2402 "substitution identifier should be Variable (got {:?})",
2403 name_cat,
2404 );
2405
2406 let brace_cat = engine.category_at_position(close_brace);
2413 assert_eq!(
2414 brace_cat,
2415 Some(HighlightCategory::String),
2416 "closing }} of ${{…}} must be String (got {:?})",
2417 brace_cat,
2418 );
2419 let backtick_cat = engine.category_at_position(close_backtick);
2420 assert_eq!(
2421 backtick_cat,
2422 Some(HighlightCategory::String),
2423 "closing backtick of template literal must be String \
2424 (got {:?})",
2425 backtick_cat,
2426 );
2427 }
2428}