1use crate::model::buffer::Buffer;
39use crate::model::marker::{MarkerId, MarkerList};
40use crate::primitives::grammar::GrammarRegistry;
41use crate::primitives::highlighter::{
42 highlight_bg, highlight_color, HighlightCategory, HighlightSpan, Highlighter, Language,
43};
44use crate::view::theme::Theme;
45use std::collections::HashMap;
46use std::ops::Range;
47use std::path::Path;
48use std::sync::Arc;
49use syntect::parsing::SyntaxSet;
50
51fn scope_to_category(scope: &str) -> Option<HighlightCategory> {
53 let scope_lower = scope.to_lowercase();
54
55 if scope_lower.starts_with("comment") {
57 return Some(HighlightCategory::Comment);
58 }
59
60 if scope_lower.starts_with("string") {
62 return Some(HighlightCategory::String);
63 }
64
65 if scope_lower.starts_with("markup.heading") || scope_lower.starts_with("entity.name.section") {
69 return Some(HighlightCategory::Keyword); }
71 if scope_lower.starts_with("markup.bold") {
73 return Some(HighlightCategory::Constant); }
75 if scope_lower.starts_with("markup.italic") {
77 return Some(HighlightCategory::Variable); }
79 if scope_lower.starts_with("markup.raw") || scope_lower.starts_with("markup.inline.raw") {
81 return Some(HighlightCategory::String); }
83 if scope_lower.starts_with("markup.underline.link") {
85 return Some(HighlightCategory::Function); }
87 if scope_lower.starts_with("markup.underline") {
89 return Some(HighlightCategory::Function);
90 }
91 if scope_lower.starts_with("markup.quote") {
93 return Some(HighlightCategory::Comment); }
95 if scope_lower.starts_with("markup.list") {
97 return Some(HighlightCategory::Operator); }
99 if scope_lower.starts_with("markup.strikethrough") {
101 return Some(HighlightCategory::Comment); }
103
104 if scope_lower.starts_with("markup.inserted") {
116 return Some(HighlightCategory::Inserted);
117 }
118 if scope_lower.starts_with("markup.deleted") {
119 return Some(HighlightCategory::Deleted);
120 }
121 if scope_lower.starts_with("markup.changed") || scope_lower.starts_with("meta.diff.range") {
122 return Some(HighlightCategory::Changed);
123 }
124 if scope_lower.starts_with("meta.diff.header") {
125 return Some(HighlightCategory::Type);
126 }
127
128 if scope_lower.starts_with("keyword.control")
130 || scope_lower.starts_with("keyword.other")
131 || scope_lower.starts_with("keyword.declaration")
132 || scope_lower.starts_with("keyword")
133 {
134 if !scope_lower.starts_with("keyword.operator") {
136 return Some(HighlightCategory::Keyword);
137 }
138 }
139
140 if scope_lower.starts_with("punctuation.definition.comment") {
144 return Some(HighlightCategory::Comment);
145 }
146 if scope_lower.starts_with("punctuation.definition.string") {
147 return Some(HighlightCategory::String);
148 }
149
150 if scope_lower.starts_with("keyword.operator") {
152 return Some(HighlightCategory::Operator);
153 }
154
155 if scope_lower.starts_with("punctuation.section")
159 || scope_lower.starts_with("punctuation.bracket")
160 || scope_lower.starts_with("punctuation.definition.array")
161 || scope_lower.starts_with("punctuation.definition.block")
162 || scope_lower.starts_with("punctuation.definition.brackets")
163 || scope_lower.starts_with("punctuation.definition.group")
164 || scope_lower.starts_with("punctuation.definition.inline-table")
165 || scope_lower.starts_with("punctuation.definition.section")
166 || scope_lower.starts_with("punctuation.definition.table")
167 || scope_lower.starts_with("punctuation.definition.tag")
168 {
169 return Some(HighlightCategory::PunctuationBracket);
170 }
171
172 if scope_lower.starts_with("punctuation.separator")
174 || scope_lower.starts_with("punctuation.terminator")
175 || scope_lower.starts_with("punctuation.accessor")
176 {
177 return Some(HighlightCategory::PunctuationDelimiter);
178 }
179
180 if scope_lower.starts_with("entity.name.function")
182 || scope_lower.starts_with("support.function")
183 || scope_lower.starts_with("meta.function-call")
184 || scope_lower.starts_with("variable.function")
185 {
186 return Some(HighlightCategory::Function);
187 }
188
189 if scope_lower.starts_with("entity.name.type")
191 || scope_lower.starts_with("entity.name.class")
192 || scope_lower.starts_with("entity.name.struct")
193 || scope_lower.starts_with("entity.name.enum")
194 || scope_lower.starts_with("entity.name.interface")
195 || scope_lower.starts_with("entity.name.trait")
196 || scope_lower.starts_with("support.type")
197 || scope_lower.starts_with("support.class")
198 {
199 return Some(HighlightCategory::Type);
200 }
201
202 if scope_lower.starts_with("storage.type") || scope_lower.starts_with("storage.modifier") {
204 return Some(HighlightCategory::Keyword);
205 }
206
207 if scope_lower.starts_with("constant.numeric")
209 || scope_lower.starts_with("constant.language.boolean")
210 {
211 return Some(HighlightCategory::Number);
212 }
213 if scope_lower.starts_with("constant") {
214 return Some(HighlightCategory::Constant);
215 }
216
217 if scope_lower.starts_with("variable.language") {
219 return Some(HighlightCategory::VariableBuiltin);
220 }
221 if scope_lower.starts_with("variable.parameter") || scope_lower.starts_with("variable.other") {
222 return Some(HighlightCategory::Variable);
223 }
224
225 if scope_lower.starts_with("entity.name.tag")
227 || scope_lower.starts_with("support.other.property")
228 || scope_lower.starts_with("meta.object-literal.key")
229 || scope_lower.starts_with("variable.other.property")
230 || scope_lower.starts_with("variable.other.object.property")
231 {
232 return Some(HighlightCategory::Property);
233 }
234
235 if scope_lower.starts_with("entity.other.attribute")
237 || scope_lower.starts_with("meta.attribute")
238 || scope_lower.starts_with("entity.name.decorator")
239 {
240 return Some(HighlightCategory::Attribute);
241 }
242
243 if scope_lower.starts_with("variable") {
245 return Some(HighlightCategory::Variable);
246 }
247
248 None
249}
250
251#[derive(Default)]
253pub enum HighlightEngine {
254 TreeSitter(Box<Highlighter>),
256 TextMate(Box<TextMateEngine>),
258 #[default]
260 None,
261}
262
263pub struct TextMateEngine {
265 syntax_set: Arc<SyntaxSet>,
266 syntax_index: usize,
267 checkpoint_markers: MarkerList,
268 checkpoint_states:
269 HashMap<MarkerId, (syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
270 dirty_from: Option<usize>,
271 cache: Option<TextMateCache>,
272 last_buffer_len: usize,
273 ts_language: Option<Language>,
274 stats: HighlightStats,
275 scope_category_cache: HashMap<syntect::parsing::Scope, Option<HighlightCategory>>,
278}
279
280#[derive(Debug, Default, Clone)]
282pub struct HighlightStats {
283 pub bytes_parsed: usize,
285 pub cache_hits: usize,
287 pub cache_misses: usize,
289 pub checkpoints_updated: usize,
291 pub convergences: usize,
293}
294
295#[derive(Debug, Clone)]
296struct TextMateCache {
297 range: Range<usize>,
298 spans: Vec<CachedSpan>,
299 tail_state: Option<(syntect::parsing::ParseState, syntect::parsing::ScopeStack)>,
302}
303
304#[derive(Debug, Clone)]
305struct CachedSpan {
306 range: Range<usize>,
307 category: crate::primitives::highlighter::HighlightCategory,
308}
309
310const MAX_PARSE_BYTES: usize = 1024 * 1024;
312
313const CHECKPOINT_INTERVAL: usize = 256;
315
316const CONVERGENCE_BUDGET: usize = 64 * 1024;
319
320fn find_line_end(content_bytes: &[u8], pos: usize) -> usize {
324 let mut line_end = pos;
325 while line_end < content_bytes.len() {
326 if content_bytes[line_end] == b'\n' {
327 line_end += 1;
328 break;
329 } else if content_bytes[line_end] == b'\r' {
330 if line_end + 1 < content_bytes.len() && content_bytes[line_end + 1] == b'\n' {
331 line_end += 2;
332 } else {
333 line_end += 1;
334 }
335 break;
336 }
337 line_end += 1;
338 }
339 line_end
340}
341
342struct PreparedLine {
344 line_for_syntect: String,
349 line_content_len: usize,
351 ends_with_newline: bool,
354}
355
356fn prepare_line_at(content_bytes: &[u8], pos: usize) -> (usize, usize, Option<PreparedLine>) {
361 let line_end = find_line_end(content_bytes, pos);
362 let line_bytes = &content_bytes[pos..line_end];
363 let line_byte_len = line_bytes.len();
364 let prepared = std::str::from_utf8(line_bytes).ok().map(|line_str| {
365 let line_content = line_str.trim_end_matches(&['\r', '\n'][..]);
366 let ends_with_newline = line_str.ends_with('\n');
367 let is_streaming_tail = line_end == content_bytes.len() && !ends_with_newline;
368 let line_for_syntect = if is_streaming_tail {
369 line_content.to_string()
370 } else {
371 format!("{}\n", line_content)
372 };
373 PreparedLine {
374 line_for_syntect,
375 line_content_len: line_content.len(),
376 ends_with_newline,
377 }
378 });
379 (line_end, line_byte_len, prepared)
380}
381
382impl TextMateEngine {
383 pub fn new(syntax_set: Arc<SyntaxSet>, syntax_index: usize) -> Self {
385 Self {
386 syntax_set,
387 syntax_index,
388 checkpoint_markers: MarkerList::new(),
389 checkpoint_states: HashMap::new(),
390 dirty_from: None,
391 cache: None,
392 last_buffer_len: 0,
393 ts_language: None,
394 stats: HighlightStats::default(),
395 scope_category_cache: HashMap::new(),
396 }
397 }
398
399 pub fn with_language(
401 syntax_set: Arc<SyntaxSet>,
402 syntax_index: usize,
403 ts_language: Option<Language>,
404 ) -> Self {
405 Self {
406 syntax_set,
407 syntax_index,
408 checkpoint_markers: MarkerList::new(),
409 checkpoint_states: HashMap::new(),
410 dirty_from: None,
411 cache: None,
412 last_buffer_len: 0,
413 ts_language,
414 stats: HighlightStats::default(),
415 scope_category_cache: HashMap::new(),
416 }
417 }
418
419 pub fn stats(&self) -> &HighlightStats {
421 &self.stats
422 }
423
424 pub fn reset_stats(&mut self) {
426 self.stats = HighlightStats::default();
427 }
428
429 pub fn language(&self) -> Option<&Language> {
431 self.ts_language.as_ref()
432 }
433
434 pub fn notify_insert(&mut self, position: usize, length: usize) {
437 self.checkpoint_markers.adjust_for_insert(position, length);
438 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
439 if let Some(cache) = &mut self.cache {
440 for span in &mut cache.spans {
441 if span.range.start >= position {
442 span.range.start += length;
443 span.range.end += length;
444 } else if span.range.end > position {
445 span.range.end += length;
446 }
447 }
448 if cache.range.end >= position {
449 cache.range.end += length;
450 if position < cache.range.end {
451 cache.tail_state = None;
452 }
453 }
454 }
455 }
456
457 pub fn notify_delete(&mut self, position: usize, length: usize) {
459 self.checkpoint_markers.adjust_for_delete(position, length);
460 self.dirty_from = Some(self.dirty_from.map_or(position, |d| d.min(position)));
461 if let Some(cache) = &mut self.cache {
462 let delete_end = position + length;
463 cache.spans.retain_mut(|span| {
464 if span.range.start >= delete_end {
465 span.range.start -= length;
466 span.range.end -= length;
467 true
468 } else if span.range.end <= position {
469 true
470 } else if span.range.start >= position && span.range.end <= delete_end {
471 false
472 } else {
473 if span.range.start < position {
474 span.range.end = position.min(span.range.end);
475 } else {
476 span.range.start = position;
477 span.range.end = position + span.range.end.saturating_sub(delete_end);
478 }
479 span.range.start < span.range.end
480 }
481 });
482 if cache.range.end > delete_end {
483 cache.range.end -= length;
484 } else if cache.range.end > position {
485 cache.range.end = position;
486 }
487 if position < cache.range.end {
488 cache.tail_state = None;
489 }
490 }
491 }
492
493 fn maybe_create_checkpoint(
499 &mut self,
500 current_offset: usize,
501 state: &syntect::parsing::ParseState,
502 current_scopes: &syntect::parsing::ScopeStack,
503 ) {
504 let nearby = self.checkpoint_markers.query_range(
505 current_offset.saturating_sub(CHECKPOINT_INTERVAL / 2),
506 current_offset + CHECKPOINT_INTERVAL / 2,
507 );
508 if nearby.is_empty() {
509 let marker_id = self.checkpoint_markers.create(current_offset, true);
510 self.checkpoint_states
511 .insert(marker_id, (state.clone(), current_scopes.clone()));
512 }
513 }
514
515 fn parse_line_into_spans(
526 &mut self,
527 state: &mut syntect::parsing::ParseState,
528 current_scopes: &mut syntect::parsing::ScopeStack,
529 prepared: &PreparedLine,
530 current_offset: usize,
531 mut on_span: impl FnMut(usize, usize, HighlightCategory),
532 ) -> bool {
533 let ops = match state.parse_line(&prepared.line_for_syntect, &self.syntax_set) {
534 Ok(ops) => ops,
535 Err(_) => return false,
536 };
537
538 let line_content_len = prepared.line_content_len;
539 let mut syntect_offset = 0;
540
541 for (op_offset, op) in ops {
542 let clamped_op_offset = op_offset.min(line_content_len);
543 if clamped_op_offset > syntect_offset {
544 if let Some(category) = self.scope_stack_to_category(current_scopes) {
545 on_span(
546 current_offset + syntect_offset,
547 current_offset + clamped_op_offset,
548 category,
549 );
550 }
551 }
552 syntect_offset = clamped_op_offset;
553 #[allow(clippy::let_underscore_must_use)]
554 let _ = current_scopes.apply(&op);
555 }
556
557 if syntect_offset < line_content_len {
558 if let Some(category) = self.scope_stack_to_category(current_scopes) {
559 on_span(
560 current_offset + syntect_offset,
561 current_offset + line_content_len,
562 category,
563 );
564 }
565 }
566 true
567 }
568
569 #[cfg(test)]
577 pub fn cache_commit_for_test(&self) -> (usize, bool) {
578 match &self.cache {
579 Some(c) => (c.range.end, c.tail_state.is_some()),
580 None => (0, false),
581 }
582 }
583
584 pub fn highlight_viewport(
585 &mut self,
586 buffer: &Buffer,
587 viewport_start: usize,
588 viewport_end: usize,
589 theme: &Theme,
590 context_bytes: usize,
591 ) -> Vec<HighlightSpan> {
592 let buf_len = buffer.len();
593 let (desired_parse_start, parse_end) = if buf_len <= MAX_PARSE_BYTES {
594 (0, buf_len)
595 } else {
596 let s = viewport_start.saturating_sub(context_bytes);
597 let e = (viewport_end + context_bytes).min(buf_len);
598 (s, e)
599 };
600
601 let dirty = self.dirty_from.take();
602 let cache_covers_viewport = self.cache.as_ref().is_some_and(|c| {
603 c.range.start <= desired_parse_start && c.range.end >= desired_parse_start
604 });
605 let exact_cache_hit = cache_covers_viewport
606 && dirty.is_none()
607 && self.last_buffer_len == buffer.len()
608 && self
609 .cache
610 .as_ref()
611 .is_some_and(|c| c.range.end >= parse_end);
612
613 if exact_cache_hit {
615 self.stats.cache_hits += 1;
616 return self.filter_cached_spans(viewport_start, viewport_end, theme);
617 }
618
619 if dirty.is_none()
621 && cache_covers_viewport
622 && self.last_buffer_len == buffer.len()
623 && self
624 .cache
625 .as_ref()
626 .is_some_and(|c| c.range.end < parse_end && c.tail_state.is_some())
627 {
628 return self.extend_cache_forward(
629 buffer,
630 parse_end,
631 viewport_start,
632 viewport_end,
633 theme,
634 );
635 }
636
637 if cache_covers_viewport && dirty.is_some() {
639 if let Some(dirty_pos) = dirty {
640 if dirty_pos < parse_end {
641 if let Some(result) = self.try_partial_update(
642 buffer,
643 dirty_pos,
644 desired_parse_start,
645 parse_end,
646 viewport_start,
647 viewport_end,
648 theme,
649 ) {
650 return result;
651 }
652 } else {
653 self.dirty_from = Some(dirty_pos);
655 self.stats.cache_hits += 1;
656 return self.filter_cached_spans(viewport_start, viewport_end, theme);
657 }
658 }
659 } else if let Some(d) = dirty {
660 self.dirty_from = Some(d);
661 }
662
663 self.full_parse(
665 buffer,
666 desired_parse_start,
667 parse_end,
668 viewport_start,
669 viewport_end,
670 theme,
671 context_bytes,
672 )
673 }
674
675 fn filter_cached_spans(
677 &self,
678 viewport_start: usize,
679 viewport_end: usize,
680 theme: &Theme,
681 ) -> Vec<HighlightSpan> {
682 let cache = self.cache.as_ref().unwrap();
683 cache
684 .spans
685 .iter()
686 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
687 .map(|span| HighlightSpan {
688 range: span.range.clone(),
689 color: highlight_color(span.category, theme),
690 bg: highlight_bg(span.category, theme),
691 category: Some(span.category),
692 })
693 .collect()
694 }
695
696 #[allow(clippy::too_many_arguments)]
700 fn try_partial_update(
701 &mut self,
702 buffer: &Buffer,
703 dirty_pos: usize,
704 desired_parse_start: usize,
705 parse_end: usize,
706 viewport_start: usize,
707 viewport_end: usize,
708 theme: &Theme,
709 ) -> Option<Vec<HighlightSpan>> {
710 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
711
712 let (actual_start, mut state, mut current_scopes) = {
714 let search_start = dirty_pos.saturating_sub(MAX_PARSE_BYTES);
715 let markers = self.checkpoint_markers.query_range(search_start, dirty_pos);
716 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
717 if let Some((id, cp_pos, _)) = nearest {
718 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
719 (cp_pos, s.clone(), sc.clone())
720 } else {
721 return None; }
723 } else if parse_end <= MAX_PARSE_BYTES {
724 (
725 0,
726 syntect::parsing::ParseState::new(syntax),
727 syntect::parsing::ScopeStack::new(),
728 )
729 } else {
730 return None; }
732 };
733
734 let mut markers_ahead: Vec<(MarkerId, usize)> = self
736 .checkpoint_markers
737 .query_range(dirty_pos, parse_end)
738 .into_iter()
739 .map(|(id, start, _)| (id, start))
740 .collect();
741 markers_ahead.sort_by_key(|(_, pos)| *pos);
742 let mut marker_idx = 0;
743
744 let content_end = parse_end.min(buffer.len());
746 if actual_start >= content_end {
747 return None;
748 }
749 let content = buffer.slice_bytes(actual_start..content_end);
750 let content_str = match std::str::from_utf8(&content) {
751 Ok(s) => s,
752 Err(_) => return None,
753 };
754
755 let mut new_spans = Vec::new();
756 let content_bytes = content_str.as_bytes();
757 let mut pos = 0;
758 let mut current_offset = actual_start;
759 let mut converged_at: Option<usize> = None;
760 let mut budget_hit_at: Option<usize> = None;
761 let mut bytes_since_checkpoint: usize = 0;
762
763 while pos < content_bytes.len() {
764 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
766 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
767 bytes_since_checkpoint = 0;
768 }
769
770 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
771 let collect_spans =
773 current_offset + line_byte_len > desired_parse_start.max(actual_start);
774 if let Some(prepared) = prepared {
775 let _ = self.parse_line_into_spans(
776 &mut state,
777 &mut current_scopes,
778 &prepared,
779 current_offset,
780 |byte_start, byte_end, category| {
781 if !collect_spans {
782 return;
783 }
784 let clamped_start = byte_start.max(actual_start);
785 if clamped_start < byte_end {
786 new_spans.push(CachedSpan {
787 range: clamped_start..byte_end,
788 category,
789 });
790 }
791 },
792 );
793 }
794
795 pos = line_end;
796 current_offset += line_byte_len;
797 bytes_since_checkpoint += line_byte_len;
798
799 while marker_idx < markers_ahead.len() && markers_ahead[marker_idx].1 <= current_offset
801 {
802 let (marker_id, _) = markers_ahead[marker_idx];
803 marker_idx += 1;
804 if let Some(stored) = self.checkpoint_states.get(&marker_id) {
805 if *stored == (state.clone(), current_scopes.clone()) {
806 self.stats.convergences += 1;
807 converged_at = Some(current_offset);
808 break;
809 }
810 }
811 self.stats.checkpoints_updated += 1;
812 self.checkpoint_states
813 .insert(marker_id, (state.clone(), current_scopes.clone()));
814 }
815
816 if converged_at.is_some() {
817 break;
818 }
819
820 if current_offset.saturating_sub(dirty_pos) >= CONVERGENCE_BUDGET {
824 budget_hit_at = Some(current_offset);
825 break;
826 }
827 }
828
829 self.stats.bytes_parsed += current_offset.saturating_sub(actual_start);
830
831 let (splice_end, dirty_after) = if let Some(c) = converged_at {
834 (c, None)
835 } else if let Some(b) = budget_hit_at {
836 (b, Some(b))
837 } else {
838 (current_offset, None)
839 };
840
841 self.stats.cache_misses += 1; Self::merge_adjacent_spans(&mut new_spans);
844
845 if let Some(cache) = &mut self.cache {
846 let splice_start = actual_start;
847 cache
848 .spans
849 .retain(|span| span.range.end <= splice_start || span.range.start >= splice_end);
850 cache.spans.extend(new_spans);
851 cache.spans.sort_by_key(|s| s.range.start);
852 Self::merge_adjacent_spans(&mut cache.spans);
853 if splice_end > cache.range.end {
854 cache.range.end = splice_end;
855 }
856 cache.tail_state = None;
857 }
858
859 self.last_buffer_len = buffer.len();
860 self.dirty_from = dirty_after;
861
862 Some(self.filter_cached_spans(viewport_start, viewport_end, theme))
863 }
864
865 fn extend_cache_forward(
869 &mut self,
870 buffer: &Buffer,
871 parse_end: usize,
872 viewport_start: usize,
873 viewport_end: usize,
874 theme: &Theme,
875 ) -> Vec<HighlightSpan> {
876 self.stats.cache_misses += 1;
877 let buf_len = buffer.len();
878 let parse_end = parse_end.min(buf_len);
879
880 let (extension_start, mut state, mut current_scopes) = {
881 let cache = self
882 .cache
883 .as_ref()
884 .expect("extend_cache_forward: cache must exist");
885 let (s, sc) = cache
886 .tail_state
887 .as_ref()
888 .expect("extend_cache_forward: tail_state must exist")
889 .clone();
890 (cache.range.end, s, sc)
891 };
892
893 if parse_end <= extension_start {
894 return self.filter_cached_spans(viewport_start, viewport_end, theme);
895 }
896
897 let content = buffer.slice_bytes(extension_start..parse_end);
898 let content_str = match std::str::from_utf8(&content) {
899 Ok(s) => s,
900 Err(_) => return self.filter_cached_spans(viewport_start, viewport_end, theme),
901 };
902
903 let mut new_spans = Vec::new();
904 let content_bytes = content_str.as_bytes();
905 let mut pos = 0;
906 let mut current_offset = extension_start;
907 let mut bytes_since_checkpoint: usize = 0;
908 let mut safe_offset = extension_start;
918 let mut safe_state = state.clone();
919 let mut safe_scopes = current_scopes.clone();
920
921 while pos < content_bytes.len() {
922 if bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
923 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
924 bytes_since_checkpoint = 0;
925 }
926
927 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
928 let mut newline_terminated = false;
929 if let Some(prepared) = prepared {
930 let parse_ok = self.parse_line_into_spans(
931 &mut state,
932 &mut current_scopes,
933 &prepared,
934 current_offset,
935 |byte_start, byte_end, category| {
936 new_spans.push(CachedSpan {
937 range: byte_start..byte_end,
938 category,
939 });
940 },
941 );
942 if parse_ok {
943 newline_terminated = prepared.ends_with_newline;
944 }
945 }
946
947 pos = line_end;
948 current_offset += line_byte_len;
949 bytes_since_checkpoint += line_byte_len;
950
951 if newline_terminated {
952 safe_offset = current_offset;
953 safe_state = state.clone();
954 safe_scopes = current_scopes.clone();
955 }
956 }
957
958 self.stats.bytes_parsed += parse_end - extension_start;
959
960 Self::merge_adjacent_spans(&mut new_spans);
961
962 let (safe_spans, unsafe_spans): (Vec<_>, Vec<_>) = new_spans
968 .into_iter()
969 .partition(|s| s.range.end <= safe_offset);
970
971 let cache = self
972 .cache
973 .as_mut()
974 .expect("extend_cache_forward: cache must still exist");
975 cache.spans.extend(safe_spans);
976 Self::merge_adjacent_spans(&mut cache.spans);
977 cache.range.end = safe_offset;
978 cache.tail_state = Some((safe_state, safe_scopes));
979 self.last_buffer_len = buf_len;
980
981 let mut result = self.filter_cached_spans(viewport_start, viewport_end, theme);
982 result.extend(
983 unsafe_spans
984 .into_iter()
985 .filter(|s| s.range.start < viewport_end && s.range.end > viewport_start)
986 .map(|s| HighlightSpan {
987 range: s.range,
988 color: highlight_color(s.category, theme),
989 bg: highlight_bg(s.category, theme),
990 category: Some(s.category),
991 }),
992 );
993 result
994 }
995
996 #[allow(clippy::too_many_arguments)]
999 fn full_parse(
1000 &mut self,
1001 buffer: &Buffer,
1002 desired_parse_start: usize,
1003 parse_end: usize,
1004 viewport_start: usize,
1005 viewport_end: usize,
1006 theme: &Theme,
1007 _context_bytes: usize,
1008 ) -> Vec<HighlightSpan> {
1009 self.stats.cache_misses += 1;
1010 self.dirty_from = None; if parse_end <= desired_parse_start {
1013 return Vec::new();
1014 }
1015
1016 let syntax = &self.syntax_set.syntaxes()[self.syntax_index];
1017 let (actual_start, mut state, mut current_scopes, create_checkpoints) =
1018 self.find_parse_resume_point(desired_parse_start, parse_end, syntax);
1019
1020 let content = buffer.slice_bytes(actual_start..parse_end);
1021 let content_str = match std::str::from_utf8(&content) {
1022 Ok(s) => s,
1023 Err(_) => return Vec::new(),
1024 };
1025
1026 let mut spans = Vec::new();
1027 let content_bytes = content_str.as_bytes();
1028 let mut pos = 0;
1029 let mut current_offset = actual_start;
1030 let mut bytes_since_checkpoint: usize = 0;
1031 let mut safe_offset = actual_start;
1037 let mut safe_state = state.clone();
1038 let mut safe_scopes = current_scopes.clone();
1039
1040 while pos < content_bytes.len() {
1041 if create_checkpoints && bytes_since_checkpoint >= CHECKPOINT_INTERVAL {
1042 self.maybe_create_checkpoint(current_offset, &state, ¤t_scopes);
1043 bytes_since_checkpoint = 0;
1044 }
1045
1046 let (line_end, line_byte_len, prepared) = prepare_line_at(content_bytes, pos);
1047 let collect_spans = current_offset + line_byte_len > desired_parse_start;
1051 let mut newline_terminated = false;
1052 if let Some(prepared) = prepared {
1053 let parse_ok = self.parse_line_into_spans(
1054 &mut state,
1055 &mut current_scopes,
1056 &prepared,
1057 current_offset,
1058 |byte_start, byte_end, category| {
1059 if !collect_spans {
1060 return;
1061 }
1062 let clamped_start = byte_start.max(desired_parse_start);
1063 if clamped_start < byte_end {
1064 spans.push(CachedSpan {
1065 range: clamped_start..byte_end,
1066 category,
1067 });
1068 }
1069 },
1070 );
1071 if parse_ok {
1072 newline_terminated = prepared.ends_with_newline;
1073 }
1074 }
1075
1076 pos = line_end;
1077 current_offset += line_byte_len;
1078 bytes_since_checkpoint += line_byte_len;
1079
1080 if newline_terminated {
1081 safe_offset = current_offset;
1082 safe_state = state.clone();
1083 safe_scopes = current_scopes.clone();
1084 }
1085
1086 let markers_here: Vec<(MarkerId, usize)> = self
1091 .checkpoint_markers
1092 .query_range(current_offset.saturating_sub(line_byte_len), current_offset)
1093 .into_iter()
1094 .map(|(id, start, _)| (id, start))
1095 .collect();
1096 for (marker_id, _) in markers_here {
1097 self.checkpoint_states
1098 .insert(marker_id, (state.clone(), current_scopes.clone()));
1099 }
1100 }
1101
1102 self.stats.bytes_parsed += parse_end.saturating_sub(actual_start);
1103
1104 Self::merge_adjacent_spans(&mut spans);
1105
1106 let cache_range_end = safe_offset.max(desired_parse_start);
1112 let cached_spans: Vec<CachedSpan> = spans
1113 .iter()
1114 .filter(|s| s.range.end <= cache_range_end)
1115 .cloned()
1116 .collect();
1117
1118 self.cache = Some(TextMateCache {
1119 range: desired_parse_start..cache_range_end,
1120 spans: cached_spans,
1121 tail_state: Some((safe_state, safe_scopes)),
1122 });
1123 self.last_buffer_len = buffer.len();
1124
1125 spans
1126 .into_iter()
1127 .filter(|span| span.range.start < viewport_end && span.range.end > viewport_start)
1128 .map(|span| {
1129 let cat = span.category;
1130 HighlightSpan {
1131 range: span.range,
1132 color: highlight_color(cat, theme),
1133 bg: highlight_bg(cat, theme),
1134 category: Some(cat),
1135 }
1136 })
1137 .collect()
1138 }
1139
1140 fn find_parse_resume_point(
1142 &self,
1143 desired_start: usize,
1144 parse_end: usize,
1145 syntax: &syntect::parsing::SyntaxReference,
1146 ) -> (
1147 usize,
1148 syntect::parsing::ParseState,
1149 syntect::parsing::ScopeStack,
1150 bool,
1151 ) {
1152 use syntect::parsing::{ParseState, ScopeStack};
1153
1154 let search_start = desired_start.saturating_sub(MAX_PARSE_BYTES);
1158 let markers = self
1159 .checkpoint_markers
1160 .query_range(search_start, desired_start + 1);
1161 let nearest = markers.into_iter().max_by_key(|(_, start, _)| *start);
1162
1163 if let Some((id, cp_pos, _)) = nearest {
1164 if let Some((s, sc)) = self.checkpoint_states.get(&id) {
1165 return (cp_pos, s.clone(), sc.clone(), true);
1166 }
1167 }
1168
1169 if parse_end <= MAX_PARSE_BYTES {
1170 (0, ParseState::new(syntax), ScopeStack::new(), true)
1172 } else {
1173 (
1176 desired_start,
1177 ParseState::new(syntax),
1178 ScopeStack::new(),
1179 true,
1180 )
1181 }
1182 }
1183
1184 fn scope_stack_to_category(
1188 &mut self,
1189 scopes: &syntect::parsing::ScopeStack,
1190 ) -> Option<HighlightCategory> {
1191 for scope in scopes.as_slice().iter().rev() {
1192 let cat = match self.scope_category_cache.get(scope) {
1193 Some(c) => *c,
1194 None => {
1195 let computed = scope_to_category(&scope.build_string());
1196 self.scope_category_cache.insert(*scope, computed);
1197 computed
1198 }
1199 };
1200 if let Some(c) = cat {
1201 return Some(c);
1202 }
1203 }
1204 None
1205 }
1206
1207 fn merge_adjacent_spans(spans: &mut Vec<CachedSpan>) {
1209 if spans.len() < 2 {
1210 return;
1211 }
1212
1213 let mut write_idx = 0;
1214 for read_idx in 1..spans.len() {
1215 if spans[write_idx].category == spans[read_idx].category
1216 && spans[write_idx].range.end == spans[read_idx].range.start
1217 {
1218 spans[write_idx].range.end = spans[read_idx].range.end;
1219 } else {
1220 write_idx += 1;
1221 if write_idx != read_idx {
1222 spans[write_idx] = spans[read_idx].clone();
1223 }
1224 }
1225 }
1226 spans.truncate(write_idx + 1);
1227 }
1228
1229 pub fn invalidate_range(&mut self, _edit_range: Range<usize>) {
1235 }
1239
1240 pub fn invalidate_all(&mut self) {
1242 self.cache = None;
1243 let ids: Vec<MarkerId> = self.checkpoint_states.keys().copied().collect();
1244 for id in ids {
1245 self.checkpoint_markers.delete(id);
1246 }
1247 self.checkpoint_states.clear();
1248 self.dirty_from = None;
1249 }
1250
1251 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1256 let cache = self.cache.as_ref()?;
1257 cache
1258 .spans
1259 .iter()
1260 .find(|span| span.range.start <= position && position < span.range.end)
1261 .map(|span| span.category)
1262 }
1263
1264 pub fn syntax_name(&self) -> &str {
1266 &self.syntax_set.syntaxes()[self.syntax_index].name
1267 }
1268}
1269
1270impl HighlightEngine {
1271 pub fn from_entry(
1278 entry: &crate::primitives::grammar::GrammarEntry,
1279 registry: &GrammarRegistry,
1280 ) -> Self {
1281 let syntax_set = registry.syntax_set_arc();
1282 if let Some(index) = entry.engines.syntect {
1283 return Self::TextMate(Box::new(TextMateEngine::with_language(
1284 syntax_set,
1285 index,
1286 entry.engines.tree_sitter,
1287 )));
1288 }
1289 if let Some(lang) = entry.engines.tree_sitter {
1290 if let Ok(highlighter) = Highlighter::new(lang) {
1291 return Self::TreeSitter(Box::new(highlighter));
1292 }
1293 }
1294 Self::None
1295 }
1296
1297 pub fn for_file(path: &Path, first_line: Option<&str>, registry: &GrammarRegistry) -> Self {
1305 if let Some(entry) = registry.find_by_path(path, first_line) {
1306 return Self::from_entry(entry, registry);
1307 }
1308 Self::None
1309 }
1310
1311 pub fn for_syntax_name(name: &str, registry: &GrammarRegistry) -> Self {
1317 if let Some(entry) = registry.find_by_name(name) {
1318 return Self::from_entry(entry, registry);
1319 }
1320 Self::None
1321 }
1322
1323 pub fn highlight_viewport(
1328 &mut self,
1329 buffer: &Buffer,
1330 viewport_start: usize,
1331 viewport_end: usize,
1332 theme: &Theme,
1333 context_bytes: usize,
1334 ) -> Vec<HighlightSpan> {
1335 match self {
1336 Self::TreeSitter(h) => {
1337 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1338 }
1339 Self::TextMate(h) => {
1340 h.highlight_viewport(buffer, viewport_start, viewport_end, theme, context_bytes)
1341 }
1342 Self::None => Vec::new(),
1343 }
1344 }
1345
1346 pub fn notify_insert(&mut self, position: usize, length: usize) {
1348 if let Self::TextMate(h) = self {
1349 h.notify_insert(position, length);
1350 }
1351 }
1352
1353 pub fn notify_delete(&mut self, position: usize, length: usize) {
1355 if let Self::TextMate(h) = self {
1356 h.notify_delete(position, length);
1357 }
1358 }
1359
1360 pub fn invalidate_range(&mut self, edit_range: Range<usize>) {
1362 match self {
1363 Self::TreeSitter(h) => h.invalidate_range(edit_range),
1364 Self::TextMate(h) => h.invalidate_range(edit_range),
1365 Self::None => {}
1366 }
1367 }
1368
1369 pub fn invalidate_all(&mut self) {
1371 match self {
1372 Self::TreeSitter(h) => h.invalidate_all(),
1373 Self::TextMate(h) => h.invalidate_all(),
1374 Self::None => {}
1375 }
1376 }
1377
1378 pub fn notify_edits(&mut self, edits: &[(usize, usize, usize)]) {
1390 for &(pos, del_len, ins_len) in edits {
1391 if del_len > 0 {
1392 self.notify_delete(pos, del_len);
1393 }
1394 if ins_len > 0 {
1395 self.notify_insert(pos, ins_len);
1396 }
1397 let edit_end = pos + del_len.max(ins_len);
1398 self.invalidate_range(pos..edit_end);
1399 }
1400 }
1401
1402 pub fn has_highlighting(&self) -> bool {
1404 !matches!(self, Self::None)
1405 }
1406
1407 pub fn backend_name(&self) -> &str {
1409 match self {
1410 Self::TreeSitter(_) => "tree-sitter",
1411 Self::TextMate(_) => "textmate",
1412 Self::None => "none",
1413 }
1414 }
1415
1416 pub fn highlight_stats(&self) -> Option<&HighlightStats> {
1418 if let Self::TextMate(h) = self {
1419 Some(h.stats())
1420 } else {
1421 None
1422 }
1423 }
1424
1425 pub fn reset_highlight_stats(&mut self) {
1427 if let Self::TextMate(h) = self {
1428 h.reset_stats();
1429 }
1430 }
1431
1432 pub fn syntax_name(&self) -> Option<&str> {
1434 match self {
1435 Self::TreeSitter(_) => None, Self::TextMate(h) => Some(h.syntax_name()),
1437 Self::None => None,
1438 }
1439 }
1440
1441 pub fn category_at_position(&self, position: usize) -> Option<HighlightCategory> {
1446 match self {
1447 Self::TreeSitter(h) => h.category_at_position(position),
1448 Self::TextMate(h) => h.category_at_position(position),
1449 Self::None => None,
1450 }
1451 }
1452
1453 pub fn language(&self) -> Option<&Language> {
1456 match self {
1457 Self::TreeSitter(h) => Some(h.language()),
1458 Self::TextMate(h) => h.language(),
1459 Self::None => None,
1460 }
1461 }
1462}
1463
1464pub fn highlight_string(
1470 code: &str,
1471 lang_hint: &str,
1472 registry: &GrammarRegistry,
1473 theme: &Theme,
1474) -> Vec<HighlightSpan> {
1475 use syntect::parsing::{ParseState, ScopeStack};
1476
1477 let syntax = match registry.syntax_set().find_syntax_by_token(lang_hint) {
1479 Some(s) => s,
1480 None => return Vec::new(),
1481 };
1482
1483 let syntax_set = registry.syntax_set();
1484 let mut state = ParseState::new(syntax);
1485 let mut spans = Vec::new();
1486 let mut current_scopes = ScopeStack::new();
1487 let mut current_offset = 0;
1488
1489 for line in code.split_inclusive('\n') {
1491 let line_start = current_offset;
1492 let line_len = line.len();
1493
1494 let line_content = line.trim_end_matches(&['\r', '\n'][..]);
1496 let line_for_syntect = if line.ends_with('\n') {
1497 format!("{}\n", line_content)
1498 } else {
1499 line_content.to_string()
1500 };
1501
1502 let ops = match state.parse_line(&line_for_syntect, syntax_set) {
1503 Ok(ops) => ops,
1504 Err(_) => {
1505 current_offset += line_len;
1506 continue;
1507 }
1508 };
1509
1510 let mut syntect_offset = 0;
1511 let line_content_len = line_content.len();
1512
1513 for (op_offset, op) in ops {
1514 let clamped_op_offset = op_offset.min(line_content_len);
1515 if clamped_op_offset > syntect_offset {
1516 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1517 let byte_start = line_start + syntect_offset;
1518 let byte_end = line_start + clamped_op_offset;
1519 if byte_start < byte_end {
1520 spans.push(HighlightSpan {
1521 range: byte_start..byte_end,
1522 color: highlight_color(category, theme),
1523 bg: highlight_bg(category, theme),
1524 category: Some(category),
1525 });
1526 }
1527 }
1528 }
1529 syntect_offset = clamped_op_offset;
1530 #[allow(clippy::let_underscore_must_use)]
1532 let _ = current_scopes.apply(&op);
1533 }
1534
1535 if syntect_offset < line_content_len {
1537 if let Some(category) = scope_stack_to_category(¤t_scopes) {
1538 let byte_start = line_start + syntect_offset;
1539 let byte_end = line_start + line_content_len;
1540 if byte_start < byte_end {
1541 spans.push(HighlightSpan {
1542 range: byte_start..byte_end,
1543 color: highlight_color(category, theme),
1544 bg: highlight_bg(category, theme),
1545 category: Some(category),
1546 });
1547 }
1548 }
1549 }
1550
1551 current_offset += line_len;
1552 }
1553
1554 merge_adjacent_highlight_spans(&mut spans);
1556
1557 spans
1558}
1559
1560fn scope_stack_to_category(scopes: &syntect::parsing::ScopeStack) -> Option<HighlightCategory> {
1562 for scope in scopes.as_slice().iter().rev() {
1563 let scope_str = scope.build_string();
1564 if let Some(cat) = scope_to_category(&scope_str) {
1565 return Some(cat);
1566 }
1567 }
1568 None
1569}
1570
1571fn merge_adjacent_highlight_spans(spans: &mut Vec<HighlightSpan>) {
1573 if spans.len() < 2 {
1574 return;
1575 }
1576
1577 let mut write_idx = 0;
1578 for read_idx in 1..spans.len() {
1579 if spans[write_idx].color == spans[read_idx].color
1580 && spans[write_idx].range.end == spans[read_idx].range.start
1581 {
1582 spans[write_idx].range.end = spans[read_idx].range.end;
1583 } else {
1584 write_idx += 1;
1585 if write_idx != read_idx {
1586 spans[write_idx] = spans[read_idx].clone();
1587 }
1588 }
1589 }
1590 spans.truncate(write_idx + 1);
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595 use crate::model::filesystem::StdFileSystem;
1596 use std::sync::Arc;
1597
1598 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
1599 Arc::new(StdFileSystem)
1600 }
1601 use super::*;
1602 use crate::view::theme;
1603
1604 #[test]
1605 fn test_highlight_engine_default() {
1606 let engine = HighlightEngine::default();
1607 assert!(!engine.has_highlighting());
1608 assert_eq!(engine.backend_name(), "none");
1609 }
1610
1611 #[test]
1612 fn test_textmate_backend_selection() {
1613 let registry =
1614 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1615
1616 let engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1618 assert_eq!(engine.backend_name(), "textmate");
1619 assert!(engine.language().is_some());
1621
1622 let engine = HighlightEngine::for_file(Path::new("test.py"), None, ®istry);
1623 assert_eq!(engine.backend_name(), "textmate");
1624 assert!(engine.language().is_some());
1625
1626 let engine = HighlightEngine::for_file(Path::new("test.js"), None, ®istry);
1630 assert_eq!(engine.backend_name(), "tree-sitter");
1631 assert!(engine.language().is_some());
1632
1633 let engine = HighlightEngine::for_file(Path::new("test.ts"), None, ®istry);
1635 assert_eq!(engine.backend_name(), "tree-sitter");
1636 assert!(engine.language().is_some());
1637
1638 let engine = HighlightEngine::for_file(Path::new("test.tsx"), None, ®istry);
1639 assert_eq!(engine.backend_name(), "tree-sitter");
1640 assert!(engine.language().is_some());
1641 }
1642
1643 #[test]
1644 fn test_tree_sitter_direct() {
1645 let highlighter = Highlighter::new(Language::TypeScript);
1649 assert!(highlighter.is_ok());
1650 }
1651
1652 #[test]
1653 fn test_unknown_extension() {
1654 let registry =
1655 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1656
1657 let engine = HighlightEngine::for_file(Path::new("test.unknown_xyz_123"), None, ®istry);
1659 let _ = engine.backend_name();
1662 }
1663
1664 #[test]
1665 fn test_highlight_viewport_empty_buffer_no_panic() {
1666 let registry =
1675 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1676
1677 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
1678
1679 let buffer = Buffer::from_str("", 0, test_fs());
1681 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1682
1683 if let HighlightEngine::TextMate(ref mut tm) = engine {
1687 let spans = tm.highlight_viewport(&buffer, 100, 200, &theme, 10);
1689 assert!(spans.is_empty());
1690 }
1691 }
1692
1693 #[test]
1697 fn test_textmate_engine_crlf_byte_offsets() {
1698 let registry =
1699 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1700
1701 let mut engine = HighlightEngine::for_file(Path::new("test.java"), None, ®istry);
1702
1703 let content = b"public\r\npublic\r\npublic\r\n";
1709 let buffer = Buffer::from_bytes(content.to_vec(), test_fs());
1710 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
1711
1712 if let HighlightEngine::TextMate(ref mut tm) = engine {
1713 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1715
1716 eprintln!(
1723 "Spans: {:?}",
1724 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1725 );
1726
1727 let has_span_at = |start: usize, end: usize| -> bool {
1729 spans
1730 .iter()
1731 .any(|s| s.range.start <= start && s.range.end >= end)
1732 };
1733
1734 assert!(
1736 has_span_at(0, 6),
1737 "Should have span covering bytes 0-6 (line 1 'public'). Spans: {:?}",
1738 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1739 );
1740
1741 assert!(
1744 has_span_at(8, 14),
1745 "Should have span covering bytes 8-14 (line 2 'public'). \
1746 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1747 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1748 );
1749
1750 assert!(
1753 has_span_at(16, 22),
1754 "Should have span covering bytes 16-22 (line 3 'public'). \
1755 If this fails, CRLF offset drift is occurring. Spans: {:?}",
1756 spans.iter().map(|s| &s.range).collect::<Vec<_>>()
1757 );
1758 } else {
1759 panic!("Expected TextMate engine for .java file");
1760 }
1761 }
1762
1763 #[test]
1777 fn test_partial_trailing_line_not_committed_to_cache() {
1778 let registry =
1779 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1780 let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, ®istry);
1781 let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1782
1783 let content = "+complete\n+partial";
1785 let buffer = Buffer::from_str(content, 0, test_fs());
1786
1787 if let HighlightEngine::TextMate(ref mut tm) = engine {
1788 let _ = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1789 let (cache_end, has_tail) = tm.cache_commit_for_test();
1790 assert_eq!(
1791 cache_end,
1792 "+complete\n".len(),
1793 "cache should commit at the last newline, not into the partial \
1794 trailing line — committing past the newline causes streaming \
1795 forward-extension to parse the line's continuation in the wrong \
1796 grammar context, losing the diff bg."
1797 );
1798 assert!(has_tail, "tail state should be saved at the safe boundary");
1799 }
1800 }
1801
1802 #[test]
1807 fn test_diff_inserted_line_is_fully_covered() {
1808 let registry =
1809 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1810 let mut engine = HighlightEngine::for_file(Path::new("commit.diff"), None, ®istry);
1811 let theme = Theme::load_builtin(theme::THEME_DARK).unwrap();
1812
1813 let content =
1814 "diff --git a/file.ts b/file.ts\n\
1815 index aaa..bbb 100644\n\
1816 --- a/file.ts\n\
1817 +++ b/file.ts\n\
1818 @@ -1,3 +1,5 @@\n\
1819 +${seen[g.subtree] > 1 ? `**Seen ${seen[g.subtree]}× — likely cross-subtree type seam.**` : \"\"}\n\
1820 + const k = `${b.fn}::${(b.what || \"\").slice(0, 80)}`;\n";
1821 let buffer = Buffer::from_str(content, 0, test_fs());
1822
1823 if let HighlightEngine::TextMate(ref mut tm) = engine {
1824 let spans = tm.highlight_viewport(&buffer, 0, content.len(), &theme, 0);
1825
1826 let bytes = content.as_bytes();
1827 let mut line_start = 0;
1828 while line_start < bytes.len() {
1829 let mut line_end = line_start;
1830 while line_end < bytes.len() && bytes[line_end] != b'\n' {
1831 line_end += 1;
1832 }
1833 if bytes[line_start] == b'+' && !content[line_start..line_end].starts_with("+++") {
1834 for byte_pos in line_start..line_end {
1835 let span = spans
1836 .iter()
1837 .find(|s| s.range.start <= byte_pos && s.range.end > byte_pos);
1838 let bg = span.and_then(|s| s.bg);
1839 assert_eq!(
1840 bg,
1841 Some(theme.diff_add_bg),
1842 "byte {} (`{}`) of `+` line starting at {} should carry diff_add_bg; \
1843 got span={:?}",
1844 byte_pos,
1845 content[byte_pos..byte_pos + 1].escape_debug(),
1846 line_start,
1847 span,
1848 );
1849 }
1850 }
1851 line_start = line_end + 1;
1852 }
1853 } else {
1854 panic!("Expected TextMate engine for .diff file");
1855 }
1856 }
1857
1858 #[test]
1859 fn test_git_rebase_todo_highlighting() {
1860 let registry =
1861 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1862
1863 let engine = HighlightEngine::for_file(Path::new("git-rebase-todo"), None, ®istry);
1865 assert_eq!(engine.backend_name(), "textmate");
1866 assert!(engine.has_highlighting());
1867 }
1868
1869 #[test]
1870 fn test_git_commit_message_highlighting() {
1871 let registry =
1872 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1873
1874 let engine = HighlightEngine::for_file(Path::new("COMMIT_EDITMSG"), None, ®istry);
1876 assert_eq!(engine.backend_name(), "textmate");
1877 assert!(engine.has_highlighting());
1878
1879 let engine = HighlightEngine::for_file(Path::new("MERGE_MSG"), None, ®istry);
1881 assert_eq!(engine.backend_name(), "textmate");
1882 assert!(engine.has_highlighting());
1883 }
1884
1885 #[test]
1886 fn test_gitignore_highlighting() {
1887 let registry =
1888 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1889
1890 let engine = HighlightEngine::for_file(Path::new(".gitignore"), None, ®istry);
1892 assert_eq!(engine.backend_name(), "textmate");
1893 assert!(engine.has_highlighting());
1894
1895 let engine = HighlightEngine::for_file(Path::new(".dockerignore"), None, ®istry);
1897 assert_eq!(engine.backend_name(), "textmate");
1898 assert!(engine.has_highlighting());
1899 }
1900
1901 #[test]
1902 fn test_gitconfig_highlighting() {
1903 let registry =
1904 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1905
1906 let engine = HighlightEngine::for_file(Path::new(".gitconfig"), None, ®istry);
1908 assert_eq!(engine.backend_name(), "textmate");
1909 assert!(engine.has_highlighting());
1910
1911 let engine = HighlightEngine::for_file(Path::new(".gitmodules"), None, ®istry);
1913 assert_eq!(engine.backend_name(), "textmate");
1914 assert!(engine.has_highlighting());
1915 }
1916
1917 #[test]
1918 fn test_gitattributes_highlighting() {
1919 let registry =
1920 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
1921
1922 let engine = HighlightEngine::for_file(Path::new(".gitattributes"), None, ®istry);
1924 assert_eq!(engine.backend_name(), "textmate");
1925 assert!(engine.has_highlighting());
1926 }
1927
1928 #[test]
1929 fn test_comment_delimiter_uses_comment_color() {
1930 assert_eq!(
1932 scope_to_category("punctuation.definition.comment"),
1933 Some(HighlightCategory::Comment)
1934 );
1935 assert_eq!(
1936 scope_to_category("punctuation.definition.comment.python"),
1937 Some(HighlightCategory::Comment)
1938 );
1939 assert_eq!(
1940 scope_to_category("punctuation.definition.comment.begin"),
1941 Some(HighlightCategory::Comment)
1942 );
1943 }
1944
1945 #[test]
1946 fn test_variable_builtin_category() {
1947 assert_eq!(
1948 scope_to_category("variable.language.this"),
1949 Some(HighlightCategory::VariableBuiltin)
1950 );
1951 assert_eq!(
1952 scope_to_category("variable.language.super"),
1953 Some(HighlightCategory::VariableBuiltin)
1954 );
1955 }
1956
1957 #[test]
1958 fn test_string_delimiter_uses_string_color() {
1959 assert_eq!(
1961 scope_to_category("punctuation.definition.string.begin"),
1962 Some(HighlightCategory::String)
1963 );
1964 assert_eq!(
1965 scope_to_category("punctuation.definition.string.end"),
1966 Some(HighlightCategory::String)
1967 );
1968 }
1969
1970 #[test]
1971 fn test_punctuation_bracket() {
1972 assert_eq!(
1974 scope_to_category("punctuation.section"),
1975 Some(HighlightCategory::PunctuationBracket)
1976 );
1977 assert_eq!(
1978 scope_to_category("punctuation.section.block.begin.c"),
1979 Some(HighlightCategory::PunctuationBracket)
1980 );
1981 assert_eq!(
1982 scope_to_category("punctuation.bracket"),
1983 Some(HighlightCategory::PunctuationBracket)
1984 );
1985 assert_eq!(
1987 scope_to_category("punctuation.definition.array.begin.toml"),
1988 Some(HighlightCategory::PunctuationBracket)
1989 );
1990 assert_eq!(
1991 scope_to_category("punctuation.definition.block.code.typst"),
1992 Some(HighlightCategory::PunctuationBracket)
1993 );
1994 assert_eq!(
1995 scope_to_category("punctuation.definition.group.typst"),
1996 Some(HighlightCategory::PunctuationBracket)
1997 );
1998 assert_eq!(
1999 scope_to_category("punctuation.definition.inline-table.begin.toml"),
2000 Some(HighlightCategory::PunctuationBracket)
2001 );
2002 assert_eq!(
2003 scope_to_category("punctuation.definition.tag.end.svelte"),
2004 Some(HighlightCategory::PunctuationBracket)
2005 );
2006 }
2007
2008 #[test]
2009 fn test_punctuation_delimiter() {
2010 assert_eq!(
2011 scope_to_category("punctuation.separator"),
2012 Some(HighlightCategory::PunctuationDelimiter)
2013 );
2014 assert_eq!(
2015 scope_to_category("punctuation.terminator.statement.c"),
2016 Some(HighlightCategory::PunctuationDelimiter)
2017 );
2018 assert_eq!(
2019 scope_to_category("punctuation.accessor"),
2020 Some(HighlightCategory::PunctuationDelimiter)
2021 );
2022 }
2023
2024 #[test]
2025 fn test_storage_type_keyword() {
2026 assert_eq!(
2027 scope_to_category("storage.type"),
2028 Some(HighlightCategory::Keyword)
2029 );
2030 assert_eq!(
2031 scope_to_category("storage.type.class"),
2032 Some(HighlightCategory::Keyword)
2033 );
2034 assert_ne!(
2035 scope_to_category("storage.type"),
2036 Some(HighlightCategory::Type)
2037 );
2038 }
2039
2040 #[test]
2044 fn test_small_file_scroll_is_cache_hit() {
2045 let registry =
2046 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2047 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2048
2049 let mut content = String::new();
2050 for i in 0..200 {
2051 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2052 }
2053 let buffer = Buffer::from_str(&content, 0, test_fs());
2054 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2055
2056 let HighlightEngine::TextMate(ref mut tm) = engine else {
2057 panic!("expected TextMate engine for .rs");
2058 };
2059
2060 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2062 let stats_after_first = tm.stats().clone();
2063 assert_eq!(
2064 stats_after_first.cache_hits, 0,
2065 "first call cannot hit cache"
2066 );
2067 assert_eq!(
2068 stats_after_first.cache_misses, 1,
2069 "first call must be a miss"
2070 );
2071
2072 let mid = buffer.len() / 2;
2074 let near_end = buffer.len().saturating_sub(200);
2075 let probes = [(0, 200), (mid, mid + 200), (near_end, buffer.len())];
2076 for (vs, ve) in probes {
2077 let _ = tm.highlight_viewport(&buffer, vs, ve, &theme, 10_000);
2078 }
2079
2080 let stats_after_scroll = tm.stats().clone();
2081 assert_eq!(
2082 stats_after_scroll.cache_misses,
2083 1,
2084 "scrolling must not add cache misses (got extra: {})",
2085 stats_after_scroll.cache_misses - 1
2086 );
2087 assert_eq!(
2088 stats_after_scroll.cache_hits, 3,
2089 "all three scroll probes must hit the cache"
2090 );
2091 assert_eq!(
2092 stats_after_scroll.bytes_parsed, stats_after_first.bytes_parsed,
2093 "scrolling must not parse any new bytes"
2094 );
2095 }
2096
2097 #[test]
2101 fn test_small_file_edit_uses_partial_update() {
2102 let registry =
2103 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2104 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2105
2106 let mut content = String::new();
2107 for i in 0..200 {
2108 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2109 }
2110 let buffer = Buffer::from_str(&content, 0, test_fs());
2111 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2112
2113 let HighlightEngine::TextMate(ref mut tm) = engine else {
2114 panic!("expected TextMate engine for .rs");
2115 };
2116
2117 let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2119 let bytes_before_edit = tm.stats().bytes_parsed;
2120 let buf_len = buffer.len();
2121 assert!(
2122 buf_len > 4000,
2123 "test needs a buffer larger than the partial-update region"
2124 );
2125
2126 let edit_pos = buf_len / 2;
2128 tm.notify_insert(edit_pos, 1);
2129 let _ = tm.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2134 let bytes_after_edit = tm.stats().bytes_parsed;
2135 let parsed = bytes_after_edit - bytes_before_edit;
2136
2137 assert!(
2138 parsed < buf_len,
2139 "edit must not trigger a whole-file reparse (parsed {parsed}, file {buf_len})"
2140 );
2141 }
2142
2143 #[test]
2149 fn test_bulk_edit_uses_partial_update() {
2150 let registry =
2151 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2152 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2153
2154 let mut content = String::new();
2155 for i in 0..200 {
2156 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2157 }
2158 let buffer = Buffer::from_str(&content, 0, test_fs());
2159 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2160
2161 let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2163 let bytes_before_edit = match &engine {
2164 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2165 _ => panic!("expected TextMate engine for .rs"),
2166 };
2167 let buf_len = buffer.len();
2168 assert!(
2169 buf_len > 4000,
2170 "test needs a buffer larger than the partial-update region"
2171 );
2172
2173 let edit_pos = buf_len / 2;
2177 let edits = vec![(edit_pos, 8usize, 1usize)];
2178 engine.notify_edits(&edits);
2179
2180 let _ = engine.highlight_viewport(&buffer, 0, 100, &theme, 10_000);
2181 let bytes_after_edit = match &engine {
2182 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2183 _ => unreachable!(),
2184 };
2185 let parsed = bytes_after_edit - bytes_before_edit;
2186
2187 assert!(
2188 parsed < buf_len,
2189 "bulk edit must not trigger a whole-file reparse \
2190 (parsed {parsed}, file {buf_len})"
2191 );
2192 }
2193
2194 #[test]
2200 fn test_bulk_edit_outside_cache_keeps_textmate_partial_update() {
2201 let registry =
2202 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2203 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2204
2205 let mut content = String::new();
2206 for i in 0..400 {
2207 content.push_str(&format!("fn f_{i}() {{ let x = {i}; }}\n"));
2208 }
2209 let buffer = Buffer::from_str(&content, 0, test_fs());
2210 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2211
2212 let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2214 let bytes_before = match &engine {
2215 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2216 _ => panic!("expected TextMate engine for .rs"),
2217 };
2218
2219 let far_pos = buffer.len() - 100;
2221 engine.notify_edits(&[(far_pos, 3, 1)]);
2222
2223 let _ = engine.highlight_viewport(&buffer, 0, 200, &theme, 1_000);
2226 let bytes_after = match &engine {
2227 HighlightEngine::TextMate(h) => h.stats().bytes_parsed,
2228 _ => unreachable!(),
2229 };
2230 let parsed = bytes_after - bytes_before;
2231 let buf_len = buffer.len();
2232 assert!(
2233 parsed < buf_len,
2234 "bulk edit outside the viewport must not force a whole-file \
2235 reparse (parsed {parsed}, file {buf_len})"
2236 );
2237 }
2238
2239 #[test]
2243 fn test_partial_update_budget_caps_work() {
2244 let registry =
2245 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2246 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2247
2248 let mut content = String::new();
2250 while content.len() < (CONVERGENCE_BUDGET * 4) {
2251 content.push_str("fn name() { let mut v = 0; v += 1; }\n");
2252 }
2253 let buffer = Buffer::from_str(&content, 0, test_fs());
2254 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2255
2256 let HighlightEngine::TextMate(ref mut tm) = engine else {
2257 panic!("expected TextMate engine for .rs");
2258 };
2259
2260 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2262 tm.notify_insert(100, 0);
2266 tm.checkpoint_states.clear();
2267
2268 let bytes_before = tm.stats().bytes_parsed;
2269 let _ = tm.highlight_viewport(&buffer, 0, 200, &theme, 10_000);
2270 let parsed = tm.stats().bytes_parsed - bytes_before;
2271
2272 assert!(
2276 parsed <= CONVERGENCE_BUDGET + 4096,
2277 "partial update parsed {parsed}, expected <= {} \
2278 (budget {CONVERGENCE_BUDGET} + slack)",
2279 CONVERGENCE_BUDGET + 4096
2280 );
2281
2282 assert!(
2284 tm.dirty_from.is_some(),
2285 "budget exit must keep dirty_from set"
2286 );
2287 }
2288
2289 #[test]
2299 fn test_large_file_uses_windowed_parse() {
2300 let registry =
2301 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2302 let mut engine = HighlightEngine::for_file(Path::new("test.rs"), None, ®istry);
2303
2304 let line = "fn long_name_for_padding() { let v = 1; v + 1; }\n";
2307 let bytes_needed = MAX_PARSE_BYTES * 2;
2308 let lines_needed = bytes_needed / line.len() + 100;
2309 let mut content = String::with_capacity(lines_needed * line.len());
2310 for _ in 0..lines_needed {
2311 content.push_str(line);
2312 }
2313 assert!(content.len() > MAX_PARSE_BYTES * 2);
2314 let buffer = Buffer::from_str(&content, 0, test_fs());
2315 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2316
2317 let HighlightEngine::TextMate(ref mut tm) = engine else {
2318 panic!("expected TextMate engine for .rs");
2319 };
2320
2321 let context_bytes = 10_000usize;
2324 let viewport_start = MAX_PARSE_BYTES + 200_000;
2325 let viewport_end = viewport_start + 1000;
2326 let _ = tm.highlight_viewport(&buffer, viewport_start, viewport_end, &theme, context_bytes);
2327 let parsed = tm.stats().bytes_parsed;
2328
2329 let window = (viewport_end - viewport_start) + 2 * context_bytes;
2333 assert!(
2334 parsed <= window * 4,
2335 "large file windowed parse should be ~{window} bytes, got {parsed} \
2336 (file {})",
2337 buffer.len()
2338 );
2339 }
2340
2341 #[test]
2348 fn test_javascript_template_literal_does_not_bleed() {
2349 let registry =
2350 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2351 let mut engine = HighlightEngine::for_file(Path::new("repro.js"), None, ®istry);
2352
2353 let source = "class ExampleClass {\n\
2355 \texampleFunction = exampleArg => `${exampleArg}`;\n\
2356 \n\
2357 \tconstructor() {\n\
2358 \t\t// constructor body\n\
2359 \t}\n\
2360 \n\
2361 \t/* multiline comment */\n\
2362 }\n";
2363 let buffer = Buffer::from_str(source, 0, test_fs());
2364 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2365
2366 let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2367
2368 let ctor_pos = source.find("constructor").expect("locate constructor");
2371 let ctor_cat = engine.category_at_position(ctor_pos);
2372 assert_ne!(
2373 ctor_cat,
2374 Some(HighlightCategory::String),
2375 "constructor keyword must not inherit string state from earlier \
2376 template literal (got {:?})",
2377 ctor_cat,
2378 );
2379
2380 let last_brace = source.rfind('}').expect("locate closing brace");
2383 let brace_cat = engine.category_at_position(last_brace);
2384 assert_ne!(
2385 brace_cat,
2386 Some(HighlightCategory::String),
2387 "closing class brace must not be highlighted as string \
2388 (got {:?})",
2389 brace_cat,
2390 );
2391 }
2392
2393 #[test]
2403 fn test_javascript_template_substitution_closing_tokens_are_string() {
2404 let registry =
2405 GrammarRegistry::load(&crate::primitives::grammar::LocalGrammarLoader::embedded_only());
2406 let mut engine = HighlightEngine::for_file(Path::new("tmpl.js"), None, ®istry);
2407
2408 let source = "const x = `${name}`;\n";
2411 let buffer = Buffer::from_str(source, 0, test_fs());
2412 let theme = Theme::load_builtin(theme::THEME_LIGHT).unwrap();
2413
2414 let _ = engine.highlight_viewport(&buffer, 0, source.len(), &theme, 0);
2415
2416 let close_brace = source
2419 .find("}`")
2420 .expect("locate substitution closing brace");
2421 let close_backtick = close_brace + 1;
2422
2423 let name_pos = source.find("name").expect("locate identifier");
2427 let name_cat = engine.category_at_position(name_pos);
2428 assert_eq!(
2429 name_cat,
2430 Some(HighlightCategory::Variable),
2431 "substitution identifier should be Variable (got {:?})",
2432 name_cat,
2433 );
2434
2435 let brace_cat = engine.category_at_position(close_brace);
2442 assert_eq!(
2443 brace_cat,
2444 Some(HighlightCategory::String),
2445 "closing }} of ${{…}} must be String (got {:?})",
2446 brace_cat,
2447 );
2448 let backtick_cat = engine.category_at_position(close_backtick);
2449 assert_eq!(
2450 backtick_cat,
2451 Some(HighlightCategory::String),
2452 "closing backtick of template literal must be String \
2453 (got {:?})",
2454 backtick_cat,
2455 );
2456 }
2457}