1use crate::highlighter::{HighlightTheme, LanguageRegistry};
2use crate::input::RopeExt;
3
4use anyhow::{anyhow, Context, Result};
5use gpui::{HighlightStyle, SharedString};
6
7use ropey::{ChunkCursor, Rope};
8use std::{
9 collections::{BTreeSet, HashMap},
10 ops::Range,
11 usize,
12};
13use sum_tree::Bias;
14use tree_sitter::{
15 InputEdit, Node, Parser, Point, Query, QueryCursor, QueryMatch, StreamingIterator, Tree,
16};
17
18#[allow(unused)]
21pub struct SyntaxHighlighter {
22 language: SharedString,
23 query: Option<Query>,
24 injection_queries: HashMap<SharedString, Query>,
25
26 locals_pattern_index: usize,
27 highlights_pattern_index: usize,
28 non_local_variable_patterns: Vec<bool>,
30 injection_content_capture_index: Option<u32>,
31 injection_language_capture_index: Option<u32>,
32 local_scope_capture_index: Option<u32>,
33 local_def_capture_index: Option<u32>,
34 local_def_value_capture_index: Option<u32>,
35 local_ref_capture_index: Option<u32>,
36
37 text: Rope,
39 parser: Parser,
40 tree: Option<Tree>,
42}
43
44struct TextProvider<'a>(&'a Rope);
45struct ByteChunks<'a> {
46 cursor: ChunkCursor<'a>,
47 end: usize,
48}
49impl<'a> tree_sitter::TextProvider<&'a [u8]> for TextProvider<'a> {
50 type I = ByteChunks<'a>;
51
52 fn text(&mut self, node: tree_sitter::Node) -> Self::I {
53 let range = node.byte_range();
54 let cursor = self.0.chunk_cursor_at(range.start);
55
56 ByteChunks {
57 cursor,
58 end: range.end,
59 }
60 }
61}
62
63impl<'a> Iterator for ByteChunks<'a> {
64 type Item = &'a [u8];
65
66 fn next(&mut self) -> Option<Self::Item> {
67 let cursor = &mut self.cursor;
68 let end = self.end;
69
70 if cursor.next() && cursor.byte_offset() < end {
71 Some(cursor.chunk().as_bytes())
72 } else {
73 None
74 }
75 }
76}
77
78#[derive(Debug, Default, Clone)]
79struct HighlightSummary {
80 count: usize,
81 start: usize,
82 end: usize,
83 min_start: usize,
84 max_end: usize,
85}
86
87#[derive(Debug, Default, Clone)]
89struct HighlightItem {
90 range: Range<usize>,
92 name: SharedString,
94}
95
96impl HighlightItem {
97 pub fn new(range: Range<usize>, name: impl Into<SharedString>) -> Self {
98 Self {
99 range,
100 name: name.into(),
101 }
102 }
103}
104
105impl sum_tree::Item for HighlightItem {
106 type Summary = HighlightSummary;
107 fn summary(&self, _cx: &()) -> Self::Summary {
108 HighlightSummary {
109 count: 1,
110 start: self.range.start,
111 end: self.range.end,
112 min_start: self.range.start,
113 max_end: self.range.end,
114 }
115 }
116}
117
118impl sum_tree::Summary for HighlightSummary {
119 type Context<'a> = &'a ();
120 fn zero(_: Self::Context<'_>) -> Self {
121 HighlightSummary {
122 count: 0,
123 start: usize::MIN,
124 end: usize::MAX,
125 min_start: usize::MAX,
126 max_end: usize::MIN,
127 }
128 }
129
130 fn add_summary(&mut self, other: &Self, _: Self::Context<'_>) {
131 self.min_start = self.min_start.min(other.min_start);
132 self.max_end = self.max_end.max(other.max_end);
133 self.start = other.start;
134 self.end = other.end;
135 self.count += other.count;
136 }
137}
138
139impl<'a> sum_tree::Dimension<'a, HighlightSummary> for usize {
140 fn zero(_: &()) -> Self {
141 0
142 }
143
144 fn add_summary(&mut self, _: &'a HighlightSummary, _: &()) {}
145}
146
147impl<'a> sum_tree::Dimension<'a, HighlightSummary> for Range<usize> {
148 fn zero(_: &()) -> Self {
149 Default::default()
150 }
151
152 fn add_summary(&mut self, summary: &'a HighlightSummary, _: &()) {
153 self.start = summary.start;
154 self.end = summary.end;
155 }
156}
157
158impl SyntaxHighlighter {
159 pub fn new(lang: &str) -> Self {
161 match Self::build_combined_injections_query(&lang) {
162 Ok(result) => result,
163 Err(err) => {
164 tracing::warn!(
165 "SyntaxHighlighter init failed, fallback to use `text`, {}",
166 err
167 );
168 Self::build_combined_injections_query("text").unwrap()
169 }
170 }
171 }
172
173 fn build_combined_injections_query(lang: &str) -> Result<Self> {
177 let Some(config) = LanguageRegistry::singleton().language(&lang) else {
178 return Err(anyhow!(
179 "language {:?} is not registered in `LanguageRegistry`",
180 lang
181 ));
182 };
183
184 let mut parser = Parser::new();
185 parser
186 .set_language(&config.language)
187 .context("parse set_language")?;
188
189 let mut query_source = String::new();
191 query_source.push_str(&config.injections);
192 let locals_query_offset = query_source.len();
193 query_source.push_str(&config.locals);
194 let highlights_query_offset = query_source.len();
195 query_source.push_str(&config.highlights);
196
197 let query = Query::new(&config.language, &query_source).context("new query")?;
200
201 let mut locals_pattern_index = 0;
202 let mut highlights_pattern_index = 0;
203 for i in 0..(query.pattern_count()) {
204 let pattern_offset = query.start_byte_for_pattern(i);
205 if pattern_offset < highlights_query_offset {
206 if pattern_offset < highlights_query_offset {
207 highlights_pattern_index += 1;
208 }
209 if pattern_offset < locals_query_offset {
210 locals_pattern_index += 1;
211 }
212 }
213 }
214
215 let non_local_variable_patterns = (0..query.pattern_count())
240 .map(|i| {
241 query
242 .property_predicates(i)
243 .iter()
244 .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
245 })
246 .collect();
247
248 let mut injection_content_capture_index = None;
250 let mut injection_language_capture_index = None;
251 let mut local_def_capture_index = None;
252 let mut local_def_value_capture_index = None;
253 let mut local_ref_capture_index = None;
254 let mut local_scope_capture_index = None;
255 for (i, name) in query.capture_names().iter().enumerate() {
256 let i = Some(i as u32);
257 match *name {
258 "injection.content" => injection_content_capture_index = i,
259 "injection.language" => injection_language_capture_index = i,
260 "local.definition" => local_def_capture_index = i,
261 "local.definition-value" => local_def_value_capture_index = i,
262 "local.reference" => local_ref_capture_index = i,
263 "local.scope" => local_scope_capture_index = i,
264 _ => {}
265 }
266 }
267
268 let mut injection_queries = HashMap::new();
269 for inj_language in config.injection_languages.iter() {
270 if let Some(inj_config) = LanguageRegistry::singleton().language(&inj_language) {
271 match Query::new(&inj_config.language, &inj_config.highlights) {
272 Ok(q) => {
273 injection_queries.insert(inj_config.name.clone(), q);
274 }
275 Err(e) => {
276 tracing::error!(
277 "failed to build injection query for {:?}: {:?}",
278 inj_config.name,
279 e
280 );
281 }
282 }
283 }
284 }
285
286 Ok(Self {
289 language: config.name.clone(),
290 query: Some(query),
291 injection_queries,
292
293 locals_pattern_index,
294 highlights_pattern_index,
295 non_local_variable_patterns,
296 injection_content_capture_index,
297 injection_language_capture_index,
298 local_scope_capture_index,
299 local_def_capture_index,
300 local_def_value_capture_index,
301 local_ref_capture_index,
302 text: Rope::new(),
303 parser,
304 tree: None,
305 })
306 }
307
308 pub fn is_empty(&self) -> bool {
309 self.text.len() == 0
310 }
311
312 pub fn update(&mut self, edit: Option<InputEdit>, text: &Rope) {
316 if self.text.eq(text) {
317 return;
318 }
319
320 let edit = edit.unwrap_or(InputEdit {
321 start_byte: 0,
322 old_end_byte: 0,
323 new_end_byte: text.len(),
324 start_position: Point::new(0, 0),
325 old_end_position: Point::new(0, 0),
326 new_end_position: Point::new(0, 0),
327 });
328
329 let mut old_tree = self
330 .tree
331 .take()
332 .unwrap_or(self.parser.parse("", None).unwrap());
333 old_tree.edit(&edit);
334
335 let new_tree = self.parser.parse_with_options(
336 &mut move |offset, _| {
337 if offset >= text.len() {
338 ""
339 } else {
340 let (chunk, chunk_byte_ix) = text.chunk(offset);
341 &chunk[offset - chunk_byte_ix..]
342 }
343 },
344 Some(&old_tree),
345 None,
346 );
347
348 let Some(new_tree) = new_tree else {
349 return;
350 };
351
352 self.tree = Some(new_tree);
353 self.text = text.clone();
354 }
355
356 fn match_styles(&self, range: Range<usize>) -> Vec<HighlightItem> {
358 let mut highlights = vec![];
359 let Some(tree) = &self.tree else {
360 return highlights;
361 };
362
363 let Some(query) = &self.query else {
364 return highlights;
365 };
366
367 let root_node = tree.root_node();
368
369 let source = &self.text;
370 let mut cursor = QueryCursor::new();
371 cursor.set_byte_range(range);
372 let mut matches = cursor.matches(&query, root_node, TextProvider(&source));
373
374 while let Some(query_match) = matches.next() {
375 if let (Some(language_name), Some(content_node), _) =
378 self.injection_for_match(None, query, query_match)
379 {
380 let styles = self.handle_injection(&language_name, content_node);
381 for (node_range, highlight_name) in styles {
382 highlights.push(HighlightItem::new(node_range.clone(), highlight_name));
383 }
384
385 continue;
386 }
387
388 for cap in query_match.captures {
389 let node = cap.node;
390
391 let Some(highlight_name) = query.capture_names().get(cap.index as usize) else {
392 continue;
393 };
394
395 let node_range: Range<usize> = node.start_byte()..node.end_byte();
396 let highlight_name = SharedString::from(highlight_name.to_string());
397
398 let last_item = highlights.last();
400 let last_range = last_item.map(|item| &item.range).unwrap_or(&(0..0));
401 let last_highlight_name = last_item.map(|item| item.name.clone());
402
403 if last_range.end <= node_range.start
404 && last_highlight_name.as_ref() == Some(&highlight_name)
405 {
406 highlights.push(HighlightItem::new(
407 last_range.start..node_range.end,
408 highlight_name.clone(),
409 ));
410 } else if last_range == &node_range {
411 highlights.push(HighlightItem::new(
415 node_range,
416 last_highlight_name.unwrap_or(highlight_name),
417 ));
418 } else {
419 highlights.push(HighlightItem::new(node_range, highlight_name.clone()));
420 }
421 }
422 }
423
424 highlights
430 }
431
432 fn handle_injection(
434 &self,
435 injection_language: &str,
436 node: Node,
437 ) -> Vec<(Range<usize>, String)> {
438 let start_offset = self.text.clip_offset(node.start_byte(), Bias::Left);
440 let end_offset = self.text.clip_offset(node.end_byte(), Bias::Right);
441
442 let mut cache = vec![];
443 let Some(query) = &self.injection_queries.get(injection_language) else {
444 return cache;
445 };
446
447 let content = self.text.slice(start_offset..end_offset);
448 if content.len() == 0 {
449 return cache;
450 };
451 let content = content.to_string();
453
454 let Some(config) = LanguageRegistry::singleton().language(injection_language) else {
455 return cache;
456 };
457 let mut parser = Parser::new();
458 if parser.set_language(&config.language).is_err() {
459 return cache;
460 }
461
462 let source = content.as_bytes();
463 let Some(tree) = parser.parse(source, None) else {
464 return cache;
465 };
466
467 let mut query_cursor = QueryCursor::new();
468 let mut matches = query_cursor.matches(query, tree.root_node(), source);
469
470 let mut last_end = start_offset;
471 while let Some(m) = matches.next() {
472 for cap in m.captures {
473 let cap_node = cap.node;
474
475 let node_range: Range<usize> =
476 start_offset + cap_node.start_byte()..start_offset + cap_node.end_byte();
477
478 if node_range.start < last_end {
479 continue;
480 }
481 if node_range.end > end_offset {
482 break;
483 }
484
485 if let Some(highlight_name) = query.capture_names().get(cap.index as usize) {
486 last_end = node_range.end;
487 cache.push((node_range, highlight_name.to_string()));
488 }
489 }
490 }
491
492 cache
493 }
494
495 fn injection_for_match<'a>(
503 &self,
504 parent_name: Option<SharedString>,
505 query: &'a Query,
506 query_match: &QueryMatch<'a, 'a>,
507 ) -> (Option<SharedString>, Option<Node<'a>>, bool) {
508 let content_capture_index = self.injection_content_capture_index;
509 let mut language_name: Option<SharedString> = None;
512 let mut content_node = None;
513
514 for capture in query_match.captures {
515 let index = Some(capture.index);
516 if index == content_capture_index {
517 content_node = Some(capture.node);
518 }
519 }
520
521 let mut include_children = false;
522 for prop in query.property_settings(query_match.pattern_index) {
523 match prop.key.as_ref() {
524 "injection.language" => {
528 if language_name.is_none() {
529 language_name = prop
530 .value
531 .as_ref()
532 .map(std::convert::AsRef::as_ref)
533 .map(ToString::to_string)
534 .map(SharedString::from);
535 }
536 }
537
538 "injection.self" => {
542 if language_name.is_none() {
543 language_name = Some(self.language.clone());
544 }
545 }
546
547 "injection.parent" => {
551 if language_name.is_none() {
552 language_name = parent_name.clone();
553 }
554 }
555
556 "injection.include-children" => include_children = true,
561 _ => {}
562 }
563 }
564
565 (language_name, content_node, include_children)
566 }
567
568 pub fn styles(
592 &self,
593 range: &Range<usize>,
594 theme: &HighlightTheme,
595 ) -> Vec<(Range<usize>, HighlightStyle)> {
596 let mut styles = vec![];
597 let start_offset = range.start;
598
599 let highlights = self.match_styles(range.clone());
600
601 for item in highlights {
603 let node_range = &item.range;
605 let name = &item.name;
606
607 let mut node_range = node_range.start.max(range.start)..node_range.end.min(range.end);
609 if node_range.start > node_range.end {
610 node_range.end = node_range.start;
611 }
612
613 styles.push((node_range, theme.style(name.as_ref()).unwrap_or_default()));
614 }
615
616 if styles.len() == 0 {
618 return vec![(start_offset..range.end, HighlightStyle::default())];
619 }
620
621 let styles = unique_styles(&range, styles);
622
623 styles
630 }
631}
632
633pub(crate) fn unique_styles(
648 total_range: &Range<usize>,
649 styles: Vec<(Range<usize>, HighlightStyle)>,
650) -> Vec<(Range<usize>, HighlightStyle)> {
651 if styles.is_empty() {
652 return styles;
653 }
654
655 let mut intervals = BTreeSet::new();
656 let mut significant_intervals = BTreeSet::new();
657
658 intervals.insert(total_range.start);
663 intervals.insert(total_range.end);
664 for (range, _) in &styles {
665 intervals.insert(range.start);
666 intervals.insert(range.end);
667 significant_intervals.insert(range.end); }
669
670 let intervals: Vec<usize> = intervals.into_iter().collect();
671 let mut result = Vec::with_capacity(intervals.len().saturating_sub(1));
672
673 for i in 0..intervals.len().saturating_sub(1) {
679 let interval = intervals[i]..intervals[i + 1];
680 if interval.start >= interval.end {
681 continue;
682 }
683
684 let mut top_style: Option<HighlightStyle> = None;
686 for (range, style) in &styles {
687 if range.start <= interval.start && interval.end <= range.end {
688 if let Some(top_style) = &mut top_style {
689 merge_highlight_style(top_style, style);
690 } else {
691 top_style = Some(*style);
692 }
693 }
694 }
695
696 if let Some(style) = top_style {
697 result.push((interval, style));
698 } else {
699 result.push((interval, HighlightStyle::default()));
700 }
701 }
702
703 let mut merged: Vec<(Range<usize>, HighlightStyle)> = Vec::with_capacity(result.len());
705 for (range, style) in result {
706 if let Some((last_range, last_style)) = merged.last_mut() {
707 if last_range.end == range.start
708 && *last_style == style
709 && !significant_intervals.contains(&range.start)
710 {
711 last_range.end = range.end;
713 continue;
714 }
715 }
716 merged.push((range, style));
717 }
718
719 merged
720}
721
722fn merge_highlight_style(style: &mut HighlightStyle, other: &HighlightStyle) {
724 if let Some(color) = other.color {
725 style.color = Some(color);
726 }
727 if let Some(font_weight) = other.font_weight {
728 style.font_weight = Some(font_weight);
729 }
730 if let Some(font_style) = other.font_style {
731 style.font_style = Some(font_style);
732 }
733 if let Some(background_color) = other.background_color {
734 style.background_color = Some(background_color);
735 }
736 if let Some(underline) = other.underline {
737 style.underline = Some(underline);
738 }
739 if let Some(strikethrough) = other.strikethrough {
740 style.strikethrough = Some(strikethrough);
741 }
742 if let Some(fade_out) = other.fade_out {
743 style.fade_out = Some(fade_out);
744 }
745}
746
747#[cfg(test)]
748mod tests {
749 use gpui::Hsla;
750
751 use super::*;
752 use crate::Colorize as _;
753
754 fn color_style(color: Hsla) -> HighlightStyle {
755 let mut style = HighlightStyle::default();
756 style.color = Some(color);
757 style
758 }
759
760 #[track_caller]
761 fn assert_unique_styles(
762 range: Range<usize>,
763 left: Vec<(Range<usize>, HighlightStyle)>,
764 right: Vec<(Range<usize>, HighlightStyle)>,
765 ) {
766 fn color_name(c: Option<Hsla>) -> String {
767 match c {
768 Some(c) => {
769 if c == gpui::red() {
770 "red".to_string()
771 } else if c == gpui::green() {
772 "green".to_string()
773 } else if c == gpui::blue() {
774 "blue".to_string()
775 } else {
776 c.to_hex()
777 }
778 }
779 None => "clean".to_string(),
780 }
781 }
782
783 let left = unique_styles(&range, left);
784 if left.len() != right.len() {
785 println!("\n---------------------------------------------");
786 for (range, style) in left.iter() {
787 println!("({:?}, {})", range, color_name(style.color));
788 }
789 println!("---------------------------------------------");
790 panic!("left {} styles, right {} styles", left.len(), right.len());
791 }
792 for (left, right) in left.into_iter().zip(right) {
793 if left.1.color != right.1.color || left.0 != right.0 {
794 panic!(
795 "\n left: ({:?}, {})\nright: ({:?}, {})\n",
796 left.0,
797 color_name(left.1.color),
798 right.0,
799 color_name(right.1.color)
800 );
801 }
802 }
803 }
804
805 #[test]
806 fn test_unique_styles() {
807 let red = color_style(gpui::red());
808 let green = color_style(gpui::green());
809 let blue = color_style(gpui::blue());
810 let clean = HighlightStyle::default();
811
812 assert_unique_styles(
813 0..65,
814 vec![
815 (2..10, clean),
816 (2..10, clean),
817 (5..11, red),
818 (2..6, clean),
819 (10..15, green),
820 (15..30, clean),
821 (29..35, blue),
822 (35..40, green),
823 (45..60, blue),
824 ],
825 vec![
826 (0..5, clean),
827 (5..6, red),
828 (6..10, red),
829 (10..11, green),
830 (11..15, green),
831 (15..29, clean),
832 (29..30, blue),
833 (30..35, blue),
834 (35..40, green),
835 (40..45, clean),
836 (45..60, blue),
837 (60..65, clean),
838 ],
839 );
840 }
841}