1use std::collections::HashMap;
2
3use thiserror::Error;
4use tree_sitter::StreamingIterator;
5use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter as TsHighlighter};
6
7unsafe extern "C" {
8 fn tree_sitter_sql() -> *const ();
10}
11
12const MARKDOWN_LANGUAGE: tree_sitter_language::LanguageFn = tree_sitter_md::LANGUAGE;
13const MARKDOWN_INLINE_LANGUAGE: tree_sitter_language::LanguageFn = tree_sitter_md::INLINE_LANGUAGE;
14const MARKDOWN_HIGHLIGHTS_QUERY: &str = tree_sitter_md::HIGHLIGHT_QUERY_BLOCK;
15const MARKDOWN_INJECTIONS_QUERY: &str = tree_sitter_md::INJECTION_QUERY_BLOCK;
16const MARKDOWN_INLINE_HIGHLIGHTS_QUERY: &str = tree_sitter_md::HIGHLIGHT_QUERY_INLINE;
17const MARKDOWN_INLINE_INJECTIONS_QUERY: &str = tree_sitter_md::INJECTION_QUERY_INLINE;
18
19const SQL_LANGUAGE: tree_sitter_language::LanguageFn =
20 unsafe { tree_sitter_language::LanguageFn::from_raw(tree_sitter_sql) };
21const SQL_HIGHLIGHTS_QUERY: &str = include_str!("../vendor/tree-sitter-sql/queries/highlights.scm");
22
23#[derive(Debug, Clone, Copy, Eq, PartialEq)]
24pub enum Grammar {
25 ObjectScript,
26 Sql,
27 Python,
28 Markdown,
29 Mdx,
30}
31
32const SUPPORTED_GRAMMARS: [&str; 5] = ["objectscript", "sql", "python", "markdown", "mdx"];
33
34impl Grammar {
35 #[must_use]
40 pub fn from_name(input: &str) -> Option<Self> {
41 let normalized = normalize_language_name(input);
42 grammar_from_normalized_name(&normalized)
43 }
44
45 #[must_use]
47 pub fn canonical_name(self) -> &'static str {
48 match self {
49 Self::ObjectScript => "objectscript",
50 Self::Sql => "sql",
51 Self::Python => "python",
52 Self::Markdown => "markdown",
53 Self::Mdx => "mdx",
54 }
55 }
56
57 #[must_use]
59 pub fn supported_names() -> &'static [&'static str] {
60 &SUPPORTED_GRAMMARS
61 }
62}
63
64#[derive(Debug, Clone, Eq, PartialEq)]
65pub struct Attr {
66 pub id: usize,
67 pub capture_name: String,
68}
69
70impl Attr {
71 #[must_use]
73 pub fn theme_key(&self) -> String {
74 format!("@{}", self.capture_name)
75 }
76}
77
78#[derive(Debug, Clone, Copy, Eq, PartialEq)]
79pub struct Span {
80 pub attr_id: usize,
81 pub start_byte: usize,
82 pub end_byte: usize,
83}
84
85#[derive(Debug, Clone, Eq, PartialEq)]
86pub struct HighlightResult {
87 pub attrs: Vec<Attr>,
88 pub spans: Vec<Span>,
89}
90
91#[derive(Debug, Error)]
92pub enum HighlightError {
93 #[error("failed to build highlight configuration: {0}")]
94 Query(#[from] tree_sitter::QueryError),
95 #[error("highlighting failed: {0}")]
96 Highlight(#[from] tree_sitter_highlight::Error),
97 #[error("failed to configure parser language: {0}")]
98 Language(#[from] tree_sitter::LanguageError),
99 #[error("failed to parse source for injection analysis")]
100 Parse,
101}
102
103pub struct SpanHighlighter {
104 highlighter: TsHighlighter,
105 attrs: Vec<Attr>,
106 objectscript: HighlightConfiguration,
107 sql: HighlightConfiguration,
108 python: HighlightConfiguration,
109 markdown: HighlightConfiguration,
110 markdown_inline: HighlightConfiguration,
111 objectscript_injection_query: tree_sitter::Query,
112 objectscript_injection_content_capture: Option<u32>,
113 objectscript_injection_language_capture: Option<u32>,
114}
115
116#[derive(Debug, Clone, Copy, Eq, PartialEq)]
117struct InjectionRegion {
118 grammar: Grammar,
119 start_byte: usize,
120 end_byte: usize,
121}
122
123impl SpanHighlighter {
124 pub fn new() -> Result<Self, HighlightError> {
134 let objectscript_language: tree_sitter::Language =
135 tree_sitter_objectscript::LANGUAGE_OBJECTSCRIPT_PLAYGROUND.into();
136 let mut objectscript = new_config(
137 objectscript_language.clone(),
138 "objectscript",
139 tree_sitter_objectscript::OBJECTSCRIPT_HIGHLIGHTS_QUERY,
140 tree_sitter_objectscript::OBJECTSCRIPT_INJECTIONS_QUERY,
141 )?;
142 let mut sql = new_config(SQL_LANGUAGE.into(), "sql", SQL_HIGHLIGHTS_QUERY, "")?;
143 let mut python = new_config(
144 tree_sitter_python::LANGUAGE.into(),
145 "python",
146 tree_sitter_python::HIGHLIGHTS_QUERY,
147 "",
148 )?;
149 let mut markdown = new_config(
150 MARKDOWN_LANGUAGE.into(),
151 "markdown",
152 MARKDOWN_HIGHLIGHTS_QUERY,
153 MARKDOWN_INJECTIONS_QUERY,
154 )?;
155 let mut markdown_inline = new_config(
156 MARKDOWN_INLINE_LANGUAGE.into(),
157 "markdown_inline",
158 MARKDOWN_INLINE_HIGHLIGHTS_QUERY,
159 MARKDOWN_INLINE_INJECTIONS_QUERY,
160 )?;
161 let objectscript_injection_query = tree_sitter::Query::new(
162 &objectscript_language,
163 tree_sitter_objectscript::OBJECTSCRIPT_INJECTIONS_QUERY,
164 )?;
165 let mut objectscript_injection_content_capture = None;
166 let mut objectscript_injection_language_capture = None;
167 for (idx, name) in objectscript_injection_query
168 .capture_names()
169 .iter()
170 .enumerate()
171 {
172 let idx = Some(idx as u32);
173 match *name {
174 "injection.content" => objectscript_injection_content_capture = idx,
175 "injection.language" => objectscript_injection_language_capture = idx,
176 _ => {}
177 }
178 }
179
180 let mut recognized = Vec::<String>::new();
181 let mut capture_index_by_name = HashMap::<String, usize>::new();
182 for config in [&objectscript, &sql, &python, &markdown, &markdown_inline] {
183 for name in config.names() {
184 if capture_index_by_name.contains_key(*name) {
185 continue;
186 }
187 let id = recognized.len();
188 let owned = (*name).to_string();
189 capture_index_by_name.insert(owned.clone(), id);
190 recognized.push(owned);
191 }
192 }
193 let recognized_refs = recognized.iter().map(String::as_str).collect::<Vec<_>>();
194 objectscript.configure(&recognized_refs);
195 sql.configure(&recognized_refs);
196 python.configure(&recognized_refs);
197 markdown.configure(&recognized_refs);
198 markdown_inline.configure(&recognized_refs);
199 let attrs = recognized
200 .into_iter()
201 .enumerate()
202 .map(|(id, capture_name)| Attr { id, capture_name })
203 .collect::<Vec<_>>();
204
205 Ok(Self {
206 highlighter: TsHighlighter::new(),
207 attrs,
208 objectscript,
209 sql,
210 python,
211 markdown,
212 markdown_inline,
213 objectscript_injection_query,
214 objectscript_injection_content_capture,
215 objectscript_injection_language_capture,
216 })
217 }
218
219 pub fn highlight(
229 &mut self,
230 source: &[u8],
231 flavor: Grammar,
232 ) -> Result<HighlightResult, HighlightError> {
233 let mut result = self.highlight_base(source, flavor)?;
234 if flavor == Grammar::ObjectScript {
235 self.apply_objectscript_injections(source, &mut result)?;
236 }
237 Ok(result)
238 }
239
240 fn highlight_base(
249 &mut self,
250 source: &[u8],
251 flavor: Grammar,
252 ) -> Result<HighlightResult, HighlightError> {
253 let config = match flavor {
254 Grammar::ObjectScript => &self.objectscript,
255 Grammar::Sql => &self.sql,
256 Grammar::Python => &self.python,
257 Grammar::Markdown => &self.markdown,
258 Grammar::Mdx => &self.sql,
260 };
261
262 let attrs = self.attrs.clone();
263
264 let injections = InjectionConfigs {
265 objectscript: &self.objectscript,
266 sql: &self.sql,
267 python: &self.python,
268 markdown: &self.markdown,
269 markdown_inline: &self.markdown_inline,
270 };
271
272 let events = self
273 .highlighter
274 .highlight(config, source, None, move |language_name| {
275 injections.resolve(language_name)
276 })?;
277 let mut spans = Vec::new();
278 let mut active_stack = Vec::new();
279
280 for event in events {
281 match event? {
282 HighlightEvent::HighlightStart(highlight) => active_stack.push(highlight.0),
283 HighlightEvent::HighlightEnd => {
284 active_stack.pop();
285 }
286 HighlightEvent::Source { start, end } => {
287 if let Some(&attr_id) = active_stack.last() {
288 push_merged(
289 &mut spans,
290 Span {
291 attr_id,
292 start_byte: start,
293 end_byte: end,
294 },
295 );
296 }
297 }
298 }
299 }
300
301 Ok(HighlightResult { attrs, spans })
302 }
303
304 pub fn highlight_lines<S: AsRef<str>>(
310 &mut self,
311 lines: &[S],
312 flavor: Grammar,
313 ) -> Result<HighlightResult, HighlightError> {
314 let source = lines
315 .iter()
316 .map(AsRef::as_ref)
317 .collect::<Vec<_>>()
318 .join("\n");
319 self.highlight(source.as_bytes(), flavor)
320 }
321
322 fn apply_objectscript_injections(
331 &mut self,
332 source: &[u8],
333 base: &mut HighlightResult,
334 ) -> Result<(), HighlightError> {
335 let injections = self.find_objectscript_injections(source)?;
336 if injections.is_empty() {
337 return Ok(());
338 }
339
340 let mut attrs = base.attrs.clone();
341 let mut attr_ids_by_name = attrs
342 .iter()
343 .map(|attr| (attr.capture_name.clone(), attr.id))
344 .collect::<HashMap<_, _>>();
345 let mut injected_spans = Vec::new();
346
347 for injection in &injections {
348 let nested_source = &source[injection.start_byte..injection.end_byte];
349 let nested = self.highlight_base(nested_source, injection.grammar)?;
350 let remap = remap_attr_ids(&nested.attrs, &mut attrs, &mut attr_ids_by_name);
351 for span in nested.spans {
352 let Some(&mapped_attr_id) = remap.get(span.attr_id) else {
353 continue;
354 };
355 injected_spans.push(Span {
356 attr_id: mapped_attr_id,
357 start_byte: span.start_byte + injection.start_byte,
358 end_byte: span.end_byte + injection.start_byte,
359 });
360 }
361 }
362
363 let mut spans = exclude_ranges(
364 &base.spans,
365 &injections
366 .iter()
367 .map(|inj| (inj.start_byte, inj.end_byte))
368 .collect::<Vec<_>>(),
369 );
370 spans.extend(injected_spans);
371
372 base.attrs = attrs;
373 base.spans = normalize_spans(spans);
374 Ok(())
375 }
376
377 fn find_objectscript_injections(
383 &self,
384 source: &[u8],
385 ) -> Result<Vec<InjectionRegion>, HighlightError> {
386 let mut parser = tree_sitter::Parser::new();
387 let objectscript_language: tree_sitter::Language =
388 tree_sitter_objectscript::LANGUAGE_OBJECTSCRIPT_PLAYGROUND.into();
389 parser.set_language(&objectscript_language)?;
390 let tree = parser.parse(source, None).ok_or(HighlightError::Parse)?;
391 let mut cursor = tree_sitter::QueryCursor::new();
392
393 let mut injections = Vec::new();
394 let mut matches =
395 cursor.matches(&self.objectscript_injection_query, tree.root_node(), source);
396 while let Some(mat) = matches.next() {
397 let Some(injection) = self.injection_region_for_match(source, &mat) else {
398 continue;
399 };
400 injections.push(injection);
401 }
402
403 if injections.is_empty() {
404 return Ok(injections);
405 }
406
407 injections.sort_by(|a, b| {
408 a.start_byte
409 .cmp(&b.start_byte)
410 .then(b.end_byte.cmp(&a.end_byte))
411 .then((a.grammar as u8).cmp(&(b.grammar as u8)))
412 });
413 injections.dedup_by(|a, b| {
414 a.grammar == b.grammar && a.start_byte == b.start_byte && a.end_byte == b.end_byte
415 });
416
417 let mut non_overlapping = Vec::with_capacity(injections.len());
418 let mut last_end = 0usize;
419 for injection in injections {
420 if injection.start_byte < last_end {
421 continue;
422 }
423 last_end = injection.end_byte;
424 non_overlapping.push(injection);
425 }
426 Ok(non_overlapping)
427 }
428
429 fn injection_region_for_match<'a>(
433 &self,
434 source: &'a [u8],
435 mat: &tree_sitter::QueryMatch<'a, 'a>,
436 ) -> Option<InjectionRegion> {
437 let mut language_name = None;
438 let mut content_node = None;
439
440 for capture in mat.captures {
441 let index = Some(capture.index);
442 if index == self.objectscript_injection_language_capture {
443 language_name = capture.node.utf8_text(source).ok();
444 } else if index == self.objectscript_injection_content_capture {
445 content_node = Some(capture.node);
446 }
447 }
448
449 for prop in self
450 .objectscript_injection_query
451 .property_settings(mat.pattern_index)
452 {
453 match prop.key.as_ref() {
454 "injection.language" => {
455 if language_name.is_none() {
456 language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
457 }
458 }
459 "injection.self" | "injection.parent" => {
460 if language_name.is_none() {
461 language_name = Some("objectscript");
462 }
463 }
464 _ => {}
465 }
466 }
467
468 let grammar = language_name.and_then(Grammar::from_name)?;
469 let content_node = content_node?;
470 let start_byte = content_node.start_byte();
471 let end_byte = content_node.end_byte();
472 if start_byte >= end_byte {
473 return None;
474 }
475
476 Some(InjectionRegion {
477 grammar,
478 start_byte,
479 end_byte,
480 })
481 }
482}
483
484struct InjectionConfigs<'a> {
485 objectscript: &'a HighlightConfiguration,
486 sql: &'a HighlightConfiguration,
487 python: &'a HighlightConfiguration,
488 markdown: &'a HighlightConfiguration,
489 markdown_inline: &'a HighlightConfiguration,
490}
491
492impl<'a> InjectionConfigs<'a> {
493 fn resolve(&self, language_name: &str) -> Option<&'a HighlightConfiguration> {
497 let normalized = normalize_language_name(language_name);
498 if normalized == "markdowninline" {
499 return Some(self.markdown_inline);
500 }
501
502 let grammar = grammar_from_normalized_name(&normalized)?;
503 match grammar {
504 Grammar::ObjectScript => Some(self.objectscript),
505 Grammar::Sql => Some(self.sql),
506 Grammar::Python => Some(self.python),
507 Grammar::Markdown => Some(self.markdown),
508 Grammar::Mdx => Some(self.sql),
509 }
510 }
511}
512
513fn normalize_language_name(input: &str) -> String {
516 input
517 .chars()
518 .filter(char::is_ascii_alphanumeric)
519 .map(|ch| ch.to_ascii_lowercase())
520 .collect()
521}
522
523fn grammar_from_normalized_name(normalized: &str) -> Option<Grammar> {
525 match normalized {
526 "objectscript" | "os" | "playground" | "objectscriptplayground" => {
527 Some(Grammar::ObjectScript)
528 }
529 "sql" | "tsql" | "plsql" | "mysql" | "postgres" | "postgresql" => Some(Grammar::Sql),
530 "python" | "py" => Some(Grammar::Python),
531 "markdown" | "md" | "gfm" => Some(Grammar::Markdown),
532 "mdx" => Some(Grammar::Mdx),
533 _ => None,
534 }
535}
536
537fn new_config(
544 language: tree_sitter::Language,
545 language_name: &str,
546 highlights: &str,
547 injections: &str,
548) -> Result<HighlightConfiguration, tree_sitter::QueryError> {
549 let mut config =
550 HighlightConfiguration::new(language, language_name, highlights, injections, "")?;
551 let recognized = config
552 .names()
553 .iter()
554 .map(|name| (*name).to_string())
555 .collect::<Vec<_>>();
556 let recognized_refs = recognized.iter().map(String::as_str).collect::<Vec<_>>();
557 config.configure(&recognized_refs);
558 Ok(config)
559}
560
561fn push_merged(spans: &mut Vec<Span>, next: Span) {
564 if next.start_byte >= next.end_byte {
565 return;
566 }
567
568 if let Some(last) = spans.last_mut() {
569 if last.attr_id == next.attr_id && last.end_byte == next.start_byte {
570 last.end_byte = next.end_byte;
571 return;
572 }
573 }
574
575 spans.push(next);
576}
577
578fn remap_attr_ids(
582 incoming: &[Attr],
583 attrs: &mut Vec<Attr>,
584 attr_ids_by_name: &mut HashMap<String, usize>,
585) -> Vec<usize> {
586 let mut remap = vec![0usize; incoming.len()];
587 for attr in incoming {
588 let mapped_attr_id = if let Some(&mapped_attr_id) = attr_ids_by_name.get(&attr.capture_name)
589 {
590 mapped_attr_id
591 } else {
592 let mapped_attr_id = attrs.len();
593 attrs.push(Attr {
594 id: mapped_attr_id,
595 capture_name: attr.capture_name.clone(),
596 });
597 attr_ids_by_name.insert(attr.capture_name.clone(), mapped_attr_id);
598 mapped_attr_id
599 };
600 if let Some(slot) = remap.get_mut(attr.id) {
601 *slot = mapped_attr_id;
602 }
603 }
604 remap
605}
606
607fn exclude_ranges(spans: &[Span], ranges: &[(usize, usize)]) -> Vec<Span> {
609 if ranges.is_empty() {
610 return spans.to_vec();
611 }
612
613 let mut out: Vec<Span> = Vec::with_capacity(spans.len());
614 let mut range_idx = 0usize;
615 for span in spans {
616 while range_idx < ranges.len() && ranges[range_idx].1 <= span.start_byte {
617 range_idx += 1;
618 }
619
620 let mut cursor = span.start_byte;
621 let mut idx = range_idx;
622 while idx < ranges.len() {
623 let (range_start, range_end) = ranges[idx];
624 if range_start >= span.end_byte {
625 break;
626 }
627
628 if range_end <= cursor {
629 idx += 1;
630 continue;
631 }
632
633 if cursor < range_start {
634 push_merged(
635 &mut out,
636 Span {
637 attr_id: span.attr_id,
638 start_byte: cursor,
639 end_byte: range_start.min(span.end_byte),
640 },
641 );
642 }
643
644 if range_end >= span.end_byte {
645 cursor = span.end_byte;
646 break;
647 }
648
649 cursor = range_end;
650 idx += 1;
651 }
652
653 if cursor < span.end_byte {
654 push_merged(
655 &mut out,
656 Span {
657 attr_id: span.attr_id,
658 start_byte: cursor,
659 end_byte: span.end_byte,
660 },
661 );
662 }
663 }
664 out
665}
666
667fn normalize_spans(mut spans: Vec<Span>) -> Vec<Span> {
669 spans.sort_by(|a, b| {
670 a.start_byte
671 .cmp(&b.start_byte)
672 .then(a.end_byte.cmp(&b.end_byte))
673 .then(a.attr_id.cmp(&b.attr_id))
674 });
675
676 let mut out: Vec<Span> = Vec::with_capacity(spans.len());
677 for mut span in spans {
678 if let Some(last) = out.last() {
679 if span.start_byte < last.end_byte {
680 if span.end_byte <= last.end_byte {
681 continue;
682 }
683 span.start_byte = last.end_byte;
684 }
685 }
686 push_merged(&mut out, span);
687 }
688 out
689}
690
691#[cfg(test)]
692mod tests {
693 use super::{Grammar, HighlightResult, SpanHighlighter};
694
695 fn has_capture_for_text(
697 result: &HighlightResult,
698 source: &[u8],
699 capture_name: &str,
700 expected_text: &[u8],
701 ) -> bool {
702 let attr_id = match result
703 .attrs
704 .iter()
705 .find(|attr| attr.capture_name == capture_name)
706 .map(|attr| attr.id)
707 {
708 Some(id) => id,
709 None => return false,
710 };
711
712 result.spans.iter().any(|span| {
713 span.attr_id == attr_id && &source[span.start_byte..span.end_byte] == expected_text
714 })
715 }
716
717 #[test]
718 fn highlights_numeric_literal_as_number() {
720 let source = br#"
721Class Demo.Highlight
722{
723 ClassMethod Main()
724 {
725 set x = 42
726 }
727}
728"#;
729 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
730 let result = highlighter
731 .highlight(source, Grammar::ObjectScript)
732 .expect("failed to highlight");
733
734 assert!(
735 has_capture_for_text(&result, source, "number", b"42"),
736 "expected highlighted span for numeric literal"
737 );
738 }
739
740 #[test]
741 fn parses_supported_grammar_aliases() {
743 assert_eq!(
744 Grammar::from_name("objectscript"),
745 Some(Grammar::ObjectScript)
746 );
747 assert_eq!(Grammar::from_name("SQL"), Some(Grammar::Sql));
748 assert_eq!(Grammar::from_name("py"), Some(Grammar::Python));
749 assert_eq!(Grammar::from_name("md"), Some(Grammar::Markdown));
750 assert_eq!(Grammar::from_name("mdx"), Some(Grammar::Mdx));
751 assert!(Grammar::from_name("unknown").is_none());
752 }
753
754 #[test]
755 fn highlights_sql_keyword() {
757 let source = b"SELECT 42 FROM Demo";
758 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
759 let result = highlighter
760 .highlight(source, Grammar::Sql)
761 .expect("failed to highlight SQL");
762
763 assert!(
764 has_capture_for_text(&result, source, "keyword", b"SELECT"),
765 "expected SELECT to be highlighted as keyword"
766 );
767 }
768
769 #[test]
770 fn objectscript_sqlquery_body_is_highlighted_as_sql() {
772 let source = br#"
773Class Test
774{
775 Query ListEmployees() As %SQLQuery
776 {
777SELECT ID,Name FROM Employee
778 }
779}
780"#;
781 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
782 let result = highlighter
783 .highlight(source, Grammar::ObjectScript)
784 .expect("failed to highlight ObjectScript with SQL injection");
785
786 assert!(
787 has_capture_for_text(&result, source, "keyword", b"SELECT"),
788 "expected SQL SELECT in %SQLQuery body to be highlighted as keyword"
789 );
790 }
791
792 #[test]
793 fn highlights_python_number() {
795 let source = b"def f(x):\n return x + 1\n";
796 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
797 let result = highlighter
798 .highlight(source, Grammar::Python)
799 .expect("failed to highlight Python");
800
801 assert!(
802 has_capture_for_text(&result, source, "number", b"1"),
803 "expected numeric literal to be highlighted in Python"
804 );
805 }
806
807 #[test]
808 fn highlights_markdown_heading() {
810 let source = b"# Heading\n";
811 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
812 let result = highlighter
813 .highlight(source, Grammar::Markdown)
814 .expect("failed to highlight Markdown");
815
816 assert!(
817 has_capture_for_text(&result, source, "text.title", b"Heading"),
818 "expected heading text to be highlighted in Markdown"
819 );
820 }
821
822 #[test]
823 fn mdx_falls_back_to_sql_keyword_highlighting() {
825 let source = b"SELECT 1 FROM Cube";
826 let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
827 let result = highlighter
828 .highlight(source, Grammar::Mdx)
829 .expect("failed to highlight MDX fallback");
830
831 assert!(
832 has_capture_for_text(&result, source, "keyword", b"SELECT"),
833 "expected MDX fallback to highlight SQL keywords"
834 );
835 }
836}