Skip to main content

highlight_spans/
lib.rs

1use std::collections::HashMap;
2
3use thiserror::Error;
4use tree_sitter::StreamingIterator;
5use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter as TsHighlighter};
6
7unsafe extern "C" {
8    /// Returns the SQL Tree-sitter language handle from the vendored parser.
9    fn tree_sitter_sql() -> *const ();
10}
11
12const MARKDOWN_LANGUAGE: tree_sitter_language::LanguageFn = tree_sitter_md::LANGUAGE;
13const MARKDOWN_INLINE_LANGUAGE: tree_sitter_language::LanguageFn = tree_sitter_md::INLINE_LANGUAGE;
14const MARKDOWN_HIGHLIGHTS_QUERY: &str = tree_sitter_md::HIGHLIGHT_QUERY_BLOCK;
15const MARKDOWN_INJECTIONS_QUERY: &str = tree_sitter_md::INJECTION_QUERY_BLOCK;
16const MARKDOWN_INLINE_HIGHLIGHTS_QUERY: &str = tree_sitter_md::HIGHLIGHT_QUERY_INLINE;
17const MARKDOWN_INLINE_INJECTIONS_QUERY: &str = tree_sitter_md::INJECTION_QUERY_INLINE;
18
19const SQL_LANGUAGE: tree_sitter_language::LanguageFn =
20    unsafe { tree_sitter_language::LanguageFn::from_raw(tree_sitter_sql) };
21const SQL_HIGHLIGHTS_QUERY: &str = include_str!("../vendor/tree-sitter-sql/queries/highlights.scm");
22
23#[derive(Debug, Clone, Copy, Eq, PartialEq)]
24pub enum Grammar {
25    ObjectScript,
26    Sql,
27    Python,
28    Markdown,
29    Mdx,
30}
31
32const SUPPORTED_GRAMMARS: [&str; 5] = ["objectscript", "sql", "python", "markdown", "mdx"];
33
34impl Grammar {
35    /// Parses a grammar name or alias into a [`Grammar`] value.
36    ///
37    /// The input is normalized to lowercase alphanumeric characters, so values
38    /// such as `"ObjectScript"`, `"objectscript-playground"`, and `"os"` are accepted.
39    #[must_use]
40    pub fn from_name(input: &str) -> Option<Self> {
41        let normalized = normalize_language_name(input);
42        grammar_from_normalized_name(&normalized)
43    }
44
45    /// Returns the canonical lowercase name for this grammar.
46    #[must_use]
47    pub fn canonical_name(self) -> &'static str {
48        match self {
49            Self::ObjectScript => "objectscript",
50            Self::Sql => "sql",
51            Self::Python => "python",
52            Self::Markdown => "markdown",
53            Self::Mdx => "mdx",
54        }
55    }
56
57    /// Returns the canonical grammar names accepted by the CLI-facing APIs.
58    #[must_use]
59    pub fn supported_names() -> &'static [&'static str] {
60        &SUPPORTED_GRAMMARS
61    }
62}
63
64#[derive(Debug, Clone, Eq, PartialEq)]
65pub struct Attr {
66    pub id: usize,
67    pub capture_name: String,
68}
69
70impl Attr {
71    /// Returns the theme lookup key for this capture (for example `"@keyword"`).
72    #[must_use]
73    pub fn theme_key(&self) -> String {
74        format!("@{}", self.capture_name)
75    }
76}
77
78#[derive(Debug, Clone, Copy, Eq, PartialEq)]
79pub struct Span {
80    pub attr_id: usize,
81    pub start_byte: usize,
82    pub end_byte: usize,
83}
84
85#[derive(Debug, Clone, Eq, PartialEq)]
86pub struct HighlightResult {
87    pub attrs: Vec<Attr>,
88    pub spans: Vec<Span>,
89}
90
91#[derive(Debug, Error)]
92pub enum HighlightError {
93    #[error("failed to build highlight configuration: {0}")]
94    Query(#[from] tree_sitter::QueryError),
95    #[error("highlighting failed: {0}")]
96    Highlight(#[from] tree_sitter_highlight::Error),
97    #[error("failed to configure parser language: {0}")]
98    Language(#[from] tree_sitter::LanguageError),
99    #[error("failed to parse source for injection analysis")]
100    Parse,
101}
102
103pub struct SpanHighlighter {
104    highlighter: TsHighlighter,
105    attrs: Vec<Attr>,
106    objectscript: HighlightConfiguration,
107    sql: HighlightConfiguration,
108    python: HighlightConfiguration,
109    markdown: HighlightConfiguration,
110    markdown_inline: HighlightConfiguration,
111    objectscript_injection_query: tree_sitter::Query,
112    objectscript_injection_content_capture: Option<u32>,
113    objectscript_injection_language_capture: Option<u32>,
114}
115
116#[derive(Debug, Clone, Copy, Eq, PartialEq)]
117struct InjectionRegion {
118    grammar: Grammar,
119    start_byte: usize,
120    end_byte: usize,
121}
122
123impl SpanHighlighter {
124    /// Creates a highlighter configured for all supported grammars and injections.
125    ///
126    /// This preloads Tree-sitter highlight configurations for ObjectScript, SQL,
127    /// Python, and Markdown variants, and builds a unified capture table.
128    ///
129    /// # Errors
130    ///
131    /// Returns an error if any grammar query cannot be compiled or if parser
132    /// language configuration fails.
133    pub fn new() -> Result<Self, HighlightError> {
134        let objectscript_language: tree_sitter::Language =
135            tree_sitter_objectscript::LANGUAGE_OBJECTSCRIPT_PLAYGROUND.into();
136        let mut objectscript = new_config(
137            objectscript_language.clone(),
138            "objectscript",
139            tree_sitter_objectscript::OBJECTSCRIPT_HIGHLIGHTS_QUERY,
140            tree_sitter_objectscript::OBJECTSCRIPT_INJECTIONS_QUERY,
141        )?;
142        let mut sql = new_config(SQL_LANGUAGE.into(), "sql", SQL_HIGHLIGHTS_QUERY, "")?;
143        let mut python = new_config(
144            tree_sitter_python::LANGUAGE.into(),
145            "python",
146            tree_sitter_python::HIGHLIGHTS_QUERY,
147            "",
148        )?;
149        let mut markdown = new_config(
150            MARKDOWN_LANGUAGE.into(),
151            "markdown",
152            MARKDOWN_HIGHLIGHTS_QUERY,
153            MARKDOWN_INJECTIONS_QUERY,
154        )?;
155        let mut markdown_inline = new_config(
156            MARKDOWN_INLINE_LANGUAGE.into(),
157            "markdown_inline",
158            MARKDOWN_INLINE_HIGHLIGHTS_QUERY,
159            MARKDOWN_INLINE_INJECTIONS_QUERY,
160        )?;
161        let objectscript_injection_query = tree_sitter::Query::new(
162            &objectscript_language,
163            tree_sitter_objectscript::OBJECTSCRIPT_INJECTIONS_QUERY,
164        )?;
165        let mut objectscript_injection_content_capture = None;
166        let mut objectscript_injection_language_capture = None;
167        for (idx, name) in objectscript_injection_query
168            .capture_names()
169            .iter()
170            .enumerate()
171        {
172            let idx = Some(idx as u32);
173            match *name {
174                "injection.content" => objectscript_injection_content_capture = idx,
175                "injection.language" => objectscript_injection_language_capture = idx,
176                _ => {}
177            }
178        }
179
180        let mut recognized = Vec::<String>::new();
181        let mut capture_index_by_name = HashMap::<String, usize>::new();
182        for config in [&objectscript, &sql, &python, &markdown, &markdown_inline] {
183            for name in config.names() {
184                if capture_index_by_name.contains_key(*name) {
185                    continue;
186                }
187                let id = recognized.len();
188                let owned = (*name).to_string();
189                capture_index_by_name.insert(owned.clone(), id);
190                recognized.push(owned);
191            }
192        }
193        let recognized_refs = recognized.iter().map(String::as_str).collect::<Vec<_>>();
194        objectscript.configure(&recognized_refs);
195        sql.configure(&recognized_refs);
196        python.configure(&recognized_refs);
197        markdown.configure(&recognized_refs);
198        markdown_inline.configure(&recognized_refs);
199        let attrs = recognized
200            .into_iter()
201            .enumerate()
202            .map(|(id, capture_name)| Attr { id, capture_name })
203            .collect::<Vec<_>>();
204
205        Ok(Self {
206            highlighter: TsHighlighter::new(),
207            attrs,
208            objectscript,
209            sql,
210            python,
211            markdown,
212            markdown_inline,
213            objectscript_injection_query,
214            objectscript_injection_content_capture,
215            objectscript_injection_language_capture,
216        })
217    }
218
219    /// Highlights a source buffer and returns capture attributes plus byte spans.
220    ///
221    /// When `flavor` is [`Grammar::ObjectScript`], language injections are resolved
222    /// and applied to injected regions (for example embedded SQL blocks).
223    ///
224    /// # Errors
225    ///
226    /// Returns an error if Tree-sitter highlighting fails or if injection parsing
227    /// cannot be completed.
228    pub fn highlight(
229        &mut self,
230        source: &[u8],
231        flavor: Grammar,
232    ) -> Result<HighlightResult, HighlightError> {
233        let mut result = self.highlight_base(source, flavor)?;
234        if flavor == Grammar::ObjectScript {
235            self.apply_objectscript_injections(source, &mut result)?;
236        }
237        Ok(result)
238    }
239
240    /// Runs the base Tree-sitter highlight pass for a single grammar.
241    ///
242    /// Unlike [`Self::highlight`], this does not apply post-processing for
243    /// ObjectScript injection regions.
244    ///
245    /// # Errors
246    ///
247    /// Returns an error if Tree-sitter fails to emit highlight events.
248    fn highlight_base(
249        &mut self,
250        source: &[u8],
251        flavor: Grammar,
252    ) -> Result<HighlightResult, HighlightError> {
253        let config = match flavor {
254            Grammar::ObjectScript => &self.objectscript,
255            Grammar::Sql => &self.sql,
256            Grammar::Python => &self.python,
257            Grammar::Markdown => &self.markdown,
258            // InterSystems MDX is OLAP query syntax; use SQL highlighting as a temporary fallback.
259            Grammar::Mdx => &self.sql,
260        };
261
262        let attrs = self.attrs.clone();
263
264        let injections = InjectionConfigs {
265            objectscript: &self.objectscript,
266            sql: &self.sql,
267            python: &self.python,
268            markdown: &self.markdown,
269            markdown_inline: &self.markdown_inline,
270        };
271
272        let events = self
273            .highlighter
274            .highlight(config, source, None, move |language_name| {
275                injections.resolve(language_name)
276            })?;
277        let mut spans = Vec::new();
278        let mut active_stack = Vec::new();
279
280        for event in events {
281            match event? {
282                HighlightEvent::HighlightStart(highlight) => active_stack.push(highlight.0),
283                HighlightEvent::HighlightEnd => {
284                    active_stack.pop();
285                }
286                HighlightEvent::Source { start, end } => {
287                    if let Some(&attr_id) = active_stack.last() {
288                        push_merged(
289                            &mut spans,
290                            Span {
291                                attr_id,
292                                start_byte: start,
293                                end_byte: end,
294                            },
295                        );
296                    }
297                }
298            }
299        }
300
301        Ok(HighlightResult { attrs, spans })
302    }
303
304    /// Highlights line-oriented input by joining lines with `\n`.
305    ///
306    /// # Errors
307    ///
308    /// Returns the same errors as [`Self::highlight`].
309    pub fn highlight_lines<S: AsRef<str>>(
310        &mut self,
311        lines: &[S],
312        flavor: Grammar,
313    ) -> Result<HighlightResult, HighlightError> {
314        let source = lines
315            .iter()
316            .map(AsRef::as_ref)
317            .collect::<Vec<_>>()
318            .join("\n");
319        self.highlight(source.as_bytes(), flavor)
320    }
321
322    /// Replaces ObjectScript injection regions in `base` with injected highlights.
323    ///
324    /// This method removes spans from injected byte ranges and merges spans produced
325    /// by the injected language highlighter.
326    ///
327    /// # Errors
328    ///
329    /// Returns an error if injection discovery or nested highlighting fails.
330    fn apply_objectscript_injections(
331        &mut self,
332        source: &[u8],
333        base: &mut HighlightResult,
334    ) -> Result<(), HighlightError> {
335        let injections = self.find_objectscript_injections(source)?;
336        if injections.is_empty() {
337            return Ok(());
338        }
339
340        let mut attrs = base.attrs.clone();
341        let mut attr_ids_by_name = attrs
342            .iter()
343            .map(|attr| (attr.capture_name.clone(), attr.id))
344            .collect::<HashMap<_, _>>();
345        let mut injected_spans = Vec::new();
346
347        for injection in &injections {
348            let nested_source = &source[injection.start_byte..injection.end_byte];
349            let nested = self.highlight_base(nested_source, injection.grammar)?;
350            let remap = remap_attr_ids(&nested.attrs, &mut attrs, &mut attr_ids_by_name);
351            for span in nested.spans {
352                let Some(&mapped_attr_id) = remap.get(span.attr_id) else {
353                    continue;
354                };
355                injected_spans.push(Span {
356                    attr_id: mapped_attr_id,
357                    start_byte: span.start_byte + injection.start_byte,
358                    end_byte: span.end_byte + injection.start_byte,
359                });
360            }
361        }
362
363        let mut spans = exclude_ranges(
364            &base.spans,
365            &injections
366                .iter()
367                .map(|inj| (inj.start_byte, inj.end_byte))
368                .collect::<Vec<_>>(),
369        );
370        spans.extend(injected_spans);
371
372        base.attrs = attrs;
373        base.spans = normalize_spans(spans);
374        Ok(())
375    }
376
377    /// Finds non-overlapping ObjectScript injection regions in the source buffer.
378    ///
379    /// # Errors
380    ///
381    /// Returns an error if parsing or query execution for injection analysis fails.
382    fn find_objectscript_injections(
383        &self,
384        source: &[u8],
385    ) -> Result<Vec<InjectionRegion>, HighlightError> {
386        let mut parser = tree_sitter::Parser::new();
387        let objectscript_language: tree_sitter::Language =
388            tree_sitter_objectscript::LANGUAGE_OBJECTSCRIPT_PLAYGROUND.into();
389        parser.set_language(&objectscript_language)?;
390        let tree = parser.parse(source, None).ok_or(HighlightError::Parse)?;
391        let mut cursor = tree_sitter::QueryCursor::new();
392
393        let mut injections = Vec::new();
394        let mut matches =
395            cursor.matches(&self.objectscript_injection_query, tree.root_node(), source);
396        while let Some(mat) = matches.next() {
397            let Some(injection) = self.injection_region_for_match(source, &mat) else {
398                continue;
399            };
400            injections.push(injection);
401        }
402
403        if injections.is_empty() {
404            return Ok(injections);
405        }
406
407        injections.sort_by(|a, b| {
408            a.start_byte
409                .cmp(&b.start_byte)
410                .then(b.end_byte.cmp(&a.end_byte))
411                .then((a.grammar as u8).cmp(&(b.grammar as u8)))
412        });
413        injections.dedup_by(|a, b| {
414            a.grammar == b.grammar && a.start_byte == b.start_byte && a.end_byte == b.end_byte
415        });
416
417        let mut non_overlapping = Vec::with_capacity(injections.len());
418        let mut last_end = 0usize;
419        for injection in injections {
420            if injection.start_byte < last_end {
421                continue;
422            }
423            last_end = injection.end_byte;
424            non_overlapping.push(injection);
425        }
426        Ok(non_overlapping)
427    }
428
429    /// Converts a query match to an [`InjectionRegion`] when captures are complete.
430    ///
431    /// Returns `None` when language or content captures are missing, unknown, or empty.
432    fn injection_region_for_match<'a>(
433        &self,
434        source: &'a [u8],
435        mat: &tree_sitter::QueryMatch<'a, 'a>,
436    ) -> Option<InjectionRegion> {
437        let mut language_name = None;
438        let mut content_node = None;
439
440        for capture in mat.captures {
441            let index = Some(capture.index);
442            if index == self.objectscript_injection_language_capture {
443                language_name = capture.node.utf8_text(source).ok();
444            } else if index == self.objectscript_injection_content_capture {
445                content_node = Some(capture.node);
446            }
447        }
448
449        for prop in self
450            .objectscript_injection_query
451            .property_settings(mat.pattern_index)
452        {
453            match prop.key.as_ref() {
454                "injection.language" => {
455                    if language_name.is_none() {
456                        language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
457                    }
458                }
459                "injection.self" | "injection.parent" => {
460                    if language_name.is_none() {
461                        language_name = Some("objectscript");
462                    }
463                }
464                _ => {}
465            }
466        }
467
468        let grammar = language_name.and_then(Grammar::from_name)?;
469        let content_node = content_node?;
470        let start_byte = content_node.start_byte();
471        let end_byte = content_node.end_byte();
472        if start_byte >= end_byte {
473            return None;
474        }
475
476        Some(InjectionRegion {
477            grammar,
478            start_byte,
479            end_byte,
480        })
481    }
482}
483
484struct InjectionConfigs<'a> {
485    objectscript: &'a HighlightConfiguration,
486    sql: &'a HighlightConfiguration,
487    python: &'a HighlightConfiguration,
488    markdown: &'a HighlightConfiguration,
489    markdown_inline: &'a HighlightConfiguration,
490}
491
492impl<'a> InjectionConfigs<'a> {
493    /// Resolves an injected language name to a highlight configuration.
494    ///
495    /// Unknown language names return `None` so Tree-sitter skips injection highlighting.
496    fn resolve(&self, language_name: &str) -> Option<&'a HighlightConfiguration> {
497        let normalized = normalize_language_name(language_name);
498        if normalized == "markdowninline" {
499            return Some(self.markdown_inline);
500        }
501
502        let grammar = grammar_from_normalized_name(&normalized)?;
503        match grammar {
504            Grammar::ObjectScript => Some(self.objectscript),
505            Grammar::Sql => Some(self.sql),
506            Grammar::Python => Some(self.python),
507            Grammar::Markdown => Some(self.markdown),
508            Grammar::Mdx => Some(self.sql),
509        }
510    }
511}
512
513/// Normalizes a language name by retaining only ASCII alphanumerics and
514/// lowercasing the result.
515fn normalize_language_name(input: &str) -> String {
516    input
517        .chars()
518        .filter(char::is_ascii_alphanumeric)
519        .map(|ch| ch.to_ascii_lowercase())
520        .collect()
521}
522
523/// Maps a normalized language name to a supported [`Grammar`].
524fn grammar_from_normalized_name(normalized: &str) -> Option<Grammar> {
525    match normalized {
526        "objectscript" | "os" | "playground" | "objectscriptplayground" => {
527            Some(Grammar::ObjectScript)
528        }
529        "sql" | "tsql" | "plsql" | "mysql" | "postgres" | "postgresql" => Some(Grammar::Sql),
530        "python" | "py" => Some(Grammar::Python),
531        "markdown" | "md" | "gfm" => Some(Grammar::Markdown),
532        "mdx" => Some(Grammar::Mdx),
533        _ => None,
534    }
535}
536
537/// Builds and configures a Tree-sitter highlight configuration.
538///
539/// # Errors
540///
541/// Returns an error when the highlight or injection query is invalid for the
542/// provided language.
543fn new_config(
544    language: tree_sitter::Language,
545    language_name: &str,
546    highlights: &str,
547    injections: &str,
548) -> Result<HighlightConfiguration, tree_sitter::QueryError> {
549    let mut config =
550        HighlightConfiguration::new(language, language_name, highlights, injections, "")?;
551    let recognized = config
552        .names()
553        .iter()
554        .map(|name| (*name).to_string())
555        .collect::<Vec<_>>();
556    let recognized_refs = recognized.iter().map(String::as_str).collect::<Vec<_>>();
557    config.configure(&recognized_refs);
558    Ok(config)
559}
560
561/// Pushes a span into `spans`, merging with the previous span when adjacent and
562/// sharing the same attribute id.
563fn push_merged(spans: &mut Vec<Span>, next: Span) {
564    if next.start_byte >= next.end_byte {
565        return;
566    }
567
568    if let Some(last) = spans.last_mut() {
569        if last.attr_id == next.attr_id && last.end_byte == next.start_byte {
570            last.end_byte = next.end_byte;
571            return;
572        }
573    }
574
575    spans.push(next);
576}
577
578/// Remaps incoming attribute ids to ids in the destination attribute table.
579///
580/// Existing destination ids are reused by capture name; new capture names are appended.
581fn remap_attr_ids(
582    incoming: &[Attr],
583    attrs: &mut Vec<Attr>,
584    attr_ids_by_name: &mut HashMap<String, usize>,
585) -> Vec<usize> {
586    let mut remap = vec![0usize; incoming.len()];
587    for attr in incoming {
588        let mapped_attr_id = if let Some(&mapped_attr_id) = attr_ids_by_name.get(&attr.capture_name)
589        {
590            mapped_attr_id
591        } else {
592            let mapped_attr_id = attrs.len();
593            attrs.push(Attr {
594                id: mapped_attr_id,
595                capture_name: attr.capture_name.clone(),
596            });
597            attr_ids_by_name.insert(attr.capture_name.clone(), mapped_attr_id);
598            mapped_attr_id
599        };
600        if let Some(slot) = remap.get_mut(attr.id) {
601            *slot = mapped_attr_id;
602        }
603    }
604    remap
605}
606
607/// Removes byte `ranges` from `spans`, splitting spans as needed.
608fn exclude_ranges(spans: &[Span], ranges: &[(usize, usize)]) -> Vec<Span> {
609    if ranges.is_empty() {
610        return spans.to_vec();
611    }
612
613    let mut out: Vec<Span> = Vec::with_capacity(spans.len());
614    let mut range_idx = 0usize;
615    for span in spans {
616        while range_idx < ranges.len() && ranges[range_idx].1 <= span.start_byte {
617            range_idx += 1;
618        }
619
620        let mut cursor = span.start_byte;
621        let mut idx = range_idx;
622        while idx < ranges.len() {
623            let (range_start, range_end) = ranges[idx];
624            if range_start >= span.end_byte {
625                break;
626            }
627
628            if range_end <= cursor {
629                idx += 1;
630                continue;
631            }
632
633            if cursor < range_start {
634                push_merged(
635                    &mut out,
636                    Span {
637                        attr_id: span.attr_id,
638                        start_byte: cursor,
639                        end_byte: range_start.min(span.end_byte),
640                    },
641                );
642            }
643
644            if range_end >= span.end_byte {
645                cursor = span.end_byte;
646                break;
647            }
648
649            cursor = range_end;
650            idx += 1;
651        }
652
653        if cursor < span.end_byte {
654            push_merged(
655                &mut out,
656                Span {
657                    attr_id: span.attr_id,
658                    start_byte: cursor,
659                    end_byte: span.end_byte,
660                },
661            );
662        }
663    }
664    out
665}
666
667/// Sorts spans and enforces a non-overlapping, merge-friendly representation.
668fn normalize_spans(mut spans: Vec<Span>) -> Vec<Span> {
669    spans.sort_by(|a, b| {
670        a.start_byte
671            .cmp(&b.start_byte)
672            .then(a.end_byte.cmp(&b.end_byte))
673            .then(a.attr_id.cmp(&b.attr_id))
674    });
675
676    let mut out: Vec<Span> = Vec::with_capacity(spans.len());
677    for mut span in spans {
678        if let Some(last) = out.last() {
679            if span.start_byte < last.end_byte {
680                if span.end_byte <= last.end_byte {
681                    continue;
682                }
683                span.start_byte = last.end_byte;
684            }
685        }
686        push_merged(&mut out, span);
687    }
688    out
689}
690
691#[cfg(test)]
692mod tests {
693    use super::{Grammar, HighlightResult, SpanHighlighter};
694
695    /// Returns whether `expected_text` appears under `capture_name` in `result`.
696    fn has_capture_for_text(
697        result: &HighlightResult,
698        source: &[u8],
699        capture_name: &str,
700        expected_text: &[u8],
701    ) -> bool {
702        let attr_id = match result
703            .attrs
704            .iter()
705            .find(|attr| attr.capture_name == capture_name)
706            .map(|attr| attr.id)
707        {
708            Some(id) => id,
709            None => return false,
710        };
711
712        result.spans.iter().any(|span| {
713            span.attr_id == attr_id && &source[span.start_byte..span.end_byte] == expected_text
714        })
715    }
716
717    #[test]
718    /// Verifies ObjectScript numeric literals are tagged as `number`.
719    fn highlights_numeric_literal_as_number() {
720        let source = br#"
721Class Demo.Highlight
722{
723  ClassMethod Main()
724  {
725    set x = 42
726  }
727}
728"#;
729        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
730        let result = highlighter
731            .highlight(source, Grammar::ObjectScript)
732            .expect("failed to highlight");
733
734        assert!(
735            has_capture_for_text(&result, source, "number", b"42"),
736            "expected highlighted span for numeric literal"
737        );
738    }
739
740    #[test]
741    /// Verifies canonical and alias grammar names resolve correctly.
742    fn parses_supported_grammar_aliases() {
743        assert_eq!(
744            Grammar::from_name("objectscript"),
745            Some(Grammar::ObjectScript)
746        );
747        assert_eq!(Grammar::from_name("SQL"), Some(Grammar::Sql));
748        assert_eq!(Grammar::from_name("py"), Some(Grammar::Python));
749        assert_eq!(Grammar::from_name("md"), Some(Grammar::Markdown));
750        assert_eq!(Grammar::from_name("mdx"), Some(Grammar::Mdx));
751        assert!(Grammar::from_name("unknown").is_none());
752    }
753
754    #[test]
755    /// Verifies SQL keywords are captured as `keyword`.
756    fn highlights_sql_keyword() {
757        let source = b"SELECT 42 FROM Demo";
758        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
759        let result = highlighter
760            .highlight(source, Grammar::Sql)
761            .expect("failed to highlight SQL");
762
763        assert!(
764            has_capture_for_text(&result, source, "keyword", b"SELECT"),
765            "expected SELECT to be highlighted as keyword"
766        );
767    }
768
769    #[test]
770    /// Verifies `%SQLQuery` bodies are highlighted via SQL injection handling.
771    fn objectscript_sqlquery_body_is_highlighted_as_sql() {
772        let source = br#"
773Class Test
774{
775  Query ListEmployees() As %SQLQuery
776  {
777SELECT ID,Name FROM Employee
778  }
779}
780"#;
781        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
782        let result = highlighter
783            .highlight(source, Grammar::ObjectScript)
784            .expect("failed to highlight ObjectScript with SQL injection");
785
786        assert!(
787            has_capture_for_text(&result, source, "keyword", b"SELECT"),
788            "expected SQL SELECT in %SQLQuery body to be highlighted as keyword"
789        );
790    }
791
792    #[test]
793    /// Verifies Python numeric literals are highlighted as `number`.
794    fn highlights_python_number() {
795        let source = b"def f(x):\n    return x + 1\n";
796        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
797        let result = highlighter
798            .highlight(source, Grammar::Python)
799            .expect("failed to highlight Python");
800
801        assert!(
802            has_capture_for_text(&result, source, "number", b"1"),
803            "expected numeric literal to be highlighted in Python"
804        );
805    }
806
807    #[test]
808    /// Verifies Markdown heading text is captured as `text.title`.
809    fn highlights_markdown_heading() {
810        let source = b"# Heading\n";
811        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
812        let result = highlighter
813            .highlight(source, Grammar::Markdown)
814            .expect("failed to highlight Markdown");
815
816        assert!(
817            has_capture_for_text(&result, source, "text.title", b"Heading"),
818            "expected heading text to be highlighted in Markdown"
819        );
820    }
821
822    #[test]
823    /// Verifies MDX currently falls back to SQL keyword highlighting.
824    fn mdx_falls_back_to_sql_keyword_highlighting() {
825        let source = b"SELECT 1 FROM Cube";
826        let mut highlighter = SpanHighlighter::new().expect("failed to build highlighter");
827        let result = highlighter
828            .highlight(source, Grammar::Mdx)
829            .expect("failed to highlight MDX fallback");
830
831        assert!(
832            has_capture_for_text(&result, source, "keyword", b"SELECT"),
833            "expected MDX fallback to highlight SQL keywords"
834        );
835    }
836}