typst_library/text/
raw.rs

1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{eco_format, EcoString, EcoVec};
7use syntect::highlighting as synt;
8use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{split_newlines, LinkedNode, Span, Spanned};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::diag::{At, FileError, SourceResult, StrResult};
15use crate::engine::Engine;
16use crate::foundations::{
17    cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed,
18    PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
19};
20use crate::html::{tag, HtmlElem};
21use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
22use crate::loading::{DataSource, Load};
23use crate::model::{Figurable, ParElem};
24use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
25use crate::visualize::Color;
26use crate::World;
27
28/// Raw text with optional syntax highlighting.
29///
30/// Displays the text verbatim and in a monospace font. This is typically used
31/// to embed computer code into your document.
32///
33/// # Example
34/// ````example
35/// Adding `rbx` to `rcx` gives
36/// the desired result.
37///
38/// What is ```rust fn main()``` in Rust
39/// would be ```c int main()``` in C.
40///
41/// ```rust
42/// fn main() {
43///     println!("Hello World!");
44/// }
45/// ```
46///
47/// This has ``` `backticks` ``` in it
48/// (but the spaces are trimmed). And
49/// ``` here``` the leading space is
50/// also trimmed.
51/// ````
52///
53/// You can also construct a [`raw`] element programmatically from a string (and
54/// provide the language tag via the optional [`lang`]($raw.lang) argument).
55/// ```example
56/// #raw("fn " + "main() {}", lang: "rust")
57/// ```
58///
59/// # Syntax
60/// This function also has dedicated syntax. You can enclose text in 1 or 3+
61/// backticks (`` ` ``) to make it raw. Two backticks produce empty raw text.
62/// This works both in markup and code.
63///
64/// When you use three or more backticks, you can additionally specify a
65/// language tag for syntax highlighting directly after the opening backticks.
66/// Within raw blocks, everything (except for the language tag, if applicable)
67/// is rendered as is, in particular, there are no escape sequences.
68///
69/// The language tag is an identifier that directly follows the opening
70/// backticks only if there are three or more backticks. If your text starts
71/// with something that looks like an identifier, but no syntax highlighting is
72/// needed, start the text with a single space (which will be trimmed) or use
73/// the single backtick syntax. If your text should start or end with a
74/// backtick, put a space before or after it (it will be trimmed).
75#[elem(
76    scope,
77    title = "Raw Text / Code",
78    Synthesize,
79    Show,
80    ShowSet,
81    LocalName,
82    Figurable,
83    PlainText
84)]
85pub struct RawElem {
86    /// The raw text.
87    ///
88    /// You can also use raw blocks creatively to create custom syntaxes for
89    /// your automations.
90    ///
91    /// ````example
92    /// // Parse numbers in raw blocks with the
93    /// // `mydsl` tag and sum them up.
94    /// #show raw.where(lang: "mydsl"): it => {
95    ///   let sum = 0
96    ///   for part in it.text.split("+") {
97    ///     sum += int(part.trim())
98    ///   }
99    ///   sum
100    /// }
101    ///
102    /// ```mydsl
103    /// 1 + 2 + 3 + 4 + 5
104    /// ```
105    /// ````
106    #[required]
107    pub text: RawContent,
108
109    /// Whether the raw text is displayed as a separate block.
110    ///
111    /// In markup mode, using one-backtick notation makes this `{false}`.
112    /// Using three-backtick notation makes it `{true}` if the enclosed content
113    /// contains at least one line break.
114    ///
115    /// ````example
116    /// // Display inline code in a small box
117    /// // that retains the correct baseline.
118    /// #show raw.where(block: false): box.with(
119    ///   fill: luma(240),
120    ///   inset: (x: 3pt, y: 0pt),
121    ///   outset: (y: 3pt),
122    ///   radius: 2pt,
123    /// )
124    ///
125    /// // Display block code in a larger block
126    /// // with more padding.
127    /// #show raw.where(block: true): block.with(
128    ///   fill: luma(240),
129    ///   inset: 10pt,
130    ///   radius: 4pt,
131    /// )
132    ///
133    /// With `rg`, you can search through your files quickly.
134    /// This example searches the current directory recursively
135    /// for the text `Hello World`:
136    ///
137    /// ```bash
138    /// rg "Hello World"
139    /// ```
140    /// ````
141    #[default(false)]
142    pub block: bool,
143
144    /// The language to syntax-highlight in.
145    ///
146    /// Apart from typical language tags known from Markdown, this supports the
147    /// `{"typ"}`, `{"typc"}`, and `{"typm"}` tags for
148    /// [Typst markup]($reference/syntax/#markup),
149    /// [Typst code]($reference/syntax/#code), and
150    /// [Typst math]($reference/syntax/#math), respectively.
151    ///
152    /// ````example
153    /// ```typ
154    /// This is *Typst!*
155    /// ```
156    ///
157    /// This is ```typ also *Typst*```, but inline!
158    /// ````
159    #[borrowed]
160    pub lang: Option<EcoString>,
161
162    /// The horizontal alignment that each line in a raw block should have.
163    /// This option is ignored if this is not a raw block (if specified
164    /// `block: false` or single backticks were used in markup mode).
165    ///
166    /// By default, this is set to `{start}`, meaning that raw text is
167    /// aligned towards the start of the text direction inside the block
168    /// by default, regardless of the current context's alignment (allowing
169    /// you to center the raw block itself without centering the text inside
170    /// it, for example).
171    ///
172    /// ````example
173    /// #set raw(align: center)
174    ///
175    /// ```typc
176    /// let f(x) = x
177    /// code = "centered"
178    /// ```
179    /// ````
180    #[default(HAlignment::Start)]
181    pub align: HAlignment,
182
183    /// Additional syntax definitions to load. The syntax definitions should be
184    /// in the [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html).
185    ///
186    /// You can pass any of the following values:
187    ///
188    /// - A path string to load a syntax file from the given path. For more
189    ///   details about paths, see the [Paths section]($syntax/#paths).
190    /// - Raw bytes from which the syntax should be decoded.
191    /// - An array where each item is one the above.
192    ///
193    /// ````example
194    /// #set raw(syntaxes: "SExpressions.sublime-syntax")
195    ///
196    /// ```sexp
197    /// (defun factorial (x)
198    ///   (if (zerop x)
199    ///     ; with a comment
200    ///     1
201    ///     (* x (factorial (- x 1)))))
202    /// ```
203    /// ````
204    #[parse(match args.named("syntaxes")? {
205        Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
206        None => None,
207    })]
208    #[fold]
209    pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
210
211    /// The theme to use for syntax highlighting. Themes should be in the
212    /// [`tmTheme` file format](https://www.sublimetext.com/docs/color_schemes_tmtheme.html).
213    ///
214    /// You can pass any of the following values:
215    ///
216    /// - `{none}`: Disables syntax highlighting.
217    /// - `{auto}`: Highlights with Typst's default theme.
218    /// - A path string to load a theme file from the given path. For more
219    ///   details about paths, see the [Paths section]($syntax/#paths).
220    /// - Raw bytes from which the theme should be decoded.
221    ///
222    /// Applying a theme only affects the color of specifically highlighted
223    /// text. It does not consider the theme's foreground and background
224    /// properties, so that you retain control over the color of raw text. You
225    /// can apply the foreground color yourself with the [`text`] function and
226    /// the background with a [filled block]($block.fill). You could also use
227    /// the [`xml`] function to extract these properties from the theme.
228    ///
229    /// ````example
230    /// #set raw(theme: "halcyon.tmTheme")
231    /// #show raw: it => block(
232    ///   fill: rgb("#1d2433"),
233    ///   inset: 8pt,
234    ///   radius: 5pt,
235    ///   text(fill: rgb("#a2aabc"), it)
236    /// )
237    ///
238    /// ```typ
239    /// = Chapter 1
240    /// #let hi = "Hello World"
241    /// ```
242    /// ````
243    #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
244        Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
245            Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
246        )),
247        Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
248        Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
249        None => None,
250    })]
251    #[borrowed]
252    pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
253
254    /// The size for a tab stop in spaces. A tab is replaced with enough spaces to
255    /// align with the next multiple of the size.
256    ///
257    /// ````example
258    /// #set raw(tab-size: 8)
259    /// ```tsv
260    /// Year	Month	Day
261    /// 2000	2	3
262    /// 2001	2	1
263    /// 2002	3	10
264    /// ```
265    /// ````
266    #[default(2)]
267    pub tab_size: usize,
268
269    /// The stylized lines of raw text.
270    ///
271    /// Made accessible for the [`raw.line` element]($raw.line).
272    /// Allows more styling control in `show` rules.
273    #[synthesized]
274    pub lines: Vec<Packed<RawLine>>,
275}
276
277#[scope]
278impl RawElem {
279    #[elem]
280    type RawLine;
281}
282
283impl RawElem {
284    /// The supported language names and tags.
285    pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
286        RAW_SYNTAXES
287            .syntaxes()
288            .iter()
289            .map(|syntax| {
290                (
291                    syntax.name.as_str(),
292                    syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
293                )
294            })
295            .chain([
296                ("Typst", vec!["typ"]),
297                ("Typst (code)", vec!["typc"]),
298                ("Typst (math)", vec!["typm"]),
299            ])
300            .collect()
301    }
302}
303
304impl Synthesize for Packed<RawElem> {
305    fn synthesize(&mut self, _: &mut Engine, styles: StyleChain) -> SourceResult<()> {
306        let seq = self.highlight(styles);
307        self.push_lines(seq);
308        Ok(())
309    }
310}
311
312impl Packed<RawElem> {
313    #[comemo::memoize]
314    fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> {
315        let elem = self.as_ref();
316        let lines = preprocess(&elem.text, styles, self.span());
317
318        let count = lines.len() as i64;
319        let lang = elem
320            .lang(styles)
321            .as_ref()
322            .as_ref()
323            .map(|s| s.to_lowercase())
324            .or(Some("txt".into()));
325
326        let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
327            lines.into_iter().enumerate().map(|(i, (line, line_span))| {
328                Packed::new(RawLine::new(
329                    i as i64 + 1,
330                    count,
331                    line.clone(),
332                    TextElem::packed(line).spanned(line_span),
333                ))
334                .spanned(line_span)
335            })
336        };
337
338        let syntaxes = LazyCell::new(|| elem.syntaxes(styles));
339        let theme: &synt::Theme = match elem.theme(styles) {
340            Smart::Auto => &RAW_THEME,
341            Smart::Custom(Some(theme)) => theme.derived.get(),
342            Smart::Custom(None) => return non_highlighted_result(lines).collect(),
343        };
344
345        let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
346
347        let mut seq = vec![];
348        if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
349            let text =
350                lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
351            let root = match lang.as_deref() {
352                Some("typc") => typst_syntax::parse_code(&text),
353                Some("typm") => typst_syntax::parse_math(&text),
354                _ => typst_syntax::parse(&text),
355            };
356
357            ThemedHighlighter::new(
358                &text,
359                LinkedNode::new(&root),
360                synt::Highlighter::new(theme),
361                &mut |i, _, range, style| {
362                    // Find span and start of line.
363                    // Note: Dedent is already applied to the text
364                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
365                    let span_offset = text[..range.start]
366                        .rfind('\n')
367                        .map_or(0, |i| range.start - (i + 1));
368                    styled(&text[range], foreground, style, span, span_offset)
369                },
370                &mut |i, range, line| {
371                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
372                    seq.push(
373                        Packed::new(RawLine::new(
374                            (i + 1) as i64,
375                            count,
376                            EcoString::from(&text[range]),
377                            Content::sequence(line.drain(..)),
378                        ))
379                        .spanned(span),
380                    );
381                },
382            )
383            .highlight();
384        } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
385            // Prefer user-provided syntaxes over built-in ones.
386            syntaxes
387                .derived
388                .iter()
389                .map(|syntax| syntax.get())
390                .chain(std::iter::once(&*RAW_SYNTAXES))
391                .find_map(|set| {
392                    set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
393                })
394        }) {
395            let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
396            for (i, (line, line_span)) in lines.into_iter().enumerate() {
397                let mut line_content = vec![];
398                let mut span_offset = 0;
399                for (style, piece) in highlighter
400                    .highlight_line(line.as_str(), syntax_set)
401                    .into_iter()
402                    .flatten()
403                {
404                    line_content.push(styled(
405                        piece,
406                        foreground,
407                        style,
408                        line_span,
409                        span_offset,
410                    ));
411                    span_offset += piece.len();
412                }
413
414                seq.push(
415                    Packed::new(RawLine::new(
416                        i as i64 + 1,
417                        count,
418                        line,
419                        Content::sequence(line_content),
420                    ))
421                    .spanned(line_span),
422                );
423            }
424        } else {
425            seq.extend(non_highlighted_result(lines));
426        };
427
428        seq
429    }
430}
431
432impl Show for Packed<RawElem> {
433    #[typst_macros::time(name = "raw", span = self.span())]
434    fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> {
435        let lines = self.lines().map(|v| v.as_slice()).unwrap_or_default();
436
437        let mut seq = EcoVec::with_capacity((2 * lines.len()).saturating_sub(1));
438        for (i, line) in lines.iter().enumerate() {
439            if i != 0 {
440                seq.push(LinebreakElem::shared().clone());
441            }
442
443            seq.push(line.clone().pack());
444        }
445
446        let mut realized = Content::sequence(seq);
447
448        if TargetElem::target_in(styles).is_html() {
449            return Ok(HtmlElem::new(if self.block(styles) {
450                tag::pre
451            } else {
452                tag::code
453            })
454            .with_body(Some(realized))
455            .pack()
456            .spanned(self.span()));
457        }
458
459        if self.block(styles) {
460            // Align the text before inserting it into the block.
461            realized = realized.aligned(self.align(styles).into());
462            realized = BlockElem::new()
463                .with_body(Some(BlockBody::Content(realized)))
464                .pack()
465                .spanned(self.span());
466        }
467
468        Ok(realized)
469    }
470}
471
472impl ShowSet for Packed<RawElem> {
473    fn show_set(&self, styles: StyleChain) -> Styles {
474        let mut out = Styles::new();
475        out.set(TextElem::set_overhang(false));
476        out.set(TextElem::set_lang(Lang::ENGLISH));
477        out.set(TextElem::set_hyphenate(Smart::Custom(false)));
478        out.set(TextElem::set_size(TextSize(Em::new(0.8).into())));
479        out.set(TextElem::set_font(FontList(vec![FontFamily::new("DejaVu Sans Mono")])));
480        out.set(TextElem::set_cjk_latin_spacing(Smart::Custom(None)));
481        if self.block(styles) {
482            out.set(ParElem::set_justify(false));
483        }
484        out
485    }
486}
487
488impl LocalName for Packed<RawElem> {
489    const KEY: &'static str = "raw";
490}
491
492impl Figurable for Packed<RawElem> {}
493
494impl PlainText for Packed<RawElem> {
495    fn plain_text(&self, text: &mut EcoString) {
496        text.push_str(&self.text.get());
497    }
498}
499
500/// The content of the raw text.
501#[derive(Debug, Clone, Hash, PartialEq)]
502pub enum RawContent {
503    /// From a string.
504    Text(EcoString),
505    /// From lines of text.
506    Lines(EcoVec<(EcoString, Span)>),
507}
508
509impl RawContent {
510    /// Returns or synthesizes the text content of the raw text.
511    fn get(&self) -> EcoString {
512        match self.clone() {
513            RawContent::Text(text) => text,
514            RawContent::Lines(lines) => {
515                let mut lines = lines.into_iter().map(|(s, _)| s);
516                if lines.len() <= 1 {
517                    lines.next().unwrap_or_default()
518                } else {
519                    lines.collect::<Vec<_>>().join("\n").into()
520                }
521            }
522        }
523    }
524}
525
526cast! {
527    RawContent,
528    self => self.get().into_value(),
529    v: EcoString => Self::Text(v),
530}
531
532/// A loaded syntax.
533#[derive(Debug, Clone, PartialEq, Hash)]
534pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
535
536impl RawSyntax {
537    /// Load syntaxes from sources.
538    fn load(
539        world: Tracked<dyn World + '_>,
540        sources: Spanned<OneOrMultiple<DataSource>>,
541    ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
542        let data = sources.load(world)?;
543        let list = sources
544            .v
545            .0
546            .iter()
547            .zip(&data)
548            .map(|(source, data)| Self::decode(source, data))
549            .collect::<StrResult<_>>()
550            .at(sources.span)?;
551        Ok(Derived::new(sources.v, list))
552    }
553
554    /// Decode a syntax from a loaded source.
555    #[comemo::memoize]
556    #[typst_macros::time(name = "load syntaxes")]
557    fn decode(source: &DataSource, data: &Bytes) -> StrResult<RawSyntax> {
558        let src = data.as_str().map_err(FileError::from)?;
559        let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err(
560            |err| match source {
561                DataSource::Path(path) => {
562                    eco_format!("failed to parse syntax file `{path}` ({err})")
563                }
564                DataSource::Bytes(_) => {
565                    eco_format!("failed to parse syntax ({err})")
566                }
567            },
568        )?;
569
570        let mut builder = SyntaxSetBuilder::new();
571        builder.add(syntax);
572
573        Ok(RawSyntax(Arc::new(ManuallyHash::new(
574            builder.build(),
575            typst_utils::hash128(data),
576        ))))
577    }
578
579    /// Return the underlying syntax set.
580    fn get(&self) -> &SyntaxSet {
581        self.0.as_ref()
582    }
583}
584
585/// A loaded syntect theme.
586#[derive(Debug, Clone, PartialEq, Hash)]
587pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
588
589impl RawTheme {
590    /// Load a theme from a data source.
591    fn load(
592        world: Tracked<dyn World + '_>,
593        source: Spanned<DataSource>,
594    ) -> SourceResult<Derived<DataSource, Self>> {
595        let data = source.load(world)?;
596        let theme = Self::decode(&data).at(source.span)?;
597        Ok(Derived::new(source.v, theme))
598    }
599
600    /// Decode a theme from bytes.
601    #[comemo::memoize]
602    fn decode(data: &Bytes) -> StrResult<RawTheme> {
603        let mut cursor = std::io::Cursor::new(data.as_slice());
604        let theme = synt::ThemeSet::load_from_reader(&mut cursor)
605            .map_err(|err| eco_format!("failed to parse theme ({err})"))?;
606        Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
607    }
608
609    /// Get the underlying syntect theme.
610    pub fn get(&self) -> &synt::Theme {
611        self.0.as_ref()
612    }
613}
614
615/// A highlighted line of raw text.
616///
617/// This is a helper element that is synthesized by [`raw`] elements.
618///
619/// It allows you to access various properties of the line, such as the line
620/// number, the raw non-highlighted text, the highlighted text, and whether it
621/// is the first or last line of the raw block.
622#[elem(name = "line", title = "Raw Text / Code Line", Show, PlainText)]
623pub struct RawLine {
624    /// The line number of the raw line inside of the raw block, starts at 1.
625    #[required]
626    pub number: i64,
627
628    /// The total number of lines in the raw block.
629    #[required]
630    pub count: i64,
631
632    /// The line of raw text.
633    #[required]
634    pub text: EcoString,
635
636    /// The highlighted raw text.
637    #[required]
638    pub body: Content,
639}
640
641impl Show for Packed<RawLine> {
642    #[typst_macros::time(name = "raw.line", span = self.span())]
643    fn show(&self, _: &mut Engine, _styles: StyleChain) -> SourceResult<Content> {
644        Ok(self.body.clone())
645    }
646}
647
648impl PlainText for Packed<RawLine> {
649    fn plain_text(&self, text: &mut EcoString) {
650        text.push_str(&self.text);
651    }
652}
653
654/// Wrapper struct for the state required to highlight typst code.
655struct ThemedHighlighter<'a> {
656    /// The code being highlighted.
657    code: &'a str,
658    /// The current node being highlighted.
659    node: LinkedNode<'a>,
660    /// The highlighter.
661    highlighter: synt::Highlighter<'a>,
662    /// The current scopes.
663    scopes: Vec<syntect::parsing::Scope>,
664    /// The current highlighted line.
665    current_line: Vec<Content>,
666    /// The range of the current line.
667    range: Range<usize>,
668    /// The current line number.
669    line: usize,
670    /// The function to style a piece of text.
671    style_fn: StyleFn<'a>,
672    /// The function to append a line.
673    line_fn: LineFn<'a>,
674}
675
676// Shorthands for highlighter closures.
677type StyleFn<'a> =
678    &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
679type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
680
681impl<'a> ThemedHighlighter<'a> {
682    pub fn new(
683        code: &'a str,
684        top: LinkedNode<'a>,
685        highlighter: synt::Highlighter<'a>,
686        style_fn: StyleFn<'a>,
687        line_fn: LineFn<'a>,
688    ) -> Self {
689        Self {
690            code,
691            node: top,
692            highlighter,
693            range: 0..0,
694            scopes: Vec::new(),
695            current_line: Vec::new(),
696            line: 0,
697            style_fn,
698            line_fn,
699        }
700    }
701
702    pub fn highlight(&mut self) {
703        self.highlight_inner();
704
705        if !self.current_line.is_empty() {
706            (self.line_fn)(
707                self.line,
708                self.range.start..self.code.len(),
709                &mut self.current_line,
710            );
711
712            self.current_line.clear();
713        }
714    }
715
716    fn highlight_inner(&mut self) {
717        if self.node.children().len() == 0 {
718            let style = self.highlighter.style_for_stack(&self.scopes);
719            let segment = &self.code[self.node.range()];
720
721            let mut len = 0;
722            for (i, line) in split_newlines(segment).into_iter().enumerate() {
723                if i != 0 {
724                    (self.line_fn)(
725                        self.line,
726                        self.range.start..self.range.end + len - 1,
727                        &mut self.current_line,
728                    );
729                    self.range.start = self.range.end + len;
730                    self.line += 1;
731                }
732
733                let offset = self.node.range().start + len;
734                let token_range = offset..(offset + line.len());
735                self.current_line.push((self.style_fn)(
736                    self.line,
737                    &self.node,
738                    token_range,
739                    style,
740                ));
741
742                len += line.len() + 1;
743            }
744
745            self.range.end += segment.len();
746        }
747
748        for child in self.node.children() {
749            let mut scopes = self.scopes.clone();
750            if let Some(tag) = typst_syntax::highlight(&child) {
751                scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
752            }
753
754            std::mem::swap(&mut scopes, &mut self.scopes);
755            self.node = child;
756            self.highlight_inner();
757            std::mem::swap(&mut scopes, &mut self.scopes);
758        }
759    }
760}
761
762fn preprocess(
763    text: &RawContent,
764    styles: StyleChain,
765    span: Span,
766) -> EcoVec<(EcoString, Span)> {
767    if let RawContent::Lines(lines) = text {
768        if lines.iter().all(|(s, _)| !s.contains('\t')) {
769            return lines.clone();
770        }
771    }
772
773    let mut text = text.get();
774    if text.contains('\t') {
775        let tab_size = RawElem::tab_size_in(styles);
776        text = align_tabs(&text, tab_size);
777    }
778    split_newlines(&text)
779        .into_iter()
780        .map(|line| (line.into(), span))
781        .collect()
782}
783
784/// Style a piece of text with a syntect style.
785fn styled(
786    piece: &str,
787    foreground: synt::Color,
788    style: synt::Style,
789    span: Span,
790    span_offset: usize,
791) -> Content {
792    let mut body = TextElem::packed(piece).spanned(span);
793
794    if span_offset > 0 {
795        body = body.styled(TextElem::set_span_offset(span_offset));
796    }
797
798    if style.foreground != foreground {
799        body = body.styled(TextElem::set_fill(to_typst(style.foreground).into()));
800    }
801
802    if style.font_style.contains(synt::FontStyle::BOLD) {
803        body = body.strong().spanned(span);
804    }
805
806    if style.font_style.contains(synt::FontStyle::ITALIC) {
807        body = body.emph().spanned(span);
808    }
809
810    if style.font_style.contains(synt::FontStyle::UNDERLINE) {
811        body = body.underlined().spanned(span);
812    }
813
814    body
815}
816
817fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
818    Color::from_u8(r, g, b, a)
819}
820
821fn to_syn(color: Color) -> synt::Color {
822    let [r, g, b, a] = color.to_rgb().to_vec4_u8();
823    synt::Color { r, g, b, a }
824}
825
826/// Create a syntect theme item.
827fn item(
828    scope: &str,
829    color: Option<&str>,
830    font_style: Option<synt::FontStyle>,
831) -> synt::ThemeItem {
832    synt::ThemeItem {
833        scope: scope.parse().unwrap(),
834        style: synt::StyleModifier {
835            foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
836            background: None,
837            font_style,
838        },
839    }
840}
841
842/// Replace tabs with spaces to align with multiples of `tab_size`.
843fn align_tabs(text: &str, tab_size: usize) -> EcoString {
844    let replacement = " ".repeat(tab_size);
845    let divisor = tab_size.max(1);
846    let amount = text.chars().filter(|&c| c == '\t').count();
847
848    let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
849    let mut column = 0;
850
851    for grapheme in text.graphemes(true) {
852        match grapheme {
853            "\t" => {
854                let required = tab_size - column % divisor;
855                res.push_str(&replacement[..required]);
856                column += required;
857            }
858            "\n" => {
859                res.push_str(grapheme);
860                column = 0;
861            }
862            _ => {
863                res.push_str(grapheme);
864                column += 1;
865            }
866        }
867    }
868
869    res
870}
871
872/// The syntect syntax definitions.
873///
874/// Syntax set is generated from the syntaxes from the `bat` project
875/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes>
876pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
877    LazyLock::new(two_face::syntax::extra_no_newlines);
878
879/// The default theme used for syntax highlighting.
880pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
881    name: Some("Typst Light".into()),
882    author: Some("The Typst Project Developers".into()),
883    settings: synt::ThemeSettings::default(),
884    scopes: vec![
885        item("comment", Some("#8a8a8a"), None),
886        item("constant.character.escape", Some("#1d6c76"), None),
887        item("markup.bold", None, Some(synt::FontStyle::BOLD)),
888        item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
889        item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
890        item("markup.raw", Some("#818181"), None),
891        item("string.other.math.typst", None, None),
892        item("punctuation.definition.math", Some("#298e0d"), None),
893        item("keyword.operator.math", Some("#1d6c76"), None),
894        item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
895        item(
896            "markup.heading.typst",
897            None,
898            Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
899        ),
900        item("punctuation.definition.list", Some("#8b41b1"), None),
901        item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
902        item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
903        item("keyword, constant.language, variable.language", Some("#d73a49"), None),
904        item("storage.type, storage.modifier", Some("#d73a49"), None),
905        item("constant", Some("#b60157"), None),
906        item("string", Some("#298e0d"), None),
907        item("entity.name, variable.function, support", Some("#4b69c6"), None),
908        item("support.macro", Some("#16718d"), None),
909        item("meta.annotation", Some("#301414"), None),
910        item("entity.other, meta.interpolation", Some("#8b41b1"), None),
911        item("meta.diff.range", Some("#8b41b1"), None),
912        item("markup.inserted, meta.diff.header.to-file", Some("#298e0d"), None),
913        item("markup.deleted, meta.diff.header.from-file", Some("#d73a49"), None),
914    ],
915});