typst_library/text/
raw.rs

1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{EcoString, EcoVec};
7use syntect::highlighting::{self as synt};
8use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{LinkedNode, Span, Spanned, split_newlines};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::World;
15use crate::diag::{
16    LineCol, LoadError, LoadResult, LoadedWithin, ReportPos, SourceResult,
17};
18use crate::engine::Engine;
19use crate::foundations::{
20    Bytes, Content, Derived, OneOrMultiple, Packed, PlainText, ShowSet, Smart,
21    StyleChain, Styles, Synthesize, Target, TargetElem, cast, elem, scope,
22};
23use crate::introspection::{Locatable, Tagged};
24use crate::layout::{Em, HAlignment};
25use crate::loading::{DataSource, Load};
26use crate::model::{Figurable, ParElem};
27use crate::routines::Routines;
28use crate::text::{FontFamily, FontList, LocalName, TextElem, TextSize};
29use crate::visualize::Color;
30
31/// Raw text with optional syntax highlighting.
32///
33/// Displays the text verbatim and in a monospace font. This is typically used
34/// to embed computer code into your document.
35///
36/// Note that text given to this element cannot contain arbitrary formatting,
37/// such as `[*strong*]` or `[_emphasis_]`, as it is displayed verbatim. If
38/// you'd like to display any kind of content with a monospace font, instead of
39/// using [`raw`], you should change its font to a monospace font using the
40/// [`text`]($text) function.
41///
42/// # Example
43/// ````example
44/// Adding `rbx` to `rcx` gives
45/// the desired result.
46///
47/// What is ```rust fn main()``` in Rust
48/// would be ```c int main()``` in C.
49///
50/// ```rust
51/// fn main() {
52///     println!("Hello World!");
53/// }
54/// ```
55///
56/// This has ``` `backticks` ``` in it
57/// (but the spaces are trimmed). And
58/// ``` here``` the leading space is
59/// also trimmed.
60/// ````
61///
62/// You can also construct a [`raw`] element programmatically from a string (and
63/// provide the language tag via the optional [`lang`]($raw.lang) argument).
64/// ```example
65/// #raw("fn " + "main() {}", lang: "rust")
66/// ```
67///
68/// # Syntax
69/// This function also has dedicated syntax. You can enclose text in 1 or 3+
70/// backticks (`` ` ``) to make it raw. Two backticks produce empty raw text.
71/// This works both in markup and code.
72///
73/// When you use three or more backticks, you can additionally specify a
74/// language tag for syntax highlighting directly after the opening backticks.
75/// Within raw blocks, everything (except for the language tag, if applicable)
76/// is rendered as is, in particular, there are no escape sequences.
77///
78/// The language tag is an identifier that directly follows the opening
79/// backticks only if there are three or more backticks. If your text starts
80/// with something that looks like an identifier, but no syntax highlighting is
81/// needed, start the text with a single space (which will be trimmed) or use
82/// the single backtick syntax. If your text should start or end with a
83/// backtick, put a space before or after it (it will be trimmed).
84///
85/// If no syntax highlighting is available by default for your specified
86/// language tag (or if you want to override the built-in definition), you may
87/// provide a custom syntax specification file to the
88/// [`syntaxes`]($raw.syntaxes) field.
89///
90/// # Styling
91/// By default, the `raw` element uses the `DejaVu Sans Mono` font (included
92/// with Typst), with a smaller font size of `{0.8em}` (that is, 80% of
93/// the global font size). This is because monospace fonts tend to be visually
94/// larger than non-monospace fonts.
95///
96/// You can customize these properties with show-set rules:
97///
98/// ````example
99/// // Switch to Cascadia Code for both
100/// // inline and block raw.
101/// #show raw: set text(font: "Cascadia Code")
102///
103/// // Reset raw blocks to the same size as normal text,
104/// // but keep inline raw at the reduced size.
105/// #show raw.where(block: true): set text(1em / 0.8)
106///
107/// Now using the `Cascadia Code` font for raw text.
108/// Here's some Python code. It looks larger now:
109///
110/// ```py
111/// def python():
112///   return 5 + 5
113/// ```
114/// ````
115///
116/// In addition, you can customize the syntax highlighting colors by setting
117/// a custom theme through the [`theme`]($raw.theme) field.
118///
119/// For complete customization of the appearance of a raw block, a show rule
120/// on [`raw.line`]($raw.line) could be helpful, such as to add line numbers.
121///
122/// Note that, in raw text, typesetting features like
123/// [hyphenation]($text.hyphenate), [overhang]($text.overhang),
124/// [CJK-Latin spacing]($text.cjk-latin-spacing) (and
125/// [justification]($par.justify) for [raw blocks]($raw.block)) will be
126/// disabled by default.
127#[elem(
128    scope,
129    title = "Raw Text / Code",
130    Synthesize,
131    Locatable,
132    Tagged,
133    ShowSet,
134    LocalName,
135    Figurable,
136    PlainText
137)]
138pub struct RawElem {
139    /// The raw text.
140    ///
141    /// You can also use raw blocks creatively to create custom syntaxes for
142    /// your automations.
143    ///
144    /// ````example:"Implementing a DSL using raw and show rules"
145    /// // Parse numbers in raw blocks with the
146    /// // `mydsl` tag and sum them up.
147    /// #show raw.where(lang: "mydsl"): it => {
148    ///   let sum = 0
149    ///   for part in it.text.split("+") {
150    ///     sum += int(part.trim())
151    ///   }
152    ///   sum
153    /// }
154    ///
155    /// ```mydsl
156    /// 1 + 2 + 3 + 4 + 5
157    /// ```
158    /// ````
159    #[required]
160    pub text: RawContent,
161
162    /// Whether the raw text is displayed as a separate block.
163    ///
164    /// In markup mode, using one-backtick notation makes this `{false}`.
165    /// Using three-backtick notation makes it `{true}` if the enclosed content
166    /// contains at least one line break.
167    ///
168    /// ````example
169    /// // Display inline code in a small box
170    /// // that retains the correct baseline.
171    /// #show raw.where(block: false): box.with(
172    ///   fill: luma(240),
173    ///   inset: (x: 3pt, y: 0pt),
174    ///   outset: (y: 3pt),
175    ///   radius: 2pt,
176    /// )
177    ///
178    /// // Display block code in a larger block
179    /// // with more padding.
180    /// #show raw.where(block: true): block.with(
181    ///   fill: luma(240),
182    ///   inset: 10pt,
183    ///   radius: 4pt,
184    /// )
185    ///
186    /// With `rg`, you can search through your files quickly.
187    /// This example searches the current directory recursively
188    /// for the text `Hello World`:
189    ///
190    /// ```bash
191    /// rg "Hello World"
192    /// ```
193    /// ````
194    #[default(false)]
195    pub block: bool,
196
197    /// The language to syntax-highlight in.
198    ///
199    /// Apart from typical language tags known from Markdown, this supports the
200    /// `{"typ"}`, `{"typc"}`, and `{"typm"}` tags for
201    /// [Typst markup]($reference/syntax/#markup),
202    /// [Typst code]($reference/syntax/#code), and
203    /// [Typst math]($reference/syntax/#math), respectively.
204    ///
205    /// ````example
206    /// ```typ
207    /// This is *Typst!*
208    /// ```
209    ///
210    /// This is ```typ also *Typst*```, but inline!
211    /// ````
212    pub lang: Option<EcoString>,
213
214    /// The horizontal alignment that each line in a raw block should have.
215    /// This option is ignored if this is not a raw block (if specified
216    /// `block: false` or single backticks were used in markup mode).
217    ///
218    /// By default, this is set to `{start}`, meaning that raw text is
219    /// aligned towards the start of the text direction inside the block
220    /// by default, regardless of the current context's alignment (allowing
221    /// you to center the raw block itself without centering the text inside
222    /// it, for example).
223    ///
224    /// ````example
225    /// #set raw(align: center)
226    ///
227    /// ```typc
228    /// let f(x) = x
229    /// code = "centered"
230    /// ```
231    /// ````
232    #[default(HAlignment::Start)]
233    pub align: HAlignment,
234
235    /// Additional syntax definitions to load. The syntax definitions should be
236    /// in the [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html).
237    ///
238    /// You can pass any of the following values:
239    ///
240    /// - A path string to load a syntax file from the given path. For more
241    ///   details about paths, see the [Paths section]($syntax/#paths).
242    /// - Raw bytes from which the syntax should be decoded.
243    /// - An array where each item is one of the above.
244    ///
245    /// ````example
246    /// #set raw(syntaxes: "SExpressions.sublime-syntax")
247    ///
248    /// ```sexp
249    /// (defun factorial (x)
250    ///   (if (zerop x)
251    ///     ; with a comment
252    ///     1
253    ///     (* x (factorial (- x 1)))))
254    /// ```
255    /// ````
256    #[parse(match args.named("syntaxes")? {
257        Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
258        None => None,
259    })]
260    #[fold]
261    pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
262
263    /// The theme to use for syntax highlighting. Themes should be in the
264    /// [`tmTheme` file format](https://www.sublimetext.com/docs/color_schemes_tmtheme.html).
265    ///
266    /// You can pass any of the following values:
267    ///
268    /// - `{none}`: Disables syntax highlighting.
269    /// - `{auto}`: Highlights with Typst's default theme.
270    /// - A path string to load a theme file from the given path. For more
271    ///   details about paths, see the [Paths section]($syntax/#paths).
272    /// - Raw bytes from which the theme should be decoded.
273    ///
274    /// Applying a theme only affects the color of specifically highlighted
275    /// text. It does not consider the theme's foreground and background
276    /// properties, so that you retain control over the color of raw text. You
277    /// can apply the foreground color yourself with the [`text`] function and
278    /// the background with a [filled block]($block.fill). You could also use
279    /// the [`xml`] function to extract these properties from the theme.
280    ///
281    /// ````example
282    /// #set raw(theme: "halcyon.tmTheme")
283    /// #show raw: it => block(
284    ///   fill: rgb("#1d2433"),
285    ///   inset: 8pt,
286    ///   radius: 5pt,
287    ///   text(fill: rgb("#a2aabc"), it)
288    /// )
289    ///
290    /// ```typ
291    /// = Chapter 1
292    /// #let hi = "Hello World"
293    /// ```
294    /// ````
295    #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
296        Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
297            Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
298        )),
299        Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
300        Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
301        None => None,
302    })]
303    pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
304
305    /// The size for a tab stop in spaces. A tab is replaced with enough spaces to
306    /// align with the next multiple of the size.
307    ///
308    /// ````example
309    /// #set raw(tab-size: 8)
310    /// ```tsv
311    /// Year	Month	Day
312    /// 2000	2	3
313    /// 2001	2	1
314    /// 2002	3	10
315    /// ```
316    /// ````
317    #[default(2)]
318    pub tab_size: usize,
319
320    /// The stylized lines of raw text.
321    ///
322    /// Made accessible for the [`raw.line` element]($raw.line).
323    /// Allows more styling control in `show` rules.
324    #[synthesized]
325    pub lines: Vec<Packed<RawLine>>,
326}
327
328#[scope]
329impl RawElem {
330    #[elem]
331    type RawLine;
332}
333
334impl RawElem {
335    /// The supported language names and tags.
336    pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
337        RAW_SYNTAXES
338            .syntaxes()
339            .iter()
340            .map(|syntax| {
341                (
342                    syntax.name.as_str(),
343                    syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
344                )
345            })
346            .chain([
347                ("Typst", vec!["typ"]),
348                ("Typst (code)", vec!["typc"]),
349                ("Typst (math)", vec!["typm"]),
350            ])
351            .collect()
352    }
353}
354
355impl Synthesize for Packed<RawElem> {
356    fn synthesize(
357        &mut self,
358        engine: &mut Engine,
359        styles: StyleChain,
360    ) -> SourceResult<()> {
361        let seq = self.highlight(engine.routines, styles);
362        self.lines = Some(seq);
363        Ok(())
364    }
365}
366
367impl Packed<RawElem> {
368    #[comemo::memoize]
369    fn highlight(&self, routines: &Routines, styles: StyleChain) -> Vec<Packed<RawLine>> {
370        let elem = self.as_ref();
371        let lines = preprocess(&elem.text, styles, self.span());
372
373        let count = lines.len() as i64;
374        let lang = elem
375            .lang
376            .get_ref(styles)
377            .as_ref()
378            .map(|s| s.to_lowercase())
379            .or(Some("txt".into()));
380
381        let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
382            lines.into_iter().enumerate().map(|(i, (line, line_span))| {
383                Packed::new(RawLine::new(
384                    i as i64 + 1,
385                    count,
386                    line.clone(),
387                    TextElem::packed(line).spanned(line_span),
388                ))
389                .spanned(line_span)
390            })
391        };
392
393        let syntaxes = LazyCell::new(|| elem.syntaxes.get_cloned(styles));
394        let theme: &synt::Theme = match elem.theme.get_ref(styles) {
395            Smart::Auto => &RAW_THEME,
396            Smart::Custom(Some(theme)) => theme.derived.get(),
397            Smart::Custom(None) => return non_highlighted_result(lines).collect(),
398        };
399
400        let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
401        let target = styles.get(TargetElem::target);
402
403        let mut seq = vec![];
404        if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
405            let text =
406                lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
407            let root = match lang.as_deref() {
408                Some("typc") => typst_syntax::parse_code(&text),
409                Some("typm") => typst_syntax::parse_math(&text),
410                _ => typst_syntax::parse(&text),
411            };
412
413            ThemedHighlighter::new(
414                &text,
415                LinkedNode::new(&root),
416                synt::Highlighter::new(theme),
417                &mut |i, _, range, style| {
418                    // Find span and start of line.
419                    // Note: Dedent is already applied to the text
420                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
421                    let span_offset = text[..range.start]
422                        .rfind('\n')
423                        .map_or(0, |i| range.start - (i + 1));
424                    styled(
425                        routines,
426                        target,
427                        &text[range],
428                        foreground,
429                        style,
430                        span,
431                        span_offset,
432                    )
433                },
434                &mut |i, range, line| {
435                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
436                    seq.push(
437                        Packed::new(RawLine::new(
438                            (i + 1) as i64,
439                            count,
440                            EcoString::from(&text[range]),
441                            Content::sequence(line.drain(..)),
442                        ))
443                        .spanned(span),
444                    );
445                },
446            )
447            .highlight();
448        } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
449            // Prefer user-provided syntaxes over built-in ones.
450            syntaxes
451                .derived
452                .iter()
453                .map(|syntax| syntax.get())
454                .chain(std::iter::once(&*RAW_SYNTAXES))
455                .find_map(|set| {
456                    set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
457                })
458        }) {
459            let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
460            for (i, (line, line_span)) in lines.into_iter().enumerate() {
461                let mut line_content = vec![];
462                let mut span_offset = 0;
463                for (style, piece) in highlighter
464                    .highlight_line(line.as_str(), syntax_set)
465                    .into_iter()
466                    .flatten()
467                {
468                    line_content.push(styled(
469                        routines,
470                        target,
471                        piece,
472                        foreground,
473                        style,
474                        line_span,
475                        span_offset,
476                    ));
477                    span_offset += piece.len();
478                }
479
480                seq.push(
481                    Packed::new(RawLine::new(
482                        i as i64 + 1,
483                        count,
484                        line,
485                        Content::sequence(line_content),
486                    ))
487                    .spanned(line_span),
488                );
489            }
490        } else {
491            seq.extend(non_highlighted_result(lines));
492        };
493
494        seq
495    }
496}
497
498impl ShowSet for Packed<RawElem> {
499    fn show_set(&self, styles: StyleChain) -> Styles {
500        let mut out = Styles::new();
501        out.set(TextElem::overhang, false);
502        out.set(TextElem::lang, Lang::ENGLISH);
503        out.set(TextElem::hyphenate, Smart::Custom(false));
504        out.set(TextElem::size, TextSize(Em::new(0.8).into()));
505        out.set(TextElem::font, FontList(vec![FontFamily::new("DejaVu Sans Mono")]));
506        out.set(TextElem::cjk_latin_spacing, Smart::Custom(None));
507        if self.block.get(styles) {
508            out.set(ParElem::justify, false);
509        }
510        out
511    }
512}
513
514impl LocalName for Packed<RawElem> {
515    const KEY: &'static str = "raw";
516}
517
518impl Figurable for Packed<RawElem> {}
519
520impl PlainText for Packed<RawElem> {
521    fn plain_text(&self, text: &mut EcoString) {
522        text.push_str(&self.text.get());
523    }
524}
525
526/// The content of the raw text.
527#[derive(Debug, Clone, Hash)]
528#[allow(
529    clippy::derived_hash_with_manual_eq,
530    reason = "https://github.com/typst/typst/pull/6560#issuecomment-3045393640"
531)]
532pub enum RawContent {
533    /// From a string.
534    Text(EcoString),
535    /// From lines of text.
536    Lines(EcoVec<(EcoString, Span)>),
537}
538
539impl RawContent {
540    /// Returns or synthesizes the text content of the raw text.
541    fn get(&self) -> EcoString {
542        match self.clone() {
543            RawContent::Text(text) => text,
544            RawContent::Lines(lines) => {
545                let mut lines = lines.into_iter().map(|(s, _)| s);
546                if lines.len() <= 1 {
547                    lines.next().unwrap_or_default()
548                } else {
549                    lines.collect::<Vec<_>>().join("\n").into()
550                }
551            }
552        }
553    }
554}
555
556impl PartialEq for RawContent {
557    fn eq(&self, other: &Self) -> bool {
558        match (self, other) {
559            (RawContent::Text(a), RawContent::Text(b)) => a == b,
560            (lines @ RawContent::Lines(_), RawContent::Text(text))
561            | (RawContent::Text(text), lines @ RawContent::Lines(_)) => {
562                *text == lines.get()
563            }
564            (RawContent::Lines(a), RawContent::Lines(b)) => Iterator::eq(
565                a.iter().map(|(line, _)| line),
566                b.iter().map(|(line, _)| line),
567            ),
568        }
569    }
570}
571
572cast! {
573    RawContent,
574    self => self.get().into_value(),
575    v: EcoString => Self::Text(v),
576}
577
578/// A loaded syntax.
579#[derive(Debug, Clone, PartialEq, Hash)]
580pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
581
582impl RawSyntax {
583    /// Load syntaxes from sources.
584    fn load(
585        world: Tracked<dyn World + '_>,
586        sources: Spanned<OneOrMultiple<DataSource>>,
587    ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
588        let loaded = sources.load(world)?;
589        let list = loaded
590            .iter()
591            .map(|data| Self::decode(&data.data).within(data))
592            .collect::<SourceResult<_>>()?;
593        Ok(Derived::new(sources.v, list))
594    }
595
596    /// Decode a syntax from a loaded source.
597    #[comemo::memoize]
598    #[typst_macros::time(name = "load syntaxes")]
599    fn decode(bytes: &Bytes) -> LoadResult<RawSyntax> {
600        let str = bytes.as_str()?;
601
602        let syntax = SyntaxDefinition::load_from_str(str, false, None)
603            .map_err(format_syntax_error)?;
604
605        let mut builder = SyntaxSetBuilder::new();
606        builder.add(syntax);
607
608        Ok(RawSyntax(Arc::new(ManuallyHash::new(
609            builder.build(),
610            typst_utils::hash128(bytes),
611        ))))
612    }
613
614    /// Return the underlying syntax set.
615    fn get(&self) -> &SyntaxSet {
616        self.0.as_ref()
617    }
618}
619
620fn format_syntax_error(error: ParseSyntaxError) -> LoadError {
621    let pos = syntax_error_pos(&error);
622    LoadError::new(pos, "failed to parse syntax", error)
623}
624
625fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos {
626    match error {
627        ParseSyntaxError::InvalidYaml(scan_error) => {
628            let m = scan_error.marker();
629            ReportPos::full(
630                m.index()..m.index(),
631                LineCol::one_based(m.line(), m.col() + 1),
632            )
633        }
634        _ => ReportPos::None,
635    }
636}
637
638/// A loaded syntect theme.
639#[derive(Debug, Clone, PartialEq, Hash)]
640pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
641
642impl RawTheme {
643    /// Load a theme from a data source.
644    fn load(
645        world: Tracked<dyn World + '_>,
646        source: Spanned<DataSource>,
647    ) -> SourceResult<Derived<DataSource, Self>> {
648        let loaded = source.load(world)?;
649        let theme = Self::decode(&loaded.data).within(&loaded)?;
650        Ok(Derived::new(source.v, theme))
651    }
652
653    /// Decode a theme from bytes.
654    #[comemo::memoize]
655    fn decode(bytes: &Bytes) -> LoadResult<RawTheme> {
656        let mut cursor = std::io::Cursor::new(bytes.as_slice());
657        let theme =
658            synt::ThemeSet::load_from_reader(&mut cursor).map_err(format_theme_error)?;
659        Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(bytes)))))
660    }
661
662    /// Get the underlying syntect theme.
663    pub fn get(&self) -> &synt::Theme {
664        self.0.as_ref()
665    }
666}
667
668fn format_theme_error(error: syntect::LoadingError) -> LoadError {
669    let pos = match &error {
670        syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
671        _ => ReportPos::None,
672    };
673    LoadError::new(pos, "failed to parse theme", error)
674}
675
676/// A highlighted line of raw text.
677///
678/// This is a helper element that is synthesized by [`raw`] elements.
679///
680/// It allows you to access various properties of the line, such as the line
681/// number, the raw non-highlighted text, the highlighted text, and whether it
682/// is the first or last line of the raw block.
683#[elem(name = "line", title = "Raw Text / Code Line", Tagged, PlainText)]
684pub struct RawLine {
685    /// The line number of the raw line inside of the raw block, starts at 1.
686    #[required]
687    pub number: i64,
688
689    /// The total number of lines in the raw block.
690    #[required]
691    pub count: i64,
692
693    /// The line of raw text.
694    #[required]
695    pub text: EcoString,
696
697    /// The highlighted raw text.
698    #[required]
699    pub body: Content,
700}
701
702impl PlainText for Packed<RawLine> {
703    fn plain_text(&self, text: &mut EcoString) {
704        text.push_str(&self.text);
705    }
706}
707
708/// Wrapper struct for the state required to highlight Typst code.
709struct ThemedHighlighter<'a> {
710    /// The code being highlighted.
711    code: &'a str,
712    /// The current node being highlighted.
713    node: LinkedNode<'a>,
714    /// The highlighter.
715    highlighter: synt::Highlighter<'a>,
716    /// The current scopes.
717    scopes: Vec<syntect::parsing::Scope>,
718    /// The current highlighted line.
719    current_line: Vec<Content>,
720    /// The range of the current line.
721    range: Range<usize>,
722    /// The current line number.
723    line: usize,
724    /// The function to style a piece of text.
725    style_fn: StyleFn<'a>,
726    /// The function to append a line.
727    line_fn: LineFn<'a>,
728}
729
730// Shorthands for highlighter closures.
731type StyleFn<'a> =
732    &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
733type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
734
735impl<'a> ThemedHighlighter<'a> {
736    pub fn new(
737        code: &'a str,
738        top: LinkedNode<'a>,
739        highlighter: synt::Highlighter<'a>,
740        style_fn: StyleFn<'a>,
741        line_fn: LineFn<'a>,
742    ) -> Self {
743        Self {
744            code,
745            node: top,
746            highlighter,
747            range: 0..0,
748            scopes: Vec::new(),
749            current_line: Vec::new(),
750            line: 0,
751            style_fn,
752            line_fn,
753        }
754    }
755
756    pub fn highlight(&mut self) {
757        self.highlight_inner();
758
759        if !self.current_line.is_empty() {
760            (self.line_fn)(
761                self.line,
762                self.range.start..self.code.len(),
763                &mut self.current_line,
764            );
765
766            self.current_line.clear();
767        }
768    }
769
770    fn highlight_inner(&mut self) {
771        if self.node.children().len() == 0 {
772            let style = self.highlighter.style_for_stack(&self.scopes);
773            let segment = &self.code[self.node.range()];
774
775            let mut len = 0;
776            for (i, line) in split_newlines(segment).into_iter().enumerate() {
777                if i != 0 {
778                    (self.line_fn)(
779                        self.line,
780                        self.range.start..self.range.end + len - 1,
781                        &mut self.current_line,
782                    );
783                    self.range.start = self.range.end + len;
784                    self.line += 1;
785                }
786
787                let offset = self.node.range().start + len;
788                let token_range = offset..(offset + line.len());
789                self.current_line.push((self.style_fn)(
790                    self.line,
791                    &self.node,
792                    token_range,
793                    style,
794                ));
795
796                len += line.len() + 1;
797            }
798
799            self.range.end += segment.len();
800        }
801
802        for child in self.node.children() {
803            let mut scopes = self.scopes.clone();
804            if let Some(tag) = typst_syntax::highlight(&child) {
805                scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
806            }
807
808            std::mem::swap(&mut scopes, &mut self.scopes);
809            self.node = child;
810            self.highlight_inner();
811            std::mem::swap(&mut scopes, &mut self.scopes);
812        }
813    }
814}
815
816fn preprocess(
817    text: &RawContent,
818    styles: StyleChain,
819    span: Span,
820) -> EcoVec<(EcoString, Span)> {
821    if let RawContent::Lines(lines) = text
822        && lines.iter().all(|(s, _)| !s.contains('\t'))
823    {
824        return lines.clone();
825    }
826
827    let mut text = text.get();
828    if text.contains('\t') {
829        let tab_size = styles.get(RawElem::tab_size);
830        text = align_tabs(&text, tab_size);
831    }
832    split_newlines(&text)
833        .into_iter()
834        .map(|line| (line.into(), span))
835        .collect()
836}
837
838/// Style a piece of text with a syntect style.
839fn styled(
840    routines: &Routines,
841    target: Target,
842    piece: &str,
843    foreground: synt::Color,
844    style: synt::Style,
845    span: Span,
846    span_offset: usize,
847) -> Content {
848    let mut body = TextElem::packed(piece).spanned(span);
849
850    if span_offset > 0 {
851        body = body.set(TextElem::span_offset, span_offset);
852    }
853
854    if style.foreground != foreground {
855        let color = to_typst(style.foreground);
856        body = match target {
857            Target::Html => (routines.html_span_filled)(body, color),
858            Target::Paged => body.set(TextElem::fill, color.into()),
859        };
860    }
861
862    if style.font_style.contains(synt::FontStyle::BOLD) {
863        body = body.strong().spanned(span);
864    }
865
866    if style.font_style.contains(synt::FontStyle::ITALIC) {
867        body = body.emph().spanned(span);
868    }
869
870    if style.font_style.contains(synt::FontStyle::UNDERLINE) {
871        body = body.underlined().spanned(span);
872    }
873
874    body
875}
876
877fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
878    Color::from_u8(r, g, b, a)
879}
880
881fn to_syn(color: Color) -> synt::Color {
882    let (r, g, b, a) = color.to_rgb().into_format::<u8, u8>().into_components();
883    synt::Color { r, g, b, a }
884}
885
886/// Create a syntect theme item.
887fn item(
888    scope: &str,
889    color: Option<&str>,
890    font_style: Option<synt::FontStyle>,
891) -> synt::ThemeItem {
892    synt::ThemeItem {
893        scope: scope.parse().unwrap(),
894        style: synt::StyleModifier {
895            foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
896            background: None,
897            font_style,
898        },
899    }
900}
901
902/// Replace tabs with spaces to align with multiples of `tab_size`.
903fn align_tabs(text: &str, tab_size: usize) -> EcoString {
904    let replacement = " ".repeat(tab_size);
905    let divisor = tab_size.max(1);
906    let amount = text.chars().filter(|&c| c == '\t').count();
907
908    let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
909    let mut column = 0;
910
911    for grapheme in text.graphemes(true) {
912        let c = grapheme.parse::<char>();
913        if c == Ok('\t') {
914            let required = tab_size - column % divisor;
915            res.push_str(&replacement[..required]);
916            column += required;
917        } else if c.is_ok_and(typst_syntax::is_newline) || grapheme == "\r\n" {
918            res.push_str(grapheme);
919            column = 0;
920        } else {
921            res.push_str(grapheme);
922            column += 1;
923        }
924    }
925
926    res
927}
928
929/// The syntect syntax definitions.
930///
931/// Syntax set is generated from the syntaxes from the `bat` project
932/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes>
933pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
934    LazyLock::new(two_face::syntax::extra_no_newlines);
935
936/// The default theme used for syntax highlighting.
937pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
938    name: Some("Typst Light".into()),
939    author: Some("The Typst Project Developers".into()),
940    settings: synt::ThemeSettings::default(),
941    scopes: vec![
942        item("comment", Some("#74747c"), None),
943        item("constant.character.escape", Some("#1d6c76"), None),
944        item("markup.bold", None, Some(synt::FontStyle::BOLD)),
945        item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
946        item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
947        item("markup.raw", Some("#6b6b6f"), None),
948        item("string.other.math.typst", None, None),
949        item("punctuation.definition.math", Some("#198810"), None),
950        item("keyword.operator.math", Some("#1d6c76"), None),
951        item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
952        item(
953            "markup.heading.typst",
954            None,
955            Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
956        ),
957        item("punctuation.definition.list", Some("#8b41b1"), None),
958        item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
959        item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
960        item("keyword, constant.language, variable.language", Some("#d73948"), None),
961        item("storage.type, storage.modifier", Some("#d73948"), None),
962        item("constant", Some("#b60157"), None),
963        item("string", Some("#198810"), None),
964        item("entity.name, variable.function, support", Some("#4b69c6"), None),
965        item("support.macro", Some("#16718d"), None),
966        item("meta.annotation", Some("#301414"), None),
967        item("entity.other, meta.interpolation", Some("#8b41b1"), None),
968        item("meta.diff.range", Some("#8b41b1"), None),
969        item("markup.inserted, meta.diff.header.to-file", Some("#198810"), None),
970        item("markup.deleted, meta.diff.header.from-file", Some("#d73948"), None),
971        item("meta.mapping.key.json string.quoted.double.json", Some("#4b69c6"), None),
972        item("meta.mapping.value.json string.quoted.double.json", Some("#198810"), None),
973    ],
974});