Skip to main content

typst_library/text/
raw.rs

1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{EcoString, EcoVec};
7use syntect::highlighting::{self as synt};
8use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{LinkedNode, Span, Spanned, split_newlines};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::World;
15use crate::diag::{
16    LineCol, LoadError, LoadResult, LoadedWithin, ReportTextPos, SourceResult,
17};
18use crate::engine::Engine;
19use crate::foundations::{
20    Bytes, Content, Derived, OneOrMultiple, Packed, PlainText, ShowSet, Smart,
21    StyleChain, Styles, Synthesize, Target, TargetElem, cast, elem, scope,
22};
23use crate::introspection::{Locatable, Tagged};
24use crate::layout::{Em, HAlignment};
25use crate::loading::{DataSource, Load};
26use crate::model::{Figurable, ParElem};
27use crate::routines::Routines;
28use crate::text::{FontFamily, FontList, LocalName, TextElem, TextSize};
29use crate::visualize::Color;
30
31/// Raw text with optional syntax highlighting.
32///
33/// Displays the text verbatim and in a monospace font. This is typically used
34/// to embed computer code into a document.
35///
36/// Text given to this element will ignore markup syntax, such as `[*strong*]`
37/// or `[_emphasis_]`, and will be displayed verbatim. If you would like to
38/// display content with a monospace font while still allowing markup syntax,
39/// instead of using @raw, you can explicitly set the text font to a monospace
40/// font with the @text.font parameter.
41///
42/// Raw elements are mainly produced with their @raw:syntax[dedicated syntax] by
43/// enclosing text with either one or three-plus backtick characters (``` ` ```)
44/// on both sides. When using three or more backticks, text immediately after
45/// the initial backticks will be treated as a @raw.lang[language tag] used for
46/// syntax highlighting, and the raw text begins after the first whitespace.
47///
48/// = Example <example>
49/// ````example
50/// Adding `rbx` to `rcx` gives
51/// the desired result.
52///
53/// What is ```rust fn main()``` in Rust
54/// would be ```c int main()``` in C.
55///
56/// ```rust
57/// fn main() {
58///     println!("Hello World!");
59/// }
60/// ```
61///
62/// This has ``` `backticks` ``` in it
63/// (but the spaces are trimmed). And
64/// ``` here``` the leading space is
65/// also trimmed.
66/// ````
67///
68/// You can also construct a @raw element programmatically from a string (and
69/// provide the language tag via the optional @raw.lang[`lang`] parameter).
70///
71/// ```example
72/// #raw("fn " + "main() {}", lang: "rust")
73/// ```
74///
75/// If no syntax highlighting is available by default for your specified
76/// language tag (or if you want to override the built-in definition), you may
77/// provide a custom syntax specification file to the @raw.syntaxes[`syntaxes`]
78/// parameter.
79///
80/// = Styling <styling>
81/// By default, the `raw` element uses the `DejaVu Sans Mono` font (included
82/// with Typst), with a smaller font size of `{0.8em}` (that is, 80% of the
83/// global font size). This is because monospace fonts tend to be visually
84/// larger than non-monospace fonts.
85///
86/// You can customize these properties with show-set rules:
87///
88/// ````example
89/// // Switch to Cascadia Code for both
90/// // inline and block raw.
91/// #show raw: set text(font: "Cascadia Code")
92///
93/// // Reset raw blocks to the same size as normal text,
94/// // but keep inline raw at the reduced size.
95/// #show raw.where(block: true): set text(1em / 0.8)
96///
97/// Now using the `Cascadia Code` font for raw text.
98/// Here's some Python code. It looks larger now:
99///
100/// ```py
101/// def python():
102///   return 5 + 5
103/// ```
104/// ````
105///
106/// In addition, you can customize the syntax highlighting colors by setting a
107/// custom theme through the @raw.theme[`theme`] parameter.
108///
109/// For complete customization of the appearance of a raw block, a show rule on
110/// @raw.line could be helpful, such as to add line numbers.
111///
112/// Note that in raw text, typesetting features like
113/// @text.hyphenate[hyphenation], @text.overhang[overhang],
114/// @text.cjk-latin-spacing[CJK-Latin spacing], and (for raw blocks)
115/// @par.justify[justification] will be disabled by default.
116///
117/// = Syntax <syntax>
118/// This function has dedicated syntax that produces a raw element in both
119/// markup and code mode. You can enclose text in one or three-plus backtick
120/// characters (``` ` ```) on both sides to make it raw. The number of backticks
121/// must be the same on both sides, and the enclosed text cannot contain a group
122/// of that many backticks in a row. Writing just two backticks (``` `` ```)
123/// produces empty raw text.
124///
125/// Notable differences from Markdown include that single backticks can enclose
126/// text spanning multiple lines without removing indentation, and that the
127/// three-plus backtick syntax still interprets language tags when used inline.
128///
129/// Raw text enclosed in _single_ backticks has no way to specify a language tag
130/// and is always treated as inline for use within a paragraph, i.e. the
131/// @raw.block[`block`] parameter is `{false}`.
132///
133/// Raw syntax using _three or more_ backticks has the following properties:
134///
135/// - *After the initial backticks, the raw block is only terminated by a
136///   sequence of the same number of backticks*
137///
138///   To include text containing a sequence of backticks, the initial and final
139///   backticks must have at least one more backtick than the sequence.
140///
141/// - *If the raw text contains a linebreak, it will be block-level, otherwise
142///   it will be inline*
143///
144///   This sets the @raw.block[`block`] parameter to `{true}` or `{false}`
145///   accordingly.
146///
147/// - *Text immediately after the initial backticks, up to the first whitespace,
148///   is treated as a _language tag_ used for syntax highlighting*
149///
150///   The specific rules for which text can be treated as the language tag are
151///   planned to change, and are @raw:language-tag-changes[explained in detail
152///   below.]
153///
154/// - *The initial and final lines have special trimming behavior*
155///
156///   For the initial line, if all characters following the initial backticks or
157///   language tag are whitespace, the entire line will be trimmed. However, if
158///   there are non-whitespace characters on that line, only a single space
159///   immediately following the initial backticks or language tag will be
160///   trimmed if present.
161///
162///   If the final line is entirely whitespace up to the closing backticks, it
163///   will be trimmed. Otherwise, if the last non-whitespace character of the
164///   final line is a backtick, then one space character will be trimmed from
165///   the end of the line if present.
166///
167/// - *Common indentation at the beginning of lines is trimmed*
168///
169///   Typst will remove initial whitespace at the beginning of lines in the raw
170///   text that is shared between all lines, i.e. common indentation. Although
171///   this excludes text on the line with the initial backticks.
172///
173///   Typst first finds the line with the fewest initial whitespace characters
174///   that contains some non-whitespace characters, including the line with the
175///   closing backticks. Then Typst trims characters from every line equal to
176///   the number of initial whitespace characters in that line. Lines which are
177///   only whitespace will remove the same number of characters until they are
178///   empty, but will keep any extra trailing whitespace.
179///
180///   #let code-point = "https://www.unicode.org/glossary/#code_point"
181///
182///   Note that this check treats tabs and spaces as equivalent characters for
183///   simplicity, and that it operates on numbers of #link(code-point)[Unicode
184///   code points], i.e. characters, not on byte lengths.
185///
186/// These properties of the three-plus backtick syntax allow for some use cases
187/// that may not be obvious:
188///
189/// - To write text containing a sequence of backticks, enclose it with one or
190///   more backticks than the sequence:
191///   ````` ```` enclosed```backticks```` `````
192///
193/// - To write text that starts or ends with a backtick, add a space inside the
194///   opening and closing backticks: ```` ``` `backticks` ``` ````
195///
196/// - To write inline text highlighted with a language tag, add a space between
197///   the language tag and the text ````rust ```rust fn main() {}``` ````
198///
199/// - To write inline text without any language tag, add a space after the
200///   initial backticks: ```` ``` text``` ```` or use the single backtick
201///   syntax: ``` `text` ```
202///
203/// == Embedding strings with raw syntax <embedding-strings>
204/// A common use-case for raw syntax is to embed data as strings with formatting
205/// by accessing the `.text` field on raw content to get the underlying string.
206/// This may also be paired with the @bytes constructor to convert the string to
207/// bytes.
208///
209/// ````example
210/// An inline YAML dictionary via `.text`
211///
212/// #yaml(bytes(
213///   ```yaml
214///   Magic:
215///     limited-by: Mana
216///   Pokémon:
217///     limited-by: Energy
218///   Yu-Gi-Oh:
219///     limited-by: false
220///   ```.text
221///   //  ^^^^ used as a string
222/// ))
223/// ````
224///
225/// == Language tag changes <language-tag-changes>
226///
227/// When using raw syntax with three or more backticks, text immediately after
228/// the initial backticks (up to the first whitespace) is treated as a
229/// @raw.lang[language tag]. However in the current version of Typst, only text
230/// that would be a valid Typst identifier is treated as the language tag. The
231/// first character not valid for an identifier will be interpreted as starting
232/// the raw text.
233///
234/// For example, in the current verion of Typst, if a raw block starts with
235/// `C++`, the identifier `C` will be the language tag, and the raw text will
236/// start with `++`. If a raw block starts with `++C`, it will have no language
237/// tag and the raw text will start with `++C`.
238///
239/// To use language tags that are not valid as identifiers in the current
240/// version of Typst, you must use the @raw.lang[`lang`] parameter, either by
241/// calling the constructor with a string: ```typ #raw("text", lang: "...")```,
242/// or by writing a set rule: ```typ #set raw(lang: "...")```.
243///
244/// In the next version of Typst, _all text_ up to the first whitespace or
245/// backtick will be treated as the language tag, allowing a wider character set
246/// for language tags. Tags including spaces or backticks will still need to be
247/// set manually via the @raw.lang[`lang`] parameter.
248///
249/// Typst will alert you if your raw blocks will be interpreted differently in
250/// the next Typst version by emitting a warning.
251#[elem(
252    scope,
253    title = "Raw Text / Code",
254    Synthesize,
255    Locatable,
256    Tagged,
257    ShowSet,
258    LocalName,
259    Figurable,
260    PlainText
261)]
262pub struct RawElem {
263    /// The raw text.
264    ///
265    /// You can also use raw blocks creatively to create custom syntaxes for
266    /// your automations.
267    ///
268    /// #example(
269    ///   title: "Implementing a DSL using raw and show rules",
270    ///   ````
271    ///   // Parse numbers in raw blocks with the
272    ///   // `mydsl` tag and sum them up.
273    ///   #show raw.where(lang: "mydsl"): it => {
274    ///     let sum = 0
275    ///     for part in it.text.split("+") {
276    ///       sum += int(part.trim())
277    ///     }
278    ///     sum
279    ///   }
280    ///
281    ///   ```mydsl
282    ///   1 + 2 + 3 + 4 + 5
283    ///   ```
284    ///   ````
285    /// )
286    #[required]
287    pub text: RawContent,
288
289    /// Whether the raw text is displayed as a separate block.
290    ///
291    /// In markup mode, using one-backtick notation makes this `{false}`. Using
292    /// three-backtick notation makes it `{true}` if the enclosed content
293    /// contains at least one line break.
294    ///
295    /// ````example
296    /// // Display inline code in a small box
297    /// // that retains the correct baseline.
298    /// #show raw.where(block: false): box.with(
299    ///   fill: luma(240),
300    ///   inset: (x: 3pt, y: 0pt),
301    ///   outset: (y: 3pt),
302    ///   radius: 2pt,
303    /// )
304    ///
305    /// // Display block code in a larger block
306    /// // with more padding.
307    /// #show raw.where(block: true): block.with(
308    ///   fill: luma(240),
309    ///   inset: 10pt,
310    ///   radius: 4pt,
311    /// )
312    ///
313    /// With `rg`, you can search through your files quickly.
314    /// This example searches the current directory recursively
315    /// for the text `Hello World`:
316    ///
317    /// ```bash
318    /// rg "Hello World"
319    /// ```
320    /// ````
321    #[default(false)]
322    pub block: bool,
323
324    /// The language to interpret the raw text as for syntax highlighting.
325    ///
326    /// In @html[HTML export], this sets the `data-lang` attribute of the
327    /// generated @html.code element.
328    ///
329    /// Apart from typical language tags known from Markdown, this supports the
330    /// `{"typ"}`, `{"typc"}`, and `{"typm"}` tags for
331    /// @reference:syntax:markup[Typst markup],
332    /// @reference:syntax:code[Typst code], and
333    /// @reference:syntax:math[Typst math], respectively.
334    ///
335    /// ````example
336    /// ```typ
337    /// This is *Typst!*
338    /// ```
339    ///
340    /// This is ```typ also *Typst*```, but inline!
341    /// ````
342    pub lang: Option<EcoString>,
343
344    /// The horizontal alignment that each line in a raw block should have. This
345    /// option is ignored if this is not a raw block (if specified
346    /// `block: false` or single backticks were used in markup mode).
347    ///
348    /// By default, this is set to `{start}`, meaning that raw text is aligned
349    /// towards the start of the text direction inside the block by default,
350    /// regardless of the current context's alignment (allowing you to center
351    /// the raw block itself without centering the text inside it, for example).
352    ///
353    /// ````example
354    /// #set raw(align: center)
355    ///
356    /// ```typc
357    /// let f(x) = x
358    /// code = "centered"
359    /// ```
360    /// ````
361    #[default(HAlignment::Start)]
362    pub align: HAlignment,
363
364    /// Additional syntax definitions to load. The syntax definitions should be
365    /// in the
366    /// #link("https://www.sublimetext.com/docs/syntax.html")[`sublime-syntax`
367    /// file format].
368    ///
369    /// You can pass any of the following values:
370    ///
371    /// - A path string or @path to load a syntax file from.
372    /// - Raw bytes from which the syntax should be decoded.
373    /// - An array where each item is one of the above.
374    ///
375    /// ````example
376    /// #set raw(syntaxes: "SExpressions.sublime-syntax")
377    ///
378    /// ```sexp
379    /// (defun factorial (x)
380    ///   (if (zerop x)
381    ///     ; with a comment
382    ///     1
383    ///     (* x (factorial (- x 1)))))
384    /// ```
385    /// ````
386    #[parse(match args.named("syntaxes")? {
387        Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
388        None => None,
389    })]
390    #[fold]
391    pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
392
393    /// The theme to use for syntax highlighting. Themes should be in the
394    /// #link("https://www.sublimetext.com/docs/color_schemes_tmtheme.html")[`tmTheme` file format].
395    ///
396    /// You can pass any of the following values:
397    ///
398    /// - `{none}`: Disables syntax highlighting.
399    /// - `{auto}`: Highlights with Typst's default theme.
400    /// - A path string or @path to load a theme file from.
401    /// - Raw bytes from which the theme should be decoded.
402    ///
403    /// Applying a theme only affects the color of specifically highlighted
404    /// text. It does not consider the theme's foreground and background
405    /// properties, so that you retain control over the color of raw text. You
406    /// can apply the foreground color yourself with the @text function and the
407    /// background with a @block.fill[filled block]. You could also use the @xml
408    /// function to extract these properties from the theme.
409    ///
410    /// ````example
411    /// #set raw(theme: "halcyon.tmTheme")
412    /// #show raw: it => block(
413    ///   fill: rgb("#1d2433"),
414    ///   inset: 8pt,
415    ///   radius: 5pt,
416    ///   text(fill: rgb("#a2aabc"), it)
417    /// )
418    ///
419    /// ```typ
420    /// = Chapter 1
421    /// #let hi = "Hello World"
422    /// ```
423    /// ````
424    #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
425        Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
426            Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
427        )),
428        Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
429        Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
430        None => None,
431    })]
432    pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
433
434    /// The size for a tab stop in spaces. A tab is replaced with enough spaces
435    /// to align with the next multiple of the size.
436    ///
437    /// ````example
438    /// #set raw(tab-size: 8)
439    /// ```tsv
440    /// Year	Month	Day
441    /// 2000	2	3
442    /// 2001	2	1
443    /// 2002	3	10
444    /// ```
445    /// ````
446    #[default(2)]
447    pub tab_size: usize,
448
449    /// The stylized lines of raw text.
450    ///
451    /// Made accessible for the @raw.line[`raw.line` element]. Allows more
452    /// styling control in `show` rules.
453    #[synthesized]
454    pub lines: Vec<Packed<RawLine>>,
455}
456
457#[scope]
458impl RawElem {
459    #[elem]
460    type RawLine;
461}
462
463impl RawElem {
464    /// The supported language names and tags.
465    pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
466        RAW_SYNTAXES
467            .syntaxes()
468            .iter()
469            .map(|syntax| {
470                (
471                    syntax.name.as_str(),
472                    syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
473                )
474            })
475            .chain([
476                ("Typst", vec!["typ"]),
477                ("Typst (code)", vec!["typc"]),
478                ("Typst (math)", vec!["typm"]),
479            ])
480            .collect()
481    }
482}
483
484impl Synthesize for Packed<RawElem> {
485    fn synthesize(
486        &mut self,
487        engine: &mut Engine,
488        styles: StyleChain,
489    ) -> SourceResult<()> {
490        let seq = self.highlight(engine.library.routines, styles);
491        self.lines = Some(seq);
492        Ok(())
493    }
494}
495
496impl Packed<RawElem> {
497    #[comemo::memoize]
498    fn highlight(&self, routines: &Routines, styles: StyleChain) -> Vec<Packed<RawLine>> {
499        let elem = self.as_ref();
500        let lines = preprocess(&elem.text, styles, self.span());
501
502        let count = lines.len() as i64;
503        let lang = elem
504            .lang
505            .get_ref(styles)
506            .as_ref()
507            .map(|s| s.to_lowercase())
508            .or(Some("txt".into()));
509
510        let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
511            lines.into_iter().enumerate().map(|(i, (line, line_span))| {
512                Packed::new(RawLine::new(
513                    i as i64 + 1,
514                    count,
515                    line.clone(),
516                    TextElem::packed(line).spanned(line_span),
517                ))
518                .spanned(line_span)
519            })
520        };
521
522        let syntaxes = LazyCell::new(|| elem.syntaxes.get_cloned(styles));
523        let theme: &synt::Theme = match elem.theme.get_ref(styles) {
524            Smart::Auto => &RAW_THEME,
525            Smart::Custom(Some(theme)) => theme.derived.get(),
526            Smart::Custom(None) => return non_highlighted_result(lines).collect(),
527        };
528
529        let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
530        let target = styles.get(TargetElem::target);
531
532        let mut seq = vec![];
533        if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
534            let text =
535                lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
536            let root = match lang.as_deref() {
537                Some("typc") => typst_syntax::parse_code(&text),
538                Some("typm") => typst_syntax::parse_math(&text),
539                _ => typst_syntax::parse(&text),
540            };
541
542            ThemedHighlighter::new(
543                &text,
544                LinkedNode::new(&root),
545                synt::Highlighter::new(theme),
546                &mut |i, _, range, style| {
547                    // Find span and start of line.
548                    // Note: Dedent is already applied to the text
549                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
550                    let span_offset = text[..range.start]
551                        .rfind('\n')
552                        .map_or(0, |i| range.start - (i + 1));
553                    styled(
554                        routines,
555                        target,
556                        &text[range],
557                        foreground,
558                        style,
559                        span,
560                        span_offset,
561                    )
562                },
563                &mut |i, range, line| {
564                    let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
565                    seq.push(
566                        Packed::new(RawLine::new(
567                            (i + 1) as i64,
568                            count,
569                            EcoString::from(&text[range]),
570                            Content::sequence(line.drain(..)),
571                        ))
572                        .spanned(span),
573                    );
574                },
575            )
576            .highlight();
577        } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
578            // Prefer user-provided syntaxes over built-in ones.
579            syntaxes
580                .derived
581                .iter()
582                .map(|syntax| syntax.get())
583                .chain(std::iter::once(&*RAW_SYNTAXES))
584                .find_map(|set| {
585                    set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
586                })
587        }) {
588            let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
589            for (i, (line, line_span)) in lines.into_iter().enumerate() {
590                let mut line_content = vec![];
591                let mut span_offset = 0;
592                for (style, piece) in highlighter
593                    .highlight_line(line.as_str(), syntax_set)
594                    .into_iter()
595                    .flatten()
596                {
597                    line_content.push(styled(
598                        routines,
599                        target,
600                        piece,
601                        foreground,
602                        style,
603                        line_span,
604                        span_offset,
605                    ));
606                    span_offset += piece.len();
607                }
608
609                seq.push(
610                    Packed::new(RawLine::new(
611                        i as i64 + 1,
612                        count,
613                        line,
614                        Content::sequence(line_content),
615                    ))
616                    .spanned(line_span),
617                );
618            }
619        } else {
620            seq.extend(non_highlighted_result(lines));
621        };
622
623        seq
624    }
625}
626
627impl ShowSet for Packed<RawElem> {
628    fn show_set(&self, styles: StyleChain) -> Styles {
629        let mut out = Styles::new();
630        out.set(TextElem::overhang, false);
631        out.set(TextElem::lang, Lang::ENGLISH);
632        out.set(TextElem::hyphenate, Smart::Custom(false));
633        out.set(TextElem::size, TextSize(Em::new(0.8).into()));
634        out.set(TextElem::font, FontList(vec![FontFamily::new("DejaVu Sans Mono")]));
635        out.set(TextElem::cjk_latin_spacing, Smart::Custom(None));
636        if self.block.get(styles) {
637            out.set(ParElem::justify, false);
638        }
639        out
640    }
641}
642
643impl LocalName for Packed<RawElem> {
644    const KEY: &'static str = "raw";
645}
646
647impl Figurable for Packed<RawElem> {}
648
649impl PlainText for Packed<RawElem> {
650    fn plain_text(&self, text: &mut EcoString) {
651        text.push_str(&self.text.get());
652    }
653}
654
655cast! {
656    RawElem,
657    v: Content => v.unpack::<Self>().map_err(|_| "expected raw text")?
658}
659
660/// The content of the raw text.
661#[derive(Debug, Clone, Hash)]
662pub enum RawContent {
663    /// From a string.
664    Text(EcoString),
665    /// From lines of text.
666    Lines(EcoVec<(EcoString, Span)>),
667}
668
669impl RawContent {
670    /// Returns or synthesizes the text content of the raw text.
671    fn get(&self) -> EcoString {
672        match self.clone() {
673            RawContent::Text(text) => text,
674            RawContent::Lines(lines) => {
675                let mut lines = lines.into_iter().map(|(s, _)| s);
676                if lines.len() <= 1 {
677                    lines.next().unwrap_or_default()
678                } else {
679                    lines.collect::<Vec<_>>().join("\n").into()
680                }
681            }
682        }
683    }
684}
685
686impl PartialEq for RawContent {
687    fn eq(&self, other: &Self) -> bool {
688        match (self, other) {
689            (RawContent::Text(a), RawContent::Text(b)) => a == b,
690            (lines @ RawContent::Lines(_), RawContent::Text(text))
691            | (RawContent::Text(text), lines @ RawContent::Lines(_)) => {
692                *text == lines.get()
693            }
694            (RawContent::Lines(a), RawContent::Lines(b)) => Iterator::eq(
695                a.iter().map(|(line, _)| line),
696                b.iter().map(|(line, _)| line),
697            ),
698        }
699    }
700}
701
702cast! {
703    RawContent,
704    self => self.get().into_value(),
705    v: EcoString => Self::Text(v),
706}
707
708/// A loaded syntax.
709#[derive(Debug, Clone, PartialEq, Hash)]
710pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
711
712impl RawSyntax {
713    /// Load syntaxes from sources.
714    fn load(
715        world: Tracked<dyn World + '_>,
716        sources: Spanned<OneOrMultiple<DataSource>>,
717    ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
718        let loaded = sources.load(world)?;
719        let list = loaded
720            .iter()
721            .map(|data| Self::decode(&data.data).within(data))
722            .collect::<SourceResult<_>>()?;
723        Ok(Derived::new(sources.v, list))
724    }
725
726    /// Decode a syntax from a loaded source.
727    #[comemo::memoize]
728    #[typst_macros::time(name = "load syntaxes")]
729    fn decode(bytes: &Bytes) -> LoadResult<RawSyntax> {
730        let str = bytes.as_str()?;
731
732        let syntax = SyntaxDefinition::load_from_str(str, false, None)
733            .map_err(format_syntax_error)?;
734
735        let mut builder = SyntaxSetBuilder::new();
736        builder.add(syntax);
737
738        Ok(RawSyntax(Arc::new(ManuallyHash::new(
739            builder.build(),
740            typst_utils::hash128(bytes),
741        ))))
742    }
743
744    /// Return the underlying syntax set.
745    fn get(&self) -> &SyntaxSet {
746        self.0.as_ref()
747    }
748}
749
750fn format_syntax_error(error: ParseSyntaxError) -> LoadError {
751    let pos = syntax_error_pos(&error);
752    LoadError::text(pos, "failed to parse syntax", error)
753}
754
755fn syntax_error_pos(error: &ParseSyntaxError) -> ReportTextPos {
756    match error {
757        ParseSyntaxError::InvalidYaml(scan_error) => {
758            let m = scan_error.marker();
759            ReportTextPos::full(
760                m.index()..m.index(),
761                LineCol::one_based(m.line(), m.col() + 1),
762            )
763        }
764        _ => ReportTextPos::None,
765    }
766}
767
768/// A loaded syntect theme.
769#[derive(Debug, Clone, PartialEq, Hash)]
770pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
771
772impl RawTheme {
773    /// Load a theme from a data source.
774    fn load(
775        world: Tracked<dyn World + '_>,
776        source: Spanned<DataSource>,
777    ) -> SourceResult<Derived<DataSource, Self>> {
778        let loaded = source.load(world)?;
779        let theme = Self::decode(&loaded.data).within(&loaded)?;
780        Ok(Derived::new(source.v, theme))
781    }
782
783    /// Decode a theme from bytes.
784    #[comemo::memoize]
785    fn decode(bytes: &Bytes) -> LoadResult<RawTheme> {
786        let mut cursor = std::io::Cursor::new(bytes.as_slice());
787        let theme =
788            synt::ThemeSet::load_from_reader(&mut cursor).map_err(format_theme_error)?;
789        Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(bytes)))))
790    }
791
792    /// Get the underlying syntect theme.
793    pub fn get(&self) -> &synt::Theme {
794        self.0.as_ref()
795    }
796}
797
798fn format_theme_error(error: syntect::LoadingError) -> LoadError {
799    let pos = match &error {
800        syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
801        _ => ReportTextPos::None,
802    };
803    LoadError::text(pos, "failed to parse theme", error)
804}
805
806/// A highlighted line of raw text.
807///
808/// This is a helper element that is synthesized by @raw elements.
809///
810/// It allows you to access various properties of the line, such as the line
811/// number, the raw non-highlighted text, the highlighted text, and whether it
812/// is the first or last line of the raw block.
813#[elem(name = "line", title = "Raw Text / Code Line", Tagged, PlainText)]
814pub struct RawLine {
815    /// The line number of the raw line inside of the raw block, starts at 1.
816    #[required]
817    pub number: i64,
818
819    /// The total number of lines in the raw block.
820    #[required]
821    pub count: i64,
822
823    /// The line of raw text.
824    #[required]
825    pub text: EcoString,
826
827    /// The highlighted raw text.
828    #[required]
829    pub body: Content,
830}
831
832impl PlainText for Packed<RawLine> {
833    fn plain_text(&self, text: &mut EcoString) {
834        text.push_str(&self.text);
835    }
836}
837
838/// Wrapper struct for the state required to highlight Typst code.
839struct ThemedHighlighter<'a> {
840    /// The code being highlighted.
841    code: &'a str,
842    /// The current node being highlighted.
843    node: LinkedNode<'a>,
844    /// The highlighter.
845    highlighter: synt::Highlighter<'a>,
846    /// The current scopes.
847    scopes: Vec<syntect::parsing::Scope>,
848    /// The current highlighted line.
849    current_line: Vec<Content>,
850    /// The range of the current line.
851    range: Range<usize>,
852    /// The current line number.
853    line: usize,
854    /// The function to style a piece of text.
855    style_fn: StyleFn<'a>,
856    /// The function to append a line.
857    line_fn: LineFn<'a>,
858}
859
860// Shorthands for highlighter closures.
861type StyleFn<'a> =
862    &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
863type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
864
865impl<'a> ThemedHighlighter<'a> {
866    pub fn new(
867        code: &'a str,
868        top: LinkedNode<'a>,
869        highlighter: synt::Highlighter<'a>,
870        style_fn: StyleFn<'a>,
871        line_fn: LineFn<'a>,
872    ) -> Self {
873        Self {
874            code,
875            node: top,
876            highlighter,
877            range: 0..0,
878            scopes: Vec::new(),
879            current_line: Vec::new(),
880            line: 0,
881            style_fn,
882            line_fn,
883        }
884    }
885
886    pub fn highlight(&mut self) {
887        self.highlight_inner();
888
889        if !self.current_line.is_empty() {
890            (self.line_fn)(
891                self.line,
892                self.range.start..self.code.len(),
893                &mut self.current_line,
894            );
895
896            self.current_line.clear();
897        }
898    }
899
900    fn highlight_inner(&mut self) {
901        if self.node.children().len() == 0 {
902            let style = self.highlighter.style_for_stack(&self.scopes);
903            let segment = &self.code[self.node.range()];
904
905            let mut len = 0;
906            for (i, line) in split_newlines(segment).into_iter().enumerate() {
907                if i != 0 {
908                    (self.line_fn)(
909                        self.line,
910                        self.range.start..self.range.end + len - 1,
911                        &mut self.current_line,
912                    );
913                    self.range.start = self.range.end + len;
914                    self.line += 1;
915                }
916
917                let offset = self.node.range().start + len;
918                let token_range = offset..(offset + line.len());
919                self.current_line.push((self.style_fn)(
920                    self.line,
921                    &self.node,
922                    token_range,
923                    style,
924                ));
925
926                len += line.len() + 1;
927            }
928
929            self.range.end += segment.len();
930        }
931
932        for child in self.node.children() {
933            let mut scopes = self.scopes.clone();
934            if let Some(tag) = typst_syntax::highlight(&child) {
935                scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
936            }
937
938            std::mem::swap(&mut scopes, &mut self.scopes);
939            self.node = child;
940            self.highlight_inner();
941            std::mem::swap(&mut scopes, &mut self.scopes);
942        }
943    }
944}
945
946fn preprocess(
947    text: &RawContent,
948    styles: StyleChain,
949    span: Span,
950) -> EcoVec<(EcoString, Span)> {
951    if let RawContent::Lines(lines) = text
952        && lines.iter().all(|(s, _)| !s.contains('\t'))
953    {
954        return lines.clone();
955    }
956
957    let mut text = text.get();
958    if text.contains('\t') {
959        let tab_size = styles.get(RawElem::tab_size);
960        text = align_tabs(&text, tab_size);
961    }
962    split_newlines(&text)
963        .into_iter()
964        .map(|line| (line.into(), span))
965        .collect()
966}
967
968/// Style a piece of text with a syntect style.
969fn styled(
970    routines: &Routines,
971    target: Target,
972    piece: &str,
973    foreground: synt::Color,
974    style: synt::Style,
975    span: Span,
976    span_offset: usize,
977) -> Content {
978    let mut body = TextElem::packed(piece).spanned(span);
979
980    if span_offset > 0 {
981        body = body.set(TextElem::span_offset, span_offset);
982    }
983
984    if style.foreground != foreground {
985        let color = to_typst(style.foreground);
986        body = match target {
987            Target::Html => (routines.html_span_filled)(body, color),
988            _ => body.set(TextElem::fill, color.into()),
989        };
990    }
991
992    if style.font_style.contains(synt::FontStyle::BOLD) {
993        body = body.strong().spanned(span);
994    }
995
996    if style.font_style.contains(synt::FontStyle::ITALIC) {
997        body = body.emph().spanned(span);
998    }
999
1000    if style.font_style.contains(synt::FontStyle::UNDERLINE) {
1001        body = body.underlined().spanned(span);
1002    }
1003
1004    body
1005}
1006
1007fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
1008    Color::from_u8(r, g, b, a)
1009}
1010
1011fn to_syn(color: Color) -> synt::Color {
1012    let (r, g, b, a) = color.to_rgb().into_format::<u8, u8>().into_components();
1013    synt::Color { r, g, b, a }
1014}
1015
1016/// Create a syntect theme item.
1017fn item(
1018    scope: &str,
1019    color: Option<&str>,
1020    font_style: Option<synt::FontStyle>,
1021) -> synt::ThemeItem {
1022    synt::ThemeItem {
1023        scope: scope.parse().unwrap(),
1024        style: synt::StyleModifier {
1025            foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
1026            background: None,
1027            font_style,
1028        },
1029    }
1030}
1031
1032/// Replace tabs with spaces to align with multiples of `tab_size`.
1033fn align_tabs(text: &str, tab_size: usize) -> EcoString {
1034    let replacement = " ".repeat(tab_size);
1035    let divisor = tab_size.max(1);
1036    let amount = text.chars().filter(|&c| c == '\t').count();
1037
1038    let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
1039    let mut column = 0;
1040
1041    for grapheme in text.graphemes(true) {
1042        let c = grapheme.parse::<char>();
1043        if c == Ok('\t') {
1044            let required = tab_size - column % divisor;
1045            res.push_str(&replacement[..required]);
1046            column += required;
1047        } else if c.is_ok_and(typst_syntax::is_newline) || grapheme == "\r\n" {
1048            res.push_str(grapheme);
1049            column = 0;
1050        } else {
1051            res.push_str(grapheme);
1052            column += 1;
1053        }
1054    }
1055
1056    res
1057}
1058
1059/// The syntect syntax definitions.
1060///
1061/// Syntax set is generated from the syntaxes from the `bat` project
1062/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes>
1063pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
1064    LazyLock::new(two_face::syntax::extra_no_newlines);
1065
1066/// The default theme used for syntax highlighting.
1067pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
1068    name: Some("Typst Light".into()),
1069    author: Some("The Typst Project Developers".into()),
1070    settings: synt::ThemeSettings::default(),
1071    scopes: vec![
1072        item("comment", Some("#74747c"), None),
1073        item("constant.character.escape", Some("#1d6c76"), None),
1074        item("markup.bold", None, Some(synt::FontStyle::BOLD)),
1075        item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
1076        item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
1077        item("markup.raw", Some("#6b6b6f"), None),
1078        item("string.other.math.typst", None, None),
1079        item("punctuation.definition.math", Some("#198810"), None),
1080        item("keyword.operator.math, punctuation.math.typst", Some("#1d6c76"), None),
1081        item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
1082        item(
1083            "markup.heading.typst",
1084            None,
1085            Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
1086        ),
1087        item("punctuation.definition.list", Some("#8b41b1"), None),
1088        item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
1089        item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
1090        item("keyword, constant.language, variable.language", Some("#d73948"), None),
1091        item("storage.type, storage.modifier", Some("#d73948"), None),
1092        item("constant", Some("#b60157"), None),
1093        item("string", Some("#198810"), None),
1094        item("entity.name, variable.function, support", Some("#4b69c6"), None),
1095        item("support.macro", Some("#16718d"), None),
1096        item("meta.annotation", Some("#301414"), None),
1097        item("entity.other, meta.interpolation", Some("#8b41b1"), None),
1098        item("meta.diff.range", Some("#8b41b1"), None),
1099        item("markup.inserted, meta.diff.header.to-file", Some("#198810"), None),
1100        item("markup.deleted, meta.diff.header.from-file", Some("#d73948"), None),
1101        item("meta.mapping.key.json string.quoted.double.json", Some("#4b69c6"), None),
1102        item("meta.mapping.value.json string.quoted.double.json", Some("#198810"), None),
1103    ],
1104});