typst_library/text/raw.rs
1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{EcoString, EcoVec};
7use syntect::highlighting::{self as synt};
8use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{LinkedNode, Span, Spanned, split_newlines};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::World;
15use crate::diag::{
16 LineCol, LoadError, LoadResult, LoadedWithin, ReportTextPos, SourceResult,
17};
18use crate::engine::Engine;
19use crate::foundations::{
20 Bytes, Content, Derived, OneOrMultiple, Packed, PlainText, ShowSet, Smart,
21 StyleChain, Styles, Synthesize, Target, TargetElem, cast, elem, scope,
22};
23use crate::introspection::{Locatable, Tagged};
24use crate::layout::{Em, HAlignment};
25use crate::loading::{DataSource, Load};
26use crate::model::{Figurable, ParElem};
27use crate::routines::Routines;
28use crate::text::{FontFamily, FontList, LocalName, TextElem, TextSize};
29use crate::visualize::Color;
30
31/// Raw text with optional syntax highlighting.
32///
33/// Displays the text verbatim and in a monospace font. This is typically used
34/// to embed computer code into a document.
35///
36/// Text given to this element will ignore markup syntax, such as `[*strong*]`
37/// or `[_emphasis_]`, and will be displayed verbatim. If you would like to
38/// display content with a monospace font while still allowing markup syntax,
39/// instead of using @raw, you can explicitly set the text font to a monospace
40/// font with the @text.font parameter.
41///
42/// Raw elements are mainly produced with their @raw:syntax[dedicated syntax] by
43/// enclosing text with either one or three-plus backtick characters (``` ` ```)
44/// on both sides. When using three or more backticks, text immediately after
45/// the initial backticks will be treated as a @raw.lang[language tag] used for
46/// syntax highlighting, and the raw text begins after the first whitespace.
47///
48/// = Example <example>
49/// ````example
50/// Adding `rbx` to `rcx` gives
51/// the desired result.
52///
53/// What is ```rust fn main()``` in Rust
54/// would be ```c int main()``` in C.
55///
56/// ```rust
57/// fn main() {
58/// println!("Hello World!");
59/// }
60/// ```
61///
62/// This has ``` `backticks` ``` in it
63/// (but the spaces are trimmed). And
64/// ``` here``` the leading space is
65/// also trimmed.
66/// ````
67///
68/// You can also construct a @raw element programmatically from a string (and
69/// provide the language tag via the optional @raw.lang[`lang`] parameter).
70///
71/// ```example
72/// #raw("fn " + "main() {}", lang: "rust")
73/// ```
74///
75/// If no syntax highlighting is available by default for your specified
76/// language tag (or if you want to override the built-in definition), you may
77/// provide a custom syntax specification file to the @raw.syntaxes[`syntaxes`]
78/// parameter.
79///
80/// = Styling <styling>
81/// By default, the `raw` element uses the `DejaVu Sans Mono` font (included
82/// with Typst), with a smaller font size of `{0.8em}` (that is, 80% of the
83/// global font size). This is because monospace fonts tend to be visually
84/// larger than non-monospace fonts.
85///
86/// You can customize these properties with show-set rules:
87///
88/// ````example
89/// // Switch to Cascadia Code for both
90/// // inline and block raw.
91/// #show raw: set text(font: "Cascadia Code")
92///
93/// // Reset raw blocks to the same size as normal text,
94/// // but keep inline raw at the reduced size.
95/// #show raw.where(block: true): set text(1em / 0.8)
96///
97/// Now using the `Cascadia Code` font for raw text.
98/// Here's some Python code. It looks larger now:
99///
100/// ```py
101/// def python():
102/// return 5 + 5
103/// ```
104/// ````
105///
106/// In addition, you can customize the syntax highlighting colors by setting a
107/// custom theme through the @raw.theme[`theme`] parameter.
108///
109/// For complete customization of the appearance of a raw block, a show rule on
110/// @raw.line could be helpful, such as to add line numbers.
111///
112/// Note that in raw text, typesetting features like
113/// @text.hyphenate[hyphenation], @text.overhang[overhang],
114/// @text.cjk-latin-spacing[CJK-Latin spacing], and (for raw blocks)
115/// @par.justify[justification] will be disabled by default.
116///
117/// = Syntax <syntax>
118/// This function has dedicated syntax that produces a raw element in both
119/// markup and code mode. You can enclose text in one or three-plus backtick
120/// characters (``` ` ```) on both sides to make it raw. The number of backticks
121/// must be the same on both sides, and the enclosed text cannot contain a group
122/// of that many backticks in a row. Writing just two backticks (``` `` ```)
123/// produces empty raw text.
124///
125/// Notable differences from Markdown include that single backticks can enclose
126/// text spanning multiple lines without removing indentation, and that the
127/// three-plus backtick syntax still interprets language tags when used inline.
128///
129/// Raw text enclosed in _single_ backticks has no way to specify a language tag
130/// and is always treated as inline for use within a paragraph, i.e. the
131/// @raw.block[`block`] parameter is `{false}`.
132///
133/// Raw syntax using _three or more_ backticks has the following properties:
134///
135/// - *After the initial backticks, the raw block is only terminated by a
136/// sequence of the same number of backticks*
137///
138/// To include text containing a sequence of backticks, the initial and final
139/// backticks must have at least one more backtick than the sequence.
140///
141/// - *If the raw text contains a linebreak, it will be block-level, otherwise
142/// it will be inline*
143///
144/// This sets the @raw.block[`block`] parameter to `{true}` or `{false}`
145/// accordingly.
146///
147/// - *Text immediately after the initial backticks, up to the first whitespace,
148/// is treated as a _language tag_ used for syntax highlighting*
149///
150/// The specific rules for which text can be treated as the language tag are
151/// planned to change, and are @raw:language-tag-changes[explained in detail
152/// below.]
153///
154/// - *The initial and final lines have special trimming behavior*
155///
156/// For the initial line, if all characters following the initial backticks or
157/// language tag are whitespace, the entire line will be trimmed. However, if
158/// there are non-whitespace characters on that line, only a single space
159/// immediately following the initial backticks or language tag will be
160/// trimmed if present.
161///
162/// If the final line is entirely whitespace up to the closing backticks, it
163/// will be trimmed. Otherwise, if the last non-whitespace character of the
164/// final line is a backtick, then one space character will be trimmed from
165/// the end of the line if present.
166///
167/// - *Common indentation at the beginning of lines is trimmed*
168///
169/// Typst will remove initial whitespace at the beginning of lines in the raw
170/// text that is shared between all lines, i.e. common indentation. Although
171/// this excludes text on the line with the initial backticks.
172///
173/// Typst first finds the line with the fewest initial whitespace characters
174/// that contains some non-whitespace characters, including the line with the
175/// closing backticks. Then Typst trims characters from every line equal to
176/// the number of initial whitespace characters in that line. Lines which are
177/// only whitespace will remove the same number of characters until they are
178/// empty, but will keep any extra trailing whitespace.
179///
180/// #let code-point = "https://www.unicode.org/glossary/#code_point"
181///
182/// Note that this check treats tabs and spaces as equivalent characters for
183/// simplicity, and that it operates on numbers of #link(code-point)[Unicode
184/// code points], i.e. characters, not on byte lengths.
185///
186/// These properties of the three-plus backtick syntax allow for some use cases
187/// that may not be obvious:
188///
189/// - To write text containing a sequence of backticks, enclose it with one or
190/// more backticks than the sequence:
191/// ````` ```` enclosed```backticks```` `````
192///
193/// - To write text that starts or ends with a backtick, add a space inside the
194/// opening and closing backticks: ```` ``` `backticks` ``` ````
195///
196/// - To write inline text highlighted with a language tag, add a space between
197/// the language tag and the text ````rust ```rust fn main() {}``` ````
198///
199/// - To write inline text without any language tag, add a space after the
200/// initial backticks: ```` ``` text``` ```` or use the single backtick
201/// syntax: ``` `text` ```
202///
203/// == Embedding strings with raw syntax <embedding-strings>
204/// A common use-case for raw syntax is to embed data as strings with formatting
205/// by accessing the `.text` field on raw content to get the underlying string.
206/// This may also be paired with the @bytes constructor to convert the string to
207/// bytes.
208///
209/// ````example
210/// An inline YAML dictionary via `.text`
211///
212/// #yaml(bytes(
213/// ```yaml
214/// Magic:
215/// limited-by: Mana
216/// Pokémon:
217/// limited-by: Energy
218/// Yu-Gi-Oh:
219/// limited-by: false
220/// ```.text
221/// // ^^^^ used as a string
222/// ))
223/// ````
224///
225/// == Language tag changes <language-tag-changes>
226///
227/// When using raw syntax with three or more backticks, text immediately after
228/// the initial backticks (up to the first whitespace) is treated as a
229/// @raw.lang[language tag]. However in the current version of Typst, only text
230/// that would be a valid Typst identifier is treated as the language tag. The
231/// first character not valid for an identifier will be interpreted as starting
232/// the raw text.
233///
234/// For example, in the current verion of Typst, if a raw block starts with
235/// `C++`, the identifier `C` will be the language tag, and the raw text will
236/// start with `++`. If a raw block starts with `++C`, it will have no language
237/// tag and the raw text will start with `++C`.
238///
239/// To use language tags that are not valid as identifiers in the current
240/// version of Typst, you must use the @raw.lang[`lang`] parameter, either by
241/// calling the constructor with a string: ```typ #raw("text", lang: "...")```,
242/// or by writing a set rule: ```typ #set raw(lang: "...")```.
243///
244/// In the next version of Typst, _all text_ up to the first whitespace or
245/// backtick will be treated as the language tag, allowing a wider character set
246/// for language tags. Tags including spaces or backticks will still need to be
247/// set manually via the @raw.lang[`lang`] parameter.
248///
249/// Typst will alert you if your raw blocks will be interpreted differently in
250/// the next Typst version by emitting a warning.
251#[elem(
252 scope,
253 title = "Raw Text / Code",
254 Synthesize,
255 Locatable,
256 Tagged,
257 ShowSet,
258 LocalName,
259 Figurable,
260 PlainText
261)]
262pub struct RawElem {
263 /// The raw text.
264 ///
265 /// You can also use raw blocks creatively to create custom syntaxes for
266 /// your automations.
267 ///
268 /// #example(
269 /// title: "Implementing a DSL using raw and show rules",
270 /// ````
271 /// // Parse numbers in raw blocks with the
272 /// // `mydsl` tag and sum them up.
273 /// #show raw.where(lang: "mydsl"): it => {
274 /// let sum = 0
275 /// for part in it.text.split("+") {
276 /// sum += int(part.trim())
277 /// }
278 /// sum
279 /// }
280 ///
281 /// ```mydsl
282 /// 1 + 2 + 3 + 4 + 5
283 /// ```
284 /// ````
285 /// )
286 #[required]
287 pub text: RawContent,
288
289 /// Whether the raw text is displayed as a separate block.
290 ///
291 /// In markup mode, using one-backtick notation makes this `{false}`. Using
292 /// three-backtick notation makes it `{true}` if the enclosed content
293 /// contains at least one line break.
294 ///
295 /// ````example
296 /// // Display inline code in a small box
297 /// // that retains the correct baseline.
298 /// #show raw.where(block: false): box.with(
299 /// fill: luma(240),
300 /// inset: (x: 3pt, y: 0pt),
301 /// outset: (y: 3pt),
302 /// radius: 2pt,
303 /// )
304 ///
305 /// // Display block code in a larger block
306 /// // with more padding.
307 /// #show raw.where(block: true): block.with(
308 /// fill: luma(240),
309 /// inset: 10pt,
310 /// radius: 4pt,
311 /// )
312 ///
313 /// With `rg`, you can search through your files quickly.
314 /// This example searches the current directory recursively
315 /// for the text `Hello World`:
316 ///
317 /// ```bash
318 /// rg "Hello World"
319 /// ```
320 /// ````
321 #[default(false)]
322 pub block: bool,
323
324 /// The language to interpret the raw text as for syntax highlighting.
325 ///
326 /// In @html[HTML export], this sets the `data-lang` attribute of the
327 /// generated @html.code element.
328 ///
329 /// Apart from typical language tags known from Markdown, this supports the
330 /// `{"typ"}`, `{"typc"}`, and `{"typm"}` tags for
331 /// @reference:syntax:markup[Typst markup],
332 /// @reference:syntax:code[Typst code], and
333 /// @reference:syntax:math[Typst math], respectively.
334 ///
335 /// ````example
336 /// ```typ
337 /// This is *Typst!*
338 /// ```
339 ///
340 /// This is ```typ also *Typst*```, but inline!
341 /// ````
342 pub lang: Option<EcoString>,
343
344 /// The horizontal alignment that each line in a raw block should have. This
345 /// option is ignored if this is not a raw block (if specified
346 /// `block: false` or single backticks were used in markup mode).
347 ///
348 /// By default, this is set to `{start}`, meaning that raw text is aligned
349 /// towards the start of the text direction inside the block by default,
350 /// regardless of the current context's alignment (allowing you to center
351 /// the raw block itself without centering the text inside it, for example).
352 ///
353 /// ````example
354 /// #set raw(align: center)
355 ///
356 /// ```typc
357 /// let f(x) = x
358 /// code = "centered"
359 /// ```
360 /// ````
361 #[default(HAlignment::Start)]
362 pub align: HAlignment,
363
364 /// Additional syntax definitions to load. The syntax definitions should be
365 /// in the
366 /// #link("https://www.sublimetext.com/docs/syntax.html")[`sublime-syntax`
367 /// file format].
368 ///
369 /// You can pass any of the following values:
370 ///
371 /// - A path string or @path to load a syntax file from.
372 /// - Raw bytes from which the syntax should be decoded.
373 /// - An array where each item is one of the above.
374 ///
375 /// ````example
376 /// #set raw(syntaxes: "SExpressions.sublime-syntax")
377 ///
378 /// ```sexp
379 /// (defun factorial (x)
380 /// (if (zerop x)
381 /// ; with a comment
382 /// 1
383 /// (* x (factorial (- x 1)))))
384 /// ```
385 /// ````
386 #[parse(match args.named("syntaxes")? {
387 Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
388 None => None,
389 })]
390 #[fold]
391 pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
392
393 /// The theme to use for syntax highlighting. Themes should be in the
394 /// #link("https://www.sublimetext.com/docs/color_schemes_tmtheme.html")[`tmTheme` file format].
395 ///
396 /// You can pass any of the following values:
397 ///
398 /// - `{none}`: Disables syntax highlighting.
399 /// - `{auto}`: Highlights with Typst's default theme.
400 /// - A path string or @path to load a theme file from.
401 /// - Raw bytes from which the theme should be decoded.
402 ///
403 /// Applying a theme only affects the color of specifically highlighted
404 /// text. It does not consider the theme's foreground and background
405 /// properties, so that you retain control over the color of raw text. You
406 /// can apply the foreground color yourself with the @text function and the
407 /// background with a @block.fill[filled block]. You could also use the @xml
408 /// function to extract these properties from the theme.
409 ///
410 /// ````example
411 /// #set raw(theme: "halcyon.tmTheme")
412 /// #show raw: it => block(
413 /// fill: rgb("#1d2433"),
414 /// inset: 8pt,
415 /// radius: 5pt,
416 /// text(fill: rgb("#a2aabc"), it)
417 /// )
418 ///
419 /// ```typ
420 /// = Chapter 1
421 /// #let hi = "Hello World"
422 /// ```
423 /// ````
424 #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
425 Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
426 Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
427 )),
428 Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
429 Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
430 None => None,
431 })]
432 pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
433
434 /// The size for a tab stop in spaces. A tab is replaced with enough spaces
435 /// to align with the next multiple of the size.
436 ///
437 /// ````example
438 /// #set raw(tab-size: 8)
439 /// ```tsv
440 /// Year Month Day
441 /// 2000 2 3
442 /// 2001 2 1
443 /// 2002 3 10
444 /// ```
445 /// ````
446 #[default(2)]
447 pub tab_size: usize,
448
449 /// The stylized lines of raw text.
450 ///
451 /// Made accessible for the @raw.line[`raw.line` element]. Allows more
452 /// styling control in `show` rules.
453 #[synthesized]
454 pub lines: Vec<Packed<RawLine>>,
455}
456
457#[scope]
458impl RawElem {
459 #[elem]
460 type RawLine;
461}
462
463impl RawElem {
464 /// The supported language names and tags.
465 pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
466 RAW_SYNTAXES
467 .syntaxes()
468 .iter()
469 .map(|syntax| {
470 (
471 syntax.name.as_str(),
472 syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
473 )
474 })
475 .chain([
476 ("Typst", vec!["typ"]),
477 ("Typst (code)", vec!["typc"]),
478 ("Typst (math)", vec!["typm"]),
479 ])
480 .collect()
481 }
482}
483
484impl Synthesize for Packed<RawElem> {
485 fn synthesize(
486 &mut self,
487 engine: &mut Engine,
488 styles: StyleChain,
489 ) -> SourceResult<()> {
490 let seq = self.highlight(engine.library.routines, styles);
491 self.lines = Some(seq);
492 Ok(())
493 }
494}
495
496impl Packed<RawElem> {
497 #[comemo::memoize]
498 fn highlight(&self, routines: &Routines, styles: StyleChain) -> Vec<Packed<RawLine>> {
499 let elem = self.as_ref();
500 let lines = preprocess(&elem.text, styles, self.span());
501
502 let count = lines.len() as i64;
503 let lang = elem
504 .lang
505 .get_ref(styles)
506 .as_ref()
507 .map(|s| s.to_lowercase())
508 .or(Some("txt".into()));
509
510 let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
511 lines.into_iter().enumerate().map(|(i, (line, line_span))| {
512 Packed::new(RawLine::new(
513 i as i64 + 1,
514 count,
515 line.clone(),
516 TextElem::packed(line).spanned(line_span),
517 ))
518 .spanned(line_span)
519 })
520 };
521
522 let syntaxes = LazyCell::new(|| elem.syntaxes.get_cloned(styles));
523 let theme: &synt::Theme = match elem.theme.get_ref(styles) {
524 Smart::Auto => &RAW_THEME,
525 Smart::Custom(Some(theme)) => theme.derived.get(),
526 Smart::Custom(None) => return non_highlighted_result(lines).collect(),
527 };
528
529 let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
530 let target = styles.get(TargetElem::target);
531
532 let mut seq = vec![];
533 if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
534 let text =
535 lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
536 let root = match lang.as_deref() {
537 Some("typc") => typst_syntax::parse_code(&text),
538 Some("typm") => typst_syntax::parse_math(&text),
539 _ => typst_syntax::parse(&text),
540 };
541
542 ThemedHighlighter::new(
543 &text,
544 LinkedNode::new(&root),
545 synt::Highlighter::new(theme),
546 &mut |i, _, range, style| {
547 // Find span and start of line.
548 // Note: Dedent is already applied to the text
549 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
550 let span_offset = text[..range.start]
551 .rfind('\n')
552 .map_or(0, |i| range.start - (i + 1));
553 styled(
554 routines,
555 target,
556 &text[range],
557 foreground,
558 style,
559 span,
560 span_offset,
561 )
562 },
563 &mut |i, range, line| {
564 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
565 seq.push(
566 Packed::new(RawLine::new(
567 (i + 1) as i64,
568 count,
569 EcoString::from(&text[range]),
570 Content::sequence(line.drain(..)),
571 ))
572 .spanned(span),
573 );
574 },
575 )
576 .highlight();
577 } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
578 // Prefer user-provided syntaxes over built-in ones.
579 syntaxes
580 .derived
581 .iter()
582 .map(|syntax| syntax.get())
583 .chain(std::iter::once(&*RAW_SYNTAXES))
584 .find_map(|set| {
585 set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
586 })
587 }) {
588 let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
589 for (i, (line, line_span)) in lines.into_iter().enumerate() {
590 let mut line_content = vec![];
591 let mut span_offset = 0;
592 for (style, piece) in highlighter
593 .highlight_line(line.as_str(), syntax_set)
594 .into_iter()
595 .flatten()
596 {
597 line_content.push(styled(
598 routines,
599 target,
600 piece,
601 foreground,
602 style,
603 line_span,
604 span_offset,
605 ));
606 span_offset += piece.len();
607 }
608
609 seq.push(
610 Packed::new(RawLine::new(
611 i as i64 + 1,
612 count,
613 line,
614 Content::sequence(line_content),
615 ))
616 .spanned(line_span),
617 );
618 }
619 } else {
620 seq.extend(non_highlighted_result(lines));
621 };
622
623 seq
624 }
625}
626
627impl ShowSet for Packed<RawElem> {
628 fn show_set(&self, styles: StyleChain) -> Styles {
629 let mut out = Styles::new();
630 out.set(TextElem::overhang, false);
631 out.set(TextElem::lang, Lang::ENGLISH);
632 out.set(TextElem::hyphenate, Smart::Custom(false));
633 out.set(TextElem::size, TextSize(Em::new(0.8).into()));
634 out.set(TextElem::font, FontList(vec![FontFamily::new("DejaVu Sans Mono")]));
635 out.set(TextElem::cjk_latin_spacing, Smart::Custom(None));
636 if self.block.get(styles) {
637 out.set(ParElem::justify, false);
638 }
639 out
640 }
641}
642
643impl LocalName for Packed<RawElem> {
644 const KEY: &'static str = "raw";
645}
646
647impl Figurable for Packed<RawElem> {}
648
649impl PlainText for Packed<RawElem> {
650 fn plain_text(&self, text: &mut EcoString) {
651 text.push_str(&self.text.get());
652 }
653}
654
655cast! {
656 RawElem,
657 v: Content => v.unpack::<Self>().map_err(|_| "expected raw text")?
658}
659
660/// The content of the raw text.
661#[derive(Debug, Clone, Hash)]
662pub enum RawContent {
663 /// From a string.
664 Text(EcoString),
665 /// From lines of text.
666 Lines(EcoVec<(EcoString, Span)>),
667}
668
669impl RawContent {
670 /// Returns or synthesizes the text content of the raw text.
671 fn get(&self) -> EcoString {
672 match self.clone() {
673 RawContent::Text(text) => text,
674 RawContent::Lines(lines) => {
675 let mut lines = lines.into_iter().map(|(s, _)| s);
676 if lines.len() <= 1 {
677 lines.next().unwrap_or_default()
678 } else {
679 lines.collect::<Vec<_>>().join("\n").into()
680 }
681 }
682 }
683 }
684}
685
686impl PartialEq for RawContent {
687 fn eq(&self, other: &Self) -> bool {
688 match (self, other) {
689 (RawContent::Text(a), RawContent::Text(b)) => a == b,
690 (lines @ RawContent::Lines(_), RawContent::Text(text))
691 | (RawContent::Text(text), lines @ RawContent::Lines(_)) => {
692 *text == lines.get()
693 }
694 (RawContent::Lines(a), RawContent::Lines(b)) => Iterator::eq(
695 a.iter().map(|(line, _)| line),
696 b.iter().map(|(line, _)| line),
697 ),
698 }
699 }
700}
701
702cast! {
703 RawContent,
704 self => self.get().into_value(),
705 v: EcoString => Self::Text(v),
706}
707
708/// A loaded syntax.
709#[derive(Debug, Clone, PartialEq, Hash)]
710pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
711
712impl RawSyntax {
713 /// Load syntaxes from sources.
714 fn load(
715 world: Tracked<dyn World + '_>,
716 sources: Spanned<OneOrMultiple<DataSource>>,
717 ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
718 let loaded = sources.load(world)?;
719 let list = loaded
720 .iter()
721 .map(|data| Self::decode(&data.data).within(data))
722 .collect::<SourceResult<_>>()?;
723 Ok(Derived::new(sources.v, list))
724 }
725
726 /// Decode a syntax from a loaded source.
727 #[comemo::memoize]
728 #[typst_macros::time(name = "load syntaxes")]
729 fn decode(bytes: &Bytes) -> LoadResult<RawSyntax> {
730 let str = bytes.as_str()?;
731
732 let syntax = SyntaxDefinition::load_from_str(str, false, None)
733 .map_err(format_syntax_error)?;
734
735 let mut builder = SyntaxSetBuilder::new();
736 builder.add(syntax);
737
738 Ok(RawSyntax(Arc::new(ManuallyHash::new(
739 builder.build(),
740 typst_utils::hash128(bytes),
741 ))))
742 }
743
744 /// Return the underlying syntax set.
745 fn get(&self) -> &SyntaxSet {
746 self.0.as_ref()
747 }
748}
749
750fn format_syntax_error(error: ParseSyntaxError) -> LoadError {
751 let pos = syntax_error_pos(&error);
752 LoadError::text(pos, "failed to parse syntax", error)
753}
754
755fn syntax_error_pos(error: &ParseSyntaxError) -> ReportTextPos {
756 match error {
757 ParseSyntaxError::InvalidYaml(scan_error) => {
758 let m = scan_error.marker();
759 ReportTextPos::full(
760 m.index()..m.index(),
761 LineCol::one_based(m.line(), m.col() + 1),
762 )
763 }
764 _ => ReportTextPos::None,
765 }
766}
767
768/// A loaded syntect theme.
769#[derive(Debug, Clone, PartialEq, Hash)]
770pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
771
772impl RawTheme {
773 /// Load a theme from a data source.
774 fn load(
775 world: Tracked<dyn World + '_>,
776 source: Spanned<DataSource>,
777 ) -> SourceResult<Derived<DataSource, Self>> {
778 let loaded = source.load(world)?;
779 let theme = Self::decode(&loaded.data).within(&loaded)?;
780 Ok(Derived::new(source.v, theme))
781 }
782
783 /// Decode a theme from bytes.
784 #[comemo::memoize]
785 fn decode(bytes: &Bytes) -> LoadResult<RawTheme> {
786 let mut cursor = std::io::Cursor::new(bytes.as_slice());
787 let theme =
788 synt::ThemeSet::load_from_reader(&mut cursor).map_err(format_theme_error)?;
789 Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(bytes)))))
790 }
791
792 /// Get the underlying syntect theme.
793 pub fn get(&self) -> &synt::Theme {
794 self.0.as_ref()
795 }
796}
797
798fn format_theme_error(error: syntect::LoadingError) -> LoadError {
799 let pos = match &error {
800 syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
801 _ => ReportTextPos::None,
802 };
803 LoadError::text(pos, "failed to parse theme", error)
804}
805
806/// A highlighted line of raw text.
807///
808/// This is a helper element that is synthesized by @raw elements.
809///
810/// It allows you to access various properties of the line, such as the line
811/// number, the raw non-highlighted text, the highlighted text, and whether it
812/// is the first or last line of the raw block.
813#[elem(name = "line", title = "Raw Text / Code Line", Tagged, PlainText)]
814pub struct RawLine {
815 /// The line number of the raw line inside of the raw block, starts at 1.
816 #[required]
817 pub number: i64,
818
819 /// The total number of lines in the raw block.
820 #[required]
821 pub count: i64,
822
823 /// The line of raw text.
824 #[required]
825 pub text: EcoString,
826
827 /// The highlighted raw text.
828 #[required]
829 pub body: Content,
830}
831
832impl PlainText for Packed<RawLine> {
833 fn plain_text(&self, text: &mut EcoString) {
834 text.push_str(&self.text);
835 }
836}
837
838/// Wrapper struct for the state required to highlight Typst code.
839struct ThemedHighlighter<'a> {
840 /// The code being highlighted.
841 code: &'a str,
842 /// The current node being highlighted.
843 node: LinkedNode<'a>,
844 /// The highlighter.
845 highlighter: synt::Highlighter<'a>,
846 /// The current scopes.
847 scopes: Vec<syntect::parsing::Scope>,
848 /// The current highlighted line.
849 current_line: Vec<Content>,
850 /// The range of the current line.
851 range: Range<usize>,
852 /// The current line number.
853 line: usize,
854 /// The function to style a piece of text.
855 style_fn: StyleFn<'a>,
856 /// The function to append a line.
857 line_fn: LineFn<'a>,
858}
859
860// Shorthands for highlighter closures.
861type StyleFn<'a> =
862 &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
863type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
864
865impl<'a> ThemedHighlighter<'a> {
866 pub fn new(
867 code: &'a str,
868 top: LinkedNode<'a>,
869 highlighter: synt::Highlighter<'a>,
870 style_fn: StyleFn<'a>,
871 line_fn: LineFn<'a>,
872 ) -> Self {
873 Self {
874 code,
875 node: top,
876 highlighter,
877 range: 0..0,
878 scopes: Vec::new(),
879 current_line: Vec::new(),
880 line: 0,
881 style_fn,
882 line_fn,
883 }
884 }
885
886 pub fn highlight(&mut self) {
887 self.highlight_inner();
888
889 if !self.current_line.is_empty() {
890 (self.line_fn)(
891 self.line,
892 self.range.start..self.code.len(),
893 &mut self.current_line,
894 );
895
896 self.current_line.clear();
897 }
898 }
899
900 fn highlight_inner(&mut self) {
901 if self.node.children().len() == 0 {
902 let style = self.highlighter.style_for_stack(&self.scopes);
903 let segment = &self.code[self.node.range()];
904
905 let mut len = 0;
906 for (i, line) in split_newlines(segment).into_iter().enumerate() {
907 if i != 0 {
908 (self.line_fn)(
909 self.line,
910 self.range.start..self.range.end + len - 1,
911 &mut self.current_line,
912 );
913 self.range.start = self.range.end + len;
914 self.line += 1;
915 }
916
917 let offset = self.node.range().start + len;
918 let token_range = offset..(offset + line.len());
919 self.current_line.push((self.style_fn)(
920 self.line,
921 &self.node,
922 token_range,
923 style,
924 ));
925
926 len += line.len() + 1;
927 }
928
929 self.range.end += segment.len();
930 }
931
932 for child in self.node.children() {
933 let mut scopes = self.scopes.clone();
934 if let Some(tag) = typst_syntax::highlight(&child) {
935 scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
936 }
937
938 std::mem::swap(&mut scopes, &mut self.scopes);
939 self.node = child;
940 self.highlight_inner();
941 std::mem::swap(&mut scopes, &mut self.scopes);
942 }
943 }
944}
945
946fn preprocess(
947 text: &RawContent,
948 styles: StyleChain,
949 span: Span,
950) -> EcoVec<(EcoString, Span)> {
951 if let RawContent::Lines(lines) = text
952 && lines.iter().all(|(s, _)| !s.contains('\t'))
953 {
954 return lines.clone();
955 }
956
957 let mut text = text.get();
958 if text.contains('\t') {
959 let tab_size = styles.get(RawElem::tab_size);
960 text = align_tabs(&text, tab_size);
961 }
962 split_newlines(&text)
963 .into_iter()
964 .map(|line| (line.into(), span))
965 .collect()
966}
967
968/// Style a piece of text with a syntect style.
969fn styled(
970 routines: &Routines,
971 target: Target,
972 piece: &str,
973 foreground: synt::Color,
974 style: synt::Style,
975 span: Span,
976 span_offset: usize,
977) -> Content {
978 let mut body = TextElem::packed(piece).spanned(span);
979
980 if span_offset > 0 {
981 body = body.set(TextElem::span_offset, span_offset);
982 }
983
984 if style.foreground != foreground {
985 let color = to_typst(style.foreground);
986 body = match target {
987 Target::Html => (routines.html_span_filled)(body, color),
988 _ => body.set(TextElem::fill, color.into()),
989 };
990 }
991
992 if style.font_style.contains(synt::FontStyle::BOLD) {
993 body = body.strong().spanned(span);
994 }
995
996 if style.font_style.contains(synt::FontStyle::ITALIC) {
997 body = body.emph().spanned(span);
998 }
999
1000 if style.font_style.contains(synt::FontStyle::UNDERLINE) {
1001 body = body.underlined().spanned(span);
1002 }
1003
1004 body
1005}
1006
1007fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
1008 Color::from_u8(r, g, b, a)
1009}
1010
1011fn to_syn(color: Color) -> synt::Color {
1012 let (r, g, b, a) = color.to_rgb().into_format::<u8, u8>().into_components();
1013 synt::Color { r, g, b, a }
1014}
1015
1016/// Create a syntect theme item.
1017fn item(
1018 scope: &str,
1019 color: Option<&str>,
1020 font_style: Option<synt::FontStyle>,
1021) -> synt::ThemeItem {
1022 synt::ThemeItem {
1023 scope: scope.parse().unwrap(),
1024 style: synt::StyleModifier {
1025 foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
1026 background: None,
1027 font_style,
1028 },
1029 }
1030}
1031
1032/// Replace tabs with spaces to align with multiples of `tab_size`.
1033fn align_tabs(text: &str, tab_size: usize) -> EcoString {
1034 let replacement = " ".repeat(tab_size);
1035 let divisor = tab_size.max(1);
1036 let amount = text.chars().filter(|&c| c == '\t').count();
1037
1038 let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
1039 let mut column = 0;
1040
1041 for grapheme in text.graphemes(true) {
1042 let c = grapheme.parse::<char>();
1043 if c == Ok('\t') {
1044 let required = tab_size - column % divisor;
1045 res.push_str(&replacement[..required]);
1046 column += required;
1047 } else if c.is_ok_and(typst_syntax::is_newline) || grapheme == "\r\n" {
1048 res.push_str(grapheme);
1049 column = 0;
1050 } else {
1051 res.push_str(grapheme);
1052 column += 1;
1053 }
1054 }
1055
1056 res
1057}
1058
1059/// The syntect syntax definitions.
1060///
1061/// Syntax set is generated from the syntaxes from the `bat` project
1062/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes>
1063pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
1064 LazyLock::new(two_face::syntax::extra_no_newlines);
1065
1066/// The default theme used for syntax highlighting.
1067pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
1068 name: Some("Typst Light".into()),
1069 author: Some("The Typst Project Developers".into()),
1070 settings: synt::ThemeSettings::default(),
1071 scopes: vec![
1072 item("comment", Some("#74747c"), None),
1073 item("constant.character.escape", Some("#1d6c76"), None),
1074 item("markup.bold", None, Some(synt::FontStyle::BOLD)),
1075 item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
1076 item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
1077 item("markup.raw", Some("#6b6b6f"), None),
1078 item("string.other.math.typst", None, None),
1079 item("punctuation.definition.math", Some("#198810"), None),
1080 item("keyword.operator.math, punctuation.math.typst", Some("#1d6c76"), None),
1081 item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
1082 item(
1083 "markup.heading.typst",
1084 None,
1085 Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
1086 ),
1087 item("punctuation.definition.list", Some("#8b41b1"), None),
1088 item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
1089 item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
1090 item("keyword, constant.language, variable.language", Some("#d73948"), None),
1091 item("storage.type, storage.modifier", Some("#d73948"), None),
1092 item("constant", Some("#b60157"), None),
1093 item("string", Some("#198810"), None),
1094 item("entity.name, variable.function, support", Some("#4b69c6"), None),
1095 item("support.macro", Some("#16718d"), None),
1096 item("meta.annotation", Some("#301414"), None),
1097 item("entity.other, meta.interpolation", Some("#8b41b1"), None),
1098 item("meta.diff.range", Some("#8b41b1"), None),
1099 item("markup.inserted, meta.diff.header.to-file", Some("#198810"), None),
1100 item("markup.deleted, meta.diff.header.from-file", Some("#d73948"), None),
1101 item("meta.mapping.key.json string.quoted.double.json", Some("#4b69c6"), None),
1102 item("meta.mapping.value.json string.quoted.double.json", Some("#198810"), None),
1103 ],
1104});