1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{EcoString, EcoVec};
7use syntect::highlighting::{self as synt};
8use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{LinkedNode, Span, Spanned, split_newlines};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::World;
15use crate::diag::{
16 LineCol, LoadError, LoadResult, LoadedWithin, ReportPos, SourceResult,
17};
18use crate::engine::Engine;
19use crate::foundations::{
20 Bytes, Content, Derived, OneOrMultiple, Packed, PlainText, ShowSet, Smart,
21 StyleChain, Styles, Synthesize, Target, TargetElem, cast, elem, scope,
22};
23use crate::introspection::{Locatable, Tagged};
24use crate::layout::{Em, HAlignment};
25use crate::loading::{DataSource, Load};
26use crate::model::{Figurable, ParElem};
27use crate::routines::Routines;
28use crate::text::{FontFamily, FontList, LocalName, TextElem, TextSize};
29use crate::visualize::Color;
30
31#[elem(
128 scope,
129 title = "Raw Text / Code",
130 Synthesize,
131 Locatable,
132 Tagged,
133 ShowSet,
134 LocalName,
135 Figurable,
136 PlainText
137)]
138pub struct RawElem {
139 #[required]
160 pub text: RawContent,
161
162 #[default(false)]
195 pub block: bool,
196
197 pub lang: Option<EcoString>,
213
214 #[default(HAlignment::Start)]
233 pub align: HAlignment,
234
235 #[parse(match args.named("syntaxes")? {
257 Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
258 None => None,
259 })]
260 #[fold]
261 pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
262
263 #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
296 Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
297 Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
298 )),
299 Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
300 Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
301 None => None,
302 })]
303 pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
304
305 #[default(2)]
318 pub tab_size: usize,
319
320 #[synthesized]
325 pub lines: Vec<Packed<RawLine>>,
326}
327
328#[scope]
329impl RawElem {
330 #[elem]
331 type RawLine;
332}
333
334impl RawElem {
335 pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
337 RAW_SYNTAXES
338 .syntaxes()
339 .iter()
340 .map(|syntax| {
341 (
342 syntax.name.as_str(),
343 syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
344 )
345 })
346 .chain([
347 ("Typst", vec!["typ"]),
348 ("Typst (code)", vec!["typc"]),
349 ("Typst (math)", vec!["typm"]),
350 ])
351 .collect()
352 }
353}
354
355impl Synthesize for Packed<RawElem> {
356 fn synthesize(
357 &mut self,
358 engine: &mut Engine,
359 styles: StyleChain,
360 ) -> SourceResult<()> {
361 let seq = self.highlight(engine.routines, styles);
362 self.lines = Some(seq);
363 Ok(())
364 }
365}
366
367impl Packed<RawElem> {
368 #[comemo::memoize]
369 fn highlight(&self, routines: &Routines, styles: StyleChain) -> Vec<Packed<RawLine>> {
370 let elem = self.as_ref();
371 let lines = preprocess(&elem.text, styles, self.span());
372
373 let count = lines.len() as i64;
374 let lang = elem
375 .lang
376 .get_ref(styles)
377 .as_ref()
378 .map(|s| s.to_lowercase())
379 .or(Some("txt".into()));
380
381 let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
382 lines.into_iter().enumerate().map(|(i, (line, line_span))| {
383 Packed::new(RawLine::new(
384 i as i64 + 1,
385 count,
386 line.clone(),
387 TextElem::packed(line).spanned(line_span),
388 ))
389 .spanned(line_span)
390 })
391 };
392
393 let syntaxes = LazyCell::new(|| elem.syntaxes.get_cloned(styles));
394 let theme: &synt::Theme = match elem.theme.get_ref(styles) {
395 Smart::Auto => &RAW_THEME,
396 Smart::Custom(Some(theme)) => theme.derived.get(),
397 Smart::Custom(None) => return non_highlighted_result(lines).collect(),
398 };
399
400 let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
401 let target = styles.get(TargetElem::target);
402
403 let mut seq = vec![];
404 if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
405 let text =
406 lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
407 let root = match lang.as_deref() {
408 Some("typc") => typst_syntax::parse_code(&text),
409 Some("typm") => typst_syntax::parse_math(&text),
410 _ => typst_syntax::parse(&text),
411 };
412
413 ThemedHighlighter::new(
414 &text,
415 LinkedNode::new(&root),
416 synt::Highlighter::new(theme),
417 &mut |i, _, range, style| {
418 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
421 let span_offset = text[..range.start]
422 .rfind('\n')
423 .map_or(0, |i| range.start - (i + 1));
424 styled(
425 routines,
426 target,
427 &text[range],
428 foreground,
429 style,
430 span,
431 span_offset,
432 )
433 },
434 &mut |i, range, line| {
435 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
436 seq.push(
437 Packed::new(RawLine::new(
438 (i + 1) as i64,
439 count,
440 EcoString::from(&text[range]),
441 Content::sequence(line.drain(..)),
442 ))
443 .spanned(span),
444 );
445 },
446 )
447 .highlight();
448 } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
449 syntaxes
451 .derived
452 .iter()
453 .map(|syntax| syntax.get())
454 .chain(std::iter::once(&*RAW_SYNTAXES))
455 .find_map(|set| {
456 set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
457 })
458 }) {
459 let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
460 for (i, (line, line_span)) in lines.into_iter().enumerate() {
461 let mut line_content = vec![];
462 let mut span_offset = 0;
463 for (style, piece) in highlighter
464 .highlight_line(line.as_str(), syntax_set)
465 .into_iter()
466 .flatten()
467 {
468 line_content.push(styled(
469 routines,
470 target,
471 piece,
472 foreground,
473 style,
474 line_span,
475 span_offset,
476 ));
477 span_offset += piece.len();
478 }
479
480 seq.push(
481 Packed::new(RawLine::new(
482 i as i64 + 1,
483 count,
484 line,
485 Content::sequence(line_content),
486 ))
487 .spanned(line_span),
488 );
489 }
490 } else {
491 seq.extend(non_highlighted_result(lines));
492 };
493
494 seq
495 }
496}
497
498impl ShowSet for Packed<RawElem> {
499 fn show_set(&self, styles: StyleChain) -> Styles {
500 let mut out = Styles::new();
501 out.set(TextElem::overhang, false);
502 out.set(TextElem::lang, Lang::ENGLISH);
503 out.set(TextElem::hyphenate, Smart::Custom(false));
504 out.set(TextElem::size, TextSize(Em::new(0.8).into()));
505 out.set(TextElem::font, FontList(vec![FontFamily::new("DejaVu Sans Mono")]));
506 out.set(TextElem::cjk_latin_spacing, Smart::Custom(None));
507 if self.block.get(styles) {
508 out.set(ParElem::justify, false);
509 }
510 out
511 }
512}
513
514impl LocalName for Packed<RawElem> {
515 const KEY: &'static str = "raw";
516}
517
518impl Figurable for Packed<RawElem> {}
519
520impl PlainText for Packed<RawElem> {
521 fn plain_text(&self, text: &mut EcoString) {
522 text.push_str(&self.text.get());
523 }
524}
525
526#[derive(Debug, Clone, Hash)]
528#[allow(
529 clippy::derived_hash_with_manual_eq,
530 reason = "https://github.com/typst/typst/pull/6560#issuecomment-3045393640"
531)]
532pub enum RawContent {
533 Text(EcoString),
535 Lines(EcoVec<(EcoString, Span)>),
537}
538
539impl RawContent {
540 fn get(&self) -> EcoString {
542 match self.clone() {
543 RawContent::Text(text) => text,
544 RawContent::Lines(lines) => {
545 let mut lines = lines.into_iter().map(|(s, _)| s);
546 if lines.len() <= 1 {
547 lines.next().unwrap_or_default()
548 } else {
549 lines.collect::<Vec<_>>().join("\n").into()
550 }
551 }
552 }
553 }
554}
555
556impl PartialEq for RawContent {
557 fn eq(&self, other: &Self) -> bool {
558 match (self, other) {
559 (RawContent::Text(a), RawContent::Text(b)) => a == b,
560 (lines @ RawContent::Lines(_), RawContent::Text(text))
561 | (RawContent::Text(text), lines @ RawContent::Lines(_)) => {
562 *text == lines.get()
563 }
564 (RawContent::Lines(a), RawContent::Lines(b)) => Iterator::eq(
565 a.iter().map(|(line, _)| line),
566 b.iter().map(|(line, _)| line),
567 ),
568 }
569 }
570}
571
572cast! {
573 RawContent,
574 self => self.get().into_value(),
575 v: EcoString => Self::Text(v),
576}
577
578#[derive(Debug, Clone, PartialEq, Hash)]
580pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
581
582impl RawSyntax {
583 fn load(
585 world: Tracked<dyn World + '_>,
586 sources: Spanned<OneOrMultiple<DataSource>>,
587 ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
588 let loaded = sources.load(world)?;
589 let list = loaded
590 .iter()
591 .map(|data| Self::decode(&data.data).within(data))
592 .collect::<SourceResult<_>>()?;
593 Ok(Derived::new(sources.v, list))
594 }
595
596 #[comemo::memoize]
598 #[typst_macros::time(name = "load syntaxes")]
599 fn decode(bytes: &Bytes) -> LoadResult<RawSyntax> {
600 let str = bytes.as_str()?;
601
602 let syntax = SyntaxDefinition::load_from_str(str, false, None)
603 .map_err(format_syntax_error)?;
604
605 let mut builder = SyntaxSetBuilder::new();
606 builder.add(syntax);
607
608 Ok(RawSyntax(Arc::new(ManuallyHash::new(
609 builder.build(),
610 typst_utils::hash128(bytes),
611 ))))
612 }
613
614 fn get(&self) -> &SyntaxSet {
616 self.0.as_ref()
617 }
618}
619
620fn format_syntax_error(error: ParseSyntaxError) -> LoadError {
621 let pos = syntax_error_pos(&error);
622 LoadError::new(pos, "failed to parse syntax", error)
623}
624
625fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos {
626 match error {
627 ParseSyntaxError::InvalidYaml(scan_error) => {
628 let m = scan_error.marker();
629 ReportPos::full(
630 m.index()..m.index(),
631 LineCol::one_based(m.line(), m.col() + 1),
632 )
633 }
634 _ => ReportPos::None,
635 }
636}
637
638#[derive(Debug, Clone, PartialEq, Hash)]
640pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
641
642impl RawTheme {
643 fn load(
645 world: Tracked<dyn World + '_>,
646 source: Spanned<DataSource>,
647 ) -> SourceResult<Derived<DataSource, Self>> {
648 let loaded = source.load(world)?;
649 let theme = Self::decode(&loaded.data).within(&loaded)?;
650 Ok(Derived::new(source.v, theme))
651 }
652
653 #[comemo::memoize]
655 fn decode(bytes: &Bytes) -> LoadResult<RawTheme> {
656 let mut cursor = std::io::Cursor::new(bytes.as_slice());
657 let theme =
658 synt::ThemeSet::load_from_reader(&mut cursor).map_err(format_theme_error)?;
659 Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(bytes)))))
660 }
661
662 pub fn get(&self) -> &synt::Theme {
664 self.0.as_ref()
665 }
666}
667
668fn format_theme_error(error: syntect::LoadingError) -> LoadError {
669 let pos = match &error {
670 syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
671 _ => ReportPos::None,
672 };
673 LoadError::new(pos, "failed to parse theme", error)
674}
675
676#[elem(name = "line", title = "Raw Text / Code Line", Tagged, PlainText)]
684pub struct RawLine {
685 #[required]
687 pub number: i64,
688
689 #[required]
691 pub count: i64,
692
693 #[required]
695 pub text: EcoString,
696
697 #[required]
699 pub body: Content,
700}
701
702impl PlainText for Packed<RawLine> {
703 fn plain_text(&self, text: &mut EcoString) {
704 text.push_str(&self.text);
705 }
706}
707
708struct ThemedHighlighter<'a> {
710 code: &'a str,
712 node: LinkedNode<'a>,
714 highlighter: synt::Highlighter<'a>,
716 scopes: Vec<syntect::parsing::Scope>,
718 current_line: Vec<Content>,
720 range: Range<usize>,
722 line: usize,
724 style_fn: StyleFn<'a>,
726 line_fn: LineFn<'a>,
728}
729
730type StyleFn<'a> =
732 &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
733type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
734
735impl<'a> ThemedHighlighter<'a> {
736 pub fn new(
737 code: &'a str,
738 top: LinkedNode<'a>,
739 highlighter: synt::Highlighter<'a>,
740 style_fn: StyleFn<'a>,
741 line_fn: LineFn<'a>,
742 ) -> Self {
743 Self {
744 code,
745 node: top,
746 highlighter,
747 range: 0..0,
748 scopes: Vec::new(),
749 current_line: Vec::new(),
750 line: 0,
751 style_fn,
752 line_fn,
753 }
754 }
755
756 pub fn highlight(&mut self) {
757 self.highlight_inner();
758
759 if !self.current_line.is_empty() {
760 (self.line_fn)(
761 self.line,
762 self.range.start..self.code.len(),
763 &mut self.current_line,
764 );
765
766 self.current_line.clear();
767 }
768 }
769
770 fn highlight_inner(&mut self) {
771 if self.node.children().len() == 0 {
772 let style = self.highlighter.style_for_stack(&self.scopes);
773 let segment = &self.code[self.node.range()];
774
775 let mut len = 0;
776 for (i, line) in split_newlines(segment).into_iter().enumerate() {
777 if i != 0 {
778 (self.line_fn)(
779 self.line,
780 self.range.start..self.range.end + len - 1,
781 &mut self.current_line,
782 );
783 self.range.start = self.range.end + len;
784 self.line += 1;
785 }
786
787 let offset = self.node.range().start + len;
788 let token_range = offset..(offset + line.len());
789 self.current_line.push((self.style_fn)(
790 self.line,
791 &self.node,
792 token_range,
793 style,
794 ));
795
796 len += line.len() + 1;
797 }
798
799 self.range.end += segment.len();
800 }
801
802 for child in self.node.children() {
803 let mut scopes = self.scopes.clone();
804 if let Some(tag) = typst_syntax::highlight(&child) {
805 scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
806 }
807
808 std::mem::swap(&mut scopes, &mut self.scopes);
809 self.node = child;
810 self.highlight_inner();
811 std::mem::swap(&mut scopes, &mut self.scopes);
812 }
813 }
814}
815
816fn preprocess(
817 text: &RawContent,
818 styles: StyleChain,
819 span: Span,
820) -> EcoVec<(EcoString, Span)> {
821 if let RawContent::Lines(lines) = text
822 && lines.iter().all(|(s, _)| !s.contains('\t'))
823 {
824 return lines.clone();
825 }
826
827 let mut text = text.get();
828 if text.contains('\t') {
829 let tab_size = styles.get(RawElem::tab_size);
830 text = align_tabs(&text, tab_size);
831 }
832 split_newlines(&text)
833 .into_iter()
834 .map(|line| (line.into(), span))
835 .collect()
836}
837
838fn styled(
840 routines: &Routines,
841 target: Target,
842 piece: &str,
843 foreground: synt::Color,
844 style: synt::Style,
845 span: Span,
846 span_offset: usize,
847) -> Content {
848 let mut body = TextElem::packed(piece).spanned(span);
849
850 if span_offset > 0 {
851 body = body.set(TextElem::span_offset, span_offset);
852 }
853
854 if style.foreground != foreground {
855 let color = to_typst(style.foreground);
856 body = match target {
857 Target::Html => (routines.html_span_filled)(body, color),
858 Target::Paged => body.set(TextElem::fill, color.into()),
859 };
860 }
861
862 if style.font_style.contains(synt::FontStyle::BOLD) {
863 body = body.strong().spanned(span);
864 }
865
866 if style.font_style.contains(synt::FontStyle::ITALIC) {
867 body = body.emph().spanned(span);
868 }
869
870 if style.font_style.contains(synt::FontStyle::UNDERLINE) {
871 body = body.underlined().spanned(span);
872 }
873
874 body
875}
876
877fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
878 Color::from_u8(r, g, b, a)
879}
880
881fn to_syn(color: Color) -> synt::Color {
882 let (r, g, b, a) = color.to_rgb().into_format::<u8, u8>().into_components();
883 synt::Color { r, g, b, a }
884}
885
886fn item(
888 scope: &str,
889 color: Option<&str>,
890 font_style: Option<synt::FontStyle>,
891) -> synt::ThemeItem {
892 synt::ThemeItem {
893 scope: scope.parse().unwrap(),
894 style: synt::StyleModifier {
895 foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
896 background: None,
897 font_style,
898 },
899 }
900}
901
902fn align_tabs(text: &str, tab_size: usize) -> EcoString {
904 let replacement = " ".repeat(tab_size);
905 let divisor = tab_size.max(1);
906 let amount = text.chars().filter(|&c| c == '\t').count();
907
908 let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
909 let mut column = 0;
910
911 for grapheme in text.graphemes(true) {
912 let c = grapheme.parse::<char>();
913 if c == Ok('\t') {
914 let required = tab_size - column % divisor;
915 res.push_str(&replacement[..required]);
916 column += required;
917 } else if c.is_ok_and(typst_syntax::is_newline) || grapheme == "\r\n" {
918 res.push_str(grapheme);
919 column = 0;
920 } else {
921 res.push_str(grapheme);
922 column += 1;
923 }
924 }
925
926 res
927}
928
929pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
934 LazyLock::new(two_face::syntax::extra_no_newlines);
935
936pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
938 name: Some("Typst Light".into()),
939 author: Some("The Typst Project Developers".into()),
940 settings: synt::ThemeSettings::default(),
941 scopes: vec![
942 item("comment", Some("#74747c"), None),
943 item("constant.character.escape", Some("#1d6c76"), None),
944 item("markup.bold", None, Some(synt::FontStyle::BOLD)),
945 item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
946 item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
947 item("markup.raw", Some("#6b6b6f"), None),
948 item("string.other.math.typst", None, None),
949 item("punctuation.definition.math", Some("#198810"), None),
950 item("keyword.operator.math", Some("#1d6c76"), None),
951 item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
952 item(
953 "markup.heading.typst",
954 None,
955 Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
956 ),
957 item("punctuation.definition.list", Some("#8b41b1"), None),
958 item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
959 item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
960 item("keyword, constant.language, variable.language", Some("#d73948"), None),
961 item("storage.type, storage.modifier", Some("#d73948"), None),
962 item("constant", Some("#b60157"), None),
963 item("string", Some("#198810"), None),
964 item("entity.name, variable.function, support", Some("#4b69c6"), None),
965 item("support.macro", Some("#16718d"), None),
966 item("meta.annotation", Some("#301414"), None),
967 item("entity.other, meta.interpolation", Some("#8b41b1"), None),
968 item("meta.diff.range", Some("#8b41b1"), None),
969 item("markup.inserted, meta.diff.header.to-file", Some("#198810"), None),
970 item("markup.deleted, meta.diff.header.from-file", Some("#d73948"), None),
971 item("meta.mapping.key.json string.quoted.double.json", Some("#4b69c6"), None),
972 item("meta.mapping.value.json string.quoted.double.json", Some("#198810"), None),
973 ],
974});