1use std::cell::LazyCell;
2use std::ops::Range;
3use std::sync::{Arc, LazyLock};
4
5use comemo::Tracked;
6use ecow::{eco_format, EcoString, EcoVec};
7use syntect::highlighting as synt;
8use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
9use typst_syntax::{split_newlines, LinkedNode, Span, Spanned};
10use typst_utils::ManuallyHash;
11use unicode_segmentation::UnicodeSegmentation;
12
13use super::Lang;
14use crate::diag::{At, FileError, SourceResult, StrResult};
15use crate::engine::Engine;
16use crate::foundations::{
17 cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed,
18 PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
19};
20use crate::html::{tag, HtmlElem};
21use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
22use crate::loading::{DataSource, Load};
23use crate::model::{Figurable, ParElem};
24use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
25use crate::visualize::Color;
26use crate::World;
27
28#[elem(
76 scope,
77 title = "Raw Text / Code",
78 Synthesize,
79 Show,
80 ShowSet,
81 LocalName,
82 Figurable,
83 PlainText
84)]
85pub struct RawElem {
86 #[required]
107 pub text: RawContent,
108
109 #[default(false)]
142 pub block: bool,
143
144 #[borrowed]
160 pub lang: Option<EcoString>,
161
162 #[default(HAlignment::Start)]
181 pub align: HAlignment,
182
183 #[parse(match args.named("syntaxes")? {
205 Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
206 None => None,
207 })]
208 #[fold]
209 pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
210
211 #[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
244 Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
245 Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
246 )),
247 Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
248 Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
249 None => None,
250 })]
251 #[borrowed]
252 pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
253
254 #[default(2)]
267 pub tab_size: usize,
268
269 #[synthesized]
274 pub lines: Vec<Packed<RawLine>>,
275}
276
277#[scope]
278impl RawElem {
279 #[elem]
280 type RawLine;
281}
282
283impl RawElem {
284 pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> {
286 RAW_SYNTAXES
287 .syntaxes()
288 .iter()
289 .map(|syntax| {
290 (
291 syntax.name.as_str(),
292 syntax.file_extensions.iter().map(|s| s.as_str()).collect(),
293 )
294 })
295 .chain([
296 ("Typst", vec!["typ"]),
297 ("Typst (code)", vec!["typc"]),
298 ("Typst (math)", vec!["typm"]),
299 ])
300 .collect()
301 }
302}
303
304impl Synthesize for Packed<RawElem> {
305 fn synthesize(&mut self, _: &mut Engine, styles: StyleChain) -> SourceResult<()> {
306 let seq = self.highlight(styles);
307 self.push_lines(seq);
308 Ok(())
309 }
310}
311
312impl Packed<RawElem> {
313 #[comemo::memoize]
314 fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> {
315 let elem = self.as_ref();
316 let lines = preprocess(&elem.text, styles, self.span());
317
318 let count = lines.len() as i64;
319 let lang = elem
320 .lang(styles)
321 .as_ref()
322 .as_ref()
323 .map(|s| s.to_lowercase())
324 .or(Some("txt".into()));
325
326 let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
327 lines.into_iter().enumerate().map(|(i, (line, line_span))| {
328 Packed::new(RawLine::new(
329 i as i64 + 1,
330 count,
331 line.clone(),
332 TextElem::packed(line).spanned(line_span),
333 ))
334 .spanned(line_span)
335 })
336 };
337
338 let syntaxes = LazyCell::new(|| elem.syntaxes(styles));
339 let theme: &synt::Theme = match elem.theme(styles) {
340 Smart::Auto => &RAW_THEME,
341 Smart::Custom(Some(theme)) => theme.derived.get(),
342 Smart::Custom(None) => return non_highlighted_result(lines).collect(),
343 };
344
345 let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
346
347 let mut seq = vec![];
348 if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) {
349 let text =
350 lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n");
351 let root = match lang.as_deref() {
352 Some("typc") => typst_syntax::parse_code(&text),
353 Some("typm") => typst_syntax::parse_math(&text),
354 _ => typst_syntax::parse(&text),
355 };
356
357 ThemedHighlighter::new(
358 &text,
359 LinkedNode::new(&root),
360 synt::Highlighter::new(theme),
361 &mut |i, _, range, style| {
362 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
365 let span_offset = text[..range.start]
366 .rfind('\n')
367 .map_or(0, |i| range.start - (i + 1));
368 styled(&text[range], foreground, style, span, span_offset)
369 },
370 &mut |i, range, line| {
371 let span = lines.get(i).map_or_else(Span::detached, |l| l.1);
372 seq.push(
373 Packed::new(RawLine::new(
374 (i + 1) as i64,
375 count,
376 EcoString::from(&text[range]),
377 Content::sequence(line.drain(..)),
378 ))
379 .spanned(span),
380 );
381 },
382 )
383 .highlight();
384 } else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
385 syntaxes
387 .derived
388 .iter()
389 .map(|syntax| syntax.get())
390 .chain(std::iter::once(&*RAW_SYNTAXES))
391 .find_map(|set| {
392 set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
393 })
394 }) {
395 let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
396 for (i, (line, line_span)) in lines.into_iter().enumerate() {
397 let mut line_content = vec![];
398 let mut span_offset = 0;
399 for (style, piece) in highlighter
400 .highlight_line(line.as_str(), syntax_set)
401 .into_iter()
402 .flatten()
403 {
404 line_content.push(styled(
405 piece,
406 foreground,
407 style,
408 line_span,
409 span_offset,
410 ));
411 span_offset += piece.len();
412 }
413
414 seq.push(
415 Packed::new(RawLine::new(
416 i as i64 + 1,
417 count,
418 line,
419 Content::sequence(line_content),
420 ))
421 .spanned(line_span),
422 );
423 }
424 } else {
425 seq.extend(non_highlighted_result(lines));
426 };
427
428 seq
429 }
430}
431
432impl Show for Packed<RawElem> {
433 #[typst_macros::time(name = "raw", span = self.span())]
434 fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> {
435 let lines = self.lines().map(|v| v.as_slice()).unwrap_or_default();
436
437 let mut seq = EcoVec::with_capacity((2 * lines.len()).saturating_sub(1));
438 for (i, line) in lines.iter().enumerate() {
439 if i != 0 {
440 seq.push(LinebreakElem::shared().clone());
441 }
442
443 seq.push(line.clone().pack());
444 }
445
446 let mut realized = Content::sequence(seq);
447
448 if TargetElem::target_in(styles).is_html() {
449 return Ok(HtmlElem::new(if self.block(styles) {
450 tag::pre
451 } else {
452 tag::code
453 })
454 .with_body(Some(realized))
455 .pack()
456 .spanned(self.span()));
457 }
458
459 if self.block(styles) {
460 realized = realized.aligned(self.align(styles).into());
462 realized = BlockElem::new()
463 .with_body(Some(BlockBody::Content(realized)))
464 .pack()
465 .spanned(self.span());
466 }
467
468 Ok(realized)
469 }
470}
471
472impl ShowSet for Packed<RawElem> {
473 fn show_set(&self, styles: StyleChain) -> Styles {
474 let mut out = Styles::new();
475 out.set(TextElem::set_overhang(false));
476 out.set(TextElem::set_lang(Lang::ENGLISH));
477 out.set(TextElem::set_hyphenate(Smart::Custom(false)));
478 out.set(TextElem::set_size(TextSize(Em::new(0.8).into())));
479 out.set(TextElem::set_font(FontList(vec![FontFamily::new("DejaVu Sans Mono")])));
480 out.set(TextElem::set_cjk_latin_spacing(Smart::Custom(None)));
481 if self.block(styles) {
482 out.set(ParElem::set_justify(false));
483 }
484 out
485 }
486}
487
488impl LocalName for Packed<RawElem> {
489 const KEY: &'static str = "raw";
490}
491
492impl Figurable for Packed<RawElem> {}
493
494impl PlainText for Packed<RawElem> {
495 fn plain_text(&self, text: &mut EcoString) {
496 text.push_str(&self.text.get());
497 }
498}
499
500#[derive(Debug, Clone, Hash, PartialEq)]
502pub enum RawContent {
503 Text(EcoString),
505 Lines(EcoVec<(EcoString, Span)>),
507}
508
509impl RawContent {
510 fn get(&self) -> EcoString {
512 match self.clone() {
513 RawContent::Text(text) => text,
514 RawContent::Lines(lines) => {
515 let mut lines = lines.into_iter().map(|(s, _)| s);
516 if lines.len() <= 1 {
517 lines.next().unwrap_or_default()
518 } else {
519 lines.collect::<Vec<_>>().join("\n").into()
520 }
521 }
522 }
523 }
524}
525
526cast! {
527 RawContent,
528 self => self.get().into_value(),
529 v: EcoString => Self::Text(v),
530}
531
532#[derive(Debug, Clone, PartialEq, Hash)]
534pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
535
536impl RawSyntax {
537 fn load(
539 world: Tracked<dyn World + '_>,
540 sources: Spanned<OneOrMultiple<DataSource>>,
541 ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
542 let data = sources.load(world)?;
543 let list = sources
544 .v
545 .0
546 .iter()
547 .zip(&data)
548 .map(|(source, data)| Self::decode(source, data))
549 .collect::<StrResult<_>>()
550 .at(sources.span)?;
551 Ok(Derived::new(sources.v, list))
552 }
553
554 #[comemo::memoize]
556 #[typst_macros::time(name = "load syntaxes")]
557 fn decode(source: &DataSource, data: &Bytes) -> StrResult<RawSyntax> {
558 let src = data.as_str().map_err(FileError::from)?;
559 let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err(
560 |err| match source {
561 DataSource::Path(path) => {
562 eco_format!("failed to parse syntax file `{path}` ({err})")
563 }
564 DataSource::Bytes(_) => {
565 eco_format!("failed to parse syntax ({err})")
566 }
567 },
568 )?;
569
570 let mut builder = SyntaxSetBuilder::new();
571 builder.add(syntax);
572
573 Ok(RawSyntax(Arc::new(ManuallyHash::new(
574 builder.build(),
575 typst_utils::hash128(data),
576 ))))
577 }
578
579 fn get(&self) -> &SyntaxSet {
581 self.0.as_ref()
582 }
583}
584
585#[derive(Debug, Clone, PartialEq, Hash)]
587pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
588
589impl RawTheme {
590 fn load(
592 world: Tracked<dyn World + '_>,
593 source: Spanned<DataSource>,
594 ) -> SourceResult<Derived<DataSource, Self>> {
595 let data = source.load(world)?;
596 let theme = Self::decode(&data).at(source.span)?;
597 Ok(Derived::new(source.v, theme))
598 }
599
600 #[comemo::memoize]
602 fn decode(data: &Bytes) -> StrResult<RawTheme> {
603 let mut cursor = std::io::Cursor::new(data.as_slice());
604 let theme = synt::ThemeSet::load_from_reader(&mut cursor)
605 .map_err(|err| eco_format!("failed to parse theme ({err})"))?;
606 Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
607 }
608
609 pub fn get(&self) -> &synt::Theme {
611 self.0.as_ref()
612 }
613}
614
615#[elem(name = "line", title = "Raw Text / Code Line", Show, PlainText)]
623pub struct RawLine {
624 #[required]
626 pub number: i64,
627
628 #[required]
630 pub count: i64,
631
632 #[required]
634 pub text: EcoString,
635
636 #[required]
638 pub body: Content,
639}
640
641impl Show for Packed<RawLine> {
642 #[typst_macros::time(name = "raw.line", span = self.span())]
643 fn show(&self, _: &mut Engine, _styles: StyleChain) -> SourceResult<Content> {
644 Ok(self.body.clone())
645 }
646}
647
648impl PlainText for Packed<RawLine> {
649 fn plain_text(&self, text: &mut EcoString) {
650 text.push_str(&self.text);
651 }
652}
653
654struct ThemedHighlighter<'a> {
656 code: &'a str,
658 node: LinkedNode<'a>,
660 highlighter: synt::Highlighter<'a>,
662 scopes: Vec<syntect::parsing::Scope>,
664 current_line: Vec<Content>,
666 range: Range<usize>,
668 line: usize,
670 style_fn: StyleFn<'a>,
672 line_fn: LineFn<'a>,
674}
675
676type StyleFn<'a> =
678 &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
679type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
680
681impl<'a> ThemedHighlighter<'a> {
682 pub fn new(
683 code: &'a str,
684 top: LinkedNode<'a>,
685 highlighter: synt::Highlighter<'a>,
686 style_fn: StyleFn<'a>,
687 line_fn: LineFn<'a>,
688 ) -> Self {
689 Self {
690 code,
691 node: top,
692 highlighter,
693 range: 0..0,
694 scopes: Vec::new(),
695 current_line: Vec::new(),
696 line: 0,
697 style_fn,
698 line_fn,
699 }
700 }
701
702 pub fn highlight(&mut self) {
703 self.highlight_inner();
704
705 if !self.current_line.is_empty() {
706 (self.line_fn)(
707 self.line,
708 self.range.start..self.code.len(),
709 &mut self.current_line,
710 );
711
712 self.current_line.clear();
713 }
714 }
715
716 fn highlight_inner(&mut self) {
717 if self.node.children().len() == 0 {
718 let style = self.highlighter.style_for_stack(&self.scopes);
719 let segment = &self.code[self.node.range()];
720
721 let mut len = 0;
722 for (i, line) in split_newlines(segment).into_iter().enumerate() {
723 if i != 0 {
724 (self.line_fn)(
725 self.line,
726 self.range.start..self.range.end + len - 1,
727 &mut self.current_line,
728 );
729 self.range.start = self.range.end + len;
730 self.line += 1;
731 }
732
733 let offset = self.node.range().start + len;
734 let token_range = offset..(offset + line.len());
735 self.current_line.push((self.style_fn)(
736 self.line,
737 &self.node,
738 token_range,
739 style,
740 ));
741
742 len += line.len() + 1;
743 }
744
745 self.range.end += segment.len();
746 }
747
748 for child in self.node.children() {
749 let mut scopes = self.scopes.clone();
750 if let Some(tag) = typst_syntax::highlight(&child) {
751 scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap())
752 }
753
754 std::mem::swap(&mut scopes, &mut self.scopes);
755 self.node = child;
756 self.highlight_inner();
757 std::mem::swap(&mut scopes, &mut self.scopes);
758 }
759 }
760}
761
762fn preprocess(
763 text: &RawContent,
764 styles: StyleChain,
765 span: Span,
766) -> EcoVec<(EcoString, Span)> {
767 if let RawContent::Lines(lines) = text {
768 if lines.iter().all(|(s, _)| !s.contains('\t')) {
769 return lines.clone();
770 }
771 }
772
773 let mut text = text.get();
774 if text.contains('\t') {
775 let tab_size = RawElem::tab_size_in(styles);
776 text = align_tabs(&text, tab_size);
777 }
778 split_newlines(&text)
779 .into_iter()
780 .map(|line| (line.into(), span))
781 .collect()
782}
783
784fn styled(
786 piece: &str,
787 foreground: synt::Color,
788 style: synt::Style,
789 span: Span,
790 span_offset: usize,
791) -> Content {
792 let mut body = TextElem::packed(piece).spanned(span);
793
794 if span_offset > 0 {
795 body = body.styled(TextElem::set_span_offset(span_offset));
796 }
797
798 if style.foreground != foreground {
799 body = body.styled(TextElem::set_fill(to_typst(style.foreground).into()));
800 }
801
802 if style.font_style.contains(synt::FontStyle::BOLD) {
803 body = body.strong().spanned(span);
804 }
805
806 if style.font_style.contains(synt::FontStyle::ITALIC) {
807 body = body.emph().spanned(span);
808 }
809
810 if style.font_style.contains(synt::FontStyle::UNDERLINE) {
811 body = body.underlined().spanned(span);
812 }
813
814 body
815}
816
817fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color {
818 Color::from_u8(r, g, b, a)
819}
820
821fn to_syn(color: Color) -> synt::Color {
822 let [r, g, b, a] = color.to_rgb().to_vec4_u8();
823 synt::Color { r, g, b, a }
824}
825
826fn item(
828 scope: &str,
829 color: Option<&str>,
830 font_style: Option<synt::FontStyle>,
831) -> synt::ThemeItem {
832 synt::ThemeItem {
833 scope: scope.parse().unwrap(),
834 style: synt::StyleModifier {
835 foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
836 background: None,
837 font_style,
838 },
839 }
840}
841
842fn align_tabs(text: &str, tab_size: usize) -> EcoString {
844 let replacement = " ".repeat(tab_size);
845 let divisor = tab_size.max(1);
846 let amount = text.chars().filter(|&c| c == '\t').count();
847
848 let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
849 let mut column = 0;
850
851 for grapheme in text.graphemes(true) {
852 match grapheme {
853 "\t" => {
854 let required = tab_size - column % divisor;
855 res.push_str(&replacement[..required]);
856 column += required;
857 }
858 "\n" => {
859 res.push_str(grapheme);
860 column = 0;
861 }
862 _ => {
863 res.push_str(grapheme);
864 column += 1;
865 }
866 }
867 }
868
869 res
870}
871
872pub static RAW_SYNTAXES: LazyLock<syntect::parsing::SyntaxSet> =
877 LazyLock::new(two_face::syntax::extra_no_newlines);
878
879pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
881 name: Some("Typst Light".into()),
882 author: Some("The Typst Project Developers".into()),
883 settings: synt::ThemeSettings::default(),
884 scopes: vec![
885 item("comment", Some("#8a8a8a"), None),
886 item("constant.character.escape", Some("#1d6c76"), None),
887 item("markup.bold", None, Some(synt::FontStyle::BOLD)),
888 item("markup.italic", None, Some(synt::FontStyle::ITALIC)),
889 item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)),
890 item("markup.raw", Some("#818181"), None),
891 item("string.other.math.typst", None, None),
892 item("punctuation.definition.math", Some("#298e0d"), None),
893 item("keyword.operator.math", Some("#1d6c76"), None),
894 item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)),
895 item(
896 "markup.heading.typst",
897 None,
898 Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE),
899 ),
900 item("punctuation.definition.list", Some("#8b41b1"), None),
901 item("markup.list.term", None, Some(synt::FontStyle::BOLD)),
902 item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
903 item("keyword, constant.language, variable.language", Some("#d73a49"), None),
904 item("storage.type, storage.modifier", Some("#d73a49"), None),
905 item("constant", Some("#b60157"), None),
906 item("string", Some("#298e0d"), None),
907 item("entity.name, variable.function, support", Some("#4b69c6"), None),
908 item("support.macro", Some("#16718d"), None),
909 item("meta.annotation", Some("#301414"), None),
910 item("entity.other, meta.interpolation", Some("#8b41b1"), None),
911 item("meta.diff.range", Some("#8b41b1"), None),
912 item("markup.inserted, meta.diff.header.to-file", Some("#298e0d"), None),
913 item("markup.deleted, meta.diff.header.from-file", Some("#d73a49"), None),
914 ],
915});