typstyle_core/pretty/
markup.rs

1use prettyless::Doc;
2use smallvec::SmallVec;
3use typst_syntax::{ast::*, SyntaxKind, SyntaxNode};
4
5use super::{
6    layout::flow::FlowItem, prelude::*, text::is_enum_marker, util::is_comment_node, Context, Mode,
7    PrettyPrinter,
8};
9use crate::{ext::StrExt, pretty::util::is_only_one_and};
10
11#[derive(Debug, PartialEq, Eq)]
12enum MarkupScope {
13    /// The top-level markup.
14    Document,
15    /// Markup enclosed by `[]`.
16    ContentBlock,
17    /// Strong or Emph.
18    Strong,
19    /// ListItem, EnumItem, desc of TermItem. Spaces without linebreaks can be stripped.
20    Item,
21    /// Heading, term of TermItem. Like `Item`, but linebreaks are not allowed.
22    InlineItem,
23}
24
25impl MarkupScope {
26    fn can_trim(&self) -> bool {
27        matches!(self, Self::Item | Self::InlineItem)
28    }
29}
30
31impl<'a> PrettyPrinter<'a> {
32    pub fn convert_markup(&'a self, ctx: Context, markup: Markup<'a>) -> ArenaDoc<'a> {
33        self.convert_markup_impl(ctx, markup, MarkupScope::Document)
34    }
35
36    pub(super) fn convert_content_block(
37        &'a self,
38        ctx: Context,
39        content_block: ContentBlock<'a>,
40    ) -> ArenaDoc<'a> {
41        let content =
42            self.convert_markup_impl(ctx, content_block.body(), MarkupScope::ContentBlock);
43        content.group().brackets()
44    }
45
46    pub(super) fn convert_strong(&'a self, ctx: Context, strong: Strong<'a>) -> ArenaDoc<'a> {
47        let body = self.convert_markup_impl(ctx, strong.body(), MarkupScope::Strong);
48        body.enclose("*", "*")
49    }
50
51    pub(super) fn convert_emph(&'a self, ctx: Context, emph: Emph<'a>) -> ArenaDoc<'a> {
52        let body = self.convert_markup_impl(ctx, emph.body(), MarkupScope::Strong);
53        body.enclose("_", "_")
54    }
55
56    pub(super) fn convert_raw(&'a self, ctx: Context, raw: Raw<'a>) -> ArenaDoc<'a> {
57        // no format multiline single backtick raw block
58        if !raw.block() && raw.lines().nth(1).is_some() {
59            return self.convert_verbatim(raw);
60        }
61
62        let mut doc = self.arena.nil();
63        for child in raw.to_untyped().children() {
64            if let Some(delim) = child.cast::<RawDelim>() {
65                doc += self.convert_trivia(delim);
66            } else if let Some(lang) = child.cast::<RawLang>() {
67                doc += self.convert_trivia(lang);
68            } else if let Some(text) = child.cast::<Text>() {
69                doc += self.convert_text(text);
70            } else if child.kind() == SyntaxKind::RawTrimmed {
71                doc += self.convert_space_untyped(ctx, child);
72            }
73        }
74        doc
75    }
76
77    pub(super) fn convert_ref(&'a self, ctx: Context, reference: Ref<'a>) -> ArenaDoc<'a> {
78        let mut doc = self.arena.text("@") + self.arena.text(reference.target());
79        if let Some(supplement) = reference.supplement() {
80            doc += self.convert_content_block(ctx, supplement);
81        }
82        doc
83    }
84
85    pub(super) fn convert_heading(&'a self, ctx: Context, heading: Heading<'a>) -> ArenaDoc<'a> {
86        self.convert_flow_like(ctx, heading.to_untyped(), |ctx, child, _| {
87            if child.kind() == SyntaxKind::HeadingMarker {
88                FlowItem::spaced(self.arena.text(child.text().as_str()))
89            } else if let Some(markup) = child.cast::<Markup>() {
90                if !child.is_empty() {
91                    FlowItem::spaced(self.convert_markup_impl(ctx, markup, MarkupScope::InlineItem))
92                } else {
93                    FlowItem::none()
94                }
95            } else {
96                FlowItem::none()
97            }
98        })
99    }
100
101    pub(super) fn convert_list_item(
102        &'a self,
103        ctx: Context,
104        list_item: ListItem<'a>,
105    ) -> ArenaDoc<'a> {
106        self.convert_list_item_like(ctx, list_item.to_untyped())
107    }
108
109    pub(super) fn convert_enum_item(
110        &'a self,
111        ctx: Context,
112        enum_item: EnumItem<'a>,
113    ) -> ArenaDoc<'a> {
114        self.convert_list_item_like(ctx, enum_item.to_untyped())
115    }
116
117    pub(super) fn convert_term_item(
118        &'a self,
119        ctx: Context,
120        term_item: TermItem<'a>,
121    ) -> ArenaDoc<'a> {
122        let node = term_item.to_untyped();
123        let mut seen_term = false;
124        let body = self.convert_flow_like(ctx, node, |ctx, child, _| match child.kind() {
125            SyntaxKind::TermMarker => FlowItem::spaced(self.arena.text(child.text().as_str())),
126            SyntaxKind::Colon => {
127                seen_term = true;
128                FlowItem::tight_spaced(self.arena.text(child.text().as_str()))
129            }
130            SyntaxKind::Space if child.text().has_linebreak() => {
131                FlowItem::tight(self.arena.hardline())
132            }
133            SyntaxKind::Parbreak => FlowItem::tight(
134                self.arena
135                    .hardline()
136                    .repeat(child.text().count_linebreaks()),
137            ),
138            SyntaxKind::Markup => {
139                if !seen_term || !child.is_empty() {
140                    // empty markup is ignored here
141                    FlowItem::spaced(self.convert_markup_impl(
142                        ctx,
143                        child.cast().expect("markup"),
144                        if !seen_term {
145                            MarkupScope::InlineItem
146                        } else {
147                            MarkupScope::Item
148                        },
149                    ))
150                } else {
151                    FlowItem::none()
152                }
153            }
154            _ => FlowItem::none(),
155        });
156        self.indent(body)
157    }
158
159    fn convert_list_item_like(&'a self, ctx: Context, item: &'a SyntaxNode) -> ArenaDoc<'a> {
160        let body = self.convert_flow_like(ctx, item, |ctx, child, _| match child.kind() {
161            SyntaxKind::ListMarker | SyntaxKind::EnumMarker | SyntaxKind::TermMarker => {
162                FlowItem::spaced(self.arena.text(child.text().as_str()))
163            }
164            SyntaxKind::Space if child.text().has_linebreak() => {
165                FlowItem::tight(self.arena.hardline())
166            }
167            SyntaxKind::Parbreak => FlowItem::tight(
168                self.arena
169                    .hardline()
170                    .repeat(child.text().count_linebreaks()),
171            ),
172            SyntaxKind::Markup if !child.is_empty() => {
173                // empty markup is ignored here
174                FlowItem::spaced(self.convert_markup_impl(
175                    ctx,
176                    child.cast().expect("markup"),
177                    MarkupScope::Item,
178                ))
179            }
180            _ => FlowItem::none(),
181        });
182        self.indent(body)
183    }
184
185    fn convert_markup_impl(
186        &'a self,
187        ctx: Context,
188        markup: Markup<'a>,
189        scope: MarkupScope,
190    ) -> ArenaDoc<'a> {
191        let ctx = ctx.with_mode(Mode::Markup);
192
193        // If the markup only contains one space, simply convert it.
194        let children = markup.to_untyped().children().as_slice();
195        if children.len() == 1 {
196            if let Some(space) = children[0].cast::<Space>() {
197                return self.convert_space(ctx, space);
198            }
199        }
200
201        let repr = collect_markup_repr(markup);
202        let body = if self.config.wrap_text && scope != MarkupScope::InlineItem {
203            self.convert_markup_body_reflow(ctx, &repr)
204        } else {
205            self.convert_markup_body(ctx, &repr)
206        };
207
208        // Add line or space (if any) to both sides.
209        // Only turn space into, not the other way around.
210        let get_delim = |bound: Boundary| {
211            if scope == MarkupScope::Document || scope.can_trim() {
212                // should not add extra lines to the document
213                return if bound == Boundary::Break {
214                    self.arena.hardline()
215                } else {
216                    self.arena.nil()
217                };
218            }
219            match bound {
220                Boundary::Nil => self.arena.nil(),
221                Boundary::NilOrBreak => {
222                    if (scope.can_trim() || ctx.break_suppressed) && !self.config.wrap_text {
223                        self.arena.nil()
224                    } else {
225                        self.arena.line_()
226                    }
227                }
228                Boundary::WeakNilOrBreak => {
229                    if self.config.wrap_text {
230                        self.arena.line_()
231                    } else {
232                        self.arena.nil()
233                    }
234                }
235                Boundary::Space(n) => {
236                    if scope.can_trim() {
237                        // the space can be safely eaten
238                        self.arena.nil()
239                    } else if self.config.wrap_text {
240                        self.arena.line()
241                    } else if self.config.collapse_markup_spaces {
242                        self.arena.space()
243                    } else {
244                        self.arena.spaces(n)
245                    }
246                }
247                Boundary::Break | Boundary::WeakBreak => self.arena.hardline(),
248            }
249        };
250
251        let open = get_delim(repr.start_bound);
252        let close = get_delim(repr.end_bound);
253        // Do not indent (compact), if the opening will not break.
254        let needs_indent = matches!(scope, MarkupScope::ContentBlock)
255            && !(matches!(*open, Doc::Nil | Doc::Text(_))
256                && contains_exactly_one_primary_expr(markup));
257        let body_with_before = open + body;
258        let body_with_before = if needs_indent {
259            self.indent(body_with_before)
260        } else {
261            // Use compact layout.
262            body_with_before
263        };
264        (body_with_before + close).group()
265    }
266
267    fn convert_markup_body(&'a self, ctx: Context, repr: &MarkupRepr<'a>) -> ArenaDoc<'a> {
268        let mut doc = self.arena.nil();
269        for &MarkupLine {
270            ref nodes,
271            breaks,
272            mixed_text,
273        } in repr.lines.iter()
274        {
275            for node in nodes.iter() {
276                doc += if node.kind() == SyntaxKind::Space {
277                    self.convert_space_untyped(ctx, node)
278                } else if let Some(text) = node.cast::<Text>() {
279                    self.convert_text(text)
280                } else if let Some(expr) = node.cast::<Expr>() {
281                    let ctx = if mixed_text {
282                        ctx.suppress_breaks()
283                    } else {
284                        ctx
285                    };
286                    self.convert_expr(ctx, expr)
287                } else if is_comment_node(node) {
288                    self.convert_comment(ctx, node)
289                } else {
290                    // can be Hash, Semicolon, Shebang
291                    self.convert_trivia_untyped(node)
292                };
293            }
294            if breaks > 0 {
295                doc += self.arena.hardline().repeat(breaks);
296            }
297        }
298        doc
299    }
300
301    /// With text-wrapping enabled, spaces may turn to linebreaks, and linebreaks may turn to spaces, if safe.
302    fn convert_markup_body_reflow(&'a self, ctx: Context, repr: &MarkupRepr<'a>) -> ArenaDoc<'a> {
303        /// For NOT space -> soft-line: \
304        /// Ensure they are not misinterpreted as markup markers after reflow.
305        ///
306        /// Besides, reflowing labels to the next line is not desired.
307        fn cannot_break_before(node: &&SyntaxNode) -> bool {
308            let text = node.text();
309            matches!(text.as_str(), "=" | "+" | "-" | "/")
310                || matches!(node.kind(), SyntaxKind::Label)
311                || is_enum_marker(text)
312        }
313
314        /// For space -> hard-line: \
315        /// Prefers block equations exclusive to a single line.
316        fn prefer_exclusive(node: &&SyntaxNode) -> bool {
317            is_block_equation(node) || is_block_raw(node)
318        }
319
320        /// For NOT hard-line -> soft-line: \
321        /// Should always break after block elements or line comments.
322        fn should_break_after(node: &SyntaxNode) -> bool {
323            matches!(
324                node.kind(),
325                SyntaxKind::Heading
326                    | SyntaxKind::ListItem
327                    | SyntaxKind::EnumItem
328                    | SyntaxKind::TermItem
329                    | SyntaxKind::LineComment
330            )
331        }
332
333        /// For NOT hard-line -> soft-line: \
334        /// Breaking after them is visually better.
335        fn preserve_break_after(node: &SyntaxNode) -> bool {
336            matches!(
337                node.kind(),
338                SyntaxKind::BlockComment
339                    | SyntaxKind::Linebreak
340                    | SyntaxKind::Label
341                    | SyntaxKind::CodeBlock
342                    | SyntaxKind::ContentBlock
343                    | SyntaxKind::Conditional
344                    | SyntaxKind::WhileLoop
345                    | SyntaxKind::ForLoop
346                    | SyntaxKind::Contextual
347            ) || is_block_equation(node)
348                || is_block_raw(node)
349        }
350
351        /// For NOT hard-line -> soft-line: \
352        /// Keeps the line exclusive (prevents soft breaks) when:
353        /// - It contains only one non-text node, or
354        /// - It contains exactly two nodes where the first is a Hash, such as `#figure()`.
355        fn preserve_exclusive(line: &MarkupLine) -> bool {
356            let nodes = &line.nodes;
357            let len = nodes.len();
358            len == 1 && nodes[0].kind() != SyntaxKind::Text
359                || len == 2 && nodes[0].kind() == SyntaxKind::Hash
360                || len > 0 && prefer_exclusive(&nodes[0])
361        }
362
363        let mut doc = self.arena.nil();
364        for (i, line) in repr.lines.iter().enumerate() {
365            let &MarkupLine {
366                ref nodes, breaks, ..
367            } = line;
368            for (j, node) in nodes.iter().enumerate() {
369                doc += if node.kind() == SyntaxKind::Space {
370                    if nodes.get(j + 1).is_some_and(cannot_break_before) {
371                        self.arena.space()
372                    } else if nodes.get(j + 1).is_some_and(prefer_exclusive)
373                        || nodes.get(j - 1).is_some_and(prefer_exclusive)
374                    {
375                        self.arena.hardline()
376                    } else {
377                        self.arena.softline()
378                    }
379                } else if let Some(text) = node.cast::<Text>() {
380                    self.convert_text_wrapped(text)
381                } else if let Some(expr) = node.cast::<Expr>() {
382                    self.convert_expr(ctx, expr)
383                } else if is_comment_node(node) {
384                    self.convert_comment(ctx, node)
385                } else {
386                    // can be Hash, Semicolon, Shebang
387                    self.convert_trivia_untyped(node)
388                };
389            }
390            // Should not eat trailing parbreaks.
391            if breaks == 1
392                && i + 1 != repr.lines.len()
393                && !nodes
394                    .last()
395                    .is_some_and(|last| should_break_after(last) || preserve_break_after(last))
396                && !preserve_exclusive(line)
397                && !preserve_exclusive(&repr.lines[i + 1])
398            {
399                doc += self.arena.softline();
400            } else if breaks > 0 {
401                doc += self.arena.hardline().repeat(breaks);
402            }
403        }
404        doc
405    }
406}
407
408#[derive(Default)]
409struct MarkupLine<'a> {
410    nodes: SmallVec<[&'a SyntaxNode; 4]>,
411    breaks: usize,
412    mixed_text: bool,
413}
414
415struct MarkupRepr<'a> {
416    lines: Vec<MarkupLine<'a>>,
417    start_bound: Boundary,
418    end_bound: Boundary,
419}
420
421/// Markup boundary, deciding whether can break.
422#[derive(Debug, Clone, Copy, PartialEq, Eq)]
423enum Boundary {
424    /// Should add no blank.
425    Nil,
426    /// Beside blocky elements. Can turn to a linebreak when multiline.
427    NilOrBreak,
428    /// Can turn to a linebreak if not in document scope and text-wrapping enabled,
429    /// as there are already spaces after comments.
430    WeakNilOrBreak,
431    /// n spaces.
432    Space(usize),
433    /// Always breaks.
434    Break,
435    /// Always breaks if not in document scope.
436    WeakBreak,
437}
438
439impl Boundary {
440    pub fn from_space(space: &str) -> Self {
441        if space.has_linebreak() {
442            Self::Break
443        } else {
444            Self::Space(space.len())
445        }
446    }
447
448    pub fn strip_space(self) -> Self {
449        match self {
450            Self::Space(_) => Self::NilOrBreak,
451            _ => self,
452        }
453    }
454}
455
456// Break markup into lines, split by stmt, parbreak, newline, multiline raw,
457// equation if a line contains text, it will be skipped by the formatter
458// to keep the original format.
459fn collect_markup_repr(markup: Markup<'_>) -> MarkupRepr<'_> {
460    /// A subset of "blocky" elements that we cannot safely handle currently.
461    /// By default show rule, these elements seem to have weak spaces on both sides.
462    /// But this behavior can be changed by wrapping them in a box.
463    fn is_special_block_elem(it: &SyntaxNode) -> bool {
464        matches!(
465            it.kind(),
466            SyntaxKind::ListItem | SyntaxKind::EnumItem | SyntaxKind::TermItem
467        )
468    }
469
470    let mut repr = MarkupRepr {
471        lines: vec![],
472        start_bound: Boundary::Nil,
473        end_bound: Boundary::Nil,
474    };
475    let mut current_line = MarkupLine::default();
476    for node in markup.to_untyped().children() {
477        let break_line = match node.kind() {
478            SyntaxKind::Parbreak => {
479                current_line.breaks = node.text().count_linebreaks(); // This is >= 2
480                true
481            }
482            SyntaxKind::Space if current_line.nodes.is_empty() => {
483                // Due to the logic of line-slitting, it must also be the first node in the markup.
484                debug_assert!(repr.lines.is_empty());
485                repr.start_bound = Boundary::from_space(node.text());
486                continue;
487            }
488            SyntaxKind::Space if node.text().has_linebreak() => {
489                current_line.breaks = 1; // Must only one
490                true
491            }
492            _ => {
493                if matches!(
494                    node.kind(),
495                    SyntaxKind::Text | SyntaxKind::Strong | SyntaxKind::Emph | SyntaxKind::Raw
496                ) {
497                    current_line.mixed_text = true;
498                }
499                if current_line.nodes.is_empty() && is_special_block_elem(node) {
500                    repr.start_bound = repr.start_bound.strip_space();
501                }
502                current_line.nodes.push(node);
503                false
504            }
505        };
506        if break_line {
507            repr.lines.push(current_line);
508            current_line = MarkupLine::default();
509        }
510    }
511    if !current_line.nodes.is_empty() {
512        repr.lines.push(current_line);
513    }
514
515    // Remove trailing spaces
516    if let Some(last_line) = repr.lines.last_mut() {
517        if last_line.breaks > 0 {
518            last_line.breaks -= 1;
519            repr.end_bound = Boundary::Break;
520        }
521        while let Some(last) = last_line.nodes.last() {
522            if last.kind() == SyntaxKind::Space {
523                repr.end_bound = Boundary::from_space(last.text());
524                last_line.nodes.pop();
525            } else {
526                if is_special_block_elem(last) {
527                    repr.end_bound = repr.end_bound.strip_space();
528                }
529                break;
530            }
531        }
532    }
533
534    // Check boundary through comments
535    if repr.start_bound == Boundary::Nil {
536        if let Some(first_line) = repr.lines.first() {
537            match first_line.nodes.iter().find(|it| !is_comment_node(it)) {
538                Some(it) if is_special_block_elem(it) => {
539                    repr.start_bound = Boundary::NilOrBreak;
540                }
541                Some(it) if it.kind() == SyntaxKind::Space => {
542                    repr.start_bound = Boundary::WeakNilOrBreak;
543                }
544                None if !first_line.nodes.is_empty() => repr.start_bound = Boundary::WeakBreak,
545                _ => {}
546            }
547        }
548    }
549    if repr.end_bound == Boundary::Nil {
550        if let Some(last_line) = repr.lines.last() {
551            match last_line.nodes.iter().rfind(|it| !is_comment_node(it)) {
552                Some(it) if is_special_block_elem(it) => {
553                    repr.end_bound = Boundary::NilOrBreak;
554                }
555                Some(it) if it.kind() == SyntaxKind::Space => {
556                    repr.end_bound = Boundary::WeakNilOrBreak;
557                }
558                None if !last_line.nodes.is_empty() => repr.end_bound = Boundary::WeakBreak,
559                _ => {}
560            }
561        }
562    }
563
564    repr
565}
566
567fn is_block_equation(it: &SyntaxNode) -> bool {
568    it.cast::<Equation>()
569        .is_some_and(|equation| equation.block())
570}
571
572fn is_block_raw(it: &SyntaxNode) -> bool {
573    it.cast::<Raw>().is_some_and(|raw| raw.block())
574}
575
576/// Returns true if the given markup contains exactly one primary (non-text, non-block) expression,
577/// ignoring spaces, linebreaks, and labels, and no linebreak or parbreak presented.
578fn contains_exactly_one_primary_expr(markup: Markup) -> bool {
579    // Fast fail: if any linebreak or parbreak is present, not a single primary expr.
580    if markup.exprs().any(|expr| {
581        matches!(expr, Expr::Space(_)) && expr.to_untyped().text().has_linebreak()
582            || matches!(expr, Expr::Parbreak(_))
583    }) {
584        return false;
585    }
586    is_only_one_and(
587        markup
588            .exprs()
589            .filter(|it| !matches!(it, Expr::Space(_) | Expr::Linebreak(_) | Expr::Label(_))),
590        |it| {
591            // Blocky expressions may produce new breaks.
592            // Other markup expressions are safe, as they must span only one line,
593            // or can be covered in boundary check.
594            !matches!(it, Expr::Text(_))
595        },
596    )
597}