harper_typst/
lib.rs

1mod offset_cursor;
2mod typst_translator;
3
4use offset_cursor::OffsetCursor;
5use typst_translator::TypstTranslator;
6
7use harper_core::{Token, parsers::Parser};
8use itertools::Itertools;
9use typst_syntax::{
10    Source, SyntaxNode,
11    ast::{AstNode, Expr, Markup},
12};
13
14/// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files.
15pub struct Typst;
16
17impl Parser for Typst {
18    fn parse(&self, source: &[char]) -> Vec<Token> {
19        let source_str: String = source.iter().collect();
20
21        // Transform the source into an AST through the `typst_syntax` crate
22        let typst_document = Source::detached(source_str);
23        let typst_tree = Markup::from_untyped(typst_document.root())
24            .expect("Unable to create typst document from parsed tree!");
25
26        // Recurse through AST to create tokens
27        let parse_helper = TypstTranslator::new(&typst_document);
28        let mut buf = Vec::new();
29        let exprs = typst_tree.exprs().collect_vec();
30        let exprs = convert_parbreaks(&mut buf, &exprs);
31        exprs
32            .into_iter()
33            .filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document)))
34            .flatten()
35            .collect_vec()
36    }
37}
38
39/// Converts newlines after certain elements to paragraph breaks
40/// This is accomplished here instead of in the translating module because at this point there is
41/// still semantic information associated with the elements.
42///
43/// Newlines are separate expressions in the parse tree (as the Space variant)
44fn convert_parbreaks<'a>(buf: &'a mut Vec<SyntaxNode>, exprs: &'a [Expr]) -> Vec<Expr<'a>> {
45    // Owned collection of nodes forcibly casted to paragraph breaks
46    *buf = exprs
47        .iter()
48        .map(|e| {
49            let mut node = SyntaxNode::placeholder(typst_syntax::SyntaxKind::Parbreak);
50            node.synthesize(e.span());
51            node
52        })
53        .collect_vec();
54
55    let should_parbreak = |e1, e2, e3| {
56        matches!(e2, Expr::Space(_))
57            && (matches!(e1, Expr::Heading(_) | Expr::List(_))
58                || matches!(e3, Expr::Heading(_) | Expr::List(_)))
59    };
60
61    let mut res: Vec<Expr> = Vec::new();
62    let mut last_element: Option<Expr> = None;
63    for ((i, expr), (_, next_expr)) in exprs.iter().enumerate().tuple_windows() {
64        let mut current_expr = *expr;
65        if let Some(last_element) = last_element {
66            if should_parbreak(last_element, *expr, *next_expr) {
67                let pbreak = typst_syntax::ast::Parbreak::from_untyped(&buf[i])
68                    .expect("Unable to convert expression to Parbreak");
69                current_expr = Expr::Parbreak(pbreak);
70            }
71        }
72        res.push(current_expr);
73        last_element = Some(*expr)
74    }
75    // Push last element because it will be excluded by tuple_windows() above
76    if let Some(last) = exprs.iter().last() {
77        res.push(*last);
78    }
79
80    res
81}