mitex_parser/
syntax.rs

1//! Syntax kinds and typed syntax nodes
2
3use mitex_lexer::{BraceKind, CommandName, Token};
4use rowan::ast::AstNode;
5
6macro_rules! arms {
7    ($scrut:ident [$($tokens:tt)*] [$($repr:tt)*] [$variant:tt $($rest:tt)*]) => {
8        arms!($scrut [$($tokens)* ConstValue::<{$($repr)*}>::VALUE => Some($variant),] [$($repr)* + 1] [$($rest)*])
9    };
10
11    ($scrut:ident [$($tokens:tt)*] [$($repr:tt)*] []) => {
12        match $scrut {
13            $($tokens)*
14            _ => None,
15        }
16    };
17}
18
19macro_rules! enum_from_repr {
20    (
21        #[repr($repr:tt)]
22        $(#[$meta:meta])*
23        $vis:vis enum $name:ident {
24            $($(#[$variant_meta:meta])* $variant:ident,)*
25        }
26    ) => {
27        #[repr($repr)]
28        $(#[$meta])*
29        $vis enum $name {
30            $($(#[$variant_meta])* $variant,)*
31        }
32
33        impl $name {
34            fn from_repr(repr: $repr) -> Option<Self> {
35                struct ConstValue<const V: $repr>;
36
37                impl<const V: $repr> ConstValue<V> {
38                    const VALUE: $repr = V;
39                }
40
41                arms!(repr [] [0] [$($variant)*])
42            }
43        }
44    };
45}
46
47enum_from_repr! {
48#[repr(u16)]
49#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
50#[allow(missing_docs)]
51pub enum SyntaxKind {
52    // Tokens
53    TokenError,
54    TokenLineBreak,
55    TokenWhiteSpace,
56    TokenComment,
57    TokenLBrace,
58    TokenRBrace,
59    TokenLBracket,
60    TokenRBracket,
61    TokenLParen,
62    TokenRParen,
63    TokenComma,
64    TokenTilde,
65    TokenSlash,
66    TokenWord,
67    TokenDollar,
68    TokenBeginMath,
69    TokenEndMath,
70    TokenAmpersand,
71    TokenHash,
72    TokenAsterisk,
73    TokenAtSign,
74    TokenUnderscore,
75    TokenCaret,
76    TokenApostrophe,
77    TokenDitto,
78    TokenSemicolon,
79    TokenCommandSym,
80
81    // Clauses
82    ClauseCommandName,
83    ClauseArgument,
84    ClauseLR,
85
86    // Items
87    ItemNewLine,
88    ItemText,
89    ItemCurly,
90    ItemBracket,
91    ItemParen,
92    ItemCmd,
93    ItemEnv,
94    ItemLR,
95    ItemBegin,
96    ItemEnd,
97    ItemBlockComment,
98    ItemTypstCode,
99    ItemAttachComponent,
100    ItemFormula,
101
102    // Scopes
103    ScopeRoot,
104}
105}
106
107impl From<Token> for SyntaxKind {
108    fn from(kind: Token) -> Self {
109        match kind {
110            Token::LineBreak => SyntaxKind::TokenLineBreak,
111            Token::Whitespace => SyntaxKind::TokenWhiteSpace,
112            Token::LineComment => SyntaxKind::TokenComment,
113            Token::Left(BraceKind::Curly) => SyntaxKind::TokenLBrace,
114            Token::Right(BraceKind::Curly) => SyntaxKind::TokenRBrace,
115            Token::Left(BraceKind::Bracket) => SyntaxKind::TokenLBracket,
116            Token::Right(BraceKind::Bracket) => SyntaxKind::TokenRBracket,
117            Token::Left(BraceKind::Paren) => SyntaxKind::TokenLParen,
118            Token::Right(BraceKind::Paren) => SyntaxKind::TokenRParen,
119            Token::Comma => SyntaxKind::TokenComma,
120            Token::Tilde => SyntaxKind::TokenTilde,
121            Token::Slash => SyntaxKind::TokenSlash,
122            Token::Underscore => SyntaxKind::TokenUnderscore,
123            Token::Apostrophe => SyntaxKind::TokenApostrophe,
124            Token::Ditto => SyntaxKind::TokenDitto,
125            Token::Semicolon => SyntaxKind::TokenSemicolon,
126            Token::Caret => SyntaxKind::TokenCaret,
127            Token::Word => SyntaxKind::TokenWord,
128            Token::Dollar => SyntaxKind::TokenDollar,
129            Token::Ampersand => SyntaxKind::TokenAmpersand,
130            Token::Hash => SyntaxKind::TokenHash,
131            Token::Asterisk => SyntaxKind::TokenAsterisk,
132            Token::AtSign => SyntaxKind::TokenAtSign,
133            Token::NewLine => SyntaxKind::ItemNewLine,
134            Token::MacroArg(_) => SyntaxKind::TokenWord,
135            Token::CommandName(
136                CommandName::ErrorBeginEnvironment | CommandName::ErrorEndEnvironment,
137            )
138            | Token::Error => SyntaxKind::TokenError,
139            Token::CommandName(CommandName::BeginEnvironment | CommandName::EndEnvironment) => {
140                SyntaxKind::TokenCommandSym
141            }
142            Token::CommandName(CommandName::BeginMath) => SyntaxKind::TokenBeginMath,
143            Token::CommandName(CommandName::EndMath) => SyntaxKind::TokenEndMath,
144            Token::CommandName(_) => SyntaxKind::ClauseCommandName,
145        }
146    }
147}
148
149impl SyntaxKind {
150    /// Checks whether the syntax kind is trivia
151    pub fn is_trivia(self) -> bool {
152        matches!(
153            self,
154            SyntaxKind::TokenLineBreak
155                | SyntaxKind::TokenWhiteSpace
156                | SyntaxKind::TokenComment
157                | SyntaxKind::ItemNewLine
158        )
159    }
160}
161
162use SyntaxKind::*;
163
164impl From<SyntaxKind> for rowan::SyntaxKind {
165    fn from(kind: SyntaxKind) -> Self {
166        Self(kind as u16)
167    }
168}
169
170impl From<rowan::SyntaxKind> for SyntaxKind {
171    fn from(kind: rowan::SyntaxKind) -> Self {
172        Self::from_repr(kind.0).expect("invalid rowan::SyntaxKind")
173    }
174}
175
176/// Provides a TeX language for rowan
177#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
178pub enum TexLang {}
179
180impl rowan::Language for TexLang {
181    type Kind = SyntaxKind;
182
183    fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
184        raw.into()
185    }
186
187    fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
188        kind.into()
189    }
190}
191
192/// exported tex syntax node
193pub type SyntaxNode = rowan::SyntaxNode<TexLang>;
194/// exported tex syntax token
195pub type SyntaxToken = rowan::SyntaxToken<TexLang>;
196/// exported tex syntax element
197pub type SyntaxElement = rowan::SyntaxElement<TexLang>;
198
199macro_rules! syntax_tree_node {
200    ($(#[$attr:meta])* $name:ident, $($kind:pat),+) => {
201        #[derive(Clone)]
202        #[repr(transparent)]
203        $(#[$attr])*
204        pub struct $name(SyntaxNode);
205
206        impl AstNode for $name {
207            type Language = TexLang;
208
209            fn can_cast(kind: SyntaxKind) -> bool {
210                match kind {
211                    $($kind => true,)+
212                    _ => false,
213                }
214            }
215
216            fn cast(node: SyntaxNode) -> Option<Self>
217            where
218                Self: Sized,
219            {
220                match node.kind() {
221                    $($kind => Some(Self(node)),)+
222                    _ => None,
223                }
224            }
225
226            fn syntax(&self) -> &SyntaxNode {
227                &self.0
228            }
229        }
230    };
231}
232
233syntax_tree_node!(
234    /// An inline formula or a display formula
235    FormulaItem,
236    ItemFormula
237);
238
239impl FormulaItem {
240    /// Checks whether it is a display formula
241    pub fn is_display(&self) -> bool {
242        self.syntax().first_token().map_or(false, |node| {
243            (node.kind() == TokenDollar && node.text() == "$$")
244                || (node.kind() == TokenBeginMath && node.text() == "\\[")
245        })
246    }
247
248    /// Checks whether it is an inline formula
249    pub fn is_inline(&self) -> bool {
250        self.syntax().first_token().map_or(false, |node| {
251            (node.kind() == TokenDollar && node.text() == "$")
252                || (node.kind() == TokenBeginMath && node.text() == "\\(")
253        })
254    }
255
256    /// Checks whether the formula is valid
257    pub fn is_valid(&self) -> bool {
258        self.syntax().first_token().map_or(false, |first_node| {
259            self.syntax().last_token().map_or(false, |last_node| {
260                if first_node.kind() == TokenDollar && last_node.kind() == TokenDollar {
261                    return (first_node.text() == "$" && last_node.text() == "$")
262                        || (first_node.text() == "$$" && last_node.text() == "$$");
263                } else if first_node.kind() == TokenBeginMath && last_node.kind() == TokenEndMath {
264                    return (first_node.text() == "\\(" && last_node.text() == "\\)")
265                        || (first_node.text() == "\\[" && last_node.text() == "\\]");
266                }
267                false
268            })
269        })
270    }
271}
272
273syntax_tree_node!(
274    /// Command item in latex document
275    ///
276    /// In short it is in shape of
277    /// ```coffeescript
278    /// ItemCmd(
279    ///   ClauseArgument(rev-arg1)?
280    ///   ClauseCommandName(name),
281    ///   ClauseArgument(arg1), ...
282    /// )
283    /// ```
284    ///
285    /// Exmaple:
286    /// ```latex
287    /// \documentclass{article}
288    /// ```
289    ///
290    /// Which will be parsed as:
291    /// ```coffeescript
292    /// ItemCmd(
293    ///   ClauseCommandName(
294    ///    TokenWord("documentclass")
295    ///  ),
296    ///   ClauseArgument(
297    ///     ItemCurly(
298    ///      TokenLBrace,
299    ///     ItemText(
300    ///      TokenWord("article")
301    ///    ),
302    ///     TokenRBrace
303    ///   )
304    /// )
305    /// ```
306    CmdItem,
307    ItemCmd
308);
309
310impl CmdItem {
311    /// Get the token corresponding to command name
312    pub fn name_tok(&self) -> Option<SyntaxToken> {
313        self.syntax()
314            .children_with_tokens()
315            .filter_map(|node| node.into_token())
316            .find(|node| node.kind() == ClauseCommandName)
317    }
318
319    /// Get the command arguments
320    pub fn arguments(&self) -> impl Iterator<Item = SyntaxNode> {
321        self.syntax()
322            .children()
323            .filter(|node| node.kind() == ClauseArgument)
324    }
325}
326
327syntax_tree_node!(
328    /// Environment item in latex document
329    /// ```coffeescript
330    /// ItemBegin(
331    ///   ClauseCommandName(name),
332    ///   ClauseArgument(arg1), ...
333    /// )
334    /// ...
335    /// ItemEnd(
336    ///   ClauseCommandName(name),
337    /// )
338    EnvItem,
339    ItemEnv
340);
341
342impl EnvItem {
343    /// Get the begin clause of the environment
344    pub fn begin(&self) -> Option<BeginItem> {
345        self.syntax().children().find_map(BeginItem::cast)
346    }
347
348    /// Get the end clause of the environment
349    pub fn end(&self) -> Option<EndItem> {
350        self.syntax().children().find_map(EndItem::cast)
351    }
352
353    /// Get the name of the environment
354    pub fn name_tok(&self) -> Option<SyntaxToken> {
355        self.begin().and_then(|begin| begin.name())
356    }
357
358    /// Get the arguments of the environment
359    pub fn arguments(&self) -> impl Iterator<Item = SyntaxNode> {
360        self.begin().into_iter().flat_map(|begin| begin.arguments())
361    }
362}
363
364syntax_tree_node!(
365    /// A paired `\left` and `\right` command with nodes in between them.
366    LRItem,
367    ItemLR
368);
369
370impl LRItem {
371    /// Get the left clause
372    pub fn left(&self) -> Option<LRClause> {
373        self.syntax().first_child().and_then(LRClause::cast)
374    }
375    /// Get the right clause
376    pub fn right(&self) -> Option<LRClause> {
377        self.syntax().last_child().and_then(LRClause::cast)
378    }
379
380    /// Get the left symbol wrapped in the clause
381    pub fn left_sym(&self) -> Option<SyntaxToken> {
382        self.left().and_then(|clause| clause.sym())
383    }
384
385    /// Get the right symbol wrapped in the clause
386    pub fn right_sym(&self) -> Option<SyntaxToken> {
387        self.right().and_then(|clause| clause.sym())
388    }
389}
390
391syntax_tree_node!(
392    /// A `\left` or `\right` command
393    LRClause,
394    ClauseLR
395);
396
397impl LRClause {
398    /// Get the command kind
399    pub fn is_left(&self) -> bool {
400        self.syntax()
401            .first_token()
402            .map(|node| node.kind() == ClauseCommandName && node.text() == "\\left")
403            .unwrap_or(false)
404    }
405
406    /// Get the symbol wrapped in the clause
407    pub fn sym(&self) -> Option<SyntaxToken> {
408        self.syntax()
409            .last_token()
410            .filter(|node| !matches!(node.kind(), ClauseCommandName))
411    }
412}
413
414syntax_tree_node!(
415    /// A `\begin{name}` command with arguments
416    BeginItem,
417    ItemBegin
418);
419
420impl BeginItem {
421    /// Get the name in the begin clause
422    pub fn name(&self) -> Option<SyntaxToken> {
423        self.syntax()
424            .first_token()
425            .filter(|node| node.kind() == TokenCommandSym)
426    }
427
428    /// Get the arguments of the environment
429    pub fn arguments(&self) -> impl Iterator<Item = SyntaxNode> {
430        self.syntax()
431            .children()
432            .filter(|node| node.kind() == ClauseArgument)
433    }
434}
435
436syntax_tree_node!(
437    /// A `\end{name}` command
438    EndItem,
439    ItemEnd
440);
441
442impl EndItem {
443    /// Get the name in the end clause
444    pub fn name(&self) -> Option<SyntaxToken> {
445        self.syntax()
446            .first_token()
447            .filter(|node| node.kind() == TokenCommandSym)
448    }
449}