math_core/
token.rs

1use strum_macros::IntoStaticStr;
2
3use mathml_renderer::attribute::{
4    FracAttr, HtmlTextStyle, MathVariant, Notation, OpAttr, Size, Style,
5};
6use mathml_renderer::length::Length;
7use mathml_renderer::symbol::{Bin, MathMLOperator, Op, OrdLike, Punct, Rel};
8
9use crate::character_class::Class;
10use crate::environments::Env;
11
12#[derive(Debug, Clone, Copy, IntoStaticStr)]
13pub enum Token<'config> {
14    #[strum(serialize = "end of input")]
15    Eof,
16    #[strum(serialize = r"\begin")]
17    Begin(Env),
18    #[strum(serialize = r"\end")]
19    End(Env),
20    #[strum(serialize = "&")]
21    NewColumn,
22    #[strum(serialize = r"\\")]
23    NewLine,
24    #[strum(serialize = r"\nonumber/\notag")]
25    NoNumber,
26    #[strum(serialize = r"\tag")]
27    Tag,
28    #[strum(serialize = r"\left")]
29    Left,
30    #[strum(serialize = r"\right")]
31    Right,
32    #[strum(serialize = r"\middle")]
33    Middle,
34    /// The opening square bracket has its own token because we need to
35    /// distinguish it from `\lbrack` after `\sqrt`.
36    #[strum(serialize = "[")]
37    SquareBracketOpen,
38    /// The closing square bracket has its own token because we often
39    /// need to search for it.
40    /// Additionally, it's useful to distinguish this from `\rbrack`.
41    #[strum(serialize = "]")]
42    SquareBracketClose,
43    #[strum(serialize = "{")]
44    GroupBegin,
45    #[strum(serialize = "}")]
46    GroupEnd,
47    Frac(Option<FracAttr>),
48    #[strum(serialize = r"\genfrac")]
49    Genfrac,
50    #[strum(serialize = "_")]
51    Underscore,
52    #[strum(serialize = "^")]
53    Circumflex,
54    Binom(Option<FracAttr>),
55    #[strum(serialize = r"\overset")]
56    Overset,
57    #[strum(serialize = r"\underset")]
58    Underset,
59    OverUnderBrace(OrdLike, bool),
60    #[strum(serialize = r"\sqrt")]
61    Sqrt,
62    Integral(Op),
63    #[strum(serialize = r"\limits")]
64    Limits,
65    // For `\lim`, `\sup`, `\inf`, `\max`, `\min`, etc.
66    PseudoOperatorLimits(&'static str),
67    Space(Length),
68    CustomSpace,
69    #[strum(serialize = "~")]
70    NonBreakingSpace,
71    Whitespace,
72    Transform(MathVariant),
73    Big(Size, Option<Class>),
74    OverUnder(Rel, bool, Option<OpAttr>),
75    /// A token corresponding to LaTeX's "mathord" character class (class 0).
76    Ord(OrdLike),
77    /// A token corresponding to LaTeX's "mathop" character class (class 1).
78    Op(Op),
79    /// A token corresponding to LaTeX's "mathbin" character class (class 2).
80    #[strum(serialize = "binary operator")]
81    BinaryOp(Bin),
82    /// A token corresponding to LaTeX's "mathrel" character class (class 3).
83    Relation(Rel),
84    /// A token corresponding to LaTeX's "mathopen" character class (class 4).
85    Open(OrdLike),
86    /// A token corresponding to LaTeX's "mathclose" character class (class 5).
87    Close(OrdLike),
88    /// A token corresponding to LaTeX's "mathpunct" character class (class 6).
89    Punctuation(Punct),
90    /// A token corresponding to LaTeX's "mathinner" character class (class I).
91    Inner(Op),
92    #[strum(serialize = "'")]
93    Prime,
94    #[strum(serialize = ">")]
95    OpGreaterThan,
96    #[strum(serialize = "<")]
97    OpLessThan,
98    #[strum(serialize = r"\&")]
99    OpAmpersand,
100    #[strum(serialize = ":")]
101    /// A token to force an operator to behave like a relation (mathrel).
102    /// This is, for example, needed for `:`, which in LaTeX is a relation,
103    /// but in MathML Core is a separator (punctuation).
104    ForceRelation(MathMLOperator),
105    /// A token to force an operator to behave like a closing symbol (mathclose).
106    /// This is, for example, needed for `!`, which in LaTeX is a closing symbol,
107    /// but in MathML Core is an ordinary operator.
108    ForceClose(MathMLOperator),
109    /// A token to force an operator to behave like a binary operator (mathbin).
110    /// This is, for example, needed for `×`, which in LaTeX is a binary operator,
111    /// but in MathML Core is a "big operator" (mathop).
112    ForceBinaryOp(MathMLOperator),
113    Letter(char, FromAscii),
114    UprightLetter(char), // letter for which we need `mathvariant="normal"`
115    Digit(char),
116    // For `\log`, `\exp`, `\sin`, `\cos`, `\tan`, etc.
117    PseudoOperator(&'static str),
118    Enclose(Notation),
119    #[strum(serialize = r"\operatorname")]
120    OperatorName(bool),
121    Slashed,
122    #[strum(serialize = r"\not")]
123    Not,
124    #[strum(serialize = r"\text*")]
125    Text(Option<HtmlTextStyle>),
126    Style(Style),
127    Color,
128    CustomCmdArg(u8),
129    CustomCmd(u8, &'config [Token<'static>]),
130    HardcodedMathML(&'static str),
131    TextModeAccent(char),
132    /// This token is intended to be used in predefined token streams.
133    /// It is equivalent to `{abc}`, but has a much more compact representation.
134    InternalStringLiteral(&'static str),
135}
136
137impl Token<'_> {
138    /// Returns the character class of this token.
139    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
140        if !in_sequence {
141            return Class::Default;
142        }
143        match self {
144            Token::Relation(_) | Token::ForceRelation(_) => Class::Relation,
145            Token::Punctuation(_) => Class::Punctuation,
146            Token::Open(_) | Token::Left | Token::SquareBracketOpen => Class::Open,
147            Token::Close(_)
148            | Token::SquareBracketClose
149            | Token::NewColumn
150            | Token::ForceClose(_) => Class::Close,
151            Token::BinaryOp(_) | Token::ForceBinaryOp(_) => Class::BinaryOp,
152            Token::Op(_) | Token::Integral(_) => Class::Operator,
153            Token::End(_) | Token::Right | Token::GroupEnd | Token::Eof if !ignore_end_tokens => {
154                Class::Close
155            }
156            Token::Inner(_) => Class::Inner,
157            // `\big` commands without the "l" or "r" really produce `Class::Default`.
158            Token::Big(_, Some(cls)) => *cls,
159            // TODO: This needs to skip spaces and other non-class tokens in the token sequence.
160            Token::CustomCmd(_, [head, ..]) => head.class(in_sequence, ignore_end_tokens),
161            _ => Class::Default,
162        }
163    }
164}
165
166#[derive(Debug, Clone, Copy, Default)]
167pub enum FromAscii {
168    #[default]
169    False,
170    True,
171}
172
173#[derive(Debug, Clone, Copy)]
174pub struct TokLoc<'config>(pub usize, pub Token<'config>);
175
176impl<'config> TokLoc<'config> {
177    #[inline]
178    pub fn token(&self) -> &Token<'config> {
179        &self.1
180    }
181
182    #[inline]
183    pub fn into_token(self) -> Token<'config> {
184        self.1
185    }
186
187    // #[inline]
188    // pub fn token_mut(&mut self) -> &mut Token<'config> {
189    //     &mut self.1
190    // }
191
192    #[inline]
193    pub fn location(&self) -> usize {
194        self.0
195    }
196
197    #[inline]
198    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
199        self.1.class(in_sequence, ignore_end_tokens)
200    }
201}
202
203impl<'config> From<Token<'config>> for TokLoc<'config> {
204    #[inline]
205    fn from(token: Token<'config>) -> Self {
206        TokLoc(0, token)
207    }
208}
209
210#[derive(Debug, Clone, Copy, PartialEq, IntoStaticStr)]
211pub enum EndToken {
212    #[strum(serialize = r"\end{...}")]
213    End,
214    #[strum(serialize = r"}")]
215    GroupClose,
216    #[strum(serialize = r"\right")]
217    Right,
218    #[strum(serialize = r"]")]
219    SquareBracketClose,
220    #[strum(serialize = r"end of input")]
221    Eof,
222}
223
224impl EndToken {
225    pub fn matches(&self, other: &Token) -> bool {
226        matches!(
227            (self, other),
228            (EndToken::End, Token::End(_))
229                | (EndToken::GroupClose, Token::GroupEnd)
230                | (EndToken::Right, Token::Right)
231                | (EndToken::SquareBracketClose, Token::SquareBracketClose)
232                | (EndToken::Eof, Token::Eof)
233        )
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    const WORD: usize = std::mem::size_of::<usize>();
242
243    #[test]
244    fn test_struct_sizes() {
245        assert!(std::mem::size_of::<Token>() <= 3 * WORD, "size of Token");
246        assert!(
247            std::mem::size_of::<TokLoc>() <= 4 * WORD,
248            "size of TokResult"
249        );
250        assert!(
251            std::mem::size_of::<Result<Token, &'static i32>>() <= 3 * WORD,
252            "size of Result<Token, pointer>"
253        );
254    }
255}