Skip to main content

math_core/
token.rs

1use std::ops::Range;
2
3use strum_macros::IntoStaticStr;
4
5use mathml_renderer::attribute::{
6    FracAttr, HtmlTextStyle, MathVariant, Notation, OpAttr, ParenType, Size, Style,
7};
8use mathml_renderer::length::Length;
9use mathml_renderer::symbol::{Bin, MathMLOperator, Op, OrdLike, Punct, Rel};
10
11use crate::character_class::Class;
12use crate::environments::Env;
13
14#[derive(Debug, Clone, Copy)]
15pub enum Token<'source> {
16    Eof,
17    Begin(Env),
18    End(Env),
19    NewColumn,
20    NewLine,
21    NoNumber,
22    Tag,
23    Left,
24    Right,
25    Middle,
26    /// The opening square bracket has its own token because we need to
27    /// distinguish it from `\lbrack` after `\sqrt`.
28    SquareBracketOpen,
29    /// The closing square bracket has its own token because we often
30    /// need to search for it.
31    /// Additionally, it's useful to distinguish this from `\rbrack`.
32    SquareBracketClose,
33    GroupBegin,
34    GroupEnd,
35    Frac(Option<FracAttr>),
36    Genfrac,
37    Underscore,
38    Circumflex,
39    Binom(Option<FracAttr>),
40    Overset,
41    Underset,
42    OverUnderBrace(OrdLike, bool),
43    Sqrt,
44    Limits,
45    // For `\lim`, `\sup`, `\inf`, `\max`, `\min`, etc.
46    PseudoOperatorLimits(&'static str),
47    Space(Length),
48    CustomSpace,
49    NonBreakingSpace,
50    Whitespace,
51    Transform(MathVariant),
52    Big(Size, Option<ParenType>),
53    OverUnder(Rel, bool, Option<OpAttr>),
54    /// A token corresponding to LaTeX's "mathord" character class (class 0).
55    Ord(OrdLike),
56    /// A token corresponding to LaTeX's "mathop" character class (class 1).
57    Op(Op),
58    /// A token corresponding to LaTeX's "mathbin" character class (class 2).
59    BinaryOp(Bin),
60    /// A token corresponding to LaTeX's "mathrel" character class (class 3).
61    Relation(Rel),
62    /// A token corresponding to LaTeX's "mathopen" character class (class 4).
63    Open(OrdLike),
64    /// A token corresponding to LaTeX's "mathclose" character class (class 5).
65    Close(OrdLike),
66    /// A token corresponding to LaTeX's "mathpunct" character class (class 6).
67    Punctuation(Punct),
68    /// A token corresponding to LaTeX's "mathinner" character class (class I).
69    Inner(Op),
70    Prime,
71    OpGreaterThan,
72    OpLessThan,
73    OpAmpersand,
74    /// A token to force an operator to behave like a relation (mathrel).
75    /// This is, for example, needed for `:`, which in LaTeX is a relation,
76    /// but in MathML Core is a separator (punctuation).
77    ForceRelation(MathMLOperator),
78    /// A token to force an operator to behave like a closing symbol (mathclose).
79    /// This is, for example, needed for `!`, which in LaTeX is a closing symbol,
80    /// but in MathML Core is an ordinary operator.
81    ForceClose(MathMLOperator),
82    /// A token to force an operator to behave like a binary operator (mathbin).
83    /// This is, for example, needed for `×`, which in LaTeX is a binary operator,
84    /// but in MathML Core is a "big operator" (mathop).
85    ForceBinaryOp(MathMLOperator),
86    Letter(char, FromAscii),
87    UprightLetter(char), // letter for which we need `mathvariant="normal"`
88    Digit(char),
89    // For `\log`, `\exp`, `\sin`, `\cos`, `\tan`, etc.
90    PseudoOperator(&'static str),
91    Enclose(Notation),
92    OperatorName(bool),
93    Slashed,
94    Not,
95    Text(Option<HtmlTextStyle>),
96    Style(Style),
97    Color,
98    CustomCmdArg(u8),
99    CustomCmd(u8, &'source [Token<'static>]),
100    HardcodedMathML(&'static str),
101    TextModeAccent(char),
102    UnknownCommand(&'source str),
103    /// This token is intended to be used in predefined token streams.
104    /// It is equivalent to `{abc}`, but has a much more compact representation.
105    InternalStringLiteral(&'static str),
106}
107
108impl Token<'_> {
109    /// Returns the character class of this token.
110    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
111        if !in_sequence {
112            return Class::Default;
113        }
114        match self {
115            Token::Relation(_) | Token::ForceRelation(_) => Class::Relation,
116            Token::Punctuation(_) => Class::Punctuation,
117            Token::Open(_) | Token::Left | Token::SquareBracketOpen => Class::Open,
118            Token::Close(_)
119            | Token::SquareBracketClose
120            | Token::NewColumn
121            | Token::ForceClose(_) => Class::Close,
122            Token::BinaryOp(_) | Token::ForceBinaryOp(_) => Class::BinaryOp,
123            Token::Op(_) => Class::Operator,
124            Token::End(_) | Token::Right | Token::GroupEnd | Token::Eof if !ignore_end_tokens => {
125                Class::Close
126            }
127            Token::Inner(_) => Class::Inner,
128            // `\big` commands without the "l" or "r" really produce `Class::Default`.
129            Token::Big(_, Some(paren_type)) => {
130                if matches!(paren_type, ParenType::Open) {
131                    Class::Open
132                } else {
133                    Class::Close
134                }
135            }
136            // TODO: This needs to skip spaces and other non-class tokens in the token sequence.
137            Token::CustomCmd(_, [head, ..]) => head.class(in_sequence, ignore_end_tokens),
138            _ => Class::Default,
139        }
140    }
141}
142
143#[derive(Debug, Clone, Copy, Default)]
144pub enum FromAscii {
145    #[default]
146    False,
147    True,
148}
149
150#[derive(Debug, Clone, Copy, Default)]
151pub struct Span(pub usize, pub u16);
152
153impl Span {
154    #[inline]
155    pub const fn zero_width(at: usize) -> Self {
156        Span(at, 0)
157    }
158
159    #[inline]
160    pub const fn start(&self) -> usize {
161        self.0
162    }
163
164    #[inline]
165    pub const fn end(&self) -> usize {
166        self.0 + self.1 as usize
167    }
168}
169
170impl From<Span> for Range<usize> {
171    #[inline]
172    fn from(span: Span) -> Self {
173        span.0..(span.0 + span.1 as usize)
174    }
175}
176
177impl TryFrom<Range<usize>> for Span {
178    type Error = ();
179    #[inline]
180    fn try_from(range: Range<usize>) -> Result<Self, ()> {
181        let length = range.end.checked_sub(range.start).ok_or(())?;
182        let length = u16::try_from(length).map_err(|_| ())?;
183        Ok(Span(range.start, length))
184    }
185}
186
187/// A token together with its span in the input string.
188#[derive(Debug, Clone, Copy)]
189pub struct TokSpan<'config>(Token<'config>, usize, u16);
190
191impl<'config> TokSpan<'config> {
192    #[inline]
193    pub const fn new(token: Token<'config>, span: Span) -> Self {
194        TokSpan(token, span.0, span.1)
195    }
196
197    #[inline]
198    pub fn token(&self) -> &Token<'config> {
199        &self.0
200    }
201
202    #[inline]
203    pub fn into_token(self) -> Token<'config> {
204        self.0
205    }
206
207    #[inline]
208    pub fn into_parts(self) -> (Token<'config>, Span) {
209        (self.0, Span(self.1, self.2))
210    }
211
212    // #[inline]
213    // pub fn token_mut(&mut self) -> &mut Token<'config> {
214    //     &mut self.0
215    // }
216
217    #[inline]
218    pub fn span(&self) -> Span {
219        Span(self.1, self.2)
220    }
221
222    #[inline]
223    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
224        self.0.class(in_sequence, ignore_end_tokens)
225    }
226}
227
228impl<'config> From<Token<'config>> for TokSpan<'config> {
229    #[inline]
230    fn from(token: Token<'config>) -> Self {
231        TokSpan(token, 0, 0)
232    }
233}
234
235#[derive(Debug, Clone, Copy, PartialEq, IntoStaticStr)]
236pub enum EndToken {
237    #[strum(serialize = r"\end{...}")]
238    End,
239    #[strum(serialize = r"}")]
240    GroupClose,
241    #[strum(serialize = r"\right")]
242    Right,
243    #[strum(serialize = r"]")]
244    SquareBracketClose,
245    #[strum(serialize = r"end of input")]
246    Eof,
247}
248
249impl EndToken {
250    pub fn matches(&self, other: &Token) -> bool {
251        matches!(
252            (self, other),
253            (EndToken::End, Token::End(_))
254                | (EndToken::GroupClose, Token::GroupEnd)
255                | (EndToken::Right, Token::Right)
256                | (EndToken::SquareBracketClose, Token::SquareBracketClose)
257                | (EndToken::Eof, Token::Eof)
258        )
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    const WORD: usize = std::mem::size_of::<usize>();
267
268    #[test]
269    fn test_struct_sizes() {
270        assert!(std::mem::size_of::<Token>() <= 3 * WORD, "size of Token");
271        assert!(
272            std::mem::size_of::<TokSpan>() <= 5 * WORD,
273            "size of TokSpan"
274        );
275        assert!(
276            std::mem::size_of::<Result<Token, &'static i32>>() <= 3 * WORD,
277            "size of Result<Token, pointer>"
278        );
279    }
280}