Skip to main content

math_core/
token.rs

1use std::ops::Range;
2
3use strum_macros::IntoStaticStr;
4
5use mathml_renderer::attribute::{
6    FracAttr, HtmlTextStyle, MathVariant, Notation, OpAttr, Size, Style,
7};
8use mathml_renderer::length::Length;
9use mathml_renderer::symbol::{Bin, MathMLOperator, Op, OrdLike, Punct, Rel};
10
11use crate::character_class::Class;
12use crate::environments::Env;
13
14#[derive(Debug, Clone, Copy)]
15pub enum Token<'source> {
16    Eof,
17    Begin(Env),
18    End(Env),
19    NewColumn,
20    NewLine,
21    NoNumber,
22    Tag,
23    Left,
24    Right,
25    Middle,
26    /// The opening square bracket has its own token because we need to
27    /// distinguish it from `\lbrack` after `\sqrt`.
28    SquareBracketOpen,
29    /// The closing square bracket has its own token because we often
30    /// need to search for it.
31    /// Additionally, it's useful to distinguish this from `\rbrack`.
32    SquareBracketClose,
33    GroupBegin,
34    GroupEnd,
35    Frac(Option<FracAttr>),
36    Genfrac,
37    Underscore,
38    Circumflex,
39    Binom(Option<FracAttr>),
40    Overset,
41    Underset,
42    OverUnderBrace(OrdLike, bool),
43    Sqrt,
44    Integral(Op),
45    Limits,
46    // For `\lim`, `\sup`, `\inf`, `\max`, `\min`, etc.
47    PseudoOperatorLimits(&'static str),
48    Space(Length),
49    CustomSpace,
50    NonBreakingSpace,
51    Whitespace,
52    Transform(MathVariant),
53    Big(Size, Option<Class>),
54    OverUnder(Rel, bool, Option<OpAttr>),
55    /// A token corresponding to LaTeX's "mathord" character class (class 0).
56    Ord(OrdLike),
57    /// A token corresponding to LaTeX's "mathop" character class (class 1).
58    Op(Op),
59    /// A token corresponding to LaTeX's "mathbin" character class (class 2).
60    BinaryOp(Bin),
61    /// A token corresponding to LaTeX's "mathrel" character class (class 3).
62    Relation(Rel),
63    /// A token corresponding to LaTeX's "mathopen" character class (class 4).
64    Open(OrdLike),
65    /// A token corresponding to LaTeX's "mathclose" character class (class 5).
66    Close(OrdLike),
67    /// A token corresponding to LaTeX's "mathpunct" character class (class 6).
68    Punctuation(Punct),
69    /// A token corresponding to LaTeX's "mathinner" character class (class I).
70    Inner(Op),
71    Prime,
72    OpGreaterThan,
73    OpLessThan,
74    OpAmpersand,
75    /// A token to force an operator to behave like a relation (mathrel).
76    /// This is, for example, needed for `:`, which in LaTeX is a relation,
77    /// but in MathML Core is a separator (punctuation).
78    ForceRelation(MathMLOperator),
79    /// A token to force an operator to behave like a closing symbol (mathclose).
80    /// This is, for example, needed for `!`, which in LaTeX is a closing symbol,
81    /// but in MathML Core is an ordinary operator.
82    ForceClose(MathMLOperator),
83    /// A token to force an operator to behave like a binary operator (mathbin).
84    /// This is, for example, needed for `×`, which in LaTeX is a binary operator,
85    /// but in MathML Core is a "big operator" (mathop).
86    ForceBinaryOp(MathMLOperator),
87    Letter(char, FromAscii),
88    UprightLetter(char), // letter for which we need `mathvariant="normal"`
89    Digit(char),
90    // For `\log`, `\exp`, `\sin`, `\cos`, `\tan`, etc.
91    PseudoOperator(&'static str),
92    Enclose(Notation),
93    OperatorName(bool),
94    Slashed,
95    Not,
96    Text(Option<HtmlTextStyle>),
97    Style(Style),
98    Color,
99    CustomCmdArg(u8),
100    CustomCmd(u8, &'source [Token<'static>]),
101    HardcodedMathML(&'static str),
102    TextModeAccent(char),
103    UnknownCommand(&'source str),
104    /// This token is intended to be used in predefined token streams.
105    /// It is equivalent to `{abc}`, but has a much more compact representation.
106    InternalStringLiteral(&'static str),
107}
108
109impl Token<'_> {
110    /// Returns the character class of this token.
111    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
112        if !in_sequence {
113            return Class::Default;
114        }
115        match self {
116            Token::Relation(_) | Token::ForceRelation(_) => Class::Relation,
117            Token::Punctuation(_) => Class::Punctuation,
118            Token::Open(_) | Token::Left | Token::SquareBracketOpen => Class::Open,
119            Token::Close(_)
120            | Token::SquareBracketClose
121            | Token::NewColumn
122            | Token::ForceClose(_) => Class::Close,
123            Token::BinaryOp(_) | Token::ForceBinaryOp(_) => Class::BinaryOp,
124            Token::Op(_) | Token::Integral(_) => Class::Operator,
125            Token::End(_) | Token::Right | Token::GroupEnd | Token::Eof if !ignore_end_tokens => {
126                Class::Close
127            }
128            Token::Inner(_) => Class::Inner,
129            // `\big` commands without the "l" or "r" really produce `Class::Default`.
130            Token::Big(_, Some(cls)) => *cls,
131            // TODO: This needs to skip spaces and other non-class tokens in the token sequence.
132            Token::CustomCmd(_, [head, ..]) => head.class(in_sequence, ignore_end_tokens),
133            _ => Class::Default,
134        }
135    }
136}
137
138#[derive(Debug, Clone, Copy, Default)]
139pub enum FromAscii {
140    #[default]
141    False,
142    True,
143}
144
145#[derive(Debug, Clone, Copy, Default)]
146pub struct Span(pub usize, pub u16);
147
148impl Span {
149    #[inline]
150    pub const fn zero_width(at: usize) -> Self {
151        Span(at, 0)
152    }
153
154    #[inline]
155    pub const fn start(&self) -> usize {
156        self.0
157    }
158
159    #[inline]
160    pub const fn end(&self) -> usize {
161        self.0 + self.1 as usize
162    }
163}
164
165impl From<Span> for Range<usize> {
166    #[inline]
167    fn from(span: Span) -> Self {
168        span.0..(span.0 + span.1 as usize)
169    }
170}
171
172impl TryFrom<Range<usize>> for Span {
173    type Error = ();
174    #[inline]
175    fn try_from(range: Range<usize>) -> Result<Self, ()> {
176        let length = range.end.checked_sub(range.start).ok_or(())?;
177        let length = u16::try_from(length).map_err(|_| ())?;
178        Ok(Span(range.start, length))
179    }
180}
181
182/// A token together with its span in the input string.
183#[derive(Debug, Clone, Copy)]
184pub struct TokSpan<'config>(Token<'config>, usize, u16);
185
186impl<'config> TokSpan<'config> {
187    #[inline]
188    pub const fn new(token: Token<'config>, span: Span) -> Self {
189        TokSpan(token, span.0, span.1)
190    }
191
192    #[inline]
193    pub fn token(&self) -> &Token<'config> {
194        &self.0
195    }
196
197    #[inline]
198    pub fn into_token(self) -> Token<'config> {
199        self.0
200    }
201
202    #[inline]
203    pub fn into_parts(self) -> (Token<'config>, Span) {
204        (self.0, Span(self.1, self.2))
205    }
206
207    // #[inline]
208    // pub fn token_mut(&mut self) -> &mut Token<'config> {
209    //     &mut self.0
210    // }
211
212    #[inline]
213    pub fn span(&self) -> Span {
214        Span(self.1, self.2)
215    }
216
217    #[inline]
218    pub(super) fn class(&self, in_sequence: bool, ignore_end_tokens: bool) -> Class {
219        self.0.class(in_sequence, ignore_end_tokens)
220    }
221}
222
223impl<'config> From<Token<'config>> for TokSpan<'config> {
224    #[inline]
225    fn from(token: Token<'config>) -> Self {
226        TokSpan(token, 0, 0)
227    }
228}
229
230#[derive(Debug, Clone, Copy, PartialEq, IntoStaticStr)]
231pub enum EndToken {
232    #[strum(serialize = r"\end{...}")]
233    End,
234    #[strum(serialize = r"}")]
235    GroupClose,
236    #[strum(serialize = r"\right")]
237    Right,
238    #[strum(serialize = r"]")]
239    SquareBracketClose,
240    #[strum(serialize = r"end of input")]
241    Eof,
242}
243
244impl EndToken {
245    pub fn matches(&self, other: &Token) -> bool {
246        matches!(
247            (self, other),
248            (EndToken::End, Token::End(_))
249                | (EndToken::GroupClose, Token::GroupEnd)
250                | (EndToken::Right, Token::Right)
251                | (EndToken::SquareBracketClose, Token::SquareBracketClose)
252                | (EndToken::Eof, Token::Eof)
253        )
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    const WORD: usize = std::mem::size_of::<usize>();
262
263    #[test]
264    fn test_struct_sizes() {
265        assert!(std::mem::size_of::<Token>() <= 3 * WORD, "size of Token");
266        assert!(
267            std::mem::size_of::<TokSpan>() <= 5 * WORD,
268            "size of TokSpan"
269        );
270        assert!(
271            std::mem::size_of::<Result<Token, &'static i32>>() <= 3 * WORD,
272            "size of Result<Token, pointer>"
273        );
274    }
275}