mitex_lexer/
lib.rs

1//! Given source strings, MiTeX Lexer provides a sequence of tokens
2//!
3//! The core of the lexer is [`Lexer<'a, S>`] which receives a string `&'a str`
4//! and a [`TokenStream`] trait object `S`, then it provides public methods to
5//! peek and bump the token stream.
6//!
7//! It has two main lexer implementations:
8//! - [`Lexer<()>`]: provides plain tokens
9//!   - See [`TokenStream`] for implementation
10//! - [`Lexer<MacroEngine>`]: provides tokens with macro expansion
11//!   - See [`MacroEngine`] for implementation
12
13mod macro_engine;
14pub mod snapshot_map;
15mod stream;
16mod token;
17
18pub use macro_engine::MacroEngine;
19pub use token::{BraceKind, CommandName, IfCommandName, Token};
20
21use logos::Logos;
22use mitex_spec::CommandSpec;
23
24use macro_engine::Macro;
25use stream::{LexCache, StreamContext};
26
27/// MiTeX's token representation
28/// A token is a pair of a token kind and its text
29type Tok<'a> = (Token, &'a str);
30
31/// A trait for bumping the token stream
32/// Its bumping is less frequently called than token peeking
33pub trait TokenStream<'a>: MacroifyStream<'a> {
34    /// Bump the token stream with at least one token if possible
35    ///
36    /// By default, it fills the peek cache with a page of tokens at the same
37    /// time
38    fn bump(&mut self, ctx: &mut StreamContext<'a>) {
39        ctx.peek_outer.bump(std::iter::from_fn(|| {
40            StreamContext::lex_one(&mut ctx.inner)
41        }));
42    }
43}
44
45/// Trait for querying macro state of a stream
46pub trait MacroifyStream<'a> {
47    /// Get a macro by name (if meeted in the stream)
48    fn get_macro(&self, _name: &str) -> Option<Macro<'a>> {
49        None
50    }
51}
52
53/// The default implementation of [`TokenStream`]
54///
55/// See [`LexCache<'a>`] for implementation
56impl TokenStream<'_> for () {}
57
58/// The default implementation of [`MacroifyStream`]
59impl MacroifyStream<'_> for () {}
60
61/// Small memory-efficient lexer for TeX
62///
63/// It gets improved performance on x86_64 but not wasm through
64#[derive(Debug, Clone)]
65pub struct Lexer<'a, S: TokenStream<'a> = ()> {
66    /// A stream context shared with the bumper
67    ctx: StreamContext<'a>,
68    /// Implementations to bump the token stream into [`Self::ctx`]
69    bumper: S,
70}
71
72impl<'a, S: TokenStream<'a>> Lexer<'a, S> {
73    /// Create a new lexer on a main input source
74    ///
75    /// Note that since we have a bumper, the returning string is not always
76    /// sliced from the input
77    pub fn new(input: &'a str, spec: CommandSpec) -> Self
78    where
79        S: Default,
80    {
81        Self::new_with_bumper(input, spec, S::default())
82    }
83
84    /// Create a new lexer on a main input source with a bumper
85    ///
86    /// Note that since we have a bumper, the returning string is not always
87    /// sliced from the input
88    pub fn new_with_bumper(input: &'a str, spec: CommandSpec, bumper: S) -> Self {
89        let inner = Token::lexer_with_extras(input, (spec, 0..0));
90        let mut n = Self {
91            ctx: StreamContext {
92                inner,
93                peek_outer: LexCache::default(),
94                peek_inner: LexCache::default(),
95            },
96            bumper,
97        };
98        n.next();
99
100        n
101    }
102
103    /// Private method to advance the lexer by one token
104    #[inline]
105    fn next(&mut self) {
106        if let Some(peeked) = self.ctx.peek_outer.buf.pop() {
107            self.ctx.peek_outer.peeked = Some(peeked);
108            return;
109        }
110
111        // it is not likely to be inlined
112        self.bumper.bump(&mut self.ctx);
113    }
114
115    /// Peek the next token
116    pub fn peek(&self) -> Option<Token> {
117        self.ctx.peek_outer.peeked.map(|(kind, _)| kind)
118    }
119
120    /// Peek the next token's text
121    pub fn peek_text(&self) -> Option<&'a str> {
122        self.ctx.peek_outer.peeked.map(|(_, text)| text)
123    }
124
125    /// Peek the next token's first char
126    pub fn peek_char(&self) -> Option<char> {
127        self.peek_text().map(str::chars).and_then(|mut e| e.next())
128    }
129
130    /// Update the text part of the peeked token
131    pub fn consume_utf8_bytes(&mut self, cnt: usize) {
132        let Some(peek_mut) = &mut self.ctx.peek_outer.peeked else {
133            return;
134        };
135        if peek_mut.1.len() <= cnt {
136            self.next();
137        } else {
138            peek_mut.1 = &peek_mut.1[cnt..];
139        }
140    }
141
142    /// Update the peeked token and return the old one
143    pub fn eat(&mut self) -> Option<(Token, &'a str)> {
144        let peeked = self.ctx.peek_outer.peeked.take()?;
145        self.next();
146        Some(peeked)
147    }
148
149    /// Find a **currently** defined macro by name
150    pub fn get_macro(&mut self, name: &str) -> Option<Macro<'a>> {
151        self.bumper.get_macro(name)
152    }
153}