mitex_lexer/lib.rs
1//! Given source strings, MiTeX Lexer provides a sequence of tokens
2//!
3//! The core of the lexer is [`Lexer<'a, S>`] which receives a string `&'a str`
4//! and a [`TokenStream`] trait object `S`, then it provides public methods to
5//! peek and bump the token stream.
6//!
7//! It has two main lexer implementations:
8//! - [`Lexer<()>`]: provides plain tokens
9//! - See [`TokenStream`] for implementation
10//! - [`Lexer<MacroEngine>`]: provides tokens with macro expansion
11//! - See [`MacroEngine`] for implementation
12
13mod macro_engine;
14pub mod snapshot_map;
15mod stream;
16mod token;
17
18pub use macro_engine::MacroEngine;
19pub use token::{BraceKind, CommandName, IfCommandName, Token};
20
21use logos::Logos;
22use mitex_spec::CommandSpec;
23
24use macro_engine::Macro;
25use stream::{LexCache, StreamContext};
26
27/// MiTeX's token representation
28/// A token is a pair of a token kind and its text
29type Tok<'a> = (Token, &'a str);
30
31/// A trait for bumping the token stream
32/// Its bumping is less frequently called than token peeking
33pub trait TokenStream<'a>: MacroifyStream<'a> {
34 /// Bump the token stream with at least one token if possible
35 ///
36 /// By default, it fills the peek cache with a page of tokens at the same
37 /// time
38 fn bump(&mut self, ctx: &mut StreamContext<'a>) {
39 ctx.peek_outer.bump(std::iter::from_fn(|| {
40 StreamContext::lex_one(&mut ctx.inner)
41 }));
42 }
43}
44
45/// Trait for querying macro state of a stream
46pub trait MacroifyStream<'a> {
47 /// Get a macro by name (if meeted in the stream)
48 fn get_macro(&self, _name: &str) -> Option<Macro<'a>> {
49 None
50 }
51}
52
53/// The default implementation of [`TokenStream`]
54///
55/// See [`LexCache<'a>`] for implementation
56impl TokenStream<'_> for () {}
57
58/// The default implementation of [`MacroifyStream`]
59impl MacroifyStream<'_> for () {}
60
61/// Small memory-efficient lexer for TeX
62///
63/// It gets improved performance on x86_64 but not wasm through
64#[derive(Debug, Clone)]
65pub struct Lexer<'a, S: TokenStream<'a> = ()> {
66 /// A stream context shared with the bumper
67 ctx: StreamContext<'a>,
68 /// Implementations to bump the token stream into [`Self::ctx`]
69 bumper: S,
70}
71
72impl<'a, S: TokenStream<'a>> Lexer<'a, S> {
73 /// Create a new lexer on a main input source
74 ///
75 /// Note that since we have a bumper, the returning string is not always
76 /// sliced from the input
77 pub fn new(input: &'a str, spec: CommandSpec) -> Self
78 where
79 S: Default,
80 {
81 Self::new_with_bumper(input, spec, S::default())
82 }
83
84 /// Create a new lexer on a main input source with a bumper
85 ///
86 /// Note that since we have a bumper, the returning string is not always
87 /// sliced from the input
88 pub fn new_with_bumper(input: &'a str, spec: CommandSpec, bumper: S) -> Self {
89 let inner = Token::lexer_with_extras(input, (spec, 0..0));
90 let mut n = Self {
91 ctx: StreamContext {
92 inner,
93 peek_outer: LexCache::default(),
94 peek_inner: LexCache::default(),
95 },
96 bumper,
97 };
98 n.next();
99
100 n
101 }
102
103 /// Private method to advance the lexer by one token
104 #[inline]
105 fn next(&mut self) {
106 if let Some(peeked) = self.ctx.peek_outer.buf.pop() {
107 self.ctx.peek_outer.peeked = Some(peeked);
108 return;
109 }
110
111 // it is not likely to be inlined
112 self.bumper.bump(&mut self.ctx);
113 }
114
115 /// Peek the next token
116 pub fn peek(&self) -> Option<Token> {
117 self.ctx.peek_outer.peeked.map(|(kind, _)| kind)
118 }
119
120 /// Peek the next token's text
121 pub fn peek_text(&self) -> Option<&'a str> {
122 self.ctx.peek_outer.peeked.map(|(_, text)| text)
123 }
124
125 /// Peek the next token's first char
126 pub fn peek_char(&self) -> Option<char> {
127 self.peek_text().map(str::chars).and_then(|mut e| e.next())
128 }
129
130 /// Update the text part of the peeked token
131 pub fn consume_utf8_bytes(&mut self, cnt: usize) {
132 let Some(peek_mut) = &mut self.ctx.peek_outer.peeked else {
133 return;
134 };
135 if peek_mut.1.len() <= cnt {
136 self.next();
137 } else {
138 peek_mut.1 = &peek_mut.1[cnt..];
139 }
140 }
141
142 /// Update the peeked token and return the old one
143 pub fn eat(&mut self) -> Option<(Token, &'a str)> {
144 let peeked = self.ctx.peek_outer.peeked.take()?;
145 self.next();
146 Some(peeked)
147 }
148
149 /// Find a **currently** defined macro by name
150 pub fn get_macro(&mut self, name: &str) -> Option<Macro<'a>> {
151 self.bumper.get_macro(name)
152 }
153}