Skip to main content

oak_tex/parser/
mod.rs

1/// Element types for the TeX parser.
2pub mod element_type;
3
4use crate::{
5    language::TexLanguage,
6    lexer::{TexLexer, token_type::TexTokenType},
7    parser::element_type::TexElementType,
8};
9use oak_core::{
10    GreenNode, OakError,
11    parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
12    source::{Source, TextEdit},
13};
14
15/// TeX parser state.
16pub(crate) type State<'a, S> = ParserState<'a, TexLanguage, S>;
17
18/// A parser for TeX source files.
19pub struct TexParser<'config> {
20    /// The language configuration.
21    pub(crate) config: &'config TexLanguage,
22}
23
24impl<'config> TexParser<'config> {
25    /// Creates a new TeX parser.
26    pub fn new(config: &'config TexLanguage) -> Self {
27        Self { config }
28    }
29
30    /// Parses a single TeX item (command, environment, group, etc.).
31    fn parse_item<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
32        match state.peek_kind() {
33            Some(TexTokenType::BeginKeyword) => self.parse_environment(state),
34            Some(kind) if self.is_command_like(kind) => self.parse_command(state),
35            Some(TexTokenType::LeftBrace) => self.parse_group(state),
36            Some(TexTokenType::Dollar) | Some(TexTokenType::DoubleDollar) => self.parse_math(state),
37            Some(TexTokenType::Caret) => self.parse_superscript(state),
38            Some(TexTokenType::Underscore) => self.parse_subscript(state),
39            _ => {
40                state.bump();
41                Ok(())
42            }
43        }
44    }
45
46    /// Returns true if the token kind is considered command-like.
47    fn is_command_like(&self, kind: TexTokenType) -> bool {
48        match kind {
49            TexTokenType::Backslash |
50            TexTokenType::Command |
51            // TexTokenType::BeginKeyword | // Handled separately
52            TexTokenType::EndKeyword |
53            TexTokenType::DocumentclassKeyword |
54            TexTokenType::UsepackageKeyword |
55            TexTokenType::SectionKeyword |
56            TexTokenType::SubsectionKeyword |
57            TexTokenType::SubsubsectionKeyword |
58            TexTokenType::ChapterKeyword |
59            TexTokenType::PartKeyword |
60            TexTokenType::TitleKeyword |
61            TexTokenType::AuthorKeyword |
62            TexTokenType::DateKeyword |
63            TexTokenType::MaketitleKeyword |
64            TexTokenType::TableofcontentsKeyword |
65            TexTokenType::ItemKeyword |
66            TexTokenType::LabelKeyword |
67            TexTokenType::RefKeyword |
68            TexTokenType::CiteKeyword |
69            TexTokenType::IncludegraphicsKeyword |
70            TexTokenType::TextbfKeyword |
71            TexTokenType::TextitKeyword |
72            TexTokenType::EmphKeyword |
73            TexTokenType::Frac |
74            TexTokenType::Sqrt |
75            TexTokenType::Sum |
76            TexTokenType::Int |
77            TexTokenType::Lim |
78            TexTokenType::Alpha |
79            TexTokenType::Beta |
80            TexTokenType::Gamma |
81            TexTokenType::Delta |
82            TexTokenType::Epsilon => true,
83            _ => false,
84        }
85    }
86
87    /// Parses a TeX environment (e.g., `\begin{...} ... \end{...}`).
88    fn parse_environment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
89        let checkpoint = state.checkpoint();
90
91        // Parse \begin{name}
92        let begin_checkpoint = state.checkpoint();
93        state.expect(TexTokenType::BeginKeyword)?;
94
95        let mut env_name = String::new();
96        if state.at(TexTokenType::LeftBrace) {
97            state.bump(); // {
98            if let Some(text) = state.peek_text() {
99                env_name = text.to_string();
100            }
101            while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
102                state.bump();
103            }
104            state.expect(TexTokenType::RightBrace)?;
105        }
106        state.finish_at(begin_checkpoint, TexElementType::BeginEnvironment);
107
108        let env_type = match env_name.as_str() {
109            "itemize" | "enumerate" | "description" => TexElementType::List,
110            "tabular" | "array" => TexElementType::Table,
111            "figure" => TexElementType::Figure,
112            "document" => TexElementType::Document,
113            "equation" | "align" | "gather" | "multline" | "eqnarray" => TexElementType::DisplayMath,
114            _ => TexElementType::Environment,
115        };
116
117        // Parse content until \end{name}
118        while state.not_at_end() && !state.at(TexTokenType::EndKeyword) {
119            match env_type {
120                TexElementType::List if state.at(TexTokenType::ItemKeyword) => {
121                    let item_checkpoint = state.checkpoint();
122                    state.bump(); // \item
123                    while state.not_at_end() && !state.at(TexTokenType::ItemKeyword) && !state.at(TexTokenType::EndKeyword) {
124                        self.parse_item(state)?;
125                    }
126                    state.finish_at(item_checkpoint, TexElementType::Item);
127                }
128                TexElementType::Table if state.at(TexTokenType::Ampersand) || state.at(TexTokenType::Command) => {
129                    // Very basic table row/cell handling
130                    if state.at(TexTokenType::Ampersand) {
131                        state.bump();
132                    }
133                    else {
134                        self.parse_item(state)?;
135                    }
136                }
137                _ => self.parse_item(state)?,
138            }
139        }
140
141        // Parse \end{name}
142        if state.at(TexTokenType::EndKeyword) {
143            let end_checkpoint = state.checkpoint();
144            state.bump();
145            if state.at(TexTokenType::LeftBrace) {
146                self.parse_mandatory_argument(state)?
147            }
148            state.finish_at(end_checkpoint, TexElementType::EndEnvironment);
149        }
150
151        state.finish_at(checkpoint, env_type);
152        Ok(())
153    }
154
155    fn parse_superscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
156        let checkpoint = state.checkpoint();
157        state.expect(TexTokenType::Caret)?;
158
159        if state.at(TexTokenType::LeftBrace) {
160            self.parse_group(state)?;
161        }
162        else {
163            state.bump();
164        }
165
166        state.finish_at(checkpoint, TexElementType::Superscript);
167        Ok(())
168    }
169
170    fn parse_subscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
171        let checkpoint = state.checkpoint();
172        state.expect(TexTokenType::Underscore)?;
173
174        if state.at(TexTokenType::LeftBrace) {
175            self.parse_group(state)?;
176        }
177        else {
178            state.bump();
179        }
180
181        state.finish_at(checkpoint, TexElementType::Subscript);
182        Ok(())
183    }
184
185    fn parse_command<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
186        let checkpoint = state.checkpoint();
187        let kind = state.peek_kind().unwrap_or(TexTokenType::Command);
188
189        let should_parse_args = state.peek_text().map_or(true, |name| {
190            let name_str: &str = &name;
191            let name_str = name_str.strip_prefix('\\').unwrap_or(name_str);
192            name_str != "left" && name_str != "right"
193        });
194
195        state.bump(); // consume \ or command keyword
196
197        let node_kind = if should_parse_args {
198            while state.at(TexTokenType::LeftBracket) || state.at(TexTokenType::LeftBrace) {
199                if state.at(TexTokenType::LeftBracket) {
200                    self.parse_optional_argument(state)?;
201                }
202                else {
203                    self.parse_mandatory_argument(state)?;
204                }
205            }
206            match kind {
207                TexTokenType::Frac | TexTokenType::Sqrt | TexTokenType::Sum | TexTokenType::Int | TexTokenType::Lim | TexTokenType::Alpha | TexTokenType::Beta | TexTokenType::Gamma | TexTokenType::Delta | TexTokenType::Epsilon => kind,
208                _ => TexTokenType::Command,
209            }
210        }
211        else {
212            kind
213        };
214
215        state.finish_at(checkpoint, node_kind.into());
216        Ok(())
217    }
218
219    fn parse_group<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
220        let checkpoint = state.checkpoint();
221        state.expect(TexTokenType::LeftBrace)?;
222        while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
223            self.parse_item(state)?;
224        }
225        state.expect(TexTokenType::RightBrace)?;
226        state.finish_at(checkpoint, TexElementType::Group);
227        Ok(())
228    }
229
230    fn parse_math<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
231        let checkpoint = state.checkpoint();
232        let kind = state.peek_kind().unwrap();
233        state.bump();
234        while state.not_at_end() && !state.at(kind) {
235            self.parse_item(state)?;
236        }
237        state.expect(kind)?;
238        let element_kind = if kind == TexTokenType::DoubleDollar { TexElementType::DisplayMath } else { TexElementType::InlineMath };
239        state.finish_at(checkpoint, element_kind);
240        Ok(())
241    }
242
243    fn parse_optional_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
244        let checkpoint = state.checkpoint();
245        state.expect(TexTokenType::LeftBracket)?;
246        while state.not_at_end() && !state.at(TexTokenType::RightBracket) {
247            self.parse_item(state)?;
248        }
249        state.expect(TexTokenType::RightBracket)?;
250        state.finish_at(checkpoint, TexElementType::OptionalArgument);
251        Ok(())
252    }
253
254    fn parse_mandatory_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
255        let checkpoint = state.checkpoint();
256        state.expect(TexTokenType::LeftBrace)?;
257        while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
258            self.parse_item(state)?;
259        }
260        state.expect(TexTokenType::RightBrace)?;
261        state.finish_at(checkpoint, TexElementType::MandatoryArgument);
262        Ok(())
263    }
264}
265
266impl<'config> Parser<TexLanguage> for TexParser<'config> {
267    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<TexLanguage>) -> ParseOutput<'a, TexLanguage> {
268        let lexer = TexLexer::new(self.config);
269        parse_with_lexer(&lexer, text, edits, cache, |state| {
270            let checkpoint = state.checkpoint();
271
272            while state.not_at_end() {
273                self.parse_item(state)?
274            }
275
276            Ok(state.finish_at(checkpoint, TexElementType::Root))
277        })
278    }
279}