1pub mod element_type;
3
4use crate::{
5 language::TexLanguage,
6 lexer::{TexLexer, token_type::TexTokenType},
7 parser::element_type::TexElementType,
8};
9use oak_core::{
10 GreenNode, OakError,
11 parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
12 source::{Source, TextEdit},
13};
14
15pub(crate) type State<'a, S> = ParserState<'a, TexLanguage, S>;
17
18pub struct TexParser<'config> {
20 pub(crate) config: &'config TexLanguage,
22}
23
24impl<'config> TexParser<'config> {
25 pub fn new(config: &'config TexLanguage) -> Self {
27 Self { config }
28 }
29
30 fn parse_item<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
32 match state.peek_kind() {
33 Some(TexTokenType::BeginKeyword) => self.parse_environment(state),
34 Some(kind) if self.is_command_like(kind) => self.parse_command(state),
35 Some(TexTokenType::LeftBrace) => self.parse_group(state),
36 Some(TexTokenType::Dollar) | Some(TexTokenType::DoubleDollar) => self.parse_math(state),
37 Some(TexTokenType::Caret) => self.parse_superscript(state),
38 Some(TexTokenType::Underscore) => self.parse_subscript(state),
39 _ => {
40 state.bump();
41 Ok(())
42 }
43 }
44 }
45
46 fn is_command_like(&self, kind: TexTokenType) -> bool {
48 match kind {
49 TexTokenType::Backslash |
50 TexTokenType::Command |
51 TexTokenType::EndKeyword |
53 TexTokenType::DocumentclassKeyword |
54 TexTokenType::UsepackageKeyword |
55 TexTokenType::SectionKeyword |
56 TexTokenType::SubsectionKeyword |
57 TexTokenType::SubsubsectionKeyword |
58 TexTokenType::ChapterKeyword |
59 TexTokenType::PartKeyword |
60 TexTokenType::TitleKeyword |
61 TexTokenType::AuthorKeyword |
62 TexTokenType::DateKeyword |
63 TexTokenType::MaketitleKeyword |
64 TexTokenType::TableofcontentsKeyword |
65 TexTokenType::ItemKeyword |
66 TexTokenType::LabelKeyword |
67 TexTokenType::RefKeyword |
68 TexTokenType::CiteKeyword |
69 TexTokenType::IncludegraphicsKeyword |
70 TexTokenType::TextbfKeyword |
71 TexTokenType::TextitKeyword |
72 TexTokenType::EmphKeyword |
73 TexTokenType::Frac |
74 TexTokenType::Sqrt |
75 TexTokenType::Sum |
76 TexTokenType::Int |
77 TexTokenType::Lim |
78 TexTokenType::Alpha |
79 TexTokenType::Beta |
80 TexTokenType::Gamma |
81 TexTokenType::Delta |
82 TexTokenType::Epsilon => true,
83 _ => false,
84 }
85 }
86
87 fn parse_environment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
89 let checkpoint = state.checkpoint();
90
91 let begin_checkpoint = state.checkpoint();
93 state.expect(TexTokenType::BeginKeyword)?;
94
95 let mut env_name = String::new();
96 if state.at(TexTokenType::LeftBrace) {
97 state.bump(); if let Some(text) = state.peek_text() {
99 env_name = text.to_string();
100 }
101 while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
102 state.bump();
103 }
104 state.expect(TexTokenType::RightBrace)?;
105 }
106 state.finish_at(begin_checkpoint, TexElementType::BeginEnvironment);
107
108 let env_type = match env_name.as_str() {
109 "itemize" | "enumerate" | "description" => TexElementType::List,
110 "tabular" | "array" => TexElementType::Table,
111 "figure" => TexElementType::Figure,
112 "document" => TexElementType::Document,
113 "equation" | "align" | "gather" | "multline" | "eqnarray" => TexElementType::DisplayMath,
114 _ => TexElementType::Environment,
115 };
116
117 while state.not_at_end() && !state.at(TexTokenType::EndKeyword) {
119 match env_type {
120 TexElementType::List if state.at(TexTokenType::ItemKeyword) => {
121 let item_checkpoint = state.checkpoint();
122 state.bump(); while state.not_at_end() && !state.at(TexTokenType::ItemKeyword) && !state.at(TexTokenType::EndKeyword) {
124 self.parse_item(state)?;
125 }
126 state.finish_at(item_checkpoint, TexElementType::Item);
127 }
128 TexElementType::Table if state.at(TexTokenType::Ampersand) || state.at(TexTokenType::Command) => {
129 if state.at(TexTokenType::Ampersand) {
131 state.bump();
132 }
133 else {
134 self.parse_item(state)?;
135 }
136 }
137 _ => self.parse_item(state)?,
138 }
139 }
140
141 if state.at(TexTokenType::EndKeyword) {
143 let end_checkpoint = state.checkpoint();
144 state.bump();
145 if state.at(TexTokenType::LeftBrace) {
146 self.parse_mandatory_argument(state)?
147 }
148 state.finish_at(end_checkpoint, TexElementType::EndEnvironment);
149 }
150
151 state.finish_at(checkpoint, env_type);
152 Ok(())
153 }
154
155 fn parse_superscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
156 let checkpoint = state.checkpoint();
157 state.expect(TexTokenType::Caret)?;
158
159 if state.at(TexTokenType::LeftBrace) {
160 self.parse_group(state)?;
161 }
162 else {
163 state.bump();
164 }
165
166 state.finish_at(checkpoint, TexElementType::Superscript);
167 Ok(())
168 }
169
170 fn parse_subscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
171 let checkpoint = state.checkpoint();
172 state.expect(TexTokenType::Underscore)?;
173
174 if state.at(TexTokenType::LeftBrace) {
175 self.parse_group(state)?;
176 }
177 else {
178 state.bump();
179 }
180
181 state.finish_at(checkpoint, TexElementType::Subscript);
182 Ok(())
183 }
184
185 fn parse_command<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
186 let checkpoint = state.checkpoint();
187 let kind = state.peek_kind().unwrap_or(TexTokenType::Command);
188
189 let should_parse_args = state.peek_text().map_or(true, |name| {
190 let name_str: &str = &name;
191 let name_str = name_str.strip_prefix('\\').unwrap_or(name_str);
192 name_str != "left" && name_str != "right"
193 });
194
195 state.bump(); let node_kind = if should_parse_args {
198 while state.at(TexTokenType::LeftBracket) || state.at(TexTokenType::LeftBrace) {
199 if state.at(TexTokenType::LeftBracket) {
200 self.parse_optional_argument(state)?;
201 }
202 else {
203 self.parse_mandatory_argument(state)?;
204 }
205 }
206 match kind {
207 TexTokenType::Frac | TexTokenType::Sqrt | TexTokenType::Sum | TexTokenType::Int | TexTokenType::Lim | TexTokenType::Alpha | TexTokenType::Beta | TexTokenType::Gamma | TexTokenType::Delta | TexTokenType::Epsilon => kind,
208 _ => TexTokenType::Command,
209 }
210 }
211 else {
212 kind
213 };
214
215 state.finish_at(checkpoint, node_kind.into());
216 Ok(())
217 }
218
219 fn parse_group<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
220 let checkpoint = state.checkpoint();
221 state.expect(TexTokenType::LeftBrace)?;
222 while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
223 self.parse_item(state)?;
224 }
225 state.expect(TexTokenType::RightBrace)?;
226 state.finish_at(checkpoint, TexElementType::Group);
227 Ok(())
228 }
229
230 fn parse_math<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
231 let checkpoint = state.checkpoint();
232 let kind = state.peek_kind().unwrap();
233 state.bump();
234 while state.not_at_end() && !state.at(kind) {
235 self.parse_item(state)?;
236 }
237 state.expect(kind)?;
238 let element_kind = if kind == TexTokenType::DoubleDollar { TexElementType::DisplayMath } else { TexElementType::InlineMath };
239 state.finish_at(checkpoint, element_kind);
240 Ok(())
241 }
242
243 fn parse_optional_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
244 let checkpoint = state.checkpoint();
245 state.expect(TexTokenType::LeftBracket)?;
246 while state.not_at_end() && !state.at(TexTokenType::RightBracket) {
247 self.parse_item(state)?;
248 }
249 state.expect(TexTokenType::RightBracket)?;
250 state.finish_at(checkpoint, TexElementType::OptionalArgument);
251 Ok(())
252 }
253
254 fn parse_mandatory_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
255 let checkpoint = state.checkpoint();
256 state.expect(TexTokenType::LeftBrace)?;
257 while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
258 self.parse_item(state)?;
259 }
260 state.expect(TexTokenType::RightBrace)?;
261 state.finish_at(checkpoint, TexElementType::MandatoryArgument);
262 Ok(())
263 }
264}
265
266impl<'config> Parser<TexLanguage> for TexParser<'config> {
267 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<TexLanguage>) -> ParseOutput<'a, TexLanguage> {
268 let lexer = TexLexer::new(self.config);
269 parse_with_lexer(&lexer, text, edits, cache, |state| {
270 let checkpoint = state.checkpoint();
271
272 while state.not_at_end() {
273 self.parse_item(state)?
274 }
275
276 Ok(state.finish_at(checkpoint, TexElementType::Root))
277 })
278 }
279}