Skip to main content

oak_tex/builder/
mod.rs

1use crate::{
2    ast::*,
3    language::TexLanguage,
4    lexer::token_type::TexTokenType,
5    parser::{TexParser, element_type::TexElementType},
6};
7use oak_core::{Builder, BuilderCache, GreenNode, OakDiagnostics, OakError, Parser, RedNode, RedTree, TextEdit, source::Source};
8
9/// TeX 语言的 AST 构建器
10#[derive(Clone)]
11pub struct TexBuilder<'config> {
12    /// 语言配置
13    config: &'config TexLanguage,
14}
15
16impl<'config> TexBuilder<'config> {
17    /// 创建新的 TeX 构建器
18    pub fn new(config: &'config TexLanguage) -> Self {
19        Self { config }
20    }
21}
22
23impl<'config> Builder<TexLanguage> for TexBuilder<'config> {
24    fn build<'a, S: Source + ?Sized>(&self, source: &'a S, edits: &[TextEdit], cache: &'a mut impl BuilderCache<TexLanguage>) -> oak_core::builder::BuildOutput<TexLanguage> {
25        let parser = TexParser::new(self.config);
26
27        let parse_result = parser.parse(source, edits, cache);
28
29        match parse_result.result {
30            Ok(green_tree) => match self.build_root_internal(green_tree, source) {
31                Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
32                Err(build_error) => {
33                    let mut diagnostics = parse_result.diagnostics;
34                    diagnostics.push(build_error.clone());
35                    OakDiagnostics { result: Err(build_error), diagnostics }
36                }
37            },
38            Err(parse_error) => OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
39        }
40    }
41}
42
43impl<'config> TexBuilder<'config> {
44    /// 构建根节点
45    fn build_root_internal<S: Source + ?Sized>(&self, green_tree: &GreenNode<TexLanguage>, source: &S) -> Result<TexRoot, OakError> {
46        let red_root = RedNode::new(green_tree, 0);
47        self.build_content(red_root, source)
48    }
49
50    fn build_content<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexRoot, OakError> {
51        let mut items = Vec::new();
52        let children: Vec<_> = node.children().collect();
53
54        if children.is_empty() {
55            return Ok(TexRoot { span: node.span().into(), items });
56        }
57
58        let first_kind: TexTokenType = children.first().unwrap().kind();
59        let last_kind: TexTokenType = children.last().unwrap().kind();
60
61        let start = if is_delimiter(first_kind) { 1 } else { 0 };
62        let end = if children.len() > start && is_delimiter(last_kind) { children.len() - 1 } else { children.len() };
63
64        for i in start..end {
65            if let Some(item) = self.build_item(children[i], source)? {
66                items.push(item)
67            }
68        }
69
70        Ok(TexRoot { span: node.span().into(), items })
71    }
72
73    fn build_item<S: Source + ?Sized>(&self, tree: RedTree<TexLanguage>, source: &S) -> Result<Option<TexItem>, OakError> {
74        let kind: TexTokenType = tree.kind();
75        match kind {
76            TexTokenType::Command
77            | TexTokenType::BeginKeyword
78            | TexTokenType::EndKeyword
79            | TexTokenType::Frac
80            | TexTokenType::Sqrt
81            | TexTokenType::Sum
82            | TexTokenType::Int
83            | TexTokenType::Lim
84            | TexTokenType::SectionKeyword
85            | TexTokenType::SubsectionKeyword
86            | TexTokenType::SubsubsectionKeyword
87            | TexTokenType::ChapterKeyword
88            | TexTokenType::PartKeyword
89            | TexTokenType::TitleKeyword
90            | TexTokenType::AuthorKeyword
91            | TexTokenType::DateKeyword
92            | TexTokenType::MaketitleKeyword
93            | TexTokenType::TableofcontentsKeyword
94            | TexTokenType::ItemKeyword
95            | TexTokenType::LabelKeyword
96            | TexTokenType::RefKeyword
97            | TexTokenType::CiteKeyword
98            | TexTokenType::IncludegraphicsKeyword
99            | TexTokenType::TextbfKeyword
100            | TexTokenType::TextitKeyword
101            | TexTokenType::EmphKeyword
102            | TexTokenType::Alpha
103            | TexTokenType::Beta
104            | TexTokenType::Gamma
105            | TexTokenType::Delta
106            | TexTokenType::Epsilon
107            | TexTokenType::Zeta
108            | TexTokenType::Eta
109            | TexTokenType::Theta
110            | TexTokenType::Iota
111            | TexTokenType::Kappa
112            | TexTokenType::Lambda
113            | TexTokenType::Mu
114            | TexTokenType::Nu
115            | TexTokenType::Xi
116            | TexTokenType::Omicron
117            | TexTokenType::Pi
118            | TexTokenType::Rho
119            | TexTokenType::Sigma
120            | TexTokenType::Tau
121            | TexTokenType::Upsilon
122            | TexTokenType::Phi
123            | TexTokenType::Chi
124            | TexTokenType::Psi
125            | TexTokenType::Omega
126            | TexTokenType::VarEpsilon
127            | TexTokenType::VarTheta
128            | TexTokenType::VarKappa
129            | TexTokenType::VarPi
130            | TexTokenType::VarRho
131            | TexTokenType::VarSigma
132            | TexTokenType::VarPhi
133            | TexTokenType::UpperGamma
134            | TexTokenType::UpperDelta
135            | TexTokenType::UpperTheta
136            | TexTokenType::UpperLambda
137            | TexTokenType::UpperXi
138            | TexTokenType::UpperPi
139            | TexTokenType::UpperSigma
140            | TexTokenType::UpperUpsilon
141            | TexTokenType::UpperPhi
142            | TexTokenType::UpperPsi
143            | TexTokenType::UpperOmega => {
144                if let Some(node) = tree.as_node() {
145                    Ok(Some(TexItem::Command(self.build_command(node, source)?)))
146                }
147                else {
148                    let name = match kind {
149                        TexTokenType::Sum => "sum".to_string(),
150                        TexTokenType::Int => "int".to_string(),
151                        TexTokenType::Lim => "lim".to_string(),
152                        TexTokenType::Frac => "frac".to_string(),
153                        TexTokenType::Sqrt => "sqrt".to_string(),
154                        TexTokenType::Alpha => "alpha".to_string(),
155                        TexTokenType::Beta => "beta".to_string(),
156                        TexTokenType::Gamma => "gamma".to_string(),
157                        TexTokenType::Delta => "delta".to_string(),
158                        TexTokenType::Epsilon => "epsilon".to_string(),
159                        TexTokenType::Zeta => "zeta".to_string(),
160                        TexTokenType::Eta => "eta".to_string(),
161                        TexTokenType::Theta => "theta".to_string(),
162                        TexTokenType::Iota => "iota".to_string(),
163                        TexTokenType::Kappa => "kappa".to_string(),
164                        TexTokenType::Lambda => "lambda".to_string(),
165                        TexTokenType::Mu => "mu".to_string(),
166                        TexTokenType::Nu => "nu".to_string(),
167                        TexTokenType::Xi => "xi".to_string(),
168                        TexTokenType::Omicron => "omicron".to_string(),
169                        TexTokenType::Pi => "pi".to_string(),
170                        TexTokenType::Rho => "rho".to_string(),
171                        TexTokenType::Sigma => "sigma".to_string(),
172                        TexTokenType::Tau => "tau".to_string(),
173                        TexTokenType::Upsilon => "upsilon".to_string(),
174                        TexTokenType::Phi => "phi".to_string(),
175                        TexTokenType::Chi => "chi".to_string(),
176                        TexTokenType::Psi => "psi".to_string(),
177                        TexTokenType::Omega => "omega".to_string(),
178                        TexTokenType::VarEpsilon => "varepsilon".to_string(),
179                        TexTokenType::VarTheta => "vartheta".to_string(),
180                        TexTokenType::VarKappa => "varkappa".to_string(),
181                        TexTokenType::VarPi => "varpi".to_string(),
182                        TexTokenType::VarRho => "varrho".to_string(),
183                        TexTokenType::VarSigma => "varsigma".to_string(),
184                        TexTokenType::VarPhi => "varphi".to_string(),
185                        TexTokenType::UpperGamma => "Gamma".to_string(),
186                        TexTokenType::UpperDelta => "Delta".to_string(),
187                        TexTokenType::UpperTheta => "Theta".to_string(),
188                        TexTokenType::UpperLambda => "Lambda".to_string(),
189                        TexTokenType::UpperXi => "Xi".to_string(),
190                        TexTokenType::UpperPi => "Pi".to_string(),
191                        TexTokenType::UpperSigma => "Sigma".to_string(),
192                        TexTokenType::UpperUpsilon => "Upsilon".to_string(),
193                        TexTokenType::UpperPhi => "Phi".to_string(),
194                        TexTokenType::UpperPsi => "Psi".to_string(),
195                        TexTokenType::UpperOmega => "Omega".to_string(),
196                        _ => tree.text(source).trim_start_matches('\\').to_string(),
197                    };
198                    Ok(Some(TexItem::Command(TexCommand { span: tree.span().into(), name, arguments: Vec::new() })))
199                }
200            }
201            TexTokenType::Environment => Ok(Some(TexItem::Environment(self.build_environment(tree.as_node().unwrap(), source)?))),
202            TexTokenType::Group => Ok(Some(TexItem::Group(self.build_group(tree.as_node().unwrap(), source)?))),
203            TexTokenType::InlineMath | TexTokenType::DisplayMath => Ok(Some(TexItem::Math(self.build_math(tree.as_node().unwrap(), source)?))),
204            TexTokenType::Superscript => Ok(Some(TexItem::Superscript(self.build_superscript(tree.as_node().unwrap(), source)?))),
205            TexTokenType::Subscript => Ok(Some(TexItem::Subscript(self.build_subscript(tree.as_node().unwrap(), source)?))),
206            TexTokenType::Identifier | TexTokenType::Number | TexTokenType::Text => Ok(Some(TexItem::Text { span: tree.span().into(), content: tree.text(source).to_string() })),
207            TexTokenType::Comment => Ok(Some(TexItem::Comment { span: tree.span().into(), content: tree.text(source).to_string() })),
208            _ => {
209                if tree.as_leaf().is_some() {
210                    Ok(Some(TexItem::Text { span: tree.span().into(), content: tree.text(source).to_string() }))
211                }
212                else {
213                    Ok(None)
214                }
215            }
216        }
217    }
218
219    fn build_superscript<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexSuperscript, OakError> {
220        let mut content = TexRoot::new(node.span().into());
221        for child in node.children() {
222            let kind: TexTokenType = child.kind();
223            if kind == TexTokenType::Caret {
224                continue;
225            }
226            if let Some(item) = self.build_item(child, source)? {
227                content.items.push(item)
228            }
229        }
230        Ok(TexSuperscript {
231            span: node.span().into(),
232            target: None, // Will be filled later by a post-processor if needed
233            content: Box::new(content),
234        })
235    }
236
237    fn build_subscript<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexSubscript, OakError> {
238        let mut content = TexRoot::new(node.span().into());
239        for child in node.children() {
240            let kind: TexTokenType = child.kind();
241            if kind == TexTokenType::Underscore {
242                continue;
243            }
244            if let Some(item) = self.build_item(child, source)? {
245                content.items.push(item)
246            }
247        }
248        Ok(TexSubscript {
249            span: node.span().into(),
250            target: None, // Will be filled later by a post-processor if needed
251            content: Box::new(content),
252        })
253    }
254
255    fn build_command<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexCommand, OakError> {
256        let mut name = String::new();
257        let mut arguments = Vec::new();
258
259        for child in node.children() {
260            let kind: TexTokenType = child.kind();
261            match kind {
262                TexTokenType::Backslash
263                | TexTokenType::Command
264                | TexTokenType::BeginKeyword
265                | TexTokenType::EndKeyword
266                | TexTokenType::Frac
267                | TexTokenType::Sqrt
268                | TexTokenType::Sum
269                | TexTokenType::Int
270                | TexTokenType::Lim
271                | TexTokenType::SectionKeyword
272                | TexTokenType::SubsectionKeyword
273                | TexTokenType::SubsubsectionKeyword
274                | TexTokenType::ChapterKeyword
275                | TexTokenType::PartKeyword
276                | TexTokenType::TitleKeyword
277                | TexTokenType::AuthorKeyword
278                | TexTokenType::DateKeyword
279                | TexTokenType::MaketitleKeyword
280                | TexTokenType::TableofcontentsKeyword
281                | TexTokenType::ItemKeyword
282                | TexTokenType::LabelKeyword
283                | TexTokenType::RefKeyword
284                | TexTokenType::CiteKeyword
285                | TexTokenType::IncludegraphicsKeyword
286                | TexTokenType::TextbfKeyword
287                | TexTokenType::TextitKeyword
288                | TexTokenType::EmphKeyword
289                | TexTokenType::Alpha
290                | TexTokenType::Beta
291                | TexTokenType::Gamma
292                | TexTokenType::Delta
293                | TexTokenType::Epsilon
294                | TexTokenType::Zeta
295                | TexTokenType::Eta
296                | TexTokenType::Theta
297                | TexTokenType::Iota
298                | TexTokenType::Kappa
299                | TexTokenType::Lambda
300                | TexTokenType::Mu
301                | TexTokenType::Nu
302                | TexTokenType::Xi
303                | TexTokenType::Omicron
304                | TexTokenType::Pi
305                | TexTokenType::Rho
306                | TexTokenType::Sigma
307                | TexTokenType::Tau
308                | TexTokenType::Upsilon
309                | TexTokenType::Phi
310                | TexTokenType::Chi
311                | TexTokenType::Psi
312                | TexTokenType::Omega
313                | TexTokenType::VarEpsilon
314                | TexTokenType::VarTheta
315                | TexTokenType::VarKappa
316                | TexTokenType::VarPi
317                | TexTokenType::VarRho
318                | TexTokenType::VarSigma
319                | TexTokenType::VarPhi
320                | TexTokenType::UpperGamma
321                | TexTokenType::UpperDelta
322                | TexTokenType::UpperTheta
323                | TexTokenType::UpperLambda
324                | TexTokenType::UpperXi
325                | TexTokenType::UpperPi
326                | TexTokenType::UpperSigma
327                | TexTokenType::UpperUpsilon
328                | TexTokenType::UpperPhi
329                | TexTokenType::UpperPsi
330                | TexTokenType::UpperOmega
331                | TexTokenType::TextBf
332                | TexTokenType::TextIt
333                | TexTokenType::TextSc
334                | TexTokenType::TextTt
335                | TexTokenType::Emph
336                | TexTokenType::Underline => {
337                    let text = child.text(source);
338                    if text.starts_with('\\') { name = text[1..].to_string() } else { name = text.to_string() }
339                }
340                TexTokenType::OptionalArgument => arguments.push(TexArgument::Optional(self.build_content(child.as_node().unwrap(), source)?)),
341                TexTokenType::MandatoryArgument => arguments.push(TexArgument::Required(self.build_content(child.as_node().unwrap(), source)?)),
342                _ => {}
343            }
344        }
345
346        Ok(TexCommand { span: node.span().into(), name, arguments })
347    }
348
349    fn build_group<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexGroup, OakError> {
350        Ok(TexGroup { span: node.span().into(), content: self.build_content(node, source)? })
351    }
352
353    fn build_math<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexMath, OakError> {
354        let kind: TexTokenType = node.kind();
355        Ok(TexMath { span: node.span().into(), content: self.build_content(node, source)?, is_display: kind == TexTokenType::DoubleDollar })
356    }
357
358    fn build_environment<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexEnvironment, OakError> {
359        let mut name = String::new();
360        let mut arguments = Vec::new();
361        let mut content = TexRoot::new(node.span().into());
362
363        for child in node.children() {
364            let kind: TexTokenType = child.kind();
365            match kind {
366                TexTokenType::BeginEnvironment => {
367                    for sub_child in child.as_node().unwrap().children() {
368                        let sub_kind: TexTokenType = sub_child.kind();
369                        match sub_kind {
370                            TexTokenType::MandatoryArgument => {
371                                // The first mandatory argument is the environment name
372                                if name.is_empty() {
373                                    let arg_root = self.build_content(sub_child.as_node().unwrap(), source)?;
374                                    for item in arg_root.items {
375                                        if let TexItem::Text { content, .. } = item {
376                                            name.push_str(&content)
377                                        }
378                                    }
379                                }
380                                else {
381                                    arguments.push(TexArgument::Required(self.build_content(sub_child.as_node().unwrap(), source)?))
382                                }
383                            }
384                            TexTokenType::OptionalArgument => arguments.push(TexArgument::Optional(self.build_content(sub_child.as_node().unwrap(), source)?)),
385                            _ => {}
386                        }
387                    }
388                }
389                TexTokenType::EndEnvironment => {}
390                _ => {
391                    if let Some(item) = self.build_item(child, source)? {
392                        content.items.push(item)
393                    }
394                }
395            }
396        }
397
398        Ok(TexEnvironment { span: node.span().into(), name, arguments, content })
399    }
400}
401
402fn is_delimiter(kind: TexTokenType) -> bool {
403    matches!(kind, TexTokenType::LeftBrace | TexTokenType::RightBrace | TexTokenType::LeftBracket | TexTokenType::RightBracket | TexTokenType::Dollar | TexTokenType::DoubleDollar)
404}