Skip to main content

oak_tex/builder/
mod.rs

1use crate::{ast::*, kind::TexSyntaxKind, language::TexLanguage, parser::TexParser};
2use oak_core::{Builder, BuilderCache, GreenNode, OakDiagnostics, OakError, Parser, RedNode, RedTree, TextEdit, source::Source};
3
4/// TeX 语言的 AST 构建器
5#[derive(Clone)]
6pub struct TexBuilder<'config> {
7    /// 语言配置
8    config: &'config TexLanguage,
9}
10
11impl<'config> TexBuilder<'config> {
12    /// 创建新的 TeX 构建器
13    pub fn new(config: &'config TexLanguage) -> Self {
14        Self { config }
15    }
16}
17
18impl<'config> Builder<TexLanguage> for TexBuilder<'config> {
19    fn build<'a, S: Source + ?Sized>(&self, source: &'a S, edits: &[TextEdit], cache: &'a mut impl BuilderCache<TexLanguage>) -> oak_core::builder::BuildOutput<TexLanguage> {
20        let parser = TexParser::new(self.config);
21
22        let parse_result = parser.parse(source, edits, cache);
23
24        match parse_result.result {
25            Ok(green_tree) => match self.build_root_internal(green_tree, source) {
26                Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
27                Err(build_error) => {
28                    let mut diagnostics = parse_result.diagnostics;
29                    diagnostics.push(build_error.clone());
30                    OakDiagnostics { result: Err(build_error), diagnostics }
31                }
32            },
33            Err(parse_error) => OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
34        }
35    }
36}
37
38impl<'config> TexBuilder<'config> {
39    /// 构建根节点
40    fn build_root_internal<S: Source + ?Sized>(&self, green_tree: &GreenNode<TexLanguage>, source: &S) -> Result<TexRoot, OakError> {
41        let red_root = RedNode::new(green_tree, 0);
42        self.build_content(red_root, source)
43    }
44
45    fn build_content<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexRoot, OakError> {
46        let mut items = Vec::new();
47        let children: Vec<_> = node.children().collect();
48
49        if children.is_empty() {
50            return Ok(TexRoot { span: node.span().into(), items });
51        }
52
53        let first_kind = children.first().unwrap().kind::<TexSyntaxKind>();
54        let last_kind = children.last().unwrap().kind::<TexSyntaxKind>();
55
56        let start = if is_delimiter(first_kind) { 1 } else { 0 };
57        let end = if children.len() > start && is_delimiter(last_kind) { children.len() - 1 } else { children.len() };
58
59        for i in start..end {
60            if let Some(item) = self.build_item(children[i], source)? {
61                items.push(item);
62            }
63        }
64
65        Ok(TexRoot { span: node.span().into(), items })
66    }
67
68    fn build_item<S: Source + ?Sized>(&self, tree: RedTree<TexLanguage>, source: &S) -> Result<Option<TexItem>, OakError> {
69        let kind: TexSyntaxKind = tree.kind();
70        match kind {
71            TexSyntaxKind::Command
72            | TexSyntaxKind::BeginKeyword
73            | TexSyntaxKind::EndKeyword
74            | TexSyntaxKind::Frac
75            | TexSyntaxKind::Sqrt
76            | TexSyntaxKind::Sum
77            | TexSyntaxKind::Int
78            | TexSyntaxKind::Lim
79            | TexSyntaxKind::SectionKeyword
80            | TexSyntaxKind::SubsectionKeyword
81            | TexSyntaxKind::SubsubsectionKeyword
82            | TexSyntaxKind::ChapterKeyword
83            | TexSyntaxKind::PartKeyword
84            | TexSyntaxKind::TitleKeyword
85            | TexSyntaxKind::AuthorKeyword
86            | TexSyntaxKind::DateKeyword
87            | TexSyntaxKind::MaketitleKeyword
88            | TexSyntaxKind::TableofcontentsKeyword
89            | TexSyntaxKind::ItemKeyword
90            | TexSyntaxKind::LabelKeyword
91            | TexSyntaxKind::RefKeyword
92            | TexSyntaxKind::CiteKeyword
93            | TexSyntaxKind::IncludegraphicsKeyword
94            | TexSyntaxKind::TextbfKeyword
95            | TexSyntaxKind::TextitKeyword
96            | TexSyntaxKind::EmphKeyword
97            | TexSyntaxKind::Alpha
98            | TexSyntaxKind::Beta
99            | TexSyntaxKind::Gamma
100            | TexSyntaxKind::Delta
101            | TexSyntaxKind::Epsilon
102            | TexSyntaxKind::Zeta
103            | TexSyntaxKind::Eta
104            | TexSyntaxKind::Theta
105            | TexSyntaxKind::Iota
106            | TexSyntaxKind::Kappa
107            | TexSyntaxKind::Lambda
108            | TexSyntaxKind::Mu
109            | TexSyntaxKind::Nu
110            | TexSyntaxKind::Xi
111            | TexSyntaxKind::Omicron
112            | TexSyntaxKind::Pi
113            | TexSyntaxKind::Rho
114            | TexSyntaxKind::Sigma
115            | TexSyntaxKind::Tau
116            | TexSyntaxKind::Upsilon
117            | TexSyntaxKind::Phi
118            | TexSyntaxKind::Chi
119            | TexSyntaxKind::Psi
120            | TexSyntaxKind::Omega
121            | TexSyntaxKind::VarEpsilon
122            | TexSyntaxKind::VarTheta
123            | TexSyntaxKind::VarKappa
124            | TexSyntaxKind::VarPi
125            | TexSyntaxKind::VarRho
126            | TexSyntaxKind::VarSigma
127            | TexSyntaxKind::VarPhi
128            | TexSyntaxKind::UpperGamma
129            | TexSyntaxKind::UpperDelta
130            | TexSyntaxKind::UpperTheta
131            | TexSyntaxKind::UpperLambda
132            | TexSyntaxKind::UpperXi
133            | TexSyntaxKind::UpperPi
134            | TexSyntaxKind::UpperSigma
135            | TexSyntaxKind::UpperUpsilon
136            | TexSyntaxKind::UpperPhi
137            | TexSyntaxKind::UpperPsi
138            | TexSyntaxKind::UpperOmega => {
139                if let Some(node) = tree.as_node() {
140                    Ok(Some(TexItem::Command(self.build_command(node, source)?)))
141                }
142                else {
143                    let name = match kind {
144                        TexSyntaxKind::Sum => "sum".to_string(),
145                        TexSyntaxKind::Int => "int".to_string(),
146                        TexSyntaxKind::Lim => "lim".to_string(),
147                        TexSyntaxKind::Frac => "frac".to_string(),
148                        TexSyntaxKind::Sqrt => "sqrt".to_string(),
149                        TexSyntaxKind::Alpha => "alpha".to_string(),
150                        TexSyntaxKind::Beta => "beta".to_string(),
151                        TexSyntaxKind::Gamma => "gamma".to_string(),
152                        TexSyntaxKind::Delta => "delta".to_string(),
153                        TexSyntaxKind::Epsilon => "epsilon".to_string(),
154                        TexSyntaxKind::Zeta => "zeta".to_string(),
155                        TexSyntaxKind::Eta => "eta".to_string(),
156                        TexSyntaxKind::Theta => "theta".to_string(),
157                        TexSyntaxKind::Iota => "iota".to_string(),
158                        TexSyntaxKind::Kappa => "kappa".to_string(),
159                        TexSyntaxKind::Lambda => "lambda".to_string(),
160                        TexSyntaxKind::Mu => "mu".to_string(),
161                        TexSyntaxKind::Nu => "nu".to_string(),
162                        TexSyntaxKind::Xi => "xi".to_string(),
163                        TexSyntaxKind::Omicron => "omicron".to_string(),
164                        TexSyntaxKind::Pi => "pi".to_string(),
165                        TexSyntaxKind::Rho => "rho".to_string(),
166                        TexSyntaxKind::Sigma => "sigma".to_string(),
167                        TexSyntaxKind::Tau => "tau".to_string(),
168                        TexSyntaxKind::Upsilon => "upsilon".to_string(),
169                        TexSyntaxKind::Phi => "phi".to_string(),
170                        TexSyntaxKind::Chi => "chi".to_string(),
171                        TexSyntaxKind::Psi => "psi".to_string(),
172                        TexSyntaxKind::Omega => "omega".to_string(),
173                        TexSyntaxKind::VarEpsilon => "varepsilon".to_string(),
174                        TexSyntaxKind::VarTheta => "vartheta".to_string(),
175                        TexSyntaxKind::VarKappa => "varkappa".to_string(),
176                        TexSyntaxKind::VarPi => "varpi".to_string(),
177                        TexSyntaxKind::VarRho => "varrho".to_string(),
178                        TexSyntaxKind::VarSigma => "varsigma".to_string(),
179                        TexSyntaxKind::VarPhi => "varphi".to_string(),
180                        TexSyntaxKind::UpperGamma => "Gamma".to_string(),
181                        TexSyntaxKind::UpperDelta => "Delta".to_string(),
182                        TexSyntaxKind::UpperTheta => "Theta".to_string(),
183                        TexSyntaxKind::UpperLambda => "Lambda".to_string(),
184                        TexSyntaxKind::UpperXi => "Xi".to_string(),
185                        TexSyntaxKind::UpperPi => "Pi".to_string(),
186                        TexSyntaxKind::UpperSigma => "Sigma".to_string(),
187                        TexSyntaxKind::UpperUpsilon => "Upsilon".to_string(),
188                        TexSyntaxKind::UpperPhi => "Phi".to_string(),
189                        TexSyntaxKind::UpperPsi => "Psi".to_string(),
190                        TexSyntaxKind::UpperOmega => "Omega".to_string(),
191                        _ => tree.text(source).trim_start_matches('\\').to_string(),
192                    };
193                    Ok(Some(TexItem::Command(TexCommand { span: tree.span().into(), name, arguments: Vec::new() })))
194                }
195            }
196            TexSyntaxKind::Environment => Ok(Some(TexItem::Environment(self.build_environment(tree.as_node().unwrap(), source)?))),
197            TexSyntaxKind::Group => Ok(Some(TexItem::Group(self.build_group(tree.as_node().unwrap(), source)?))),
198            TexSyntaxKind::InlineMath | TexSyntaxKind::DisplayMath => Ok(Some(TexItem::Math(self.build_math(tree.as_node().unwrap(), source)?))),
199            TexSyntaxKind::Superscript => Ok(Some(TexItem::Superscript(self.build_superscript(tree.as_node().unwrap(), source)?))),
200            TexSyntaxKind::Subscript => Ok(Some(TexItem::Subscript(self.build_subscript(tree.as_node().unwrap(), source)?))),
201            TexSyntaxKind::Identifier | TexSyntaxKind::Number | TexSyntaxKind::Text => Ok(Some(TexItem::Text { span: tree.span().into(), content: tree.text(source).to_string() })),
202            TexSyntaxKind::Comment => Ok(Some(TexItem::Comment { span: tree.span().into(), content: tree.text(source).to_string() })),
203            _ => {
204                if tree.as_leaf().is_some() {
205                    Ok(Some(TexItem::Text { span: tree.span().into(), content: tree.text(source).to_string() }))
206                }
207                else {
208                    Ok(None)
209                }
210            }
211        }
212    }
213
214    fn build_superscript<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexSuperscript, OakError> {
215        let mut content = TexRoot::new(node.span().into());
216        for child in node.children() {
217            if child.kind::<TexSyntaxKind>() == TexSyntaxKind::Caret {
218                continue;
219            }
220            if let Some(item) = self.build_item(child, source)? {
221                content.items.push(item);
222            }
223        }
224        Ok(TexSuperscript {
225            span: node.span().into(),
226            target: None, // Will be filled later by a post-processor if needed
227            content: Box::new(content),
228        })
229    }
230
231    fn build_subscript<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexSubscript, OakError> {
232        let mut content = TexRoot::new(node.span().into());
233        for child in node.children() {
234            if child.kind::<TexSyntaxKind>() == TexSyntaxKind::Underscore {
235                continue;
236            }
237            if let Some(item) = self.build_item(child, source)? {
238                content.items.push(item);
239            }
240        }
241        Ok(TexSubscript {
242            span: node.span().into(),
243            target: None, // Will be filled later by a post-processor if needed
244            content: Box::new(content),
245        })
246    }
247
248    fn build_command<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexCommand, OakError> {
249        let mut name = String::new();
250        let mut arguments = Vec::new();
251
252        for child in node.children() {
253            let kind: TexSyntaxKind = child.kind();
254            match kind {
255                TexSyntaxKind::Backslash
256                | TexSyntaxKind::Command
257                | TexSyntaxKind::BeginKeyword
258                | TexSyntaxKind::EndKeyword
259                | TexSyntaxKind::Frac
260                | TexSyntaxKind::Sqrt
261                | TexSyntaxKind::Sum
262                | TexSyntaxKind::Int
263                | TexSyntaxKind::Lim
264                | TexSyntaxKind::SectionKeyword
265                | TexSyntaxKind::SubsectionKeyword
266                | TexSyntaxKind::SubsubsectionKeyword
267                | TexSyntaxKind::ChapterKeyword
268                | TexSyntaxKind::PartKeyword
269                | TexSyntaxKind::TitleKeyword
270                | TexSyntaxKind::AuthorKeyword
271                | TexSyntaxKind::DateKeyword
272                | TexSyntaxKind::MaketitleKeyword
273                | TexSyntaxKind::TableofcontentsKeyword
274                | TexSyntaxKind::ItemKeyword
275                | TexSyntaxKind::LabelKeyword
276                | TexSyntaxKind::RefKeyword
277                | TexSyntaxKind::CiteKeyword
278                | TexSyntaxKind::IncludegraphicsKeyword
279                | TexSyntaxKind::TextbfKeyword
280                | TexSyntaxKind::TextitKeyword
281                | TexSyntaxKind::EmphKeyword
282                | TexSyntaxKind::Alpha
283                | TexSyntaxKind::Beta
284                | TexSyntaxKind::Gamma
285                | TexSyntaxKind::Delta
286                | TexSyntaxKind::Epsilon
287                | TexSyntaxKind::Zeta
288                | TexSyntaxKind::Eta
289                | TexSyntaxKind::Theta
290                | TexSyntaxKind::Iota
291                | TexSyntaxKind::Kappa
292                | TexSyntaxKind::Lambda
293                | TexSyntaxKind::Mu
294                | TexSyntaxKind::Nu
295                | TexSyntaxKind::Xi
296                | TexSyntaxKind::Omicron
297                | TexSyntaxKind::Pi
298                | TexSyntaxKind::Rho
299                | TexSyntaxKind::Sigma
300                | TexSyntaxKind::Tau
301                | TexSyntaxKind::Upsilon
302                | TexSyntaxKind::Phi
303                | TexSyntaxKind::Chi
304                | TexSyntaxKind::Psi
305                | TexSyntaxKind::Omega
306                | TexSyntaxKind::VarEpsilon
307                | TexSyntaxKind::VarTheta
308                | TexSyntaxKind::VarKappa
309                | TexSyntaxKind::VarPi
310                | TexSyntaxKind::VarRho
311                | TexSyntaxKind::VarSigma
312                | TexSyntaxKind::VarPhi
313                | TexSyntaxKind::UpperGamma
314                | TexSyntaxKind::UpperDelta
315                | TexSyntaxKind::UpperTheta
316                | TexSyntaxKind::UpperLambda
317                | TexSyntaxKind::UpperXi
318                | TexSyntaxKind::UpperPi
319                | TexSyntaxKind::UpperSigma
320                | TexSyntaxKind::UpperUpsilon
321                | TexSyntaxKind::UpperPhi
322                | TexSyntaxKind::UpperPsi
323                | TexSyntaxKind::UpperOmega
324                | TexSyntaxKind::TextBf
325                | TexSyntaxKind::TextIt
326                | TexSyntaxKind::TextSc
327                | TexSyntaxKind::TextTt
328                | TexSyntaxKind::Emph
329                | TexSyntaxKind::Underline => {
330                    let text = child.text(source);
331                    if text.starts_with('\\') {
332                        name = text[1..].to_string();
333                    }
334                    else {
335                        name = text.to_string();
336                    }
337                }
338                TexSyntaxKind::OptionalArgument => {
339                    arguments.push(TexArgument::Optional(self.build_content(child.as_node().unwrap(), source)?));
340                }
341                TexSyntaxKind::MandatoryArgument => {
342                    arguments.push(TexArgument::Required(self.build_content(child.as_node().unwrap(), source)?));
343                }
344                _ => {}
345            }
346        }
347
348        Ok(TexCommand { span: node.span().into(), name, arguments })
349    }
350
351    fn build_group<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexGroup, OakError> {
352        Ok(TexGroup { span: node.span().into(), content: self.build_content(node, source)? })
353    }
354
355    fn build_math<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexMath, OakError> {
356        let kind: TexSyntaxKind = node.kind();
357        Ok(TexMath { span: node.span().into(), content: self.build_content(node, source)?, is_display: kind == TexSyntaxKind::DoubleDollar })
358    }
359
360    fn build_environment<S: Source + ?Sized>(&self, node: RedNode<TexLanguage>, source: &S) -> Result<TexEnvironment, OakError> {
361        let mut name = String::new();
362        let mut arguments = Vec::new();
363        let mut content = TexRoot::new(node.span().into());
364
365        for child in node.children() {
366            let kind: TexSyntaxKind = child.kind();
367            match kind {
368                TexSyntaxKind::BeginEnvironment => {
369                    for sub_child in child.as_node().unwrap().children() {
370                        match sub_child.kind::<TexSyntaxKind>() {
371                            TexSyntaxKind::MandatoryArgument => {
372                                // The first mandatory argument is the environment name
373                                if name.is_empty() {
374                                    let arg_root = self.build_content(sub_child.as_node().unwrap(), source)?;
375                                    for item in arg_root.items {
376                                        if let TexItem::Text { content, .. } = item {
377                                            name.push_str(&content);
378                                        }
379                                    }
380                                }
381                                else {
382                                    arguments.push(TexArgument::Required(self.build_content(sub_child.as_node().unwrap(), source)?));
383                                }
384                            }
385                            TexSyntaxKind::OptionalArgument => {
386                                arguments.push(TexArgument::Optional(self.build_content(sub_child.as_node().unwrap(), source)?));
387                            }
388                            _ => {}
389                        }
390                    }
391                }
392                TexSyntaxKind::EndEnvironment => {}
393                _ => {
394                    if let Some(item) = self.build_item(child, source)? {
395                        content.items.push(item);
396                    }
397                }
398            }
399        }
400
401        Ok(TexEnvironment { span: node.span().into(), name, arguments, content })
402    }
403}
404
405fn is_delimiter(kind: TexSyntaxKind) -> bool {
406    matches!(kind, TexSyntaxKind::LeftBrace | TexSyntaxKind::RightBrace | TexSyntaxKind::LeftBracket | TexSyntaxKind::RightBracket | TexSyntaxKind::Dollar | TexSyntaxKind::DoubleDollar)
407}