Skip to main content

lex_core/lex/building/
ast_tree.rs

1//! AST Builder from ParseNode IR
2//!
3//!     This module contains the `AstTreeBuilder`, which walks the `ParseNode` tree produced
4//!     by the parser and constructs the final AST.
5//!
6//!     From the IR nodes, we build the actual AST nodes. During this step:
7//!         1. We unroll source tokens so that ast nodes have access to token values.
8//!         2. The location from tokens is used to calculate the location for the ast node.
9//!         3. The location is transformed from byte range to a dual byte range + line:column
10//!            position.
11//!
12//!     At this stage we create the root session tree that will later be attached to the
13//!     `Document` node during assembling.
14//!
15//!     See [location](super::location) for location calculation utilities.
16
17use crate::lex::ast::elements::document::DocumentTitle;
18use crate::lex::ast::elements::typed_content::{self, ContentElement, SessionContent};
19use crate::lex::ast::error::{format_source_context, ParserError, ParserResult};
20use crate::lex::ast::range::SourceLocation;
21use crate::lex::ast::text_content::TextContent;
22use crate::lex::ast::{AstNode, ContentItem, ListItem, Range, Session};
23use crate::lex::building::api as ast_api;
24use crate::lex::building::location::compute_location_from_locations;
25use crate::lex::parsing::ir::{NodeType, ParseNode, ParseNodePayload, TokenLocation};
26
27/// Output of the AST tree builder: a document title (if present) and the root session.
28pub struct BuildOutput {
29    pub title: Option<DocumentTitle>,
30    pub root: Session,
31}
32
33/// A builder that constructs an AST from a `ParseNode` tree.
34pub struct AstTreeBuilder<'a> {
35    source: &'a str,
36    source_location: SourceLocation,
37}
38
39impl<'a> AstTreeBuilder<'a> {
40    /// Creates a new `AstTreeBuilder`.
41    pub fn new(source: &'a str) -> Self {
42        Self {
43            source,
44            source_location: SourceLocation::new(source),
45        }
46    }
47
48    /// Builds the document's root Session and optional DocumentTitle from a root `ParseNode`.
49    pub fn build(&self, root_node: ParseNode) -> ParserResult<BuildOutput> {
50        if root_node.node_type != NodeType::Document {
51            panic!("Expected a Document node at the root");
52        }
53
54        // Extract DocumentTitle node if present, and filter structural markers
55        let mut document_title: Option<DocumentTitle> = None;
56        let filtered_children: Vec<ParseNode> = root_node
57            .children
58            .into_iter()
59            .filter(|node| {
60                match node.node_type {
61                    NodeType::DocumentStart => false,
62                    NodeType::DocumentTitle => {
63                        // Build DocumentTitle from the parse node's tokens
64                        let title_text = ast_api::text_content_from_tokens(
65                            node.tokens.clone(),
66                            self.source,
67                            &self.source_location,
68                        );
69                        let title_location = title_text.location.clone().unwrap_or_default();
70
71                        // Check for subtitle child node
72                        let subtitle = node.children.iter().find_map(|child| {
73                            if child.node_type == NodeType::DocumentSubtitle {
74                                let sub_text = ast_api::text_content_from_tokens(
75                                    child.tokens.clone(),
76                                    self.source,
77                                    &self.source_location,
78                                );
79                                Some(sub_text)
80                            } else {
81                                None
82                            }
83                        });
84
85                        let location = if let Some(ref sub) = subtitle {
86                            // Extend location to cover both title and subtitle
87                            let sub_loc = sub.location.clone().unwrap_or_default();
88                            Range::new(
89                                title_location.span.start..sub_loc.span.end,
90                                title_location.start,
91                                sub_loc.end,
92                            )
93                        } else {
94                            title_location
95                        };
96
97                        document_title = Some(match subtitle {
98                            Some(sub) => DocumentTitle::with_subtitle(title_text, sub, location),
99                            None => DocumentTitle::new(title_text, location),
100                        });
101                        false
102                    }
103                    _ => true,
104                }
105            })
106            .collect();
107
108        let content = self.build_content_items(filtered_children)?;
109        let content_locations: Vec<Range> =
110            content.iter().map(|item| item.range().clone()).collect();
111        let root_location = compute_location_from_locations(&content_locations);
112        let session_content = typed_content::into_session_contents(content);
113        let root = Session::new(
114            TextContent::from_string(String::new(), None::<Range>),
115            session_content,
116        );
117
118        Ok(BuildOutput {
119            title: document_title,
120            root: root.at(root_location),
121        })
122    }
123
124    /// Builds a vector of `ContentItem`s from a vector of `ParseNode`s.
125    fn build_content_items(&self, nodes: Vec<ParseNode>) -> ParserResult<Vec<ContentItem>> {
126        nodes
127            .into_iter()
128            .map(|node| self.build_content_item(node))
129            .collect()
130    }
131
132    /// Builds a single `ContentItem` from a `ParseNode`.
133    fn build_content_item(&self, node: ParseNode) -> ParserResult<ContentItem> {
134        match node.node_type {
135            NodeType::Paragraph => Ok(self.build_paragraph(node)),
136            NodeType::Session => self.build_session(node),
137            NodeType::List => self.build_list(node),
138            NodeType::Definition => self.build_definition(node),
139            NodeType::Annotation => self.build_annotation(node),
140            NodeType::VerbatimBlock => Ok(self.build_verbatim_block(node)),
141            NodeType::Table => Ok(self.build_table(node)),
142            NodeType::BlankLineGroup => Ok(self.build_blank_line_group(node)),
143            _ => panic!("Unexpected node type"),
144        }
145    }
146
147    fn build_paragraph(&self, node: ParseNode) -> ContentItem {
148        let token_lines = group_tokens_by_line(node.tokens);
149        ast_api::paragraph_from_token_lines(token_lines, self.source, &self.source_location)
150    }
151
152    fn build_session(&self, node: ParseNode) -> ParserResult<ContentItem> {
153        let title_tokens = node.tokens;
154        let content = self.build_session_content(node.children)?;
155        Ok(ast_api::session_from_tokens(
156            title_tokens,
157            content,
158            self.source,
159            &self.source_location,
160        ))
161    }
162
163    fn build_definition(&self, node: ParseNode) -> ParserResult<ContentItem> {
164        let subject_tokens = node.tokens;
165        let content = self.build_general_content(node.children, "Definition")?;
166        Ok(ast_api::definition_from_tokens(
167            subject_tokens,
168            content,
169            self.source,
170            &self.source_location,
171        ))
172    }
173
174    fn build_list(&self, node: ParseNode) -> ParserResult<ContentItem> {
175        let list_items: Result<Vec<_>, _> = node
176            .children
177            .into_iter()
178            .map(|child_node| self.build_list_item(child_node))
179            .collect();
180        Ok(ast_api::list_from_items(list_items?))
181    }
182
183    fn build_list_item(&self, node: ParseNode) -> ParserResult<ListItem> {
184        let marker_tokens = node.tokens;
185        let content = self.build_general_content(node.children, "ListItem")?;
186        Ok(ast_api::list_item_from_tokens(
187            marker_tokens,
188            content,
189            self.source,
190            &self.source_location,
191        ))
192    }
193
194    fn build_annotation(&self, node: ParseNode) -> ParserResult<ContentItem> {
195        let header_tokens = node.tokens;
196        let content = self.build_general_content(node.children, "Annotation")?;
197        Ok(ast_api::annotation_from_tokens(
198            header_tokens,
199            content,
200            self.source,
201            &self.source_location,
202        ))
203    }
204
205    fn build_verbatim_block(&self, mut node: ParseNode) -> ContentItem {
206        let payload = node
207            .payload
208            .take()
209            .expect("Parser must attach verbatim payload");
210        let ParseNodePayload::VerbatimBlock {
211            subject,
212            content_lines,
213            closing_data_tokens,
214        } = payload
215        else {
216            panic!("Expected VerbatimBlock payload for VerbatimBlock node");
217        };
218
219        let closing_data =
220            ast_api::data_from_tokens(closing_data_tokens, self.source, &self.source_location);
221
222        ast_api::verbatim_block_from_lines(
223            &subject,
224            &content_lines,
225            closing_data,
226            self.source,
227            &self.source_location,
228        )
229    }
230
231    fn build_table(&self, mut node: ParseNode) -> ContentItem {
232        let payload = node
233            .payload
234            .take()
235            .expect("Parser must attach table payload");
236        let ParseNodePayload::Table {
237            subject,
238            content_lines,
239            config_annotation_tokens,
240        } = payload
241        else {
242            panic!("Expected Table payload for Table node");
243        };
244
245        // Convert config annotation tokens to an Annotation, if present
246        let config_annotation = config_annotation_tokens.map(|tokens| {
247            ast_api::annotation_from_tokens(tokens, vec![], self.source, &self.source_location)
248        });
249
250        ast_api::table_from_lines(
251            &subject,
252            &content_lines,
253            config_annotation,
254            self.source,
255            &self.source_location,
256        )
257    }
258
259    fn build_blank_line_group(&self, node: ParseNode) -> ContentItem {
260        ast_api::blank_line_group_from_tokens(node.tokens, self.source, &self.source_location)
261    }
262
263    fn build_session_content(&self, nodes: Vec<ParseNode>) -> ParserResult<Vec<SessionContent>> {
264        nodes
265            .into_iter()
266            .map(|node| self.build_content_item(node).map(SessionContent::from))
267            .collect()
268    }
269
270    fn build_general_content(
271        &self,
272        nodes: Vec<ParseNode>,
273        context: &str,
274    ) -> ParserResult<Vec<ContentElement>> {
275        nodes
276            .into_iter()
277            .map(|node| {
278                self.build_content_item(node).and_then(|item| {
279                    let location = item.range().clone();
280
281                    // Extract text snippet from source for the invalid item (Session title)
282                    // Get the line at the start of the error location
283                    let source_lines: Vec<&str> = self.source.lines().collect();
284                    let error_line_num = location.start.line;
285                    let session_title = if error_line_num < source_lines.len() {
286                        source_lines[error_line_num]
287                    } else {
288                        ""
289                    };
290
291                    ContentElement::try_from(item).map_err(|_| {
292                        Box::new(ParserError::InvalidNesting {
293                            container: context.to_string(),
294                            invalid_child: "Session".to_string(),
295                            invalid_child_text: session_title.to_string(),
296                            location: location.clone(),
297                            source_context: format_source_context(self.source, &location),
298                        })
299                    })
300                })
301            })
302            .collect()
303    }
304}
305
306/// Group a flat vector of tokens into lines (split by Newline tokens).
307fn group_tokens_by_line(tokens: Vec<TokenLocation>) -> Vec<Vec<TokenLocation>> {
308    if tokens.is_empty() {
309        return vec![];
310    }
311
312    let mut lines: Vec<Vec<TokenLocation>> = vec![];
313    let mut current_line: Vec<TokenLocation> = vec![];
314
315    for token_location in tokens {
316        if matches!(token_location.0, crate::lex::lexing::Token::BlankLine(_)) {
317            lines.push(current_line);
318            current_line = vec![];
319        } else {
320            current_line.push(token_location);
321        }
322    }
323
324    if !current_line.is_empty() {
325        lines.push(current_line);
326    }
327
328    lines
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use crate::lex::token::{LineToken, LineType, Token};
335
336    fn parse_node(
337        node_type: NodeType,
338        tokens: Vec<TokenLocation>,
339        children: Vec<ParseNode>,
340    ) -> ParseNode {
341        ParseNode {
342            node_type,
343            tokens,
344            children,
345            payload: None,
346        }
347    }
348
349    #[test]
350    fn build_general_content_rejects_nested_session() {
351        let source = "Term\nchild\n";
352        let builder = AstTreeBuilder::new(source);
353
354        let nested_session = parse_node(
355            NodeType::Session,
356            vec![(Token::Text("child".into()), 5..10)],
357            vec![],
358        );
359
360        let err = builder
361            .build_general_content(vec![nested_session], "Definition")
362            .expect_err("sessions should not be allowed in general content");
363
364        match *err {
365            ParserError::InvalidNesting {
366                ref container,
367                ref invalid_child,
368                ref invalid_child_text,
369                ref location,
370                ..
371            } => {
372                assert_eq!(container, "Definition");
373                assert_eq!(invalid_child, "Session");
374                assert_eq!(invalid_child_text.trim(), "child");
375                assert_eq!(location.start.line, 1);
376            }
377        }
378    }
379
380    #[test]
381    fn group_tokens_by_line_handles_blank_boundaries() {
382        let tokens = vec![
383            (Token::Text("a".into()), 0..1),
384            (Token::BlankLine(Some("\n".into())), 1..2),
385            (Token::BlankLine(Some("\n".into())), 2..3),
386            (Token::Text("b".into()), 3..4),
387        ];
388
389        let lines = group_tokens_by_line(tokens);
390
391        assert_eq!(lines.len(), 3);
392        assert_eq!(lines[0].len(), 1); // before blank line
393        assert!(lines[1].is_empty()); // consecutive blank line produces empty bucket
394        assert_eq!(lines[2].len(), 1); // after blanks
395    }
396
397    #[test]
398    #[allow(clippy::single_range_in_vec_init)]
399    fn build_verbatim_block_preserves_payload_data() {
400        let source = "subject\ncontent\nclose\n";
401        let builder = AstTreeBuilder::new(source);
402
403        let subject_token = LineToken {
404            source_tokens: vec![Token::Text("subject".into())],
405            token_spans: vec![0..7],
406            line_type: LineType::SubjectLine,
407        };
408
409        let content_line = LineToken {
410            source_tokens: vec![Token::Text("content".into())],
411            token_spans: vec![8..15],
412            line_type: LineType::ParagraphLine,
413        };
414
415        let payload = ParseNodePayload::VerbatimBlock {
416            subject: subject_token,
417            content_lines: vec![content_line],
418            closing_data_tokens: vec![(Token::Text("close".into()), 16..21)],
419        };
420
421        let node = ParseNode {
422            node_type: NodeType::VerbatimBlock,
423            tokens: vec![],
424            children: vec![],
425            payload: Some(payload),
426        };
427
428        let item = builder.build_verbatim_block(node);
429
430        if let ContentItem::VerbatimBlock(verbatim) = item {
431            assert_eq!(verbatim.subject.as_string(), "subject");
432            assert_eq!(verbatim.children.len(), 1);
433            assert_eq!(verbatim.closing_data.label.value, "close");
434        } else {
435            panic!("expected verbatim block");
436        }
437    }
438
439    #[test]
440    fn test_document_title_parsing() {
441        let source = "My Document Title\n\nContent paragraph.\n";
442        let builder = AstTreeBuilder::new(source);
443
444        let content_tokens = vec![
445            (Token::Text("Content paragraph.".into()), 19..37),
446            (Token::BlankLine(Some("\n".into())), 37..38),
447        ];
448
449        let root_node = ParseNode {
450            node_type: NodeType::Document,
451            tokens: vec![],
452            children: vec![
453                ParseNode {
454                    node_type: NodeType::DocumentTitle,
455                    tokens: vec![(Token::Text("My Document Title".to_string()), 0..17)],
456                    children: vec![],
457                    payload: None,
458                },
459                ParseNode {
460                    node_type: NodeType::Paragraph,
461                    tokens: content_tokens,
462                    children: vec![],
463                    payload: None,
464                },
465            ],
466            payload: None,
467        };
468
469        let output = builder.build(root_node).expect("Builder failed");
470
471        assert!(output.title.is_some());
472        assert_eq!(output.title.unwrap().as_str(), "My Document Title");
473        assert_eq!(output.root.children.len(), 1);
474        if let ContentItem::Paragraph(p) = &output.root.children[0] {
475            assert_eq!(p.text(), "Content paragraph.");
476        } else {
477            panic!("Expected paragraph");
478        }
479    }
480
481    #[test]
482    fn test_document_title_parsing_no_title() {
483        let source = "# Section 1\n\nContent.\n";
484        let builder = AstTreeBuilder::new(source);
485
486        let root_node = ParseNode {
487            node_type: NodeType::Document,
488            tokens: vec![],
489            children: vec![ParseNode {
490                node_type: NodeType::Session,
491                tokens: vec![(Token::Text("Section 1".into()), 2..11)],
492                children: vec![
493                    ParseNode {
494                        node_type: NodeType::BlankLineGroup,
495                        tokens: vec![],
496                        children: vec![],
497                        payload: None,
498                    },
499                    ParseNode {
500                        node_type: NodeType::Paragraph,
501                        tokens: vec![(Token::Text("Content.".into()), 13..21)],
502                        children: vec![],
503                        payload: None,
504                    },
505                ],
506                payload: None,
507            }],
508            payload: None,
509        };
510
511        let output = builder.build(root_node).expect("Builder failed");
512
513        assert!(output.title.is_none());
514        assert_eq!(output.root.children.len(), 1); // 1 session
515    }
516}