Skip to main content

oak_markdown/builder/
mod.rs

1use crate::{
2    ast::*,
3    language::MarkdownLanguage,
4    parser::{MarkdownParser, element_type::MarkdownElementType},
5};
6use oak_core::{Builder, BuilderCache, GreenNode, OakError, Parser, RedNode, RedTree, SourceText, TextEdit, source::Source};
7
8/// AST builder for the Markdown language.
9#[derive(Clone)]
10pub struct MarkdownBuilder<'config> {
11    /// Language configuration.
12    config: &'config MarkdownLanguage,
13}
14
15impl<'config> MarkdownBuilder<'config> {
16    /// Creates a new MarkdownBuilder with the given configuration.
17    pub fn new(config: &'config MarkdownLanguage) -> Self {
18        Self { config }
19    }
20
21    /// Builds the AST root node from the green tree.
22    fn build_root(&self, green_tree: &GreenNode<MarkdownLanguage>, source: &SourceText) -> Result<MarkdownRoot, OakError> {
23        let red_root = RedNode::new(green_tree, 0);
24
25        let mut blocks = Vec::new();
26        for child in red_root.children() {
27            if let RedTree::Node(node) = child {
28                if let Some(block) = self.build_block(node, source) {
29                    blocks.push(block)
30                }
31            }
32        }
33
34        Ok(MarkdownRoot { blocks })
35    }
36
37    /// Builds block-level elements.
38    fn build_block(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> Option<Block> {
39        let kind = node.element_type();
40
41        match kind {
42            MarkdownElementType::Heading1 | MarkdownElementType::Heading2 | MarkdownElementType::Heading3 | MarkdownElementType::Heading4 | MarkdownElementType::Heading5 | MarkdownElementType::Heading6 => {
43                let level = match kind {
44                    MarkdownElementType::Heading1 => 1,
45                    MarkdownElementType::Heading2 => 2,
46                    MarkdownElementType::Heading3 => 3,
47                    MarkdownElementType::Heading4 => 4,
48                    MarkdownElementType::Heading5 => 5,
49                    MarkdownElementType::Heading6 => 6,
50                    _ => 1,
51                };
52                let content = self.collect_text(node, source);
53                Some(Block::Heading(Heading { level, content, span: node.span() }))
54            }
55            MarkdownElementType::Paragraph => {
56                let content = self.collect_text(node, source);
57                Some(Block::Paragraph(Paragraph { content, span: node.span() }))
58            }
59            MarkdownElementType::CodeBlock => {
60                let (language, content) = self.extract_code_block(node, source);
61                Some(Block::CodeBlock(CodeBlock { language, content, span: node.span() }))
62            }
63            MarkdownElementType::List => {
64                let (is_ordered, items) = self.extract_list(node, source);
65                Some(Block::List(List { is_ordered, items, span: node.span() }))
66            }
67            MarkdownElementType::Blockquote => {
68                let content = self.extract_blockquote_content(node, source);
69                Some(Block::Blockquote(Blockquote { content, span: node.span() }))
70            }
71            MarkdownElementType::HorizontalRule => Some(Block::HorizontalRule(HorizontalRule { span: node.span() })),
72            MarkdownElementType::Table => {
73                let (header, rows) = self.extract_table(node, source);
74                Some(Block::Table(Table { header, rows, span: node.span() }))
75            }
76            _ => None,
77        }
78    }
79
80    /// Collects text content from a node and its children.
81    fn collect_text(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> String {
82        let mut text = String::new();
83        for child in node.children() {
84            match child {
85                RedTree::Node(child_node) => {
86                    text.push_str(&self.collect_text(child_node, source));
87                }
88                RedTree::Leaf(_) => {
89                    text.push_str(&child.text(source));
90                }
91            }
92        }
93        text
94    }
95
96    /// Extracts code block content and language.
97    fn extract_code_block(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> (Option<String>, String) {
98        let mut language = None;
99        let mut content = String::new();
100        let mut in_content = false;
101
102        for child in node.children() {
103            if let RedTree::Node(child_node) = child {
104                let kind = child_node.element_type();
105                match kind {
106                    MarkdownElementType::CodeLanguage => {
107                        language = Some(self.collect_text(child_node, source).trim().to_string());
108                    }
109                    MarkdownElementType::Text | MarkdownElementType::Whitespace | MarkdownElementType::Newline => {
110                        if in_content {
111                            content.push_str(&child_node.text(source));
112                        }
113                    }
114                    MarkdownElementType::CodeFence => {
115                        in_content = !in_content;
116                    }
117                    _ => {}
118                }
119            }
120        }
121
122        (language.filter(|s| !s.is_empty()), content.trim().to_string())
123    }
124
125    /// Extracts list items.
126    fn extract_list(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> (bool, Vec<ListItem>) {
127        let mut is_ordered = false;
128        let mut items = Vec::new();
129
130        for child in node.children() {
131            if let RedTree::Node(child_node) = child {
132                let kind = child_node.element_type();
133                if kind == MarkdownElementType::ListItem {
134                    let list_item = self.build_list_item(child_node, source);
135
136                    if items.is_empty() {
137                        let text = child_node.text(source);
138                        is_ordered = text.trim_start().chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false);
139                    }
140
141                    items.push(list_item);
142                }
143            }
144        }
145
146        (is_ordered, items)
147    }
148
149    /// Extracts blockquote content.
150    fn extract_blockquote_content(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> Vec<Block> {
151        let mut content = Vec::new();
152
153        for child in node.children() {
154            if let RedTree::Node(child_node) = child {
155                if let Some(block) = self.build_block(child_node, source) {
156                    content.push(block);
157                }
158            }
159        }
160
161        if content.is_empty() {
162            let text = self.collect_text(node, source);
163            if !text.trim().is_empty() {
164                content.push(Block::Paragraph(Paragraph { content: text.trim().to_string(), span: node.span() }));
165            }
166        }
167
168        content
169    }
170
171    /// Extracts table content.
172    fn extract_table(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> (TableRow, Vec<TableRow>) {
173        let mut header = TableRow { cells: Vec::new(), span: node.span() };
174        let mut rows = Vec::new();
175        let mut is_header = true;
176
177        for child in node.children() {
178            if let RedTree::Node(child_node) = child {
179                let kind = child_node.element_type();
180                match kind {
181                    MarkdownElementType::TableRow => {
182                        let cells = self.extract_table_cells(child_node, source);
183                        let row = TableRow { cells, span: child_node.span() };
184                        if is_header {
185                            header = row;
186                            is_header = false;
187                        }
188                        else {
189                            rows.push(row);
190                        }
191                    }
192                    MarkdownElementType::TableSeparator => {}
193                    _ => {}
194                }
195            }
196        }
197
198        (header, rows)
199    }
200
201    /// Extracts table cells from a row.
202    fn extract_table_cells(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> Vec<TableCell> {
203        let mut cells = Vec::new();
204
205        for child in node.children() {
206            if let RedTree::Node(child_node) = child {
207                let kind = child_node.element_type();
208                if kind == MarkdownElementType::TableCell {
209                    let content = self.collect_text(child_node, source);
210                    cells.push(TableCell { content: content.trim().to_string(), span: child_node.span() });
211                }
212            }
213        }
214
215        cells
216    }
217
218    /// Builds inline-level elements.
219    fn build_inline(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> Option<Inline> {
220        let kind = node.element_type();
221        let text = self.collect_text(node, source);
222
223        match kind {
224            MarkdownElementType::Text => Some(Inline::Text(text)),
225            MarkdownElementType::Emphasis => Some(Inline::Italic(text)),
226            MarkdownElementType::Strong => Some(Inline::Bold(text)),
227            MarkdownElementType::Strikethrough => Some(Inline::Text(text)),
228            MarkdownElementType::InlineCode => Some(Inline::Code(text)),
229            MarkdownElementType::Link => Some(Inline::Link { text: text, url: String::new(), title: None }),
230            MarkdownElementType::Image => Some(Inline::Image { alt: text, url: String::new(), title: None }),
231            _ => None,
232        }
233    }
234
235    fn build_list_item(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> crate::ast::ListItem {
236        let mut content = Vec::new();
237        let mut is_task = false;
238        let mut is_checked = None;
239
240        for child in node.children() {
241            if let RedTree::Node(child_node) = child {
242                let kind = child_node.element_type();
243                if kind == MarkdownElementType::TaskMarker {
244                    is_task = true;
245                    let marker_text = child_node.text(source);
246                    is_checked = Some(marker_text.contains('x') || marker_text.contains('X'));
247                }
248                else if let Some(block) = self.build_block(child_node, source) {
249                    content.push(block);
250                }
251            }
252        }
253
254        if content.is_empty() {
255            let text = node.text(source).to_string();
256            if !text.trim().is_empty() {
257                let display_text = if text.starts_with("- ") || text.starts_with("* ") {
258                    text[2..].to_string()
259                }
260                else if text.len() > 3 && text.chars().next().unwrap().is_ascii_digit() && text.contains(". ") {
261                    if let Some(pos) = text.find(". ") { text[pos + 2..].to_string() } else { text }
262                }
263                else {
264                    text
265                };
266
267                content.push(crate::ast::Block::Paragraph(crate::ast::Paragraph { content: display_text.trim().to_string(), span: node.span() }))
268            }
269        }
270
271        crate::ast::ListItem { content, is_task, is_checked, span: node.span() }
272    }
273}
274
275impl<'config> Builder<MarkdownLanguage> for MarkdownBuilder<'config> {
276    fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<MarkdownLanguage>) -> oak_core::builder::BuildOutput<MarkdownLanguage> {
277        let parser = MarkdownParser::new(self.config);
278        let mut parse_session = oak_core::parser::session::ParseSession::<MarkdownLanguage>::default();
279        let parse_result = parser.parse(source, edits, &mut parse_session);
280
281        match parse_result.result {
282            Ok(green_tree) => {
283                let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
284                match self.build_root(green_tree, &source_text) {
285                    Ok(ast_root) => oak_core::OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
286                    Err(build_error) => {
287                        let mut diagnostics = parse_result.diagnostics;
288                        diagnostics.push(build_error.clone());
289                        oak_core::OakDiagnostics { result: Err(build_error), diagnostics }
290                    }
291                }
292            }
293            Err(parse_error) => oak_core::OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
294        }
295    }
296}