Skip to main content

oak_markdown/builder/
mod.rs

1use crate::{ast::*, language::MarkdownLanguage, parser::MarkdownParser};
2use oak_core::{Builder, BuilderCache, GreenNode, OakError, Parser, RedNode, RedTree, SourceText, TextEdit, source::Source};
3
4/// AST builder for the Markdown language.
5#[derive(Clone)]
6pub struct MarkdownBuilder<'config> {
7    /// Language configuration.
8    config: &'config MarkdownLanguage,
9}
10
11impl<'config> MarkdownBuilder<'config> {
12    /// Creates a new MarkdownBuilder with the given configuration.
13    pub fn new(config: &'config MarkdownLanguage) -> Self {
14        Self { config }
15    }
16
17    /// Builds the AST root node from the green tree.
18    fn build_root(&self, green_tree: &GreenNode<MarkdownLanguage>, source: &SourceText) -> Result<MarkdownRoot, OakError> {
19        let red_root = RedNode::new(green_tree, 0);
20
21        let mut blocks = Vec::new();
22        for child in red_root.children() {
23            if let RedTree::Node(node) = child {
24                if let Some(block) = self.build_block(node, source) {
25                    blocks.push(block)
26                }
27            }
28        }
29
30        Ok(MarkdownRoot { blocks })
31    }
32
33    /// 构建块级元素
34    fn build_block(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> Option<Block> {
35        use crate::{lexer::token_type::MarkdownTokenType as TT, parser::element_type::MarkdownElementType as ET};
36
37        let kind = node.green.kind;
38        match kind {
39            ET::Heading1 | ET::Heading2 | ET::Heading3 | ET::Heading4 | ET::Heading5 | ET::Heading6 => {
40                let level = match kind {
41                    ET::Heading1 => 1,
42                    ET::Heading2 => 2,
43                    ET::Heading3 => 3,
44                    ET::Heading4 => 4,
45                    ET::Heading5 => 5,
46                    ET::Heading6 => 6,
47                    _ => unreachable!(),
48                };
49                let text = source.get_text_in(node.span());
50                let content = text.trim_start_matches('#').trim_start().to_string();
51                Some(Block::Heading(crate::ast::Heading { level, content, span: node.span() }))
52            }
53            ET::Paragraph => Some(Block::Paragraph(crate::ast::Paragraph { content: source.get_text_in(node.span()).to_string(), span: node.span() })),
54            ET::CodeBlock => {
55                let mut language = None;
56                let mut content = String::new();
57
58                for child in node.children() {
59                    match child {
60                        RedTree::Leaf(leaf) => {
61                            if leaf.kind == TT::CodeLanguage {
62                                language = Some(source.get_text_in(leaf.span).trim().to_string());
63                            }
64                            else if leaf.kind != TT::CodeFence {
65                                content.push_str(&source.get_text_in(leaf.span));
66                            }
67                        }
68                        RedTree::Node(child_node) => {
69                            // 检查子节点是否包含语言标识
70                            for sub_child in child_node.children() {
71                                if let RedTree::Leaf(sub_leaf) = sub_child {
72                                    if sub_leaf.kind == TT::CodeLanguage {
73                                        language = Some(source.get_text_in(sub_leaf.span).trim().to_string());
74                                    }
75                                    else if sub_leaf.kind != TT::CodeFence {
76                                        content.push_str(&source.get_text_in(sub_leaf.span));
77                                    }
78                                }
79                                else if let RedTree::Node(sub_node) = sub_child {
80                                    content.push_str(&source.get_text_in(sub_node.span()));
81                                }
82                            }
83                        }
84                    }
85                }
86
87                Some(Block::CodeBlock(crate::ast::CodeBlock { language, content: content.trim().to_string(), span: node.span() }))
88            }
89            ET::UnorderedList | ET::OrderedList => {
90                let mut items = Vec::new();
91                for child in node.children() {
92                    if let RedTree::Node(child_node) = child {
93                        if child_node.green.kind == ET::ListItem {
94                            items.push(self.build_list_item(child_node, source));
95                        }
96                    }
97                }
98                Some(Block::List(crate::ast::List { is_ordered: kind == ET::OrderedList, items, span: node.span() }))
99            }
100            ET::Blockquote => {
101                let mut content_text = String::new();
102                for child in node.children() {
103                    match child {
104                        RedTree::Leaf(leaf) => {
105                            if leaf.kind != TT::BlockquoteMarker {
106                                content_text.push_str(&source.get_text_in(leaf.span))
107                            }
108                        }
109                        RedTree::Node(child_node) => content_text.push_str(&source.get_text_in(child_node.span())),
110                    }
111                }
112
113                // 简单的引用处理:将其内容作为段落
114                Some(Block::Blockquote(crate::ast::Blockquote { content: vec![Block::Paragraph(crate::ast::Paragraph { content: content_text.trim().to_string(), span: node.span() })], span: node.span() }))
115            }
116            ET::HorizontalRule => Some(Block::HorizontalRule(crate::ast::HorizontalRule { span: node.span() })),
117            ET::Table => {
118                let text = source.get_text_in(node.span());
119                let lines: Vec<&str> = text.lines().collect();
120                if lines.is_empty() {
121                    return None;
122                }
123
124                let parse_row = |line: &str| -> crate::ast::TableRow {
125                    let cells = line
126                        .split('|')
127                        .filter(|s| !s.trim().is_empty())
128                        .map(|s| crate::ast::TableCell {
129                            content: s.trim().to_string(),
130                            span: node.span(), // 简化处理
131                        })
132                        .collect();
133                    crate::ast::TableRow { cells, span: node.span() }
134                };
135
136                let header = parse_row(lines[0]);
137                let mut rows = Vec::new();
138                for line in lines.iter().skip(1) {
139                    if line.contains("---") {
140                        continue;
141                    }
142                    if line.trim().is_empty() {
143                        continue;
144                    }
145                    rows.push(parse_row(line))
146                }
147
148                Some(Block::Table(crate::ast::Table { header, rows, span: node.span() }))
149            }
150            ET::HtmlTag => {
151                // TODO: 实现 HTML 构建
152                None
153            }
154            _ => None,
155        }
156    }
157
158    fn build_list_item(&self, node: RedNode<MarkdownLanguage>, source: &SourceText) -> crate::ast::ListItem {
159        let mut content = Vec::new();
160        for child in node.children() {
161            if let RedTree::Node(child_node) = child {
162                if let Some(block) = self.build_block(child_node, source) {
163                    content.push(block)
164                }
165            }
166        }
167
168        // 如果没有嵌套块,但有文本内容,将其包装为段落
169        if content.is_empty() {
170            let text = source.get_text_in(node.span()).to_string();
171            if !text.trim().is_empty() {
172                // 简单的清理:移除可能的列表标记前缀
173                let display_text = if text.starts_with("- ") || text.starts_with("* ") {
174                    text[2..].to_string()
175                }
176                else if text.len() > 3 && text.chars().next().unwrap().is_ascii_digit() && text.contains(". ") {
177                    // 处理有序列表标记,如 "1. "
178                    if let Some(pos) = text.find(". ") { text[pos + 2..].to_string() } else { text }
179                }
180                else {
181                    text
182                };
183
184                content.push(crate::ast::Block::Paragraph(crate::ast::Paragraph { content: display_text.trim().to_string(), span: node.span() }))
185            }
186        }
187
188        crate::ast::ListItem { content, is_task: false, is_checked: None, span: node.span() }
189    }
190}
191
192impl<'config> Builder<MarkdownLanguage> for MarkdownBuilder<'config> {
193    fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<MarkdownLanguage>) -> oak_core::builder::BuildOutput<MarkdownLanguage> {
194        let parser = MarkdownParser::new(self.config);
195        let mut parse_session = oak_core::parser::session::ParseSession::<MarkdownLanguage>::default();
196        let parse_result = parser.parse(source, edits, &mut parse_session);
197
198        match parse_result.result {
199            Ok(green_tree) => {
200                let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
201                match self.build_root(green_tree, &source_text) {
202                    Ok(ast_root) => oak_core::OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
203                    Err(build_error) => {
204                        let mut diagnostics = parse_result.diagnostics;
205                        diagnostics.push(build_error.clone());
206                        oak_core::OakDiagnostics { result: Err(build_error), diagnostics }
207                    }
208                }
209            }
210            Err(parse_error) => oak_core::OakDiagnostics { result: Err(parse_error), diagnostics: parse_result.diagnostics },
211        }
212    }
213}