Skip to main content

oak_markdown/parser/
mod.rs

1use crate::{kind::MarkdownSyntaxKind, language::MarkdownLanguage};
2use oak_core::{GreenNode, OakError, Parser, ParserState, source::Source};
3
4pub(crate) type State<'a, S> = ParserState<'a, MarkdownLanguage, S>;
5
6pub struct MarkdownParser<'config> {
7    pub(crate) config: &'config MarkdownLanguage,
8}
9
10impl<'config> MarkdownParser<'config> {
11    pub fn new(config: &'config MarkdownLanguage) -> Self {
12        Self { config }
13    }
14
15    pub(crate) fn parse_root_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<&'a GreenNode<'a, MarkdownLanguage>, OakError> {
16        let checkpoint = state.checkpoint();
17
18        while state.not_at_end() {
19            let item_checkpoint = state.checkpoint();
20            if let Some(kind) = state.peek_kind() {
21                match kind {
22                    MarkdownSyntaxKind::Heading1 | MarkdownSyntaxKind::Heading2 | MarkdownSyntaxKind::Heading3 | MarkdownSyntaxKind::Heading4 | MarkdownSyntaxKind::Heading5 | MarkdownSyntaxKind::Heading6 => {
23                        // 消耗标记和后续所有内容直到换行
24                        state.bump();
25                        while state.not_at_end() {
26                            if let Some(next_kind) = state.peek_kind() {
27                                if next_kind == MarkdownSyntaxKind::Newline {
28                                    break;
29                                }
30                            }
31                            state.bump();
32                        }
33                        state.finish_at(item_checkpoint, kind.into());
34                    }
35                    MarkdownSyntaxKind::ListMarker => {
36                        // 列表聚合逻辑:收集连续的列表项
37                        let mut is_ordered = false;
38                        if let Some(text) = state.peek_text() {
39                            if text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
40                                is_ordered = true;
41                            }
42                        }
43
44                        let list_checkpoint = item_checkpoint;
45                        while state.not_at_end() {
46                            if let Some(MarkdownSyntaxKind::ListMarker) = state.peek_kind() {
47                                // 检查当前项是否与列表类型一致
48                                let current_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
49
50                                if current_is_ordered != is_ordered && state.checkpoint() != list_checkpoint {
51                                    // 类型不一致且不是第一项,结束当前列表
52                                    break;
53                                }
54
55                                let li_checkpoint = state.checkpoint();
56                                state.bump(); // 消耗标记并存入树
57                                while state.not_at_end() {
58                                    if let Some(next_kind) = state.peek_kind() {
59                                        if next_kind == MarkdownSyntaxKind::Newline {
60                                            break;
61                                        }
62                                    }
63                                    state.bump();
64                                }
65                                state.finish_at(li_checkpoint, MarkdownSyntaxKind::ListItem.into());
66
67                                // 消耗可能的换行,准备看下一个是否还是列表项
68                                if let Some(MarkdownSyntaxKind::Newline) = state.peek_kind() {
69                                    let nl_checkpoint = state.checkpoint();
70                                    state.bump();
71                                    if !matches!(state.peek_kind(), Some(MarkdownSyntaxKind::ListMarker)) {
72                                        // 如果下一行不是列表项,或者我们要结束列表,回退换行(除非它是列表的一部分)
73                                        // 这里简单处理:如果下一行不是列表项,就结束
74                                        break;
75                                    }
76                                    // 检查下一行列表项类型是否一致
77                                    let next_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
78                                    if next_is_ordered != is_ordered {
79                                        // 下一项类型不一致,不消耗这个换行,留给下一个列表
80                                        state.restore(nl_checkpoint);
81                                        break;
82                                    }
83                                }
84                                else {
85                                    break;
86                                }
87                            }
88                            else {
89                                break;
90                            }
91                        }
92
93                        let list_kind = if is_ordered { MarkdownSyntaxKind::OrderedList } else { MarkdownSyntaxKind::UnorderedList };
94                        state.finish_at(list_checkpoint, list_kind.into());
95                    }
96                    MarkdownSyntaxKind::BlockquoteMarker => {
97                        // 消耗 > 标记
98                        state.bump();
99                        // 收集引用内容直到遇到非引用的新行
100                        while state.not_at_end() {
101                            if let Some(next_kind) = state.peek_kind() {
102                                if next_kind == MarkdownSyntaxKind::Newline {
103                                    state.bump();
104                                    if let Some(after_nl) = state.peek_kind() {
105                                        if after_nl != MarkdownSyntaxKind::BlockquoteMarker && after_nl != MarkdownSyntaxKind::Whitespace {
106                                            break;
107                                        }
108                                    }
109                                    else {
110                                        break;
111                                    }
112                                }
113                                else if next_kind == MarkdownSyntaxKind::Heading1
114                                    || next_kind == MarkdownSyntaxKind::Heading2
115                                    || next_kind == MarkdownSyntaxKind::Heading3
116                                    || next_kind == MarkdownSyntaxKind::Heading4
117                                    || next_kind == MarkdownSyntaxKind::Heading5
118                                    || next_kind == MarkdownSyntaxKind::Heading6
119                                    || next_kind == MarkdownSyntaxKind::HorizontalRule
120                                    || next_kind == MarkdownSyntaxKind::CodeFence
121                                {
122                                    break;
123                                }
124                            }
125                            state.bump();
126                        }
127                        state.finish_at(item_checkpoint, MarkdownSyntaxKind::Blockquote.into());
128                    }
129                    MarkdownSyntaxKind::CodeFence => {
130                        // 消耗开始围栏
131                        state.bump();
132                        // 消耗可能的语言标识
133                        if let Some(MarkdownSyntaxKind::CodeLanguage) = state.peek_kind() {
134                            state.bump();
135                        }
136                        // 收集代码内容直到遇到结束围栏
137                        while state.not_at_end() {
138                            if let Some(next_kind) = state.peek_kind() {
139                                if next_kind == MarkdownSyntaxKind::CodeFence {
140                                    state.bump();
141                                    break;
142                                }
143                            }
144                            state.bump();
145                        }
146                        state.finish_at(item_checkpoint, MarkdownSyntaxKind::CodeBlock.into());
147                    }
148                    MarkdownSyntaxKind::HorizontalRule => {
149                        state.bump();
150                        state.finish_at(item_checkpoint, MarkdownSyntaxKind::HorizontalRule.into());
151                    }
152                    MarkdownSyntaxKind::Pipe => {
153                        // 表格聚合:消耗连续的包含 | 的行
154                        while state.not_at_end() {
155                            // 消耗当前行直到换行
156                            while state.not_at_end() {
157                                if let Some(next_kind) = state.peek_kind() {
158                                    if next_kind == MarkdownSyntaxKind::Newline {
159                                        break;
160                                    }
161                                }
162                                state.bump();
163                            }
164
165                            // 消耗换行并检查下一行
166                            if let Some(MarkdownSyntaxKind::Newline) = state.peek_kind() {
167                                let checkpoint_before_nl = state.checkpoint();
168                                state.bump();
169
170                                // 检查下一行是否以 | 开头
171                                let mut is_table_line = false;
172                                while state.not_at_end() {
173                                    if let Some(kind) = state.peek_kind() {
174                                        if kind == MarkdownSyntaxKind::Whitespace {
175                                            state.bump();
176                                        }
177                                        else if kind == MarkdownSyntaxKind::Pipe {
178                                            is_table_line = true;
179                                            break;
180                                        }
181                                        else {
182                                            break;
183                                        }
184                                    }
185                                    else {
186                                        break;
187                                    }
188                                }
189
190                                if is_table_line {
191                                    // 是表格行,继续循环
192                                    continue;
193                                }
194                                else {
195                                    // 不是表格行,回退到换行前并退出
196                                    state.restore(checkpoint_before_nl);
197                                    break;
198                                }
199                            }
200                            else {
201                                break;
202                            }
203                        }
204                        state.finish_at(item_checkpoint, MarkdownSyntaxKind::Table.into());
205                    }
206                    MarkdownSyntaxKind::Newline | MarkdownSyntaxKind::Whitespace => {
207                        state.bump();
208                    }
209                    _ => {
210                        // 收集段落内容:直到遇到两个换行或另一个块级元素
211                        while state.not_at_end() {
212                            if let Some(next_kind) = state.peek_kind() {
213                                if next_kind == MarkdownSyntaxKind::Newline {
214                                    let _cp = state.checkpoint();
215                                    state.bump();
216                                    // 检查是否是连续换行
217                                    if let Some(after_nl) = state.peek_kind() {
218                                        if after_nl == MarkdownSyntaxKind::Newline {
219                                            state.bump();
220                                            break;
221                                        }
222                                        // 或者是块级元素
223                                        if matches!(
224                                            after_nl,
225                                            MarkdownSyntaxKind::Heading1
226                                                | MarkdownSyntaxKind::Heading2
227                                                | MarkdownSyntaxKind::Heading3
228                                                | MarkdownSyntaxKind::Heading4
229                                                | MarkdownSyntaxKind::Heading5
230                                                | MarkdownSyntaxKind::Heading6
231                                                | MarkdownSyntaxKind::BlockquoteMarker
232                                                | MarkdownSyntaxKind::CodeFence
233                                                | MarkdownSyntaxKind::ListMarker
234                                                | MarkdownSyntaxKind::HorizontalRule
235                                        ) {
236                                            break;
237                                        }
238                                    }
239                                    else {
240                                        break;
241                                    }
242                                }
243                                else if matches!(
244                                    next_kind,
245                                    MarkdownSyntaxKind::Heading1
246                                        | MarkdownSyntaxKind::Heading2
247                                        | MarkdownSyntaxKind::Heading3
248                                        | MarkdownSyntaxKind::Heading4
249                                        | MarkdownSyntaxKind::Heading5
250                                        | MarkdownSyntaxKind::Heading6
251                                        | MarkdownSyntaxKind::BlockquoteMarker
252                                        | MarkdownSyntaxKind::CodeFence
253                                        | MarkdownSyntaxKind::ListMarker
254                                        | MarkdownSyntaxKind::HorizontalRule
255                                ) {
256                                    break;
257                                }
258                            }
259                            state.bump();
260                        }
261                        state.finish_at(item_checkpoint, MarkdownSyntaxKind::Paragraph.into());
262                    }
263                }
264            }
265            else {
266                state.advance();
267            }
268        }
269
270        let root = state.finish_at(checkpoint, MarkdownSyntaxKind::Root.into());
271        Ok(root)
272    }
273}
274
275impl<'config> Parser<MarkdownLanguage> for MarkdownParser<'config> {
276    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[oak_core::TextEdit], cache: &'a mut impl oak_core::ParseCache<MarkdownLanguage>) -> oak_core::ParseOutput<'a, MarkdownLanguage> {
277        let lexer = crate::lexer::MarkdownLexer::new(&self.config);
278        oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_root_internal(state))
279    }
280}