oak_markdown/parser/mod.rs
1use crate::{kind::MarkdownSyntaxKind, language::MarkdownLanguage};
2use oak_core::{GreenNode, OakError, Parser, ParserState, source::Source};
3
4pub(crate) type State<'a, S> = ParserState<'a, MarkdownLanguage, S>;
5
6pub struct MarkdownParser<'config> {
7 pub(crate) config: &'config MarkdownLanguage,
8}
9
10impl<'config> MarkdownParser<'config> {
11 pub fn new(config: &'config MarkdownLanguage) -> Self {
12 Self { config }
13 }
14
15 pub(crate) fn parse_root_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<&'a GreenNode<'a, MarkdownLanguage>, OakError> {
16 let checkpoint = state.checkpoint();
17
18 while state.not_at_end() {
19 let item_checkpoint = state.checkpoint();
20 if let Some(kind) = state.peek_kind() {
21 match kind {
22 MarkdownSyntaxKind::Heading1 | MarkdownSyntaxKind::Heading2 | MarkdownSyntaxKind::Heading3 | MarkdownSyntaxKind::Heading4 | MarkdownSyntaxKind::Heading5 | MarkdownSyntaxKind::Heading6 => {
23 // 消耗标记和后续所有内容直到换行
24 state.bump();
25 while state.not_at_end() {
26 if let Some(next_kind) = state.peek_kind() {
27 if next_kind == MarkdownSyntaxKind::Newline {
28 break;
29 }
30 }
31 state.bump();
32 }
33 state.finish_at(item_checkpoint, kind.into());
34 }
35 MarkdownSyntaxKind::ListMarker => {
36 // 列表聚合逻辑:收集连续的列表项
37 let mut is_ordered = false;
38 if let Some(text) = state.peek_text() {
39 if text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
40 is_ordered = true;
41 }
42 }
43
44 let list_checkpoint = item_checkpoint;
45 while state.not_at_end() {
46 if let Some(MarkdownSyntaxKind::ListMarker) = state.peek_kind() {
47 // 检查当前项是否与列表类型一致
48 let current_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
49
50 if current_is_ordered != is_ordered && state.checkpoint() != list_checkpoint {
51 // 类型不一致且不是第一项,结束当前列表
52 break;
53 }
54
55 let li_checkpoint = state.checkpoint();
56 state.bump(); // 消耗标记并存入树
57 while state.not_at_end() {
58 if let Some(next_kind) = state.peek_kind() {
59 if next_kind == MarkdownSyntaxKind::Newline {
60 break;
61 }
62 }
63 state.bump();
64 }
65 state.finish_at(li_checkpoint, MarkdownSyntaxKind::ListItem.into());
66
67 // 消耗可能的换行,准备看下一个是否还是列表项
68 if let Some(MarkdownSyntaxKind::Newline) = state.peek_kind() {
69 let nl_checkpoint = state.checkpoint();
70 state.bump();
71 if !matches!(state.peek_kind(), Some(MarkdownSyntaxKind::ListMarker)) {
72 // 如果下一行不是列表项,或者我们要结束列表,回退换行(除非它是列表的一部分)
73 // 这里简单处理:如果下一行不是列表项,就结束
74 break;
75 }
76 // 检查下一行列表项类型是否一致
77 let next_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
78 if next_is_ordered != is_ordered {
79 // 下一项类型不一致,不消耗这个换行,留给下一个列表
80 state.restore(nl_checkpoint);
81 break;
82 }
83 }
84 else {
85 break;
86 }
87 }
88 else {
89 break;
90 }
91 }
92
93 let list_kind = if is_ordered { MarkdownSyntaxKind::OrderedList } else { MarkdownSyntaxKind::UnorderedList };
94 state.finish_at(list_checkpoint, list_kind.into());
95 }
96 MarkdownSyntaxKind::BlockquoteMarker => {
97 // 消耗 > 标记
98 state.bump();
99 // 收集引用内容直到遇到非引用的新行
100 while state.not_at_end() {
101 if let Some(next_kind) = state.peek_kind() {
102 if next_kind == MarkdownSyntaxKind::Newline {
103 state.bump();
104 if let Some(after_nl) = state.peek_kind() {
105 if after_nl != MarkdownSyntaxKind::BlockquoteMarker && after_nl != MarkdownSyntaxKind::Whitespace {
106 break;
107 }
108 }
109 else {
110 break;
111 }
112 }
113 else if next_kind == MarkdownSyntaxKind::Heading1
114 || next_kind == MarkdownSyntaxKind::Heading2
115 || next_kind == MarkdownSyntaxKind::Heading3
116 || next_kind == MarkdownSyntaxKind::Heading4
117 || next_kind == MarkdownSyntaxKind::Heading5
118 || next_kind == MarkdownSyntaxKind::Heading6
119 || next_kind == MarkdownSyntaxKind::HorizontalRule
120 || next_kind == MarkdownSyntaxKind::CodeFence
121 {
122 break;
123 }
124 }
125 state.bump();
126 }
127 state.finish_at(item_checkpoint, MarkdownSyntaxKind::Blockquote.into());
128 }
129 MarkdownSyntaxKind::CodeFence => {
130 // 消耗开始围栏
131 state.bump();
132 // 消耗可能的语言标识
133 if let Some(MarkdownSyntaxKind::CodeLanguage) = state.peek_kind() {
134 state.bump();
135 }
136 // 收集代码内容直到遇到结束围栏
137 while state.not_at_end() {
138 if let Some(next_kind) = state.peek_kind() {
139 if next_kind == MarkdownSyntaxKind::CodeFence {
140 state.bump();
141 break;
142 }
143 }
144 state.bump();
145 }
146 state.finish_at(item_checkpoint, MarkdownSyntaxKind::CodeBlock.into());
147 }
148 MarkdownSyntaxKind::HorizontalRule => {
149 state.bump();
150 state.finish_at(item_checkpoint, MarkdownSyntaxKind::HorizontalRule.into());
151 }
152 MarkdownSyntaxKind::Pipe => {
153 // 表格聚合:消耗连续的包含 | 的行
154 while state.not_at_end() {
155 // 消耗当前行直到换行
156 while state.not_at_end() {
157 if let Some(next_kind) = state.peek_kind() {
158 if next_kind == MarkdownSyntaxKind::Newline {
159 break;
160 }
161 }
162 state.bump();
163 }
164
165 // 消耗换行并检查下一行
166 if let Some(MarkdownSyntaxKind::Newline) = state.peek_kind() {
167 let checkpoint_before_nl = state.checkpoint();
168 state.bump();
169
170 // 检查下一行是否以 | 开头
171 let mut is_table_line = false;
172 while state.not_at_end() {
173 if let Some(kind) = state.peek_kind() {
174 if kind == MarkdownSyntaxKind::Whitespace {
175 state.bump();
176 }
177 else if kind == MarkdownSyntaxKind::Pipe {
178 is_table_line = true;
179 break;
180 }
181 else {
182 break;
183 }
184 }
185 else {
186 break;
187 }
188 }
189
190 if is_table_line {
191 // 是表格行,继续循环
192 continue;
193 }
194 else {
195 // 不是表格行,回退到换行前并退出
196 state.restore(checkpoint_before_nl);
197 break;
198 }
199 }
200 else {
201 break;
202 }
203 }
204 state.finish_at(item_checkpoint, MarkdownSyntaxKind::Table.into());
205 }
206 MarkdownSyntaxKind::Newline | MarkdownSyntaxKind::Whitespace => {
207 state.bump();
208 }
209 _ => {
210 // 收集段落内容:直到遇到两个换行或另一个块级元素
211 while state.not_at_end() {
212 if let Some(next_kind) = state.peek_kind() {
213 if next_kind == MarkdownSyntaxKind::Newline {
214 let _cp = state.checkpoint();
215 state.bump();
216 // 检查是否是连续换行
217 if let Some(after_nl) = state.peek_kind() {
218 if after_nl == MarkdownSyntaxKind::Newline {
219 state.bump();
220 break;
221 }
222 // 或者是块级元素
223 if matches!(
224 after_nl,
225 MarkdownSyntaxKind::Heading1
226 | MarkdownSyntaxKind::Heading2
227 | MarkdownSyntaxKind::Heading3
228 | MarkdownSyntaxKind::Heading4
229 | MarkdownSyntaxKind::Heading5
230 | MarkdownSyntaxKind::Heading6
231 | MarkdownSyntaxKind::BlockquoteMarker
232 | MarkdownSyntaxKind::CodeFence
233 | MarkdownSyntaxKind::ListMarker
234 | MarkdownSyntaxKind::HorizontalRule
235 ) {
236 break;
237 }
238 }
239 else {
240 break;
241 }
242 }
243 else if matches!(
244 next_kind,
245 MarkdownSyntaxKind::Heading1
246 | MarkdownSyntaxKind::Heading2
247 | MarkdownSyntaxKind::Heading3
248 | MarkdownSyntaxKind::Heading4
249 | MarkdownSyntaxKind::Heading5
250 | MarkdownSyntaxKind::Heading6
251 | MarkdownSyntaxKind::BlockquoteMarker
252 | MarkdownSyntaxKind::CodeFence
253 | MarkdownSyntaxKind::ListMarker
254 | MarkdownSyntaxKind::HorizontalRule
255 ) {
256 break;
257 }
258 }
259 state.bump();
260 }
261 state.finish_at(item_checkpoint, MarkdownSyntaxKind::Paragraph.into());
262 }
263 }
264 }
265 else {
266 state.advance();
267 }
268 }
269
270 let root = state.finish_at(checkpoint, MarkdownSyntaxKind::Root.into());
271 Ok(root)
272 }
273}
274
275impl<'config> Parser<MarkdownLanguage> for MarkdownParser<'config> {
276 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[oak_core::TextEdit], cache: &'a mut impl oak_core::ParseCache<MarkdownLanguage>) -> oak_core::ParseOutput<'a, MarkdownLanguage> {
277 let lexer = crate::lexer::MarkdownLexer::new(&self.config);
278 oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_root_internal(state))
279 }
280}