oak_rst/parser/mod.rs
1/// Element types for the reStructuredText language.
2pub mod element_type;
3
4use crate::{language::RstLanguage, lexer::token_type::RstTokenType, parser::element_type::RstElementType as ET};
5use oak_core::{GreenNode, OakError, Parser, ParserState, source::Source};
6
7/// Parser for reStructuredText language.
8pub struct RstParser<'config> {
9 pub(crate) config: &'config RstLanguage,
10}
11
12impl<'config> RstParser<'config> {
13 /// Creates a new RstParser with the given configuration.
14 pub fn new(config: &'config RstLanguage) -> Self {
15 Self { config }
16 }
17}
18
19impl<'config> Parser<RstLanguage> for RstParser<'config> {
20 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[oak_core::TextEdit], cache: &'a mut impl oak_core::ParseCache<RstLanguage>) -> oak_core::ParseOutput<'a, RstLanguage> {
21 let lexer = crate::lexer::RstLexer::new(&self.config);
22 oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| {
23 let checkpoint = state.checkpoint();
24
25 while state.not_at_end() {
26 let item_checkpoint = state.checkpoint();
27 if let Some(kind) = state.peek_kind() {
28 match kind {
29 RstTokenType::Comment => {
30 state.bump();
31 state.finish_at(item_checkpoint, ET::Comment);
32 }
33 RstTokenType::Heading1 => {
34 state.bump();
35 // 解析标题文本
36 self.parse_inlines_until_newline(state);
37 state.finish_at(item_checkpoint, ET::Heading1);
38 }
39 RstTokenType::Heading2 => {
40 state.bump();
41 // 解析标题文本
42 self.parse_inlines_until_newline(state);
43 state.finish_at(item_checkpoint, ET::Heading2);
44 }
45 RstTokenType::Heading3 => {
46 state.bump();
47 // 解析标题文本
48 self.parse_inlines_until_newline(state);
49 state.finish_at(item_checkpoint, ET::Heading3);
50 }
51 RstTokenType::Heading4 => {
52 state.bump();
53 // 解析标题文本
54 self.parse_inlines_until_newline(state);
55 state.finish_at(item_checkpoint, ET::Heading4);
56 }
57 RstTokenType::Heading5 => {
58 state.bump();
59 // 解析标题文本
60 self.parse_inlines_until_newline(state);
61 state.finish_at(item_checkpoint, ET::Heading5);
62 }
63 RstTokenType::Heading6 => {
64 state.bump();
65 // 解析标题文本
66 self.parse_inlines_until_newline(state);
67 state.finish_at(item_checkpoint, ET::Heading6);
68 }
69 RstTokenType::Directive => {
70 state.bump();
71 // 解析指令参数和选项
72 while state.not_at_end() {
73 if let Some(kind) = state.peek_kind() {
74 match kind {
75 RstTokenType::DirectiveArgument => {
76 state.bump();
77 }
78 RstTokenType::DirectiveOption => {
79 state.bump();
80 }
81 RstTokenType::Newline => {
82 state.bump();
83 // 检查是否有指令内容
84 let mut indent_level = 0;
85 while state.not_at_end() {
86 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
87 state.bump();
88 indent_level += 1;
89 }
90 else if indent_level > 0 {
91 // 指令内容
92 self.parse_inlines_until_newline(state);
93 }
94 else {
95 break;
96 }
97 }
98 break;
99 }
100 _ => {
101 break;
102 }
103 }
104 }
105 else {
106 break;
107 }
108 }
109 state.finish_at(item_checkpoint, ET::Directive);
110 }
111 RstTokenType::BulletListMarker => {
112 let list_checkpoint = item_checkpoint;
113 while state.not_at_end() {
114 if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
115 let li_checkpoint = state.checkpoint();
116 state.bump(); // Marker
117 self.parse_inlines_until_newline(state);
118 state.finish_at(li_checkpoint, ET::ListItem);
119
120 if let Some(RstTokenType::Newline) = state.peek_kind() {
121 let nl_checkpoint = state.checkpoint();
122 state.bump();
123 // 检查是否有嵌套列表
124 let mut indent_level = 0;
125 while state.not_at_end() {
126 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
127 state.bump();
128 indent_level += 1;
129 }
130 else if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
131 // 递归解析嵌套列表
132 let nested_list_checkpoint = state.checkpoint();
133 while state.not_at_end() {
134 if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
135 let nested_li_checkpoint = state.checkpoint();
136 state.bump(); // Marker
137 self.parse_inlines_until_newline(state);
138 state.finish_at(nested_li_checkpoint, ET::ListItem);
139
140 if let Some(RstTokenType::Newline) = state.peek_kind() {
141 let nested_nl_checkpoint = state.checkpoint();
142 state.bump();
143 let mut nested_indent_level = 0;
144 while state.not_at_end() {
145 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
146 state.bump();
147 nested_indent_level += 1;
148 }
149 else {
150 break;
151 }
152 }
153 if nested_indent_level <= indent_level || !matches!(state.peek_kind(), Some(RstTokenType::BulletListMarker)) {
154 state.restore(nested_nl_checkpoint);
155 break;
156 }
157 }
158 else {
159 break;
160 }
161 }
162 else {
163 break;
164 }
165 }
166 state.finish_at(nested_list_checkpoint, ET::BulletList);
167 break;
168 }
169 else {
170 state.restore(nl_checkpoint);
171 break;
172 }
173 }
174 }
175
176 if let Some(RstTokenType::Newline) = state.peek_kind() {
177 let nl_checkpoint = state.checkpoint();
178 state.bump();
179 if !matches!(state.peek_kind(), Some(RstTokenType::BulletListMarker)) {
180 state.restore(nl_checkpoint);
181 break;
182 }
183 }
184 else {
185 break;
186 }
187 }
188 else {
189 break;
190 }
191 }
192 state.finish_at(list_checkpoint, ET::BulletList);
193 }
194 RstTokenType::EnumeratedListMarker => {
195 let list_checkpoint = item_checkpoint;
196 while state.not_at_end() {
197 if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
198 let li_checkpoint = state.checkpoint();
199 state.bump(); // Marker
200 self.parse_inlines_until_newline(state);
201 state.finish_at(li_checkpoint, ET::ListItem);
202
203 if let Some(RstTokenType::Newline) = state.peek_kind() {
204 let nl_checkpoint = state.checkpoint();
205 state.bump();
206 // 检查是否有嵌套列表
207 let mut indent_level = 0;
208 while state.not_at_end() {
209 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
210 state.bump();
211 indent_level += 1;
212 }
213 else if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
214 // 递归解析嵌套列表
215 let nested_list_checkpoint = state.checkpoint();
216 while state.not_at_end() {
217 if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
218 let nested_li_checkpoint = state.checkpoint();
219 state.bump(); // Marker
220 self.parse_inlines_until_newline(state);
221 state.finish_at(nested_li_checkpoint, ET::ListItem);
222
223 if let Some(RstTokenType::Newline) = state.peek_kind() {
224 let nested_nl_checkpoint = state.checkpoint();
225 state.bump();
226 let mut nested_indent_level = 0;
227 while state.not_at_end() {
228 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
229 state.bump();
230 nested_indent_level += 1;
231 }
232 else {
233 break;
234 }
235 }
236 if nested_indent_level <= indent_level || !matches!(state.peek_kind(), Some(RstTokenType::EnumeratedListMarker)) {
237 state.restore(nested_nl_checkpoint);
238 break;
239 }
240 }
241 else {
242 break;
243 }
244 }
245 else {
246 break;
247 }
248 }
249 state.finish_at(nested_list_checkpoint, ET::EnumeratedList);
250 break;
251 }
252 else {
253 state.restore(nl_checkpoint);
254 break;
255 }
256 }
257 }
258
259 if let Some(RstTokenType::Newline) = state.peek_kind() {
260 let nl_checkpoint = state.checkpoint();
261 state.bump();
262 if !matches!(state.peek_kind(), Some(RstTokenType::EnumeratedListMarker)) {
263 state.restore(nl_checkpoint);
264 break;
265 }
266 }
267 else {
268 break;
269 }
270 }
271 else {
272 break;
273 }
274 }
275 state.finish_at(list_checkpoint, ET::EnumeratedList);
276 }
277 RstTokenType::DefinitionDefinition => {
278 state.bump();
279 self.parse_inlines_until_newline(state);
280 state.finish_at(item_checkpoint, ET::DefinitionDefinition);
281 }
282 RstTokenType::Table => {
283 let table_checkpoint = item_checkpoint;
284 state.bump();
285 // 解析表格行和单元格
286 self.parse_table(state);
287 state.finish_at(table_checkpoint, ET::Table);
288 }
289 RstTokenType::CodeBlock => {
290 state.bump();
291 // 解析代码块内容
292 while state.not_at_end() {
293 if let Some(RstTokenType::Newline) = state.peek_kind() {
294 state.bump();
295 // 检查代码块结束
296 let mut is_end = true;
297 for _ in 0..3 {
298 if let Some(RstTokenType::Text) = state.peek_kind() {
299 state.bump();
300 }
301 else {
302 is_end = false;
303 break;
304 }
305 }
306 if is_end {
307 break;
308 }
309 }
310 else {
311 state.bump();
312 }
313 }
314 state.finish_at(item_checkpoint, ET::CodeBlock);
315 }
316 RstTokenType::FootnoteDefinition => {
317 state.bump();
318 // 解析脚注定义内容
319 self.parse_inlines_until_newline(state);
320 state.finish_at(item_checkpoint, ET::FootnoteDefinition);
321 }
322 RstTokenType::HorizontalRule => {
323 state.bump();
324 state.finish_at(item_checkpoint, ET::HorizontalRule);
325 }
326 RstTokenType::Newline | RstTokenType::Whitespace => {
327 state.bump();
328 }
329 _ => {
330 self.parse_paragraph(state);
331 }
332 }
333 }
334 else {
335 state.advance();
336 }
337 }
338
339 let root = state.finish_at(checkpoint, ET::Root);
340 Ok(root)
341 })
342 }
343}
344
345impl<'config> RstParser<'config> {
346 fn is_block_start(&self, kind: RstTokenType) -> bool {
347 matches!(
348 kind,
349 RstTokenType::Comment
350 | RstTokenType::Heading1
351 | RstTokenType::Heading2
352 | RstTokenType::Heading3
353 | RstTokenType::Heading4
354 | RstTokenType::Heading5
355 | RstTokenType::Heading6
356 | RstTokenType::Directive
357 | RstTokenType::BulletListMarker
358 | RstTokenType::EnumeratedListMarker
359 | RstTokenType::DefinitionDefinition
360 | RstTokenType::Table
361 | RstTokenType::CodeBlock
362 | RstTokenType::HorizontalRule
363 )
364 }
365
366 fn parse_paragraph<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
367 let checkpoint = state.checkpoint();
368 while state.not_at_end() {
369 if let Some(next_kind) = state.peek_kind() {
370 if next_kind == RstTokenType::Newline {
371 let cp = state.checkpoint();
372 state.bump();
373 if let Some(after_nl) = state.peek_kind() {
374 if after_nl == RstTokenType::Newline || self.is_block_start(after_nl) {
375 state.restore(cp);
376 break;
377 }
378 }
379 else {
380 break;
381 }
382 }
383 else if self.is_block_start(next_kind) {
384 break;
385 }
386 }
387 self.parse_inline(state);
388 }
389 state.finish_at(checkpoint, ET::Paragraph);
390 }
391
392 fn parse_inlines_until_newline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
393 while state.not_at_end() {
394 if let Some(kind) = state.peek_kind() {
395 if kind == RstTokenType::Newline {
396 break;
397 }
398 }
399 self.parse_inline(state);
400 }
401 }
402
403 fn parse_inline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
404 let checkpoint = state.checkpoint();
405 if let Some(kind) = state.peek_kind() {
406 match kind {
407 RstTokenType::Emphasis => {
408 state.bump(); // Start marker
409 while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Emphasis) && state.peek_kind() != Some(RstTokenType::Newline) {
410 self.parse_inline(state);
411 }
412 if state.peek_kind() == Some(RstTokenType::Emphasis) {
413 state.bump(); // End marker
414 }
415 state.finish_at(checkpoint, ET::Emphasis);
416 }
417 RstTokenType::Strong => {
418 state.bump(); // Start marker
419 while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Strong) && state.peek_kind() != Some(RstTokenType::Newline) {
420 self.parse_inline(state);
421 }
422 if state.peek_kind() == Some(RstTokenType::Strong) {
423 state.bump(); // End marker
424 }
425 state.finish_at(checkpoint, ET::Strong);
426 }
427 RstTokenType::Literal => {
428 state.bump();
429 // 解析字面量内容
430 while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Literal) && state.peek_kind() != Some(RstTokenType::Newline) {
431 state.bump();
432 }
433 if state.peek_kind() == Some(RstTokenType::Literal) {
434 state.bump(); // End marker
435 }
436 state.finish_at(checkpoint, ET::Literal);
437 }
438 RstTokenType::Link => {
439 state.bump();
440 // 解析链接内容
441 while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Link) && state.peek_kind() != Some(RstTokenType::Newline) {
442 self.parse_inline(state);
443 }
444 if state.peek_kind() == Some(RstTokenType::Link) {
445 state.bump(); // End marker
446 }
447 state.finish_at(checkpoint, ET::Link);
448 }
449 RstTokenType::FootnoteReference => {
450 state.bump();
451 state.finish_at(checkpoint, ET::FootnoteReference);
452 }
453 RstTokenType::SubstitutionReference => {
454 state.bump();
455 state.finish_at(checkpoint, ET::SubstitutionReference);
456 }
457 RstTokenType::Role => {
458 state.bump();
459 state.finish_at(checkpoint, ET::Role);
460 }
461 _ => {
462 state.bump();
463 }
464 }
465 }
466 else {
467 state.advance();
468 }
469 }
470
471 /// Parses complex table structures
472 fn parse_table<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
473 while state.not_at_end() {
474 if let Some(RstTokenType::Newline) = state.peek_kind() {
475 state.bump();
476 if let Some(RstTokenType::Table) = state.peek_kind() {
477 state.bump();
478 // Parse table row
479 let row_checkpoint = state.checkpoint();
480 // Process table row content
481 self.parse_table_row(state);
482 state.finish_at(row_checkpoint, ET::TableRow);
483 }
484 else {
485 break;
486 }
487 }
488 else {
489 break;
490 }
491 }
492 }
493
494 /// Parses a single table row
495 fn parse_table_row<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
496 // Skip any leading whitespace
497 while state.not_at_end() {
498 if let Some(RstTokenType::Whitespace) = state.peek_kind() {
499 state.bump();
500 }
501 else {
502 break;
503 }
504 }
505
506 // Process table cells
507 while state.not_at_end() {
508 if let Some(RstTokenType::Text) = state.peek_kind() {
509 let cell_checkpoint = state.checkpoint();
510 // Parse cell content until next | or end of line
511 while state.not_at_end() {
512 if let Some(kind) = state.peek_kind() {
513 if kind == RstTokenType::Newline {
514 break;
515 }
516 state.bump();
517 }
518 else {
519 break;
520 }
521 }
522 state.finish_at(cell_checkpoint, ET::TableCell);
523 }
524 else if let Some(RstTokenType::Newline) = state.peek_kind() {
525 break;
526 }
527 else {
528 state.bump();
529 }
530 }
531 }
532}