Skip to main content

oak_rst/parser/
mod.rs

1/// Element types for the reStructuredText language.
2pub mod element_type;
3
4use crate::{language::RstLanguage, lexer::token_type::RstTokenType, parser::element_type::RstElementType as ET};
5use oak_core::{GreenNode, OakError, Parser, ParserState, source::Source};
6
7/// Parser for reStructuredText language.
8pub struct RstParser<'config> {
9    pub(crate) config: &'config RstLanguage,
10}
11
12impl<'config> RstParser<'config> {
13    /// Creates a new RstParser with the given configuration.
14    pub fn new(config: &'config RstLanguage) -> Self {
15        Self { config }
16    }
17}
18
19impl<'config> Parser<RstLanguage> for RstParser<'config> {
20    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[oak_core::TextEdit], cache: &'a mut impl oak_core::ParseCache<RstLanguage>) -> oak_core::ParseOutput<'a, RstLanguage> {
21        let lexer = crate::lexer::RstLexer::new(&self.config);
22        oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| {
23            let checkpoint = state.checkpoint();
24
25            while state.not_at_end() {
26                let item_checkpoint = state.checkpoint();
27                if let Some(kind) = state.peek_kind() {
28                    match kind {
29                        RstTokenType::Comment => {
30                            state.bump();
31                            state.finish_at(item_checkpoint, ET::Comment);
32                        }
33                        RstTokenType::Heading1 => {
34                            state.bump();
35                            // 解析标题文本
36                            self.parse_inlines_until_newline(state);
37                            state.finish_at(item_checkpoint, ET::Heading1);
38                        }
39                        RstTokenType::Heading2 => {
40                            state.bump();
41                            // 解析标题文本
42                            self.parse_inlines_until_newline(state);
43                            state.finish_at(item_checkpoint, ET::Heading2);
44                        }
45                        RstTokenType::Heading3 => {
46                            state.bump();
47                            // 解析标题文本
48                            self.parse_inlines_until_newline(state);
49                            state.finish_at(item_checkpoint, ET::Heading3);
50                        }
51                        RstTokenType::Heading4 => {
52                            state.bump();
53                            // 解析标题文本
54                            self.parse_inlines_until_newline(state);
55                            state.finish_at(item_checkpoint, ET::Heading4);
56                        }
57                        RstTokenType::Heading5 => {
58                            state.bump();
59                            // 解析标题文本
60                            self.parse_inlines_until_newline(state);
61                            state.finish_at(item_checkpoint, ET::Heading5);
62                        }
63                        RstTokenType::Heading6 => {
64                            state.bump();
65                            // 解析标题文本
66                            self.parse_inlines_until_newline(state);
67                            state.finish_at(item_checkpoint, ET::Heading6);
68                        }
69                        RstTokenType::Directive => {
70                            state.bump();
71                            // 解析指令参数和选项
72                            while state.not_at_end() {
73                                if let Some(kind) = state.peek_kind() {
74                                    match kind {
75                                        RstTokenType::DirectiveArgument => {
76                                            state.bump();
77                                        }
78                                        RstTokenType::DirectiveOption => {
79                                            state.bump();
80                                        }
81                                        RstTokenType::Newline => {
82                                            state.bump();
83                                            // 检查是否有指令内容
84                                            let mut indent_level = 0;
85                                            while state.not_at_end() {
86                                                if let Some(RstTokenType::Whitespace) = state.peek_kind() {
87                                                    state.bump();
88                                                    indent_level += 1;
89                                                }
90                                                else if indent_level > 0 {
91                                                    // 指令内容
92                                                    self.parse_inlines_until_newline(state);
93                                                }
94                                                else {
95                                                    break;
96                                                }
97                                            }
98                                            break;
99                                        }
100                                        _ => {
101                                            break;
102                                        }
103                                    }
104                                }
105                                else {
106                                    break;
107                                }
108                            }
109                            state.finish_at(item_checkpoint, ET::Directive);
110                        }
111                        RstTokenType::BulletListMarker => {
112                            let list_checkpoint = item_checkpoint;
113                            while state.not_at_end() {
114                                if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
115                                    let li_checkpoint = state.checkpoint();
116                                    state.bump(); // Marker
117                                    self.parse_inlines_until_newline(state);
118                                    state.finish_at(li_checkpoint, ET::ListItem);
119
120                                    if let Some(RstTokenType::Newline) = state.peek_kind() {
121                                        let nl_checkpoint = state.checkpoint();
122                                        state.bump();
123                                        // 检查是否有嵌套列表
124                                        let mut indent_level = 0;
125                                        while state.not_at_end() {
126                                            if let Some(RstTokenType::Whitespace) = state.peek_kind() {
127                                                state.bump();
128                                                indent_level += 1;
129                                            }
130                                            else if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
131                                                // 递归解析嵌套列表
132                                                let nested_list_checkpoint = state.checkpoint();
133                                                while state.not_at_end() {
134                                                    if let Some(RstTokenType::BulletListMarker) = state.peek_kind() {
135                                                        let nested_li_checkpoint = state.checkpoint();
136                                                        state.bump(); // Marker
137                                                        self.parse_inlines_until_newline(state);
138                                                        state.finish_at(nested_li_checkpoint, ET::ListItem);
139
140                                                        if let Some(RstTokenType::Newline) = state.peek_kind() {
141                                                            let nested_nl_checkpoint = state.checkpoint();
142                                                            state.bump();
143                                                            let mut nested_indent_level = 0;
144                                                            while state.not_at_end() {
145                                                                if let Some(RstTokenType::Whitespace) = state.peek_kind() {
146                                                                    state.bump();
147                                                                    nested_indent_level += 1;
148                                                                }
149                                                                else {
150                                                                    break;
151                                                                }
152                                                            }
153                                                            if nested_indent_level <= indent_level || !matches!(state.peek_kind(), Some(RstTokenType::BulletListMarker)) {
154                                                                state.restore(nested_nl_checkpoint);
155                                                                break;
156                                                            }
157                                                        }
158                                                        else {
159                                                            break;
160                                                        }
161                                                    }
162                                                    else {
163                                                        break;
164                                                    }
165                                                }
166                                                state.finish_at(nested_list_checkpoint, ET::BulletList);
167                                                break;
168                                            }
169                                            else {
170                                                state.restore(nl_checkpoint);
171                                                break;
172                                            }
173                                        }
174                                    }
175
176                                    if let Some(RstTokenType::Newline) = state.peek_kind() {
177                                        let nl_checkpoint = state.checkpoint();
178                                        state.bump();
179                                        if !matches!(state.peek_kind(), Some(RstTokenType::BulletListMarker)) {
180                                            state.restore(nl_checkpoint);
181                                            break;
182                                        }
183                                    }
184                                    else {
185                                        break;
186                                    }
187                                }
188                                else {
189                                    break;
190                                }
191                            }
192                            state.finish_at(list_checkpoint, ET::BulletList);
193                        }
194                        RstTokenType::EnumeratedListMarker => {
195                            let list_checkpoint = item_checkpoint;
196                            while state.not_at_end() {
197                                if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
198                                    let li_checkpoint = state.checkpoint();
199                                    state.bump(); // Marker
200                                    self.parse_inlines_until_newline(state);
201                                    state.finish_at(li_checkpoint, ET::ListItem);
202
203                                    if let Some(RstTokenType::Newline) = state.peek_kind() {
204                                        let nl_checkpoint = state.checkpoint();
205                                        state.bump();
206                                        // 检查是否有嵌套列表
207                                        let mut indent_level = 0;
208                                        while state.not_at_end() {
209                                            if let Some(RstTokenType::Whitespace) = state.peek_kind() {
210                                                state.bump();
211                                                indent_level += 1;
212                                            }
213                                            else if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
214                                                // 递归解析嵌套列表
215                                                let nested_list_checkpoint = state.checkpoint();
216                                                while state.not_at_end() {
217                                                    if let Some(RstTokenType::EnumeratedListMarker) = state.peek_kind() {
218                                                        let nested_li_checkpoint = state.checkpoint();
219                                                        state.bump(); // Marker
220                                                        self.parse_inlines_until_newline(state);
221                                                        state.finish_at(nested_li_checkpoint, ET::ListItem);
222
223                                                        if let Some(RstTokenType::Newline) = state.peek_kind() {
224                                                            let nested_nl_checkpoint = state.checkpoint();
225                                                            state.bump();
226                                                            let mut nested_indent_level = 0;
227                                                            while state.not_at_end() {
228                                                                if let Some(RstTokenType::Whitespace) = state.peek_kind() {
229                                                                    state.bump();
230                                                                    nested_indent_level += 1;
231                                                                }
232                                                                else {
233                                                                    break;
234                                                                }
235                                                            }
236                                                            if nested_indent_level <= indent_level || !matches!(state.peek_kind(), Some(RstTokenType::EnumeratedListMarker)) {
237                                                                state.restore(nested_nl_checkpoint);
238                                                                break;
239                                                            }
240                                                        }
241                                                        else {
242                                                            break;
243                                                        }
244                                                    }
245                                                    else {
246                                                        break;
247                                                    }
248                                                }
249                                                state.finish_at(nested_list_checkpoint, ET::EnumeratedList);
250                                                break;
251                                            }
252                                            else {
253                                                state.restore(nl_checkpoint);
254                                                break;
255                                            }
256                                        }
257                                    }
258
259                                    if let Some(RstTokenType::Newline) = state.peek_kind() {
260                                        let nl_checkpoint = state.checkpoint();
261                                        state.bump();
262                                        if !matches!(state.peek_kind(), Some(RstTokenType::EnumeratedListMarker)) {
263                                            state.restore(nl_checkpoint);
264                                            break;
265                                        }
266                                    }
267                                    else {
268                                        break;
269                                    }
270                                }
271                                else {
272                                    break;
273                                }
274                            }
275                            state.finish_at(list_checkpoint, ET::EnumeratedList);
276                        }
277                        RstTokenType::DefinitionDefinition => {
278                            state.bump();
279                            self.parse_inlines_until_newline(state);
280                            state.finish_at(item_checkpoint, ET::DefinitionDefinition);
281                        }
282                        RstTokenType::Table => {
283                            let table_checkpoint = item_checkpoint;
284                            state.bump();
285                            // 解析表格行和单元格
286                            self.parse_table(state);
287                            state.finish_at(table_checkpoint, ET::Table);
288                        }
289                        RstTokenType::CodeBlock => {
290                            state.bump();
291                            // 解析代码块内容
292                            while state.not_at_end() {
293                                if let Some(RstTokenType::Newline) = state.peek_kind() {
294                                    state.bump();
295                                    // 检查代码块结束
296                                    let mut is_end = true;
297                                    for _ in 0..3 {
298                                        if let Some(RstTokenType::Text) = state.peek_kind() {
299                                            state.bump();
300                                        }
301                                        else {
302                                            is_end = false;
303                                            break;
304                                        }
305                                    }
306                                    if is_end {
307                                        break;
308                                    }
309                                }
310                                else {
311                                    state.bump();
312                                }
313                            }
314                            state.finish_at(item_checkpoint, ET::CodeBlock);
315                        }
316                        RstTokenType::FootnoteDefinition => {
317                            state.bump();
318                            // 解析脚注定义内容
319                            self.parse_inlines_until_newline(state);
320                            state.finish_at(item_checkpoint, ET::FootnoteDefinition);
321                        }
322                        RstTokenType::HorizontalRule => {
323                            state.bump();
324                            state.finish_at(item_checkpoint, ET::HorizontalRule);
325                        }
326                        RstTokenType::Newline | RstTokenType::Whitespace => {
327                            state.bump();
328                        }
329                        _ => {
330                            self.parse_paragraph(state);
331                        }
332                    }
333                }
334                else {
335                    state.advance();
336                }
337            }
338
339            let root = state.finish_at(checkpoint, ET::Root);
340            Ok(root)
341        })
342    }
343}
344
345impl<'config> RstParser<'config> {
346    fn is_block_start(&self, kind: RstTokenType) -> bool {
347        matches!(
348            kind,
349            RstTokenType::Comment
350                | RstTokenType::Heading1
351                | RstTokenType::Heading2
352                | RstTokenType::Heading3
353                | RstTokenType::Heading4
354                | RstTokenType::Heading5
355                | RstTokenType::Heading6
356                | RstTokenType::Directive
357                | RstTokenType::BulletListMarker
358                | RstTokenType::EnumeratedListMarker
359                | RstTokenType::DefinitionDefinition
360                | RstTokenType::Table
361                | RstTokenType::CodeBlock
362                | RstTokenType::HorizontalRule
363        )
364    }
365
366    fn parse_paragraph<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
367        let checkpoint = state.checkpoint();
368        while state.not_at_end() {
369            if let Some(next_kind) = state.peek_kind() {
370                if next_kind == RstTokenType::Newline {
371                    let cp = state.checkpoint();
372                    state.bump();
373                    if let Some(after_nl) = state.peek_kind() {
374                        if after_nl == RstTokenType::Newline || self.is_block_start(after_nl) {
375                            state.restore(cp);
376                            break;
377                        }
378                    }
379                    else {
380                        break;
381                    }
382                }
383                else if self.is_block_start(next_kind) {
384                    break;
385                }
386            }
387            self.parse_inline(state);
388        }
389        state.finish_at(checkpoint, ET::Paragraph);
390    }
391
392    fn parse_inlines_until_newline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
393        while state.not_at_end() {
394            if let Some(kind) = state.peek_kind() {
395                if kind == RstTokenType::Newline {
396                    break;
397                }
398            }
399            self.parse_inline(state);
400        }
401    }
402
403    fn parse_inline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
404        let checkpoint = state.checkpoint();
405        if let Some(kind) = state.peek_kind() {
406            match kind {
407                RstTokenType::Emphasis => {
408                    state.bump(); // Start marker
409                    while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Emphasis) && state.peek_kind() != Some(RstTokenType::Newline) {
410                        self.parse_inline(state);
411                    }
412                    if state.peek_kind() == Some(RstTokenType::Emphasis) {
413                        state.bump(); // End marker
414                    }
415                    state.finish_at(checkpoint, ET::Emphasis);
416                }
417                RstTokenType::Strong => {
418                    state.bump(); // Start marker
419                    while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Strong) && state.peek_kind() != Some(RstTokenType::Newline) {
420                        self.parse_inline(state);
421                    }
422                    if state.peek_kind() == Some(RstTokenType::Strong) {
423                        state.bump(); // End marker
424                    }
425                    state.finish_at(checkpoint, ET::Strong);
426                }
427                RstTokenType::Literal => {
428                    state.bump();
429                    // 解析字面量内容
430                    while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Literal) && state.peek_kind() != Some(RstTokenType::Newline) {
431                        state.bump();
432                    }
433                    if state.peek_kind() == Some(RstTokenType::Literal) {
434                        state.bump(); // End marker
435                    }
436                    state.finish_at(checkpoint, ET::Literal);
437                }
438                RstTokenType::Link => {
439                    state.bump();
440                    // 解析链接内容
441                    while state.not_at_end() && state.peek_kind() != Some(RstTokenType::Link) && state.peek_kind() != Some(RstTokenType::Newline) {
442                        self.parse_inline(state);
443                    }
444                    if state.peek_kind() == Some(RstTokenType::Link) {
445                        state.bump(); // End marker
446                    }
447                    state.finish_at(checkpoint, ET::Link);
448                }
449                RstTokenType::FootnoteReference => {
450                    state.bump();
451                    state.finish_at(checkpoint, ET::FootnoteReference);
452                }
453                RstTokenType::SubstitutionReference => {
454                    state.bump();
455                    state.finish_at(checkpoint, ET::SubstitutionReference);
456                }
457                RstTokenType::Role => {
458                    state.bump();
459                    state.finish_at(checkpoint, ET::Role);
460                }
461                _ => {
462                    state.bump();
463                }
464            }
465        }
466        else {
467            state.advance();
468        }
469    }
470
471    /// Parses complex table structures
472    fn parse_table<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
473        while state.not_at_end() {
474            if let Some(RstTokenType::Newline) = state.peek_kind() {
475                state.bump();
476                if let Some(RstTokenType::Table) = state.peek_kind() {
477                    state.bump();
478                    // Parse table row
479                    let row_checkpoint = state.checkpoint();
480                    // Process table row content
481                    self.parse_table_row(state);
482                    state.finish_at(row_checkpoint, ET::TableRow);
483                }
484                else {
485                    break;
486                }
487            }
488            else {
489                break;
490            }
491        }
492    }
493
494    /// Parses a single table row
495    fn parse_table_row<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, RstLanguage, S>) {
496        // Skip any leading whitespace
497        while state.not_at_end() {
498            if let Some(RstTokenType::Whitespace) = state.peek_kind() {
499                state.bump();
500            }
501            else {
502                break;
503            }
504        }
505
506        // Process table cells
507        while state.not_at_end() {
508            if let Some(RstTokenType::Text) = state.peek_kind() {
509                let cell_checkpoint = state.checkpoint();
510                // Parse cell content until next | or end of line
511                while state.not_at_end() {
512                    if let Some(kind) = state.peek_kind() {
513                        if kind == RstTokenType::Newline {
514                            break;
515                        }
516                        state.bump();
517                    }
518                    else {
519                        break;
520                    }
521                }
522                state.finish_at(cell_checkpoint, ET::TableCell);
523            }
524            else if let Some(RstTokenType::Newline) = state.peek_kind() {
525                break;
526            }
527            else {
528                state.bump();
529            }
530        }
531    }
532}