Skip to main content

oak_org_mode/parser/
mod.rs

1use crate::{kind::OrgModeSyntaxKind, language::OrgModeLanguage, lexer::OrgModeLexer};
2use oak_core::{
3    parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
4    source::{Source, TextEdit},
5};
6
7pub(crate) type State<'a, S> = ParserState<'a, OrgModeLanguage, S>;
8
9pub struct OrgModeParser<'a> {
10    pub language: &'a OrgModeLanguage,
11}
12
13impl<'a> OrgModeParser<'a> {
14    pub fn new(language: &'a OrgModeLanguage) -> Self {
15        Self { language }
16    }
17
18    fn is_at_start_of_line<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) -> bool {
19        let pos = state.current_offset();
20        if pos == 0 {
21            return true;
22        }
23        let prev_text = state.source.get_text_in((pos - 1..pos).into());
24        prev_text.as_ref() == "\n"
25    }
26
27    fn parse_item<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
28        let kind = state.peek_kind();
29        match kind {
30            Some(OrgModeSyntaxKind::Star) => {
31                if self.is_at_start_of_line(state) {
32                    // 只有 * 后面跟着空格或者是行尾,才认为是标题
33                    let mut is_heading = false;
34                    let next_kind = state.peek_kind_at(1);
35                    if next_kind == Some(OrgModeSyntaxKind::Whitespace) || next_kind == Some(OrgModeSyntaxKind::Newline) || next_kind.is_none() {
36                        is_heading = true;
37                    }
38
39                    if is_heading {
40                        self.parse_heading(state);
41                    }
42                    else {
43                        self.parse_paragraph(state);
44                    }
45                }
46                else {
47                    self.parse_paragraph(state);
48                }
49            }
50            Some(OrgModeSyntaxKind::Hash) => {
51                if self.is_at_start_of_line(state) {
52                    self.parse_block(state);
53                }
54                else {
55                    self.parse_paragraph(state);
56                }
57            }
58            Some(OrgModeSyntaxKind::Minus) | Some(OrgModeSyntaxKind::Plus) => {
59                if self.is_at_start_of_line(state) {
60                    self.parse_list(state);
61                }
62                else {
63                    self.parse_paragraph(state);
64                }
65            }
66            Some(OrgModeSyntaxKind::Newline) | Some(OrgModeSyntaxKind::Whitespace) => {
67                state.bump();
68            }
69            _ => self.parse_paragraph(state),
70        }
71    }
72
73    fn parse_list<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
74        let checkpoint = state.checkpoint();
75        while state.at(OrgModeSyntaxKind::Minus) || state.at(OrgModeSyntaxKind::Plus) {
76            self.parse_list_item(state);
77            // Handle optional newline/whitespace between items
78            while state.at(OrgModeSyntaxKind::Newline) || state.at(OrgModeSyntaxKind::Whitespace) {
79                state.bump();
80            }
81        }
82        state.finish_at(checkpoint, OrgModeSyntaxKind::List.into());
83    }
84
85    fn parse_list_item<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
86        let checkpoint = state.checkpoint();
87        state.bump(); // - or +
88        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
89            state.bump();
90        }
91        state.finish_at(checkpoint, OrgModeSyntaxKind::ListItem.into());
92    }
93
94    fn parse_heading<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
95        let checkpoint = state.checkpoint();
96        while state.at(OrgModeSyntaxKind::Star) {
97            state.bump();
98        }
99
100        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
101            state.bump();
102        }
103        state.finish_at(checkpoint, OrgModeSyntaxKind::Heading.into());
104    }
105
106    fn parse_block<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
107        let checkpoint = state.checkpoint();
108        state.bump(); // #
109        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
110            state.bump();
111        }
112        state.finish_at(checkpoint, OrgModeSyntaxKind::Block.into());
113    }
114
115    fn parse_paragraph<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
116        let checkpoint = state.checkpoint();
117        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
118            let next_kind = state.peek_kind();
119            match next_kind {
120                Some(OrgModeSyntaxKind::Star) => self.parse_bold(state),
121                Some(OrgModeSyntaxKind::Slash) => self.parse_italic(state),
122                Some(OrgModeSyntaxKind::Underscore) => self.parse_underline(state),
123                Some(OrgModeSyntaxKind::LeftBracket) => self.parse_link(state),
124                Some(OrgModeSyntaxKind::Tilde) => self.parse_inline_code(state),
125                Some(OrgModeSyntaxKind::Equal) => self.parse_verbatim(state),
126                Some(OrgModeSyntaxKind::Plus) => self.parse_strikethrough(state),
127                _ => {
128                    state.bump();
129                }
130            }
131        }
132        if state.at(OrgModeSyntaxKind::Newline) {
133            state.bump();
134        }
135        state.finish_at(checkpoint, OrgModeSyntaxKind::Paragraph.into());
136    }
137
138    fn parse_bold<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
139        let checkpoint = state.checkpoint();
140        state.bump(); // *
141        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Star) && !state.at(OrgModeSyntaxKind::Newline) {
142            state.bump();
143        }
144        if state.at(OrgModeSyntaxKind::Star) {
145            state.bump();
146        }
147        state.finish_at(checkpoint, OrgModeSyntaxKind::Bold.into());
148    }
149
150    fn parse_italic<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
151        let checkpoint = state.checkpoint();
152        state.bump(); // /
153        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Slash) && !state.at(OrgModeSyntaxKind::Newline) {
154            state.bump();
155        }
156        if state.at(OrgModeSyntaxKind::Slash) {
157            state.bump();
158        }
159        state.finish_at(checkpoint, OrgModeSyntaxKind::Italic.into());
160    }
161
162    fn parse_underline<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
163        let checkpoint = state.checkpoint();
164        state.bump(); // _
165        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Underscore) && !state.at(OrgModeSyntaxKind::Newline) {
166            state.bump();
167        }
168        if state.at(OrgModeSyntaxKind::Underscore) {
169            state.bump();
170        }
171        state.finish_at(checkpoint, OrgModeSyntaxKind::Underline.into());
172    }
173
174    fn parse_link<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
175        let checkpoint = state.checkpoint();
176        state.bump(); // [
177        if state.at(OrgModeSyntaxKind::LeftBracket) {
178            state.bump(); // [[
179            while state.not_at_end() && !state.at(OrgModeSyntaxKind::RightBracket) && !state.at(OrgModeSyntaxKind::Newline) {
180                state.bump();
181            }
182            if state.at(OrgModeSyntaxKind::RightBracket) {
183                state.bump(); // ]
184                if state.at(OrgModeSyntaxKind::RightBracket) {
185                    state.bump(); // ]]
186                }
187                else if state.at(OrgModeSyntaxKind::LeftBracket) {
188                    state.bump(); // ][
189                    while state.not_at_end() && !state.at(OrgModeSyntaxKind::RightBracket) && !state.at(OrgModeSyntaxKind::Newline) {
190                        state.bump();
191                    }
192                    if state.at(OrgModeSyntaxKind::RightBracket) {
193                        state.bump(); // ]
194                        if state.at(OrgModeSyntaxKind::RightBracket) {
195                            state.bump(); // ]]
196                        }
197                    }
198                }
199            }
200        }
201        state.finish_at(checkpoint, OrgModeSyntaxKind::Link.into());
202    }
203
204    fn parse_inline_code<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
205        let checkpoint = state.checkpoint();
206        state.bump(); // ~
207        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Tilde) && !state.at(OrgModeSyntaxKind::Newline) {
208            state.bump();
209        }
210        if state.at(OrgModeSyntaxKind::Tilde) {
211            state.bump();
212        }
213        state.finish_at(checkpoint, OrgModeSyntaxKind::InlineCode.into());
214    }
215
216    fn parse_verbatim<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
217        let checkpoint = state.checkpoint();
218        state.bump(); // =
219        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Equal) && !state.at(OrgModeSyntaxKind::Newline) {
220            state.bump();
221        }
222        if state.at(OrgModeSyntaxKind::Equal) {
223            state.bump();
224        }
225        state.finish_at(checkpoint, OrgModeSyntaxKind::Verbatim.into());
226    }
227
228    fn parse_strikethrough<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
229        let checkpoint = state.checkpoint();
230        state.bump(); // +
231        while state.not_at_end() && !state.at(OrgModeSyntaxKind::Plus) && !state.at(OrgModeSyntaxKind::Newline) {
232            state.bump();
233        }
234        if state.at(OrgModeSyntaxKind::Plus) {
235            state.bump();
236        }
237        state.finish_at(checkpoint, OrgModeSyntaxKind::Strikethrough.into());
238    }
239}
240
241impl<'a> Parser<OrgModeLanguage> for OrgModeParser<'a> {
242    fn parse<'b, S: Source + ?Sized>(&self, text: &'b S, edits: &[TextEdit], cache: &'b mut impl ParseCache<OrgModeLanguage>) -> ParseOutput<'b, OrgModeLanguage> {
243        let lexer = OrgModeLexer::new(self.language);
244        parse_with_lexer(&lexer, text, edits, cache, |state| {
245            let checkpoint = state.checkpoint();
246            while state.not_at_end() {
247                self.parse_item(state);
248            }
249
250            Ok(state.finish_at(checkpoint, OrgModeSyntaxKind::Document.into()))
251        })
252    }
253}