Skip to main content

oak_org_mode/parser/
mod.rs

1pub mod element_type;
2
3use crate::{
4    language::OrgModeLanguage,
5    lexer::{OrgModeLexer, token_type::OrgModeTokenType},
6    parser::element_type::OrgModeElementType,
7};
8use oak_core::{
9    parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
10    source::{Source, TextEdit},
11};
12
13pub(crate) type State<'a, S> = ParserState<'a, OrgModeLanguage, S>;
14
15/// Org-mode parser.
16pub struct OrgModeParser<'a> {
17    /// Language definition.
18    pub language: &'a OrgModeLanguage,
19}
20
21impl<'a> OrgModeParser<'a> {
22    /// Creates a new `OrgModeParser`.
23    pub fn new(language: &'a OrgModeLanguage) -> Self {
24        Self { language }
25    }
26
27    fn is_at_start_of_line<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) -> bool {
28        let pos = state.current_offset();
29        if pos == 0 {
30            return true;
31        }
32        let prev_text = state.source.get_text_in((pos - 1..pos).into());
33        prev_text.as_ref() == "\n"
34    }
35
36    fn parse_item<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
37        let kind = state.peek_kind();
38        match kind {
39            Some(OrgModeTokenType::Star) => {
40                if self.is_at_start_of_line(state) {
41                    // Only * followed by whitespace or end of line is considered a heading
42                    let mut is_heading = false;
43                    let next_kind = state.peek_kind_at(1);
44                    if next_kind == Some(OrgModeTokenType::Whitespace) || next_kind == Some(OrgModeTokenType::Newline) || next_kind.is_none() {
45                        is_heading = true;
46                    }
47
48                    if is_heading {
49                        self.parse_heading(state);
50                    }
51                    else {
52                        self.parse_paragraph(state);
53                    }
54                }
55                else {
56                    self.parse_paragraph(state);
57                }
58            }
59            Some(OrgModeTokenType::Hash) => {
60                if self.is_at_start_of_line(state) {
61                    self.parse_block(state);
62                }
63                else {
64                    self.parse_paragraph(state);
65                }
66            }
67            Some(OrgModeTokenType::Minus) | Some(OrgModeTokenType::Plus) => {
68                if self.is_at_start_of_line(state) {
69                    self.parse_list(state);
70                }
71                else {
72                    self.parse_paragraph(state);
73                }
74            }
75            Some(OrgModeTokenType::Pipe) => {
76                if self.is_at_start_of_line(state) {
77                    self.parse_table(state);
78                }
79                else {
80                    self.parse_paragraph(state);
81                }
82            }
83            Some(OrgModeTokenType::Colon) => {
84                if self.is_at_start_of_line(state) {
85                    self.parse_drawer(state);
86                }
87                else {
88                    self.parse_paragraph(state);
89                }
90            }
91            Some(OrgModeTokenType::Newline) | Some(OrgModeTokenType::Whitespace) => {
92                state.bump();
93            }
94            _ => {
95                self.parse_paragraph(state);
96            }
97        }
98    }
99
100    fn parse_heading<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
101        let checkpoint = state.checkpoint();
102
103        // Parse stars
104        while state.at(OrgModeTokenType::Star) {
105            state.bump();
106        }
107
108        // Parse whitespace
109        if state.at(OrgModeTokenType::Whitespace) {
110            state.bump();
111        }
112
113        // Parse title (inline content)
114        self.parse_inline_content(state);
115
116        // Consume newline
117        if state.at(OrgModeTokenType::Newline) {
118            state.bump();
119        }
120
121        state.finish_at(checkpoint, OrgModeElementType::Heading);
122    }
123
124    fn parse_block<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
125        let checkpoint = state.checkpoint();
126        state.bump(); // #
127        self.parse_inline_content(state);
128        if state.at(OrgModeTokenType::Newline) {
129            state.bump();
130        }
131        state.finish_at(checkpoint, OrgModeElementType::Block);
132    }
133
134    fn parse_list<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
135        let checkpoint = state.checkpoint();
136        state.bump(); // - or +
137        self.parse_inline_content(state);
138        if state.at(OrgModeTokenType::Newline) {
139            state.bump();
140        }
141        state.finish_at(checkpoint, OrgModeElementType::List);
142    }
143
144    fn parse_paragraph<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
145        let checkpoint = state.checkpoint();
146        self.parse_inline_content(state);
147        if state.at(OrgModeTokenType::Newline) {
148            state.bump();
149        }
150        state.finish_at(checkpoint, OrgModeElementType::Paragraph);
151    }
152
153    fn parse_table<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
154        let checkpoint = state.checkpoint();
155        while state.at(OrgModeTokenType::Pipe) {
156            self.parse_table_row(state);
157            // Check if next line is also a table row
158            while state.at(OrgModeTokenType::Newline) || state.at(OrgModeTokenType::Whitespace) {
159                state.bump();
160            }
161        }
162        state.finish_at(checkpoint, OrgModeElementType::Table);
163    }
164
165    fn parse_table_row<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
166        let checkpoint = state.checkpoint();
167        while state.at(OrgModeTokenType::Pipe) {
168            self.parse_table_cell(state);
169        }
170        if state.at(OrgModeTokenType::Newline) {
171            state.bump();
172        }
173        state.finish_at(checkpoint, OrgModeElementType::TableRow);
174    }
175
176    fn parse_table_cell<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
177        let checkpoint = state.checkpoint();
178        state.bump(); // |
179        while state.not_at_end() && !state.at(OrgModeTokenType::Pipe) && !state.at(OrgModeTokenType::Newline) {
180            self.parse_inline_content(state);
181        }
182        state.finish_at(checkpoint, OrgModeElementType::TableCell);
183    }
184
185    fn parse_drawer<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
186        let checkpoint = state.checkpoint();
187        state.bump(); // :
188        while state.not_at_end() && !state.at(OrgModeTokenType::Newline) {
189            state.bump();
190        }
191        if state.at(OrgModeTokenType::Newline) {
192            state.bump();
193        }
194        state.finish_at(checkpoint, OrgModeElementType::DrawerBlock);
195    }
196
197    fn parse_inline_content<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
198        while state.not_at_end() && !state.at(OrgModeTokenType::Newline) {
199            let next_kind = state.peek_kind();
200            match next_kind {
201                Some(OrgModeTokenType::Star) => self.parse_bold(state),
202                Some(OrgModeTokenType::Slash) => self.parse_italic(state),
203                Some(OrgModeTokenType::Underscore) => self.parse_underline(state),
204                Some(OrgModeTokenType::LeftBracket) => self.parse_link(state),
205                Some(OrgModeTokenType::Tilde) => self.parse_inline_code(state),
206                Some(OrgModeTokenType::Equal) => self.parse_verbatim(state),
207                Some(OrgModeTokenType::Plus) => self.parse_strikethrough(state),
208                _ => {
209                    state.bump();
210                }
211            }
212        }
213    }
214
215    fn parse_bold<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
216        let checkpoint = state.checkpoint();
217        let marker = OrgModeTokenType::Star;
218        state.bump(); // *
219        while state.not_at_end() && !state.at(marker) && !state.at(OrgModeTokenType::Newline) {
220            self.parse_inline_content(state);
221        }
222        if state.at(marker) {
223            state.bump();
224        }
225        state.finish_at(checkpoint, OrgModeElementType::Bold);
226    }
227
228    fn parse_italic<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
229        let checkpoint = state.checkpoint();
230        let marker = OrgModeTokenType::Slash;
231        state.bump(); // /
232        while state.not_at_end() && !state.at(marker) && !state.at(OrgModeTokenType::Newline) {
233            self.parse_inline_content(state);
234        }
235        if state.at(marker) {
236            state.bump();
237        }
238        state.finish_at(checkpoint, OrgModeElementType::Italic);
239    }
240
241    fn parse_underline<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
242        let checkpoint = state.checkpoint();
243        let marker = OrgModeTokenType::Underscore;
244        state.bump(); // _
245        while state.not_at_end() && !state.at(marker) && !state.at(OrgModeTokenType::Newline) {
246            self.parse_inline_content(state);
247        }
248        if state.at(marker) {
249            state.bump();
250        }
251        state.finish_at(checkpoint, OrgModeElementType::Underline);
252    }
253
254    fn parse_link<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
255        let checkpoint = state.checkpoint();
256        state.bump(); // [
257        if state.at(OrgModeTokenType::LeftBracket) {
258            state.bump(); // [[
259            while state.not_at_end() && !state.at(OrgModeTokenType::RightBracket) && !state.at(OrgModeTokenType::Newline) {
260                state.bump();
261            }
262            if state.at(OrgModeTokenType::RightBracket) {
263                state.bump(); // ]
264                if state.at(OrgModeTokenType::RightBracket) {
265                    state.bump(); // ]]
266                }
267                else if state.at(OrgModeTokenType::LeftBracket) {
268                    state.bump(); // ][
269                    while state.not_at_end() && !state.at(OrgModeTokenType::RightBracket) && !state.at(OrgModeTokenType::Newline) {
270                        state.bump();
271                    }
272                    if state.at(OrgModeTokenType::RightBracket) {
273                        state.bump(); // ]
274                        if state.at(OrgModeTokenType::RightBracket) {
275                            state.bump(); // ]]
276                        }
277                    }
278                }
279            }
280        }
281        state.finish_at(checkpoint, crate::parser::element_type::OrgModeElementType::Link);
282    }
283
284    fn parse_inline_code<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
285        let checkpoint = state.checkpoint();
286        state.bump(); // ~
287        while state.not_at_end() && !state.at(OrgModeTokenType::Tilde) && !state.at(OrgModeTokenType::Newline) {
288            state.bump();
289        }
290        if state.at(OrgModeTokenType::Tilde) {
291            state.bump();
292        }
293        state.finish_at(checkpoint, crate::parser::element_type::OrgModeElementType::InlineCode);
294    }
295
296    fn parse_verbatim<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
297        let checkpoint = state.checkpoint();
298        state.bump(); // =
299        while state.not_at_end() && !state.at(OrgModeTokenType::Equal) && !state.at(OrgModeTokenType::Newline) {
300            state.bump();
301        }
302        if state.at(OrgModeTokenType::Equal) {
303            state.bump();
304        }
305        state.finish_at(checkpoint, crate::parser::element_type::OrgModeElementType::Verbatim);
306    }
307
308    fn parse_strikethrough<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
309        let checkpoint = state.checkpoint();
310        state.bump(); // +
311        while state.not_at_end() && !state.at(OrgModeTokenType::Plus) && !state.at(OrgModeTokenType::Newline) {
312            state.bump();
313        }
314        if state.at(OrgModeTokenType::Plus) {
315            state.bump();
316        }
317        state.finish_at(checkpoint, crate::parser::element_type::OrgModeElementType::Strikethrough);
318    }
319}
320
321impl<'a> Parser<OrgModeLanguage> for OrgModeParser<'a> {
322    fn parse<'b, S: Source + ?Sized>(&self, text: &'b S, edits: &[TextEdit], cache: &'b mut impl ParseCache<OrgModeLanguage>) -> ParseOutput<'b, OrgModeLanguage> {
323        let lexer = OrgModeLexer::new(self.language);
324        parse_with_lexer(&lexer, text, edits, cache, |state| {
325            let checkpoint = state.checkpoint();
326            while state.not_at_end() {
327                self.parse_item(state);
328            }
329
330            Ok(state.finish_at(checkpoint, crate::parser::element_type::OrgModeElementType::Document))
331        })
332    }
333}