oak_org_mode/parser/
mod.rs1use crate::{kind::OrgModeSyntaxKind, language::OrgModeLanguage, lexer::OrgModeLexer};
2use oak_core::{
3 parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
4 source::{Source, TextEdit},
5};
6
7pub(crate) type State<'a, S> = ParserState<'a, OrgModeLanguage, S>;
8
9pub struct OrgModeParser<'a> {
10 pub language: &'a OrgModeLanguage,
11}
12
13impl<'a> OrgModeParser<'a> {
14 pub fn new(language: &'a OrgModeLanguage) -> Self {
15 Self { language }
16 }
17
18 fn is_at_start_of_line<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) -> bool {
19 let pos = state.current_offset();
20 if pos == 0 {
21 return true;
22 }
23 let prev_text = state.source.get_text_in((pos - 1..pos).into());
24 prev_text.as_ref() == "\n"
25 }
26
27 fn parse_item<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
28 let kind = state.peek_kind();
29 match kind {
30 Some(OrgModeSyntaxKind::Star) => {
31 if self.is_at_start_of_line(state) {
32 let mut is_heading = false;
34 let next_kind = state.peek_kind_at(1);
35 if next_kind == Some(OrgModeSyntaxKind::Whitespace) || next_kind == Some(OrgModeSyntaxKind::Newline) || next_kind.is_none() {
36 is_heading = true;
37 }
38
39 if is_heading {
40 self.parse_heading(state);
41 }
42 else {
43 self.parse_paragraph(state);
44 }
45 }
46 else {
47 self.parse_paragraph(state);
48 }
49 }
50 Some(OrgModeSyntaxKind::Hash) => {
51 if self.is_at_start_of_line(state) {
52 self.parse_block(state);
53 }
54 else {
55 self.parse_paragraph(state);
56 }
57 }
58 Some(OrgModeSyntaxKind::Minus) | Some(OrgModeSyntaxKind::Plus) => {
59 if self.is_at_start_of_line(state) {
60 self.parse_list(state);
61 }
62 else {
63 self.parse_paragraph(state);
64 }
65 }
66 Some(OrgModeSyntaxKind::Newline) | Some(OrgModeSyntaxKind::Whitespace) => {
67 state.bump();
68 }
69 _ => self.parse_paragraph(state),
70 }
71 }
72
73 fn parse_list<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
74 let checkpoint = state.checkpoint();
75 while state.at(OrgModeSyntaxKind::Minus) || state.at(OrgModeSyntaxKind::Plus) {
76 self.parse_list_item(state);
77 while state.at(OrgModeSyntaxKind::Newline) || state.at(OrgModeSyntaxKind::Whitespace) {
79 state.bump();
80 }
81 }
82 state.finish_at(checkpoint, OrgModeSyntaxKind::List.into());
83 }
84
85 fn parse_list_item<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
86 let checkpoint = state.checkpoint();
87 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
89 state.bump();
90 }
91 state.finish_at(checkpoint, OrgModeSyntaxKind::ListItem.into());
92 }
93
94 fn parse_heading<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
95 let checkpoint = state.checkpoint();
96 while state.at(OrgModeSyntaxKind::Star) {
97 state.bump();
98 }
99
100 while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
101 state.bump();
102 }
103 state.finish_at(checkpoint, OrgModeSyntaxKind::Heading.into());
104 }
105
106 fn parse_block<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
107 let checkpoint = state.checkpoint();
108 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
110 state.bump();
111 }
112 state.finish_at(checkpoint, OrgModeSyntaxKind::Block.into());
113 }
114
115 fn parse_paragraph<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
116 let checkpoint = state.checkpoint();
117 while state.not_at_end() && !state.at(OrgModeSyntaxKind::Newline) {
118 let next_kind = state.peek_kind();
119 match next_kind {
120 Some(OrgModeSyntaxKind::Star) => self.parse_bold(state),
121 Some(OrgModeSyntaxKind::Slash) => self.parse_italic(state),
122 Some(OrgModeSyntaxKind::Underscore) => self.parse_underline(state),
123 Some(OrgModeSyntaxKind::LeftBracket) => self.parse_link(state),
124 Some(OrgModeSyntaxKind::Tilde) => self.parse_inline_code(state),
125 Some(OrgModeSyntaxKind::Equal) => self.parse_verbatim(state),
126 Some(OrgModeSyntaxKind::Plus) => self.parse_strikethrough(state),
127 _ => {
128 state.bump();
129 }
130 }
131 }
132 if state.at(OrgModeSyntaxKind::Newline) {
133 state.bump();
134 }
135 state.finish_at(checkpoint, OrgModeSyntaxKind::Paragraph.into());
136 }
137
138 fn parse_bold<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
139 let checkpoint = state.checkpoint();
140 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Star) && !state.at(OrgModeSyntaxKind::Newline) {
142 state.bump();
143 }
144 if state.at(OrgModeSyntaxKind::Star) {
145 state.bump();
146 }
147 state.finish_at(checkpoint, OrgModeSyntaxKind::Bold.into());
148 }
149
150 fn parse_italic<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
151 let checkpoint = state.checkpoint();
152 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Slash) && !state.at(OrgModeSyntaxKind::Newline) {
154 state.bump();
155 }
156 if state.at(OrgModeSyntaxKind::Slash) {
157 state.bump();
158 }
159 state.finish_at(checkpoint, OrgModeSyntaxKind::Italic.into());
160 }
161
162 fn parse_underline<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
163 let checkpoint = state.checkpoint();
164 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Underscore) && !state.at(OrgModeSyntaxKind::Newline) {
166 state.bump();
167 }
168 if state.at(OrgModeSyntaxKind::Underscore) {
169 state.bump();
170 }
171 state.finish_at(checkpoint, OrgModeSyntaxKind::Underline.into());
172 }
173
174 fn parse_link<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
175 let checkpoint = state.checkpoint();
176 state.bump(); if state.at(OrgModeSyntaxKind::LeftBracket) {
178 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::RightBracket) && !state.at(OrgModeSyntaxKind::Newline) {
180 state.bump();
181 }
182 if state.at(OrgModeSyntaxKind::RightBracket) {
183 state.bump(); if state.at(OrgModeSyntaxKind::RightBracket) {
185 state.bump(); }
187 else if state.at(OrgModeSyntaxKind::LeftBracket) {
188 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::RightBracket) && !state.at(OrgModeSyntaxKind::Newline) {
190 state.bump();
191 }
192 if state.at(OrgModeSyntaxKind::RightBracket) {
193 state.bump(); if state.at(OrgModeSyntaxKind::RightBracket) {
195 state.bump(); }
197 }
198 }
199 }
200 }
201 state.finish_at(checkpoint, OrgModeSyntaxKind::Link.into());
202 }
203
204 fn parse_inline_code<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
205 let checkpoint = state.checkpoint();
206 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Tilde) && !state.at(OrgModeSyntaxKind::Newline) {
208 state.bump();
209 }
210 if state.at(OrgModeSyntaxKind::Tilde) {
211 state.bump();
212 }
213 state.finish_at(checkpoint, OrgModeSyntaxKind::InlineCode.into());
214 }
215
216 fn parse_verbatim<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
217 let checkpoint = state.checkpoint();
218 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Equal) && !state.at(OrgModeSyntaxKind::Newline) {
220 state.bump();
221 }
222 if state.at(OrgModeSyntaxKind::Equal) {
223 state.bump();
224 }
225 state.finish_at(checkpoint, OrgModeSyntaxKind::Verbatim.into());
226 }
227
228 fn parse_strikethrough<'b, S: Source + ?Sized>(&self, state: &mut State<'b, S>) {
229 let checkpoint = state.checkpoint();
230 state.bump(); while state.not_at_end() && !state.at(OrgModeSyntaxKind::Plus) && !state.at(OrgModeSyntaxKind::Newline) {
232 state.bump();
233 }
234 if state.at(OrgModeSyntaxKind::Plus) {
235 state.bump();
236 }
237 state.finish_at(checkpoint, OrgModeSyntaxKind::Strikethrough.into());
238 }
239}
240
241impl<'a> Parser<OrgModeLanguage> for OrgModeParser<'a> {
242 fn parse<'b, S: Source + ?Sized>(&self, text: &'b S, edits: &[TextEdit], cache: &'b mut impl ParseCache<OrgModeLanguage>) -> ParseOutput<'b, OrgModeLanguage> {
243 let lexer = OrgModeLexer::new(self.language);
244 parse_with_lexer(&lexer, text, edits, cache, |state| {
245 let checkpoint = state.checkpoint();
246 while state.not_at_end() {
247 self.parse_item(state);
248 }
249
250 Ok(state.finish_at(checkpoint, OrgModeSyntaxKind::Document.into()))
251 })
252 }
253}