oak_markdown/parser/
mod.rs1pub mod element_type;
3
4use crate::{language::MarkdownLanguage, lexer::token_type::MarkdownTokenType, parser::element_type::MarkdownElementType as ET};
5use oak_core::{Parser, ParserState, source::Source};
6
7pub struct MarkdownParser<'config> {
9 pub(crate) config: &'config MarkdownLanguage,
10}
11
12impl<'config> MarkdownParser<'config> {
13 pub fn new(config: &'config MarkdownLanguage) -> Self {
15 Self { config }
16 }
17}
18
19impl<'config> Parser<MarkdownLanguage> for MarkdownParser<'config> {
20 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[oak_core::TextEdit], cache: &'a mut impl oak_core::ParseCache<MarkdownLanguage>) -> oak_core::ParseOutput<'a, MarkdownLanguage> {
21 let lexer = crate::lexer::MarkdownLexer::new(&self.config);
22 oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| {
23 let checkpoint = state.checkpoint();
24
25 while state.not_at_end() {
26 let item_checkpoint = state.checkpoint();
27 if let Some(kind) = state.peek_kind() {
28 match kind {
29 MarkdownTokenType::FrontMatter => {
30 if self.config.allow_front_matter {
31 state.bump();
32 state.finish_at(item_checkpoint, ET::FrontMatter);
33 }
34 else {
35 self.parse_paragraph(state);
36 }
37 }
38 MarkdownTokenType::MathBlock => {
39 if self.config.allow_math {
40 state.bump();
41 state.finish_at(item_checkpoint, ET::MathBlock);
42 }
43 else {
44 self.parse_paragraph(state);
45 }
46 }
47 MarkdownTokenType::HtmlTag | MarkdownTokenType::HtmlComment => {
48 if self.config.allow_html {
49 state.bump();
50 state.finish_at(item_checkpoint, ET::from(kind));
51 }
52 else {
53 self.parse_paragraph(state);
54 }
55 }
56 MarkdownTokenType::XmlTag | MarkdownTokenType::XmlComment => {
57 if self.config.allow_xml {
58 state.bump();
59 state.finish_at(item_checkpoint, ET::from(kind));
60 }
61 else {
62 self.parse_paragraph(state);
63 }
64 }
65 MarkdownTokenType::FootnoteDefinition => {
66 state.bump();
67 self.parse_inlines_until_newline(state);
68 state.finish_at(item_checkpoint, ET::FootnoteDefinition);
69 }
70 MarkdownTokenType::Heading1 | MarkdownTokenType::Heading2 | MarkdownTokenType::Heading3 | MarkdownTokenType::Heading4 | MarkdownTokenType::Heading5 | MarkdownTokenType::Heading6 => {
71 state.bump();
72 self.parse_inlines_until_newline(state);
73 state.finish_at(item_checkpoint, ET::from(kind));
74 }
75 MarkdownTokenType::ListMarker => {
76 let list_checkpoint = item_checkpoint;
77 let mut is_ordered = false;
78 if let Some(text) = state.peek_text() {
79 if text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
80 is_ordered = true;
81 }
82 }
83
84 while state.not_at_end() {
85 if let Some(MarkdownTokenType::ListMarker) = state.peek_kind() {
86 let current_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
87
88 if current_is_ordered != is_ordered && state.checkpoint() != list_checkpoint {
89 break;
90 }
91
92 let li_checkpoint = state.checkpoint();
93 state.bump(); self.parse_inlines_until_newline(state);
95 state.finish_at(li_checkpoint, ET::ListItem);
96
97 if let Some(MarkdownTokenType::Newline) = state.peek_kind() {
98 let nl_checkpoint = state.checkpoint();
99 state.bump();
100 if !matches!(state.peek_kind(), Some(MarkdownTokenType::ListMarker)) {
101 state.restore(nl_checkpoint);
102 break;
103 }
104 let next_is_ordered = if let Some(text) = state.peek_text() { text.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) } else { false };
105 if next_is_ordered != is_ordered {
106 state.restore(nl_checkpoint);
107 break;
108 }
109 }
110 else {
111 break;
112 }
113 }
114 else {
115 break;
116 }
117 }
118 state.finish_at(list_checkpoint, ET::List);
119 }
120 MarkdownTokenType::BlockquoteMarker => {
121 state.bump();
122 while state.not_at_end() {
123 if let Some(next_kind) = state.peek_kind() {
124 if next_kind == MarkdownTokenType::Newline {
125 state.bump();
126 if let Some(after_nl) = state.peek_kind() {
127 if after_nl == MarkdownTokenType::BlockquoteMarker {
128 state.bump();
129 continue;
130 }
131 if after_nl != MarkdownTokenType::Whitespace && after_nl != MarkdownTokenType::Text {
132 break;
133 }
134 }
135 else {
136 break;
137 }
138 }
139 else if self.is_block_start(next_kind) && next_kind != MarkdownTokenType::BlockquoteMarker {
140 break;
141 }
142 }
143 self.parse_inline(state);
144 }
145 state.finish_at(item_checkpoint, ET::Blockquote);
146 }
147 MarkdownTokenType::CodeFence => {
148 state.bump();
149 if let Some(MarkdownTokenType::CodeLanguage) = state.peek_kind() {
150 state.bump();
151 }
152 while state.not_at_end() {
153 if let Some(next_kind) = state.peek_kind() {
154 if next_kind == MarkdownTokenType::CodeFence {
155 state.bump();
156 break;
157 }
158 }
159 state.bump();
160 }
161 state.finish_at(item_checkpoint, ET::CodeBlock);
162 }
163 MarkdownTokenType::HorizontalRule => {
164 state.bump();
165 state.finish_at(item_checkpoint, ET::HorizontalRule);
166 }
167 MarkdownTokenType::Pipe => {
168 while state.not_at_end() {
169 while state.not_at_end() {
170 if let Some(next_kind) = state.peek_kind() {
171 if next_kind == MarkdownTokenType::Newline {
172 break;
173 }
174 }
175 state.bump();
176 }
177 if let Some(MarkdownTokenType::Newline) = state.peek_kind() {
178 let checkpoint_before_nl = state.checkpoint();
179 state.bump();
180 let mut is_table_line = false;
181 while state.not_at_end() {
182 if let Some(kind) = state.peek_kind() {
183 if kind == MarkdownTokenType::Whitespace {
184 state.bump();
185 }
186 else if kind == MarkdownTokenType::Pipe {
187 is_table_line = true;
188 break;
189 }
190 else {
191 break;
192 }
193 }
194 else {
195 break;
196 }
197 }
198 if is_table_line {
199 continue;
200 }
201 else {
202 state.restore(checkpoint_before_nl);
203 break;
204 }
205 }
206 else {
207 break;
208 }
209 }
210 state.finish_at(item_checkpoint, ET::Table);
211 }
212 MarkdownTokenType::Newline | MarkdownTokenType::Whitespace => {
213 state.bump();
214 }
215 _ => {
216 self.parse_paragraph(state);
217 }
218 }
219 }
220 else {
221 state.advance();
222 }
223 }
224
225 let root = state.finish_at(checkpoint, ET::Root);
226 Ok(root)
227 })
228 }
229}
230
231impl<'config> MarkdownParser<'config> {
232 fn is_block_start(&self, kind: MarkdownTokenType) -> bool {
233 matches!(
234 kind,
235 MarkdownTokenType::Heading1
236 | MarkdownTokenType::Heading2
237 | MarkdownTokenType::Heading3
238 | MarkdownTokenType::Heading4
239 | MarkdownTokenType::Heading5
240 | MarkdownTokenType::Heading6
241 | MarkdownTokenType::BlockquoteMarker
242 | MarkdownTokenType::CodeFence
243 | MarkdownTokenType::ListMarker
244 | MarkdownTokenType::HorizontalRule
245 | MarkdownTokenType::MathBlock
246 | MarkdownTokenType::FrontMatter
247 | MarkdownTokenType::FootnoteDefinition
248 )
249 }
250
251 fn parse_paragraph<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MarkdownLanguage, S>) {
252 let checkpoint = state.checkpoint();
253 while state.not_at_end() {
254 if let Some(next_kind) = state.peek_kind() {
255 if next_kind == MarkdownTokenType::Newline {
256 let cp = state.checkpoint();
257 state.bump();
258 if let Some(after_nl) = state.peek_kind() {
259 if after_nl == MarkdownTokenType::Newline || self.is_block_start(after_nl) {
260 state.restore(cp);
261 break;
262 }
263 }
264 else {
265 break;
266 }
267 }
268 else if self.is_block_start(next_kind) {
269 break;
270 }
271 }
272 self.parse_inline(state);
273 }
274 state.finish_at(checkpoint, ET::Paragraph);
275 }
276
277 fn parse_inlines_until_newline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MarkdownLanguage, S>) {
278 while state.not_at_end() {
279 if let Some(kind) = state.peek_kind() {
280 if kind == MarkdownTokenType::Newline {
281 break;
282 }
283 }
284 self.parse_inline(state);
285 }
286 }
287
288 fn parse_inline<'a, S: Source + ?Sized>(&self, state: &mut ParserState<'a, MarkdownLanguage, S>) {
289 let checkpoint = state.checkpoint();
290 if let Some(kind) = state.peek_kind() {
291 match kind {
292 MarkdownTokenType::Emphasis | MarkdownTokenType::Strong | MarkdownTokenType::Strikethrough => {
293 let marker_kind = kind;
294 state.bump(); while state.not_at_end() && state.peek_kind() != Some(marker_kind) && state.peek_kind() != Some(MarkdownTokenType::Newline) {
296 self.parse_inline(state);
297 }
298 if state.peek_kind() == Some(marker_kind) {
299 state.bump(); }
301 state.finish_at(checkpoint, ET::from(marker_kind));
302 }
303 MarkdownTokenType::Link | MarkdownTokenType::Image => {
304 let is_image = kind == MarkdownTokenType::Image;
305 state.bump(); while state.not_at_end() && state.peek_text().as_deref() != Some("]") && state.peek_kind() != Some(MarkdownTokenType::Newline) {
308 self.parse_inline(state);
309 }
310 if state.peek_text().as_deref() == Some("]") {
311 state.bump();
312 }
313 if state.peek_text().as_deref() == Some("(") {
315 state.bump();
316 while state.not_at_end() && state.peek_text().as_deref() != Some(")") && state.peek_kind() != Some(MarkdownTokenType::Newline) {
317 state.bump();
318 }
319 if state.peek_text().as_deref() == Some(")") {
320 state.bump();
321 }
322 }
323 state.finish_at(checkpoint, if is_image { ET::Image } else { ET::Link });
324 }
325 MarkdownTokenType::InlineCode | MarkdownTokenType::MathInline | MarkdownTokenType::Superscript | MarkdownTokenType::Subscript | MarkdownTokenType::FootnoteReference => {
326 state.bump();
327 state.finish_at(checkpoint, ET::from(kind));
328 }
329 _ => {
330 state.bump();
331 }
332 }
333 }
334 else {
335 state.advance();
336 }
337 }
338}