1use std::ops::{Deref, DerefMut};
2
3use pulldown_cmark::{CodeBlockKind, Event, Options, Tag, TagEnd};
4
5use crate::note_editor::{
6 ast::{self, Node, SourceRange, TaskKind},
7 rich_text::{RichText, Style, TextSegment},
8};
9
10pub struct Parser<'a>(pulldown_cmark::TextMergeWithOffset<'a, pulldown_cmark::OffsetIter<'a>>);
11
12impl<'a> Deref for Parser<'a> {
13 type Target = pulldown_cmark::TextMergeWithOffset<'a, pulldown_cmark::OffsetIter<'a>>;
14 fn deref(&self) -> &Self::Target {
15 &self.0
16 }
17}
18
19impl DerefMut for Parser<'_> {
20 fn deref_mut(&mut self) -> &mut Self::Target {
21 &mut self.0
22 }
23}
24
25impl<'a> Iterator for Parser<'a> {
26 type Item = (Event<'a>, SourceRange<usize>);
27 fn next(&mut self) -> Option<Self::Item> {
28 self.deref_mut().next()
29 }
30}
31
32#[derive(Clone, Debug, PartialEq, Default)]
33pub struct ParserState {
34 task_kind: Vec<ast::TaskKind>,
35 item_kind: Vec<ast::ItemKind>,
36}
37
38impl<'a> Parser<'a> {
39 pub fn new(text: &'a str) -> Self {
46 let mut options = Options::all();
47
48 options.remove(Options::ENABLE_SMART_PUNCTUATION);
56
57 let parser = pulldown_cmark::TextMergeWithOffset::new(
58 pulldown_cmark::Parser::new_ext(text, options).into_offset_iter(),
59 );
60
61 Self(parser)
62 }
63
64 pub fn parse(mut self) -> Vec<Node> {
65 let mut result = Vec::new();
66 let mut state = ParserState::default();
67
68 while let Some((event, _)) = self.next() {
69 match event {
70 Event::Start(tag) if Self::is_container_tag(&tag) => {
71 if let Some(node) = self.parse_container(tag, &mut state) {
72 result.push(node);
73 }
74 }
75 _ => {}
76 }
77 }
78
79 result
80 }
81
82 pub fn parse_container(&mut self, tag: Tag, state: &mut ParserState) -> Option<Node> {
83 let mut nodes = Vec::new();
84 let mut text_segments = Vec::new();
85 let mut inline_styles = Vec::new();
86
87 match tag {
88 Tag::List(Some(start)) => {
89 state.item_kind.push(ast::ItemKind::Ordered(start));
90 }
91 Tag::List(..) => {
92 state.item_kind.push(ast::ItemKind::Unordered);
93 }
94 _ => {}
95 };
96
97 while let Some((event, source_range)) = self.next() {
98 match event {
99 Event::Start(inner_tag) if Self::is_container_tag(&inner_tag) => {
100 if let Some(node) = self.parse_container(inner_tag, state) {
101 nodes.push(node);
102 }
103 }
104
105 Event::Start(inner_tag) if Self::is_inline_tag(&inner_tag) => {
106 if let Some(style) = Self::tag_to_style(&inner_tag) {
107 inline_styles.push(style);
108 }
109 }
110
111 Event::TaskListMarker(checked) => {
112 state.task_kind.push(if checked {
113 TaskKind::Checked
114 } else {
115 TaskKind::Unchecked
116 });
117 }
118
119 Event::Code(text) => {
120 let text_segment = TextSegment::styled(&text, Style::Code);
121 text_segments.push(text_segment);
122 }
123
124 Event::Text(text) => {
125 let mut text_segment = TextSegment::plain(&text);
126 inline_styles.iter().for_each(|style| {
127 text_segment.add_style(style);
128 });
129 text_segments.push(text_segment);
130 }
131
132 Event::SoftBreak => {
133 let text_segment = TextSegment::empty_line();
134 text_segments.push(text_segment);
135 }
136
137 Event::End(tag_end) if Self::tags_match(&tag, &tag_end) => {
138 let text = if !text_segments.is_empty() {
139 RichText::from(text_segments)
140 } else {
141 RichText::empty()
142 };
143
144 return match tag {
145 Tag::Heading { level, .. } => Some(Node::Heading {
146 level: level.into(),
147 text,
148 source_range,
149 }),
150 Tag::Item => {
151 if !text.is_empty() {
156 nodes.insert(
157 0,
158 Node::Paragraph {
159 text,
160 source_range: source_range.clone(),
161 },
162 );
163 }
164
165 let item = if let Some(kind) = state.task_kind.pop() {
166 Some(Node::Task {
167 kind,
168 nodes,
169 source_range,
170 })
171 } else {
172 Some(Node::Item {
173 kind: state
174 .item_kind
175 .last()
176 .cloned()
177 .unwrap_or(ast::ItemKind::Unordered),
178 nodes,
179 source_range,
180 })
181 };
182
183 if let Some(ast::ItemKind::Ordered(start)) = state.item_kind.last_mut()
184 {
185 *start += 1;
186 };
187
188 item
189 }
190 Tag::List(..) => {
191 state.item_kind.pop();
192
193 Some(Node::List {
194 nodes,
195 source_range,
196 })
197 }
198 Tag::CodeBlock(kind) => Some(Node::CodeBlock {
199 lang: match kind {
200 CodeBlockKind::Fenced(lang) => Some(lang.to_string()),
201 _ => None,
202 },
203 text,
204 source_range,
205 }),
206 Tag::BlockQuote(kind) => Some(Node::BlockQuote {
207 kind: kind.map(|kind| kind.into()),
208 nodes,
209 source_range,
210 }),
211 Tag::Paragraph => Some(Node::Paragraph { text, source_range }),
212 _ => None,
213 };
214 }
215 _ => {}
216 }
217 }
218
219 None
220 }
221
222 fn is_container_tag(tag: &Tag) -> bool {
223 matches!(
224 tag,
225 Tag::Paragraph
226 | Tag::Item
227 | Tag::List(..)
228 | Tag::BlockQuote(..)
229 | Tag::CodeBlock(..)
230 | Tag::Heading { .. }
231 )
232 }
233
234 fn is_inline_tag(tag: &Tag) -> bool {
235 matches!(tag, Tag::Emphasis | Tag::Strong | Tag::Strikethrough)
236 }
237
238 fn tags_match(start: &Tag, end: &TagEnd) -> bool {
239 fn tag_to_end(tag: &Tag) -> Option<TagEnd> {
240 match tag {
241 Tag::Heading { level, .. } => Some(TagEnd::Heading(*level)),
242 Tag::List(ordered) => Some(TagEnd::List(ordered.is_some())),
243 Tag::Item => Some(TagEnd::Item),
244 Tag::BlockQuote(kind) => Some(TagEnd::BlockQuote(*kind)),
245 Tag::CodeBlock(..) => Some(TagEnd::CodeBlock),
246 Tag::Paragraph => Some(TagEnd::Paragraph),
247 _ => None,
248 }
249 }
250
251 if let Some(start) = tag_to_end(start) {
252 std::mem::discriminant(&start) == std::mem::discriminant(end)
253 } else {
254 false
255 }
256 }
257
258 fn tag_to_style(tag: &Tag) -> Option<Style> {
259 match tag {
260 Tag::Emphasis => Some(Style::Emphasis),
261 Tag::Strong => Some(Style::Strong),
262 Tag::Strikethrough => Some(Style::Strikethrough),
263 _ => None,
264 }
265 }
266}
267
268pub fn from_str(text: &str) -> Vec<Node> {
269 Parser::new(text).parse()
270}
271
272#[cfg(test)]
273mod tests {
274 use indoc::indoc;
275 use insta::assert_snapshot;
276
277 use super::*;
278
279 #[test]
280 fn test_parser() {
281 let tests = [
282 (
283 "paragraphs",
284 indoc! { r#"## Paragraphs
285 To create paragraphs in Markdown, use a **blank line** to separate blocks of text. Each block of text separated by a blank line is treated as a distinct paragraph.
286
287 This is a paragraph.
288
289 This is another paragraph.
290
291 A blank line between lines of text creates separate paragraphs. This is the default behavior in Markdown.
292 "#},
293 ),
294 (
295 "headings",
296 indoc! { r#"## Headings
297 To create a heading, add up to six `#` symbols before your heading text. The number of `#` symbols determines the size of the heading.
298
299 # This is a heading 1
300 ## This is a heading 2
301 ### This is a heading 3
302 #### This is a heading 4
303 ##### This is a heading 5
304 ###### This is a heading 6
305 "#},
306 ),
307 (
308 "lists",
309 indoc! { r#"## Lists
310 You can create an unordered list by adding a `-`, `*`, or `+` before the text.
311
312 - First list item
313 - Second list item
314 - Third list item
315
316 To create an ordered list, start each line with a number followed by a `.` or `)` symbol.
317
318 1. First list item
319 2. Second list item
320 3. Third list item
321
322 1) First list item
323 2) Second list item
324 3) Third list item
325 "#},
326 ),
327 (
328 "lists_line_breaks",
329 indoc! { r#"## Lists with line breaks
330 You can use line breaks within an ordered list without altering the numbering.
331
332 1. First list item
333
334 2. Second list item
335 3. Third list item
336
337 4. Fourth list item
338 5. Fifth list item
339 6. Sixth list item
340 "#},
341 ),
342 (
343 "task_lists",
344 indoc! { r#"## Task lists
345 To create a task list, start each list item with a hyphen and space followed by `[ ]`.
346
347 - [x] This is a completed task.
348 - [ ] This is an incomplete task.
349
350 You can toggle a task in Reading view by selecting the checkbox.
351
352 > [!tip]
353 > You can use any character inside the brackets to mark it as complete.
354 >
355 > - [x] Milk
356 > - [?] Eggs
357 > - [-] Eggs
358 "#},
359 ),
360 (
361 "nesting_lists",
362 indoc! { r#"## Nesting lists
363 You can nest any type of list—ordered, unordered, or task lists—under any other type of list.
364
365 To create a nested list, indent one or more list items. You can mix list types within a nested structure:
366
367 1. First list item
368 1. Ordered nested list item
369 2. Second list item
370 - Unordered nested list item
371 "#},
372 ),
373 (
374 "nesting_task_lists",
375 indoc! { r#"## Nesting task lists
376 Similarly, you can create a nested task list by indenting one or more list items:
377
378 - [ ] Task item 1
379 - [ ] Subtask 1
380 - [ ] Task item 2
381 - [ ] Subtask 2
382 "#},
383 ),
384 (
402 "code_blocks",
403 indoc! { r#"## Code blocks
404 To format code as a block, enclose it with three backticks or three tildes.
405
406 ```md
407 cd ~/Desktop
408 ```
409
410 You can also create a code block by indenting the text using `Tab` or 4 blank spaces.
411
412 cd ~/Desktop
413
414 "#},
415 ),
416 (
417 "code_syntax_highlighting_in_blocks",
418 indoc! { r#"## Code syntax highlighting in blocks
419 You can add syntax highlighting to a code block, by adding a language code after the first set of backticks.
420
421 ```js
422 function fancyAlert(arg) {
423 if(arg) {
424 $.facebox({div:'#foo'})
425 }
426 }
427 ```
428 "#},
429 ),
430 ];
431
432 tests.into_iter().for_each(|(name, text)| {
433 assert_snapshot!(
434 name,
435 format!(
436 "{}\n ---\n\n{}",
437 text,
438 ast::nodes_to_sexp(&from_str(text), 0)
439 )
440 );
441 });
442 }
443}