1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use super::{Parser, PlainEnglish};
use crate::{Span, Token, TokenKind};

/// A parser that wraps the [`PlainEnglish`] parser that allows one to parse CommonMark files.
///
/// Will ignore code blocks and tables.
pub struct Markdown;

impl Parser for Markdown {
    fn parse(&mut self, source: &[char]) -> Vec<Token> {
        let mut english_parser = PlainEnglish;

        let source_str: String = source.iter().collect();
        let md_parser =
            pulldown_cmark::Parser::new_ext(&source_str, pulldown_cmark::Options::all());

        let mut tokens = Vec::new();

        let mut traversed_bytes = 0;
        let mut traversed_chars = 0;

        let mut stack = Vec::new();

        // NOTE: the range spits out __byte__ indices, not char indices.
        // This is why we keep track above.
        for (event, range) in md_parser.into_offset_iter() {
            if range.start > traversed_bytes {
                traversed_chars += source_str[traversed_bytes..range.start].chars().count();
                traversed_bytes = range.start;
            }

            match event {
                pulldown_cmark::Event::HardBreak => {
                    tokens.push(Token {
                        span: Span::new_with_len(traversed_chars, 1),
                        kind: TokenKind::Newline(1),
                    });
                }
                pulldown_cmark::Event::Start(tag) => stack.push(tag),
                pulldown_cmark::Event::End(pulldown_cmark::Tag::Paragraph)
                | pulldown_cmark::Event::End(pulldown_cmark::Tag::Item) => tokens.push(Token {
                    span: Span::new_with_len(traversed_chars, 1),
                    kind: TokenKind::Newline(1),
                }),
                pulldown_cmark::Event::End(_) => {
                    stack.pop();
                }
                pulldown_cmark::Event::Code(code) => {
                    let chunk_len = code.chars().count();

                    tokens.push(Token {
                        span: Span::new(traversed_chars, chunk_len),
                        kind: TokenKind::Unlintable,
                    });
                }
                pulldown_cmark::Event::Text(text) => {
                    let chunk_len = text.chars().count();

                    if let Some(tag) = stack.last() {
                        use pulldown_cmark::Tag;

                        if matches!(tag, Tag::CodeBlock(..)) {
                            tokens.push(Token {
                                span: Span::new(traversed_chars, text.chars().count()),
                                kind: TokenKind::Unlintable,
                            });
                            continue;
                        }

                        if !(matches!(tag, Tag::Paragraph)
                            || matches!(tag, Tag::Link(..))
                            || matches!(tag, Tag::Heading(..))
                            || matches!(tag, Tag::Item)
                            || matches!(tag, Tag::TableCell)
                            || matches!(tag, Tag::Emphasis)
                            || matches!(tag, Tag::Strong)
                            || matches!(tag, Tag::Strikethrough))
                        {
                            continue;
                        }
                    }

                    let mut new_tokens =
                        english_parser.parse(&source[traversed_chars..traversed_chars + chunk_len]);

                    new_tokens
                        .iter_mut()
                        .for_each(|token| token.span.offset(traversed_chars));

                    tokens.append(&mut new_tokens);
                }
                _ => (),
            }
        }

        tokens
    }
}