1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
//! Scan Markdown text and identify math block events.
use std::collections::VecDeque;

use serde_derive::{Deserialize, Serialize};

/// A pair of strings are delimiters.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Delimiter {
    /// Left delimiter.
    pub left: String,
    /// Right delimiter.
    pub right: String,
}

impl Delimiter {
    /// Same left and right `delimiter`.
    pub fn same(delimiter: String) -> Self {
        Self {
            left: delimiter.clone(),
            right: delimiter,
        }
    }

    /// The first byte of the left delimiter.
    pub fn first(&self) -> u8 {
        self.left.as_bytes()[0]
    }

    /// Whether `to_match` matches the left delimiter.
    pub fn match_left(&self, to_match: &[u8]) -> bool {
        if self.left.len() > to_match.len() {
            return false;
        }
        for (we, they) in self.left.as_bytes().iter().zip(to_match) {
            if we != they {
                return false;
            }
        }
        true
    }
}

/// An event for parsing in a Markdown file.
#[derive(Debug)]
pub enum Event {
    /// A beginning of text or math block.
    Begin(usize),
    /// An end of a text block.
    TextEnd(usize),
    /// An end of an inline math block.
    InlineEnd(usize),
    /// An end of a display math block.
    BlockEnd(usize),
}

/// Scanner for text to identify block and inline math `Event`s.
#[derive(Debug)]
pub struct Scan<'a> {
    string: &'a str,
    bytes: &'a [u8],
    index: usize,
    /// Buffer for block and inline math `Event`s.
    pub events: VecDeque<Event>,
    block_delimiter: &'a Delimiter,
    inline_delimiter: &'a Delimiter,
}

impl<'a> Iterator for Scan<'a> {
    type Item = Event;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            match self.events.pop_front() {
                Some(item) => return Some(item),
                None => self.process_byte().ok()?,
            }
        }
    }
}

impl<'a> Scan<'a> {
    /// Set up a `Scan` for `string` with given delimiters.
    pub fn new(
        string: &'a str,
        block_delimiter: &'a Delimiter,
        inline_delimiter: &'a Delimiter,
    ) -> Self {
        Self {
            string,
            bytes: string.as_bytes(),
            index: 0,
            events: VecDeque::new(),
            block_delimiter,
            inline_delimiter,
        }
    }

    /// Scan, identify and store all `Event`s in `self.events`.
    pub fn run(&mut self) {
        while let Ok(()) = self.process_byte() {}
    }

    /// Get byte currently pointed to. Returns `Err(())` if out of bound.
    fn get_byte(&self) -> Result<u8, ()> {
        self.bytes.get(self.index).map(|b| b.to_owned()).ok_or(())
    }

    /// Increment index.
    fn inc(&mut self) {
        self.index += 1;
    }

    /// Scan one byte, proceed process based on the byte.
    /// - Start of delimiter => call `process_delimit`.
    /// - `\` => skip one byte.
    /// - `` ` `` => call `process_backtick`.
    /// Return `Err(())` if no more bytes to process.
    fn process_byte(&mut self) -> Result<(), ()> {
        let byte = self.get_byte()?;
        self.inc();
        match byte {
            b if b == self.block_delimiter.first() || b == self.inline_delimiter.first() => {
                self.index -= 1;
                if self.block_delimiter.match_left(&self.bytes[self.index..]) {
                    self.process_delimit(false)?;
                } else if self.inline_delimiter.match_left(&self.bytes[self.index..]) {
                    self.process_delimit(true)?;
                } else {
                    self.inc();
                }
            }
            b'\\' => {
                self.inc();
            }
            b'`' => self.process_backtick()?,
            _ => (),
        }
        Ok(())
    }

    /// Fully skip a backtick-delimited code block.
    /// Guaranteed to match the number of backticks in delimiters.
    /// Return `Err(())` if no more bytes to process.
    fn process_backtick(&mut self) -> Result<(), ()> {
        let mut n_back_ticks = 1;
        loop {
            let byte = self.get_byte()?;
            if byte == b'`' {
                self.inc();
                n_back_ticks += 1;
            } else {
                break;
            }
        }
        loop {
            self.index += self.string[self.index..]
                .find(&"`".repeat(n_back_ticks))
                .ok_or(())?
                + n_back_ticks;
            if self.get_byte()? == b'`' {
                // Skip excessive backticks.
                self.inc();
                while let b'`' = self.get_byte()? {
                    self.inc();
                }
            } else {
                break;
            }
        }
        Ok(())
    }

    /// Skip a full math block.
    /// Add `Event`s to mark the start and end of the math block and
    /// surrounding text blocks.
    /// Return `Err(())` if no more bytes to process.
    fn process_delimit(&mut self, inline: bool) -> Result<(), ()> {
        if self.index > 0 {
            self.events.push_back(Event::TextEnd(self.index));
        }

        let delim = if inline {
            self.inline_delimiter
        } else {
            self.block_delimiter
        };
        self.index += delim.left.len();
        self.events.push_back(Event::Begin(self.index));

        loop {
            self.index += self.string[self.index..].find(&delim.right).ok_or(())?;

            // Check `\`.
            let mut escaped = false;
            let mut checking = self.index;
            loop {
                checking -= 1;
                if self.bytes.get(checking) == Some(&b'\\') {
                    escaped = !escaped;
                } else {
                    break;
                }
            }
            if !escaped {
                let end_event = if inline {
                    Event::InlineEnd(self.index)
                } else {
                    Event::BlockEnd(self.index)
                };
                self.events.push_back(end_event);
                self.index += delim.right.len();
                self.events.push_back(Event::Begin(self.index));
                break;
            } else {
                self.index += delim.right.len();
            }
        }

        Ok(())
    }
}