mdbook_katex/
scan.rs

1//! Scan Markdown text and identify math block events.
2use super::*;
3
4/// A pair of strings are delimiters.
5#[derive(Clone, Debug, Deserialize, Serialize)]
6pub struct Delimiter {
7    /// Left delimiter.
8    pub left: String,
9    /// Right delimiter.
10    pub right: String,
11}
12
13impl Delimiter {
14    /// Same left and right `delimiter`.
15    pub fn same(delimiter: String) -> Self {
16        Self {
17            left: delimiter.clone(),
18            right: delimiter,
19        }
20    }
21
22    /// The first byte of the left delimiter.
23    pub fn first(&self) -> u8 {
24        self.left.as_bytes()[0]
25    }
26
27    /// Whether `to_match` matches the left delimiter.
28    pub fn match_left(&self, to_match: &[u8]) -> bool {
29        if self.left.len() > to_match.len() {
30            return false;
31        }
32        for (we, they) in self.left.as_bytes().iter().zip(to_match) {
33            if we != they {
34                return false;
35            }
36        }
37        true
38    }
39}
40
41/// An event for parsing in a Markdown file.
42#[derive(Debug)]
43pub enum Event {
44    /// A beginning of text or math block.
45    Begin(usize),
46    /// An end of a text block.
47    TextEnd(usize),
48    /// An end of an inline math block.
49    InlineEnd(usize),
50    /// An end of a display math block.
51    BlockEnd(usize),
52}
53
54/// Scanner for text to identify block and inline math `Event`s.
55#[derive(Debug)]
56pub struct Scan<'a> {
57    string: &'a str,
58    bytes: &'a [u8],
59    index: usize,
60    /// Buffer for block and inline math `Event`s.
61    pub events: VecDeque<Event>,
62    block_delimiter: &'a Delimiter,
63    inline_delimiter: &'a Delimiter,
64}
65
66impl Iterator for Scan<'_> {
67    type Item = Event;
68
69    fn next(&mut self) -> Option<Self::Item> {
70        loop {
71            match self.events.pop_front() {
72                Some(item) => return Some(item),
73                None => self.process_byte().ok()?,
74            }
75        }
76    }
77}
78
79impl<'a> Scan<'a> {
80    /// Set up a `Scan` for `string` with given delimiters.
81    pub fn new(
82        string: &'a str,
83        block_delimiter: &'a Delimiter,
84        inline_delimiter: &'a Delimiter,
85    ) -> Self {
86        Self {
87            string,
88            bytes: string.as_bytes(),
89            index: 0,
90            events: VecDeque::new(),
91            block_delimiter,
92            inline_delimiter,
93        }
94    }
95
96    /// Scan, identify and store all `Event`s in `self.events`.
97    pub fn run(&mut self) {
98        while let Ok(()) = self.process_byte() {}
99    }
100
101    /// Get byte currently pointed to. Returns `Err(())` if out of bound.
102    fn get_byte(&self) -> Result<u8, ()> {
103        self.bytes.get(self.index).map(|b| b.to_owned()).ok_or(())
104    }
105
106    /// Increment index.
107    fn inc(&mut self) {
108        self.index += 1;
109    }
110
111    /// Scan one byte, proceed process based on the byte.
112    /// - Start of delimiter => call `process_delimit`.
113    /// - `\` => skip one byte.
114    /// - `` ` `` => call `process_backtick`.
115    ///   Return `Err(())` if no more bytes to process.
116    fn process_byte(&mut self) -> Result<(), ()> {
117        let byte = self.get_byte()?;
118        self.inc();
119        match byte {
120            b if b == self.block_delimiter.first()
121                && self
122                    .block_delimiter
123                    .match_left(&self.bytes[(self.index - 1)..]) =>
124            {
125                self.index -= 1;
126                self.process_delimit(false)?;
127            }
128            b if b == self.inline_delimiter.first()
129                && self
130                    .inline_delimiter
131                    .match_left(&self.bytes[(self.index - 1)..]) =>
132            {
133                self.index -= 1;
134                self.process_delimit(true)?;
135            }
136            b'\\' => {
137                self.inc();
138            }
139            b'`' => self.process_backtick()?,
140            _ => (),
141        }
142        Ok(())
143    }
144
145    /// Fully skip a backtick-delimited code block.
146    /// Guaranteed to match the number of backticks in delimiters.
147    /// Return `Err(())` if no more bytes to process.
148    fn process_backtick(&mut self) -> Result<(), ()> {
149        let mut n_back_ticks = 1;
150        loop {
151            let byte = self.get_byte()?;
152            if byte == b'`' {
153                self.inc();
154                n_back_ticks += 1;
155            } else {
156                break;
157            }
158        }
159        loop {
160            self.index += self.string[self.index..]
161                .find(&"`".repeat(n_back_ticks))
162                .ok_or(())?
163                + n_back_ticks;
164            if self.get_byte()? == b'`' {
165                // Skip excessive backticks.
166                self.inc();
167                while let b'`' = self.get_byte()? {
168                    self.inc();
169                }
170            } else {
171                break;
172            }
173        }
174        Ok(())
175    }
176
177    /// Skip a full math block.
178    /// Add `Event`s to mark the start and end of the math block and
179    /// surrounding text blocks.
180    /// Return `Err(())` if no more bytes to process.
181    fn process_delimit(&mut self, inline: bool) -> Result<(), ()> {
182        if self.index > 0 {
183            self.events.push_back(Event::TextEnd(self.index));
184        }
185
186        let delim = if inline {
187            self.inline_delimiter
188        } else {
189            self.block_delimiter
190        };
191        self.index += delim.left.len();
192        self.events.push_back(Event::Begin(self.index));
193
194        loop {
195            self.index += self.string[self.index..].find(&delim.right).ok_or(())?;
196
197            // Check `\`.
198            let mut escaped = false;
199            let mut checking = self.index;
200            loop {
201                checking -= 1;
202                if self.bytes.get(checking) == Some(&b'\\') {
203                    escaped = !escaped;
204                } else {
205                    break;
206                }
207            }
208            if !escaped {
209                let end_event = if inline {
210                    Event::InlineEnd(self.index)
211                } else {
212                    Event::BlockEnd(self.index)
213                };
214                self.events.push_back(end_event);
215                self.index += delim.right.len();
216                self.events.push_back(Event::Begin(self.index));
217                break;
218            } else {
219                self.index += delim.right.len();
220            }
221        }
222
223        Ok(())
224    }
225}