comment_parser/
parse.rs

1use std::fmt;
2use std::iter::FusedIterator;
3use std::ops::Range;
4
5use line_span::{find_line_range, find_next_line_start};
6
7use crate::syntax::SyntaxRule;
8
9/// Events contain [`raw`] and [`text`].
10///
11/// Text is the contents of the comment, while raw includes additional
12/// characters based on the type of comment, such as the comment
13/// delimiters or "start and end symbols" of the comment.
14///
15/// - `LineComment`'s `raw` includes the whole line.
16/// - `BlockComment`'s `raw` includes only the block comment delimiters.
17///
18/// *The above is only true, for events parsed by [`CommentParser`].*
19///
20/// [`text`]: enum.Event.html#method.text
21/// [`raw`]: enum.Event.html#method.raw
22/// [`CommentParser`]: struct.CommentParser.html
23///
24/// # Example
25///
26/// ```rust
27/// # use comment_parser::Event;
28/// let line = Event::LineComment("  // Foo Bar", " Foo Bar");
29/// assert_eq!(line.text(), " Foo Bar");
30/// assert_eq!(line.raw(),  "  // Foo Bar");
31///
32/// let block = Event::BlockComment("/* Foo\n  Bar */", " Foo\n  Bar ");
33/// assert_eq!(block.text(), " Foo\n  Bar ");
34/// assert_eq!(block.raw(),  "/* Foo\n  Bar */");
35///
36/// # use comment_parser::{get_syntax, CommentParser};
37/// #
38/// # let code = "  \n  // Foo Bar\r\n foo /* Foo\n  Bar */ foo\n";
39/// #
40/// # let mut parser = CommentParser::new(code, get_syntax("rust").unwrap());
41/// # assert_eq!(parser.next(), Some(line));
42/// # assert_eq!(parser.next(), Some(block));
43/// # assert_eq!(parser.next(), None);
44/// ```
45#[derive(PartialEq, Clone)]
46pub enum Event<'a> {
47    /// `LineComment(raw, text)`
48    LineComment(&'a str, &'a str),
49    /// `BlockComment(raw, text)`
50    BlockComment(&'a str, &'a str),
51}
52
53impl<'a> Event<'a> {
54    /// Returns the raw part of an `Event`.
55    #[inline]
56    pub fn raw(&self) -> &str {
57        use Event::*;
58        match self {
59            LineComment(raw, _) | BlockComment(raw, _) => raw,
60        }
61    }
62
63    /// Returns the text part of an `Event`.
64    #[inline]
65    pub fn text(&self) -> &str {
66        use Event::*;
67        match self {
68            LineComment(_, text) | BlockComment(_, text) => text,
69        }
70    }
71}
72
73impl<'a> fmt::Debug for Event<'a> {
74    /// Renders [`raw`] as `_` as both [`raw`] and
75    /// [`text`] are similar.
76    ///
77    /// [`text`]: enum.Event.html#method.text
78    /// [`raw`]: enum.Event.html#method.raw
79    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
80        use Event::*;
81        let name = match self {
82            LineComment(..) => "LineComment",
83            BlockComment(..) => "BlockComment",
84        };
85        fmt.debug_tuple(name)
86            .field(&format_args!("_"))
87            .field(&self.text())
88            .finish()
89    }
90}
91
92#[derive(Clone, Debug)]
93enum RawEvent<'a> {
94    LineComment(&'a str, &'a str),
95    BlockComment(&'a str, &'a str),
96    String(&'a str, &'a str),
97}
98
99impl<'a> RawEvent<'a> {
100    #[inline]
101    fn into_event(self) -> Option<Event<'a>> {
102        use RawEvent::*;
103        match self {
104            LineComment(raw, text) => Some(Event::LineComment(raw, text)),
105            BlockComment(raw, text) => Some(Event::BlockComment(raw, text)),
106            String(..) => None,
107        }
108    }
109}
110
111/// `CommentParser` parses `text` and produces [`Event`]s.
112///
113/// [`Event`]: enum.Event.html
114#[allow(missing_debug_implementations)]
115#[derive(Clone)]
116pub struct CommentParser<'a> {
117    text: &'a str,
118    index: usize,
119    rules: &'a [SyntaxRule<'a>],
120    max_rule_len: usize,
121}
122
123impl<'a> CommentParser<'a> {
124    /// Creates a `CommentParser` which parses `text` based on
125    /// `rules` and produces [`Event`]s.
126    ///
127    /// # Panics
128    ///
129    /// Note that `CommentParser` panics immediately upon calling `new`,
130    /// if any [`SyntaxRule`] contains an empty `&[u8]`.
131    ///
132    /// [`SyntaxRule`]: enum.SyntaxRule.html
133    #[inline]
134    pub fn new(text: &'a str, rules: &'a [SyntaxRule]) -> Self {
135        assert!(SyntaxRule::check_rules(rules), "empty syntax rule");
136
137        Self {
138            text,
139            index: 0,
140            rules,
141            max_rule_len: SyntaxRule::max_rule_len(rules),
142        }
143    }
144
145    fn next_event(&mut self) -> Option<RawEvent<'a>> {
146        let bytes = self.text.as_bytes();
147
148        let rule = bytes[self.index..]
149            .windows(self.max_rule_len)
150            .enumerate()
151            .filter_map(|(i, w)| {
152                let rule = self
153                    .rules
154                    .iter()
155                    .position(|rule| w.starts_with(rule.start()))?;
156                Some((self.index + i, &self.rules[rule]))
157            })
158            .next();
159
160        if let Some((start, rule)) = rule {
161            Some(match rule.parse_rule() {
162                ParseRule::LineComment => self.parse_line_comment(start, rule),
163                ParseRule::BlockComment => self.parse_block_comment(start, rule),
164                ParseRule::String => self.parse_string(start, rule),
165            })
166        } else {
167            self.index = bytes.len();
168            None
169        }
170    }
171
172    fn parse_line_comment(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
173        let after_start = start + rule.start().len();
174        let Range { start, end } = find_line_range(self.text, start);
175
176        self.index = find_next_line_start(self.text, end).unwrap_or_else(|| self.text.len());
177
178        let line = &self.text[start..end];
179        let comment = &self.text[after_start..end];
180
181        RawEvent::LineComment(line, comment)
182    }
183
184    fn parse_block_comment(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
185        let after_start = start + rule.start().len();
186
187        let rule_end = rule.end();
188
189        let (before_end, end) = self.text.as_bytes()[after_start..]
190            .windows(rule_end.len())
191            .position(|w| w == rule_end)
192            .map(|i| {
193                let i = after_start + i;
194                (i, i + rule_end.len())
195            })
196            .unwrap_or_else(|| {
197                let i = self.text.len();
198                (i, i)
199            });
200
201        self.index = end;
202
203        let lines = &self.text[start..end];
204        let comment = &self.text[after_start..before_end];
205
206        RawEvent::BlockComment(lines, comment)
207    }
208
209    fn parse_string(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
210        let after_start = start + rule.start().len();
211        let rule_end = rule.start();
212
213        let mut skip = false;
214
215        let (before_end, end) = self.text.as_bytes()[after_start..]
216            .windows(rule_end.len())
217            .position(|w| {
218                if skip {
219                    skip = false;
220                    false
221                // TODO: This should be part of SyntaxRule
222                } else if w[0] == b'\\' {
223                    skip = true;
224                    false
225                } else {
226                    w == rule_end
227                }
228            })
229            .map(|i| {
230                let i = after_start + i;
231                (i, i + rule_end.len())
232            })
233            .unwrap_or_else(|| {
234                let i = self.text.len();
235                (i, i)
236            });
237
238        self.index = end;
239
240        let lines = &self.text[start..end];
241        let string = &self.text[after_start..before_end];
242
243        RawEvent::String(lines, string)
244    }
245}
246
247impl<'a> Iterator for CommentParser<'a> {
248    type Item = Event<'a>;
249
250    fn next(&mut self) -> Option<Self::Item> {
251        if self.index == self.text.len() {
252            return None;
253        }
254
255        while let Some(event) = self.next_event() {
256            let event = event.into_event();
257            if event.is_some() {
258                return event;
259            }
260        }
261
262        None
263    }
264}
265
266impl<'a> FusedIterator for CommentParser<'a> {}
267
268enum ParseRule {
269    LineComment,
270    BlockComment,
271    String,
272}
273
274impl<'a> SyntaxRule<'a> {
275    #[inline]
276    fn parse_rule(&self) -> ParseRule {
277        use SyntaxRule::*;
278        match self {
279            LineComment(..) => ParseRule::LineComment,
280            BlockComment(..) => ParseRule::BlockComment,
281            String(..) => ParseRule::String,
282        }
283    }
284
285    #[inline]
286    fn start(&self) -> &[u8] {
287        use SyntaxRule::*;
288        match self {
289            LineComment(start) | BlockComment(start, _) | String(start) => start,
290        }
291    }
292
293    #[inline]
294    fn end(&self) -> &[u8] {
295        use SyntaxRule::*;
296        match self {
297            BlockComment(_, end) => end,
298            _ => unreachable!(),
299        }
300    }
301
302    #[inline]
303    fn max_rule_len(rules: &[Self]) -> usize {
304        rules
305            .iter()
306            .map(Self::start)
307            .map(<[u8]>::len)
308            .max()
309            .unwrap_or(0)
310    }
311
312    /// Returns `true` if the rules are valid.
313    #[inline]
314    fn check_rules(rules: &[Self]) -> bool {
315        !rules.iter().any(|rule| {
316            use SyntaxRule::*;
317            match rule {
318                LineComment(start) | String(start) => start.is_empty(),
319                BlockComment(start, end) => start.is_empty() || end.is_empty(),
320            }
321        })
322    }
323}