1use std::fmt;
2use std::iter::FusedIterator;
3use std::ops::Range;
4
5use line_span::{find_line_range, find_next_line_start};
6
7use crate::syntax::SyntaxRule;
8
9#[derive(PartialEq, Clone)]
46pub enum Event<'a> {
47 LineComment(&'a str, &'a str),
49 BlockComment(&'a str, &'a str),
51}
52
53impl<'a> Event<'a> {
54 #[inline]
56 pub fn raw(&self) -> &str {
57 use Event::*;
58 match self {
59 LineComment(raw, _) | BlockComment(raw, _) => raw,
60 }
61 }
62
63 #[inline]
65 pub fn text(&self) -> &str {
66 use Event::*;
67 match self {
68 LineComment(_, text) | BlockComment(_, text) => text,
69 }
70 }
71}
72
73impl<'a> fmt::Debug for Event<'a> {
74 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
80 use Event::*;
81 let name = match self {
82 LineComment(..) => "LineComment",
83 BlockComment(..) => "BlockComment",
84 };
85 fmt.debug_tuple(name)
86 .field(&format_args!("_"))
87 .field(&self.text())
88 .finish()
89 }
90}
91
92#[derive(Clone, Debug)]
93enum RawEvent<'a> {
94 LineComment(&'a str, &'a str),
95 BlockComment(&'a str, &'a str),
96 String(&'a str, &'a str),
97}
98
99impl<'a> RawEvent<'a> {
100 #[inline]
101 fn into_event(self) -> Option<Event<'a>> {
102 use RawEvent::*;
103 match self {
104 LineComment(raw, text) => Some(Event::LineComment(raw, text)),
105 BlockComment(raw, text) => Some(Event::BlockComment(raw, text)),
106 String(..) => None,
107 }
108 }
109}
110
111#[allow(missing_debug_implementations)]
115#[derive(Clone)]
116pub struct CommentParser<'a> {
117 text: &'a str,
118 index: usize,
119 rules: &'a [SyntaxRule<'a>],
120 max_rule_len: usize,
121}
122
123impl<'a> CommentParser<'a> {
124 #[inline]
134 pub fn new(text: &'a str, rules: &'a [SyntaxRule]) -> Self {
135 assert!(SyntaxRule::check_rules(rules), "empty syntax rule");
136
137 Self {
138 text,
139 index: 0,
140 rules,
141 max_rule_len: SyntaxRule::max_rule_len(rules),
142 }
143 }
144
145 fn next_event(&mut self) -> Option<RawEvent<'a>> {
146 let bytes = self.text.as_bytes();
147
148 let rule = bytes[self.index..]
149 .windows(self.max_rule_len)
150 .enumerate()
151 .filter_map(|(i, w)| {
152 let rule = self
153 .rules
154 .iter()
155 .position(|rule| w.starts_with(rule.start()))?;
156 Some((self.index + i, &self.rules[rule]))
157 })
158 .next();
159
160 if let Some((start, rule)) = rule {
161 Some(match rule.parse_rule() {
162 ParseRule::LineComment => self.parse_line_comment(start, rule),
163 ParseRule::BlockComment => self.parse_block_comment(start, rule),
164 ParseRule::String => self.parse_string(start, rule),
165 })
166 } else {
167 self.index = bytes.len();
168 None
169 }
170 }
171
172 fn parse_line_comment(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
173 let after_start = start + rule.start().len();
174 let Range { start, end } = find_line_range(self.text, start);
175
176 self.index = find_next_line_start(self.text, end).unwrap_or_else(|| self.text.len());
177
178 let line = &self.text[start..end];
179 let comment = &self.text[after_start..end];
180
181 RawEvent::LineComment(line, comment)
182 }
183
184 fn parse_block_comment(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
185 let after_start = start + rule.start().len();
186
187 let rule_end = rule.end();
188
189 let (before_end, end) = self.text.as_bytes()[after_start..]
190 .windows(rule_end.len())
191 .position(|w| w == rule_end)
192 .map(|i| {
193 let i = after_start + i;
194 (i, i + rule_end.len())
195 })
196 .unwrap_or_else(|| {
197 let i = self.text.len();
198 (i, i)
199 });
200
201 self.index = end;
202
203 let lines = &self.text[start..end];
204 let comment = &self.text[after_start..before_end];
205
206 RawEvent::BlockComment(lines, comment)
207 }
208
209 fn parse_string(&mut self, start: usize, rule: &SyntaxRule) -> RawEvent<'a> {
210 let after_start = start + rule.start().len();
211 let rule_end = rule.start();
212
213 let mut skip = false;
214
215 let (before_end, end) = self.text.as_bytes()[after_start..]
216 .windows(rule_end.len())
217 .position(|w| {
218 if skip {
219 skip = false;
220 false
221 } else if w[0] == b'\\' {
223 skip = true;
224 false
225 } else {
226 w == rule_end
227 }
228 })
229 .map(|i| {
230 let i = after_start + i;
231 (i, i + rule_end.len())
232 })
233 .unwrap_or_else(|| {
234 let i = self.text.len();
235 (i, i)
236 });
237
238 self.index = end;
239
240 let lines = &self.text[start..end];
241 let string = &self.text[after_start..before_end];
242
243 RawEvent::String(lines, string)
244 }
245}
246
247impl<'a> Iterator for CommentParser<'a> {
248 type Item = Event<'a>;
249
250 fn next(&mut self) -> Option<Self::Item> {
251 if self.index == self.text.len() {
252 return None;
253 }
254
255 while let Some(event) = self.next_event() {
256 let event = event.into_event();
257 if event.is_some() {
258 return event;
259 }
260 }
261
262 None
263 }
264}
265
266impl<'a> FusedIterator for CommentParser<'a> {}
267
268enum ParseRule {
269 LineComment,
270 BlockComment,
271 String,
272}
273
274impl<'a> SyntaxRule<'a> {
275 #[inline]
276 fn parse_rule(&self) -> ParseRule {
277 use SyntaxRule::*;
278 match self {
279 LineComment(..) => ParseRule::LineComment,
280 BlockComment(..) => ParseRule::BlockComment,
281 String(..) => ParseRule::String,
282 }
283 }
284
285 #[inline]
286 fn start(&self) -> &[u8] {
287 use SyntaxRule::*;
288 match self {
289 LineComment(start) | BlockComment(start, _) | String(start) => start,
290 }
291 }
292
293 #[inline]
294 fn end(&self) -> &[u8] {
295 use SyntaxRule::*;
296 match self {
297 BlockComment(_, end) => end,
298 _ => unreachable!(),
299 }
300 }
301
302 #[inline]
303 fn max_rule_len(rules: &[Self]) -> usize {
304 rules
305 .iter()
306 .map(Self::start)
307 .map(<[u8]>::len)
308 .max()
309 .unwrap_or(0)
310 }
311
312 #[inline]
314 fn check_rules(rules: &[Self]) -> bool {
315 !rules.iter().any(|rule| {
316 use SyntaxRule::*;
317 match rule {
318 LineComment(start) | String(start) => start.is_empty(),
319 BlockComment(start, end) => start.is_empty() || end.is_empty(),
320 }
321 })
322 }
323}