no_comment/without_comments.rs
1use std::collections::VecDeque;
2
3/// Buffer type used in the `WithoutComments` iterator, `Deref`s to `VecDeque<char>`.
4/// The capacity of the inner `VecDeque<char>` is constant, it represents the maximum length of
5/// buffer needed to match any open or close pattern for the current language.
6#[derive(Deref, Debug)]
7#[repr(transparent)]
8struct Buf(VecDeque<char>);
9
10impl Buf {
11 fn new(max_len: usize) -> Self {
12 Self(VecDeque::with_capacity(max_len))
13 }
14
15 fn is_full(&self) -> bool {
16 self.len() == self.capacity()
17 }
18
19 /// Fill up inner `VecDeque<char>` to capacity from provided iterator. This is the only way
20 /// to add elements to the buffer.
21 fn fill_up(&mut self, iter: &mut impl Iterator<Item = char>) {
22 while !self.is_full() {
23 match iter.next() {
24 None => break,
25 Some(x) => self.0.push_back(x),
26 }
27 }
28 }
29
30 /// Checks whether the beginning of the buffer matches the provided pattern, the buffer should
31 /// be full when this method is called.
32 fn matches(&self, pat: &str) -> bool {
33 self.iter().take(pat.len()).copied().eq(pat.chars())
34 }
35
36 /// Assert that the buffer is not empty and pop the first element
37 fn pop_front(&mut self) -> char {
38 self.0.pop_front().unwrap()
39 }
40
41 /// Assert that the buffer has at least n elements and pop the first n elements
42 fn pop_front_n(&mut self, n: usize) {
43 let _ = self.0.drain(..n);
44 }
45}
46
47/// Same as the `Option` type but with the additional `None`-like value `Wait` used to signify
48/// that an item cannot be returned at this time but that another attempt should be made (as
49/// opposed to `None` which means that the iteration has concluded).
50#[derive(Debug)]
51enum Tription<T> {
52 /// An item of type `T` is returned
53 Some(T),
54 /// Iteration has completed
55 None,
56 /// No item can be returned but another attempt should be made
57 Wait,
58}
59
60impl<T> From<Option<T>> for Tription<T> {
61 fn from(o: Option<T>) -> Self {
62 match o {
63 Some(t) => Tription::Some(t),
64 None => Tription::None,
65 }
66 }
67}
68
69/// Represents a set of rules for matching a specific comment in a language, for example 'block
70/// comment in rust' or 'line comment in haskell'.
71#[derive(Copy, Clone, Debug)]
72pub struct Comment {
73 /// Open comment pattern, such as `/*`
74 pub(crate) open_pat: &'static str,
75 /// Close comment pattern, such as `*/`
76 pub(crate) close_pat: &'static str,
77 /// Whether this type of comment can be nested. For example, rust block comments can be
78 /// nested while C block comments can't.
79 pub(crate) nests: bool,
80 /// Whether to return the close comment pattern. For example, in rust block comments `*/`
81 /// isn't returned while in rust line comments, `\n` is returned.
82 pub(crate) keep_close_pat: bool, // whether to still return close_pat as part of the text
83 /// Whether to allow the close comment pattern in regular text. For example, in rust `*/`
84 /// will panic unless it closes a block comment while `\n` will be treated normally.
85 pub(crate) allow_close_pat: bool, // whether to allow close_pat without matching open_pat
86}
87
88/// `char` iterator that removes comments based on a list of `Comment` specifications.
89/// Unclosed comments (`//...` or `/*...` or equivalents) continue until the end of the iterator.
90/// Closing unopened block comments (`... */` or equivalent) causes a panic.
91pub struct WithoutComments<I: Iterator<Item = char>> {
92 /// Inner `char` iterator
93 iter: I,
94 /// Buffer used to match against open and close patterns
95 buf: Buf,
96 /// List of types of comments and associated rules
97 comments: Box<[Comment]>,
98 /// The current state. None represents normal text, i.e. not currently in a comment,
99 /// Some(idx, nesting) represents that the iterator is currently in a comment, idx
100 /// is the index of the current comment in self.comments, nesting is None if the current
101 /// comment doesn't nest and Some(d) otherwise, where d is the current nesting depth
102 /// starting at 0.
103 state: Option<(usize, Option<usize>)>,
104}
105
106impl<I: Iterator<Item = char>> WithoutComments<I> {
107 fn new(iter: I, comments: Box<[Comment]>, buf_len: usize) -> Self {
108 Self {
109 iter,
110 // buffer will be filled in first call to self.next_()
111 buf: Buf::new(buf_len),
112 comments,
113 state: None,
114 }
115 }
116
117 /// Inner equivalent of `Iterator::next` returning a `Tription` instead of an `Option`.
118 /// This is for the case where a block comment follows right after another
119 /// (`/* ... *//* ... */` or equivalent), after reading `*/`, the buffer needs to be filled
120 /// to make sure that any eventual `/*` will be matched, this is done in the next call to
121 /// `next_`, thus, the calling loop in `Iterator::next` is told to wait one more iteration.
122 fn next_(&mut self) -> Tription<char> {
123 // at least one element missing from previous call
124 self.buf.fill_up(&mut self.iter);
125
126 if self.buf.is_empty() {
127 return Tription::None;
128 }
129
130 // if in comment
131 if let Some((idx, ref mut nesting)) = self.state {
132 let comment = &self.comments[idx];
133 let &Comment {
134 open_pat,
135 close_pat,
136 keep_close_pat,
137 ..
138 } = comment;
139
140 // check close before open to make thinks like python's '''...''' work
141 if self.buf.matches(close_pat) {
142 // matched close pattern
143
144 if !keep_close_pat {
145 self.buf.pop_front_n(close_pat.len());
146 }
147
148 match nesting {
149 // non-nesting comment or top-level comment
150 None | Some(0) => self.state = None,
151 // nested comment
152 Some(d) => *d -= 1,
153 }
154 } else if let Some(depth) = nesting {
155 if self.buf.matches(open_pat) {
156 // matched nesting open pattern
157 self.buf.pop_front_n(open_pat.len());
158 *depth += 1;
159 } else {
160 self.buf.pop_front();
161 }
162 } else {
163 self.buf.pop_front();
164 }
165
166 Tription::Wait
167 } else {
168 // if in text
169 // #![feature(bindings_after_at)]
170 // for comment @ Comment(open_pat, .., nests) in self.comments.as_ref() {
171 // for each rule...
172 for (idx, comment) in self.comments.iter().enumerate() {
173 let Comment {
174 open_pat,
175 close_pat,
176 nests,
177 allow_close_pat,
178 ..
179 } = comment;
180
181 // if it matches open pattern, open
182 if self.buf.matches(open_pat) {
183 self.buf.pop_front_n(open_pat.len());
184
185 let nesting = match nests {
186 true => Some(0),
187 false => None,
188 };
189 self.state = Some((idx, nesting));
190 return Tription::Wait;
191 } else if !allow_close_pat && self.buf.matches(close_pat) {
192 // if close pattern forbidden, panic
193 panic!("Got \"{}\" without matching \"{}\"", close_pat, open_pat)
194 }
195 }
196
197 Tription::Some(self.buf.pop_front())
198 }
199 }
200}
201
202impl<I: Iterator<Item = char>> Iterator for WithoutComments<I> {
203 type Item = char;
204
205 /// Simply calls `WithoutComments::next_`, a return value of `Tription::Wait` signifies
206 /// that another attempt should be made, `Tription::Some` and `Tription::None` are
207 /// equivalent to the same variants of the `Option` type.
208 fn next(&mut self) -> Option<Self::Item> {
209 loop {
210 match self.next_() {
211 Tription::None => return None,
212 Tription::Some(c) => return Some(c),
213 Tription::Wait => (),
214 }
215 }
216 }
217}
218
219/// A trait to implement the `without_comments` method on all `Iterator<Item=char>`
220pub trait IntoWithoutComments
221where
222 Self: Sized + Iterator<Item = char>,
223{
224 /// Returns a `WithoutComments` iterator containing self
225 ///
226 /// # Arguments
227 ///
228 /// * `language` - A boxed slice containing all the comments that the returned iterator
229 /// will be removing
230 ///
231 /// # Example
232 ///
233 /// ```
234 /// use no_comment::{IntoWithoutComments, languages};
235 /// let with_comments = "S/*he */be/*lie*/ve//d";
236 /// let without_comments = with_comments
237 /// .chars()
238 /// .without_comments(languages::rust())
239 /// .collect::<String>();
240 /// assert_eq!(&without_comments, "Sbeve");
241 /// ```
242 fn without_comments(self, language: Box<[Comment]>) -> WithoutComments<Self> {
243 let mut buf_len = 0;
244 for &Comment {
245 open_pat,
246 close_pat,
247 ..
248 } in language.iter()
249 {
250 if open_pat.len() > buf_len {
251 buf_len = open_pat.len()
252 }
253 if close_pat.len() > buf_len {
254 buf_len = close_pat.len()
255 }
256 }
257 assert_ne!(buf_len, 0);
258 WithoutComments::new(self, language, buf_len)
259 }
260}
261
262/// Blanket implementation
263impl<I: Iterator<Item = char>> IntoWithoutComments for I {}