no_comment/
without_comments.rs

1use std::collections::VecDeque;
2
3/// Buffer type used in the `WithoutComments` iterator, `Deref`s to `VecDeque<char>`.
4/// The capacity of the inner `VecDeque<char>` is constant, it represents the maximum length of
5/// buffer needed to match any open or close pattern for the current language.
6#[derive(Deref, Debug)]
7#[repr(transparent)]
8struct Buf(VecDeque<char>);
9
10impl Buf {
11    fn new(max_len: usize) -> Self {
12        Self(VecDeque::with_capacity(max_len))
13    }
14
15    fn is_full(&self) -> bool {
16        self.len() == self.capacity()
17    }
18
19    /// Fill up inner `VecDeque<char>` to capacity from provided iterator. This is the only way
20    /// to add elements to the buffer.
21    fn fill_up(&mut self, iter: &mut impl Iterator<Item = char>) {
22        while !self.is_full() {
23            match iter.next() {
24                None => break,
25                Some(x) => self.0.push_back(x),
26            }
27        }
28    }
29
30    /// Checks whether the beginning of the buffer matches the provided pattern, the buffer should
31    /// be full when this method is called.
32    fn matches(&self, pat: &str) -> bool {
33        self.iter().take(pat.len()).copied().eq(pat.chars())
34    }
35
36    /// Assert that the buffer is not empty and pop the first element
37    fn pop_front(&mut self) -> char {
38        self.0.pop_front().unwrap()
39    }
40
41    /// Assert that the buffer has at least n elements and pop the first n elements
42    fn pop_front_n(&mut self, n: usize) {
43        let _ = self.0.drain(..n);
44    }
45}
46
47/// Same as the `Option` type but with the additional `None`-like value `Wait` used to signify
48/// that an item cannot be returned at this time but that another attempt should be made (as
49/// opposed to `None` which means that the iteration has concluded).
50#[derive(Debug)]
51enum Tription<T> {
52    /// An item of type `T` is returned
53    Some(T),
54    /// Iteration has completed
55    None,
56    /// No item can be returned but another attempt should be made
57    Wait,
58}
59
60impl<T> From<Option<T>> for Tription<T> {
61    fn from(o: Option<T>) -> Self {
62        match o {
63            Some(t) => Tription::Some(t),
64            None => Tription::None,
65        }
66    }
67}
68
69/// Represents a set of rules for matching a specific comment in a language, for example 'block
70/// comment in rust' or 'line comment in haskell'.
71#[derive(Copy, Clone, Debug)]
72pub struct Comment {
73    /// Open comment pattern, such as `/*`
74    pub(crate) open_pat: &'static str,
75    /// Close comment pattern, such as `*/`
76    pub(crate) close_pat: &'static str,
77    /// Whether this type of comment can be nested. For example, rust block comments can be
78    /// nested while C block comments can't.
79    pub(crate) nests: bool,
80    /// Whether to return the close comment pattern. For example, in rust block comments `*/`
81    /// isn't returned while in rust line comments, `\n` is returned.
82    pub(crate) keep_close_pat: bool, // whether to still return close_pat as part of the text
83    /// Whether to allow the close comment pattern in regular text. For example, in rust `*/`
84    /// will panic unless it closes a block comment while `\n` will be treated normally.
85    pub(crate) allow_close_pat: bool, // whether to allow close_pat without matching open_pat
86}
87
88/// `char` iterator that removes comments based on a list of `Comment` specifications.
89/// Unclosed comments (`//...` or `/*...` or equivalents) continue until the end of the iterator.
90/// Closing unopened block comments (`... */` or equivalent) causes a panic.
91pub struct WithoutComments<I: Iterator<Item = char>> {
92    /// Inner `char` iterator
93    iter: I,
94    /// Buffer used to match against open and close patterns
95    buf: Buf,
96    /// List of types of comments and associated rules
97    comments: Box<[Comment]>,
98    /// The current state. None represents normal text, i.e. not currently in a comment,
99    /// Some(idx, nesting) represents that the iterator is currently in a comment, idx
100    /// is the index of the current comment in self.comments, nesting is None if the current
101    /// comment doesn't nest and Some(d) otherwise, where d is the current nesting depth
102    /// starting at 0.
103    state: Option<(usize, Option<usize>)>,
104}
105
106impl<I: Iterator<Item = char>> WithoutComments<I> {
107    fn new(iter: I, comments: Box<[Comment]>, buf_len: usize) -> Self {
108        Self {
109            iter,
110            // buffer will be filled in first call to self.next_()
111            buf: Buf::new(buf_len),
112            comments,
113            state: None,
114        }
115    }
116
117    /// Inner equivalent of `Iterator::next` returning a `Tription` instead of an `Option`.
118    /// This is for the case where a block comment follows right after another
119    /// (`/* ... *//* ... */` or equivalent), after reading `*/`, the buffer needs to be filled
120    /// to make sure that any eventual `/*` will be matched, this is done in the next call to
121    /// `next_`, thus, the calling loop in `Iterator::next` is told to wait one more iteration.
122    fn next_(&mut self) -> Tription<char> {
123        // at least one element missing from previous call
124        self.buf.fill_up(&mut self.iter);
125
126        if self.buf.is_empty() {
127            return Tription::None;
128        }
129
130        // if in comment
131        if let Some((idx, ref mut nesting)) = self.state {
132            let comment = &self.comments[idx];
133            let &Comment {
134                open_pat,
135                close_pat,
136                keep_close_pat,
137                ..
138            } = comment;
139
140            // check close before open to make thinks like python's '''...''' work
141            if self.buf.matches(close_pat) {
142                // matched close pattern
143
144                if !keep_close_pat {
145                    self.buf.pop_front_n(close_pat.len());
146                }
147
148                match nesting {
149                    // non-nesting comment or top-level comment
150                    None | Some(0) => self.state = None,
151                    // nested comment
152                    Some(d) => *d -= 1,
153                }
154            } else if let Some(depth) = nesting {
155                if self.buf.matches(open_pat) {
156                    // matched nesting open pattern
157                    self.buf.pop_front_n(open_pat.len());
158                    *depth += 1;
159                } else {
160                    self.buf.pop_front();
161                }
162            } else {
163                self.buf.pop_front();
164            }
165
166            Tription::Wait
167        } else {
168            // if in text
169            // #![feature(bindings_after_at)]
170            // for comment @ Comment(open_pat, .., nests) in self.comments.as_ref() {
171            // for each rule...
172            for (idx, comment) in self.comments.iter().enumerate() {
173                let Comment {
174                    open_pat,
175                    close_pat,
176                    nests,
177                    allow_close_pat,
178                    ..
179                } = comment;
180
181                // if it matches open pattern, open
182                if self.buf.matches(open_pat) {
183                    self.buf.pop_front_n(open_pat.len());
184
185                    let nesting = match nests {
186                        true => Some(0),
187                        false => None,
188                    };
189                    self.state = Some((idx, nesting));
190                    return Tription::Wait;
191                } else if !allow_close_pat && self.buf.matches(close_pat) {
192                    // if close pattern forbidden, panic
193                    panic!("Got \"{}\" without matching \"{}\"", close_pat, open_pat)
194                }
195            }
196
197            Tription::Some(self.buf.pop_front())
198        }
199    }
200}
201
202impl<I: Iterator<Item = char>> Iterator for WithoutComments<I> {
203    type Item = char;
204
205    /// Simply calls `WithoutComments::next_`, a return value of `Tription::Wait` signifies
206    /// that another attempt should be made, `Tription::Some` and `Tription::None` are
207    /// equivalent to the same variants of the `Option` type.
208    fn next(&mut self) -> Option<Self::Item> {
209        loop {
210            match self.next_() {
211                Tription::None => return None,
212                Tription::Some(c) => return Some(c),
213                Tription::Wait => (),
214            }
215        }
216    }
217}
218
219/// A trait to implement the `without_comments` method on all `Iterator<Item=char>`
220pub trait IntoWithoutComments
221where
222    Self: Sized + Iterator<Item = char>,
223{
224    /// Returns a `WithoutComments` iterator containing self
225    ///
226    /// # Arguments
227    ///
228    /// * `language` - A boxed slice containing all the comments that the returned iterator
229    /// will be removing
230    ///
231    /// # Example
232    ///
233    /// ```
234    /// use no_comment::{IntoWithoutComments, languages};
235    /// let with_comments = "S/*he */be/*lie*/ve//d";
236    /// let without_comments = with_comments
237    ///     .chars()
238    ///     .without_comments(languages::rust())
239    ///     .collect::<String>();
240    /// assert_eq!(&without_comments, "Sbeve");
241    /// ```
242    fn without_comments(self, language: Box<[Comment]>) -> WithoutComments<Self> {
243        let mut buf_len = 0;
244        for &Comment {
245            open_pat,
246            close_pat,
247            ..
248        } in language.iter()
249        {
250            if open_pat.len() > buf_len {
251                buf_len = open_pat.len()
252            }
253            if close_pat.len() > buf_len {
254                buf_len = close_pat.len()
255            }
256        }
257        assert_ne!(buf_len, 0);
258        WithoutComments::new(self, language, buf_len)
259    }
260}
261
262/// Blanket implementation
263impl<I: Iterator<Item = char>> IntoWithoutComments for I {}