logos_iter/
peekable.rs

1// Peek logic and tests are taken from core::iter::Peekable
2//
3// Rust copyright:
4// The Rust Project is dual-licensed under Apache 2.0 and MIT
5// terms.
6
7use crate::LogosIter;
8use logos::{Logos, Source, Span};
9
10/// Peekable version of Logos lexer.
11///
12/// It's needed because `Iterator::peekable` have no methods like
13/// `peek_span()` or `peek_slice()` and so on.
14///
15/// Methods like `peek_*()` require mutable reference to lexer.
16///
17/// # Example
18/// ```rust
19/// use logos::{Lexer, Logos};
20/// use logos_iter::LogosIter;
21///
22/// #[derive(Debug, Logos, PartialEq, Eq)]
23/// enum Token {
24///     #[error]
25///     Error,
26///     #[token("foo")]
27///     Foo,
28///     #[token("bar")]
29///     Bar,
30/// }
31///
32/// let mut lexer = Token::lexer("foobar").peekable_lexer();
33///
34/// assert_eq!(lexer.next(), Some(Token::Foo));
35/// assert_eq!(lexer.peek(), Some(&Token::Bar));
36/// assert_eq!(lexer.peek_slice(), "bar");
37/// assert_eq!(lexer.peek_span(), 3..6);
38/// ```
39pub struct PeekableLexer<'source, L, T>
40where
41    L: LogosIter<'source, T>,
42    T: Logos<'source>,
43{
44    pub(crate) lexer: L,
45    pub(crate) peeked: Option<Peeked<'source, T>>,
46}
47
48pub(crate) struct Peeked<'source, T>
49where
50    T: Logos<'source>,
51{
52    token: Option<T>,
53    prev_span: Span,
54    span: Span,
55    remainder: &'source <T::Source as Source>::Slice,
56}
57
58impl<'source, T> Clone for Peeked<'source, T>
59where
60    T: Logos<'source> + Clone,
61{
62    fn clone(&self) -> Self {
63        Self {
64            token: self.token.clone(),
65            prev_span: self.span.clone(),
66            span: self.span.clone(),
67            remainder: self.remainder,
68        }
69    }
70}
71
72impl<'source, L, T> PeekableLexer<'source, L, T>
73where
74    L: LogosIter<'source, T>,
75    T: Logos<'source>,
76{
77    fn peek_impl(&mut self) -> &mut Peeked<'source, T> {
78        let lexer = &mut self.lexer;
79        self.peeked.get_or_insert_with(|| Peeked {
80            prev_span: lexer.span(),
81            token: lexer.next(),
82            span: lexer.span(),
83            remainder: lexer.remainder(),
84        })
85    }
86
87    pub fn peek(&mut self) -> Option<&T> {
88        self.peek_impl().token.as_ref()
89    }
90
91    pub fn peek_mut(&mut self) -> Option<&mut T> {
92        self.peek_impl().token.as_mut()
93    }
94
95    pub fn peek_span(&mut self) -> Span {
96        self.peek_impl().span.clone()
97    }
98
99    pub fn peek_slice(&mut self) -> &'source <T::Source as logos::Source>::Slice {
100        let span = self.peek_span();
101        // SAFETY: span is in range of source
102        unsafe { self.lexer.source().slice_unchecked(span) }
103    }
104
105    pub fn peek_remainder(&mut self) -> &'source <T::Source as logos::Source>::Slice {
106        self.peek_impl().remainder
107    }
108
109    pub fn next_if(&mut self, func: impl FnOnce(&T) -> bool) -> Option<T> {
110        let prev_span = self.span();
111
112        match self.next() {
113            Some(matched) if func(&matched) => Some(matched),
114            other => {
115                // Since we called `self.next()`, we consumed `self.peeked`.
116                assert!(self.peeked.is_none());
117                self.peeked = Some(Peeked {
118                    token: other,
119                    prev_span,
120                    span: self.lexer.span(),
121                    remainder: self.lexer.remainder(),
122                });
123                None
124            }
125        }
126    }
127
128    pub fn next_if_eq<U>(&mut self, expected: &U) -> Option<T>
129    where
130        T: PartialEq<U>,
131    {
132        self.next_if(|next| next == expected)
133    }
134
135    /// Returns inner lexer
136    ///
137    /// Inner lexer state can be like you already called [`Iterator::next`],
138    /// if you used one of `peek_*` or `next_if_*` methods
139    pub fn into_inner(self) -> L {
140        self.lexer
141    }
142}
143
144impl<'source, L, T> Clone for PeekableLexer<'source, L, T>
145where
146    L: LogosIter<'source, T> + Clone,
147    T: Logos<'source> + Clone,
148    T::Extras: Clone,
149{
150    fn clone(&self) -> Self {
151        Self {
152            lexer: self.lexer.clone(),
153            peeked: self.peeked.clone(),
154        }
155    }
156}
157
158impl<'source, L, T> Iterator for PeekableLexer<'source, L, T>
159where
160    L: LogosIter<'source, T>,
161    T: Logos<'source>,
162{
163    type Item = T;
164
165    fn next(&mut self) -> Option<Self::Item> {
166        match self.peeked.take() {
167            None => self.lexer.next(),
168            Some(Peeked { token, .. }) => token,
169        }
170    }
171
172    fn size_hint(&self) -> (usize, Option<usize>) {
173        let peek_len = match self.peeked {
174            Some(Peeked { token: None, .. }) => return (0, Some(0)),
175            Some(Peeked { token: Some(_), .. }) => 1,
176            None => 0,
177        };
178        let (lo, hi) = self.lexer.size_hint();
179        let lo = lo.saturating_add(peek_len);
180        let hi = match hi {
181            Some(x) => x.checked_add(peek_len),
182            None => None,
183        };
184        (lo, hi)
185    }
186
187    fn count(mut self) -> usize {
188        match self.peeked.take() {
189            Some(Peeked { token: None, .. }) => 0,
190            Some(Peeked { token: Some(_), .. }) => 1 + self.lexer.count(),
191            None => self.lexer.count(),
192        }
193    }
194
195    fn last(mut self) -> Option<T> {
196        let peek_opt = match self.peeked.take() {
197            Some(Peeked { token: None, .. }) => return None,
198            Some(Peeked {
199                token: v @ Some(_), ..
200            }) => v,
201            None => None,
202        };
203        self.lexer.last().or(peek_opt)
204    }
205
206    fn nth(&mut self, n: usize) -> Option<T> {
207        match self.peeked.take() {
208            Some(Peeked { token: None, .. }) => None,
209            Some(Peeked {
210                token: v @ Some(_), ..
211            }) if n == 0 => v,
212            Some(Peeked { token: Some(_), .. }) => self.lexer.nth(n - 1),
213            None => self.lexer.nth(n),
214        }
215    }
216
217    fn fold<Acc, Fold>(self, init: Acc, mut fold: Fold) -> Acc
218    where
219        Fold: FnMut(Acc, T) -> Acc,
220    {
221        let acc = match self.peeked {
222            Some(Peeked { token: None, .. }) => return init,
223            Some(Peeked { token: Some(v), .. }) => fold(init, v),
224            None => init,
225        };
226        self.lexer.fold(acc, fold)
227    }
228}
229
230impl<'source, L, T> LogosIter<'source, T> for PeekableLexer<'source, L, T>
231where
232    L: LogosIter<'source, T>,
233    T: Logos<'source>,
234{
235    fn span(&self) -> Span {
236        match &self.peeked {
237            None => self.lexer.span(),
238            Some(Peeked { prev_span, .. }) => prev_span.clone(),
239        }
240    }
241
242    fn slice(&self) -> &'source <T::Source as logos::Source>::Slice {
243        let span = self.span();
244        // SAFETY: span is in range of source
245        unsafe { self.lexer.source().slice_unchecked(span) }
246    }
247
248    fn source(&self) -> &'source T::Source {
249        self.lexer.source()
250    }
251
252    fn remainder(&self) -> &'source <T::Source as logos::Source>::Slice {
253        match &self.peeked {
254            None => self.lexer.remainder(),
255            Some(Peeked { prev_span, .. }) => {
256                let source = self.lexer.source();
257                // SAFETY: span is in range of source
258                unsafe { source.slice_unchecked(prev_span.end..source.len()) }
259            }
260        }
261    }
262
263    fn bump(&mut self, n: usize) {
264        match self.peeked.take() {
265            None => self.lexer.bump(n),
266            Some(Peeked { span, .. }) => {
267                let token_len = span.end - span.start;
268                let n = n - token_len;
269                self.lexer.bump(n);
270            }
271        }
272    }
273
274    fn extras(&self) -> &T::Extras {
275        self.lexer.extras()
276    }
277
278    fn extras_mut(&mut self) -> &mut T::Extras {
279        self.lexer.extras_mut()
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    extern crate alloc;
286
287    use super::*;
288    use logos::Lexer;
289
290    use alloc::{vec, vec::Vec};
291
292    #[derive(Debug, Logos, PartialEq, Eq, Copy, Clone)]
293    enum Token {
294        #[error]
295        #[token(" ", logos::skip)]
296        Error,
297        #[token("0")]
298        Zero,
299        #[token("1")]
300        One,
301        #[token("2")]
302        Two,
303        #[token("3")]
304        Three,
305        #[token("4")]
306        Four,
307        #[token("5")]
308        Five,
309        #[token("heart")]
310        Heart,
311        #[token("of")]
312        Of,
313        #[token("gold")]
314        Gold,
315        #[token("trillian")]
316        Trillian,
317        #[token("zaphod")]
318        Zaphod,
319    }
320
321    fn make_it(s: &'static str) -> PeekableLexer<'static, Lexer<'static, Token>, Token> {
322        Token::lexer(s).peekable_lexer()
323    }
324
325    #[test]
326    fn test_lexer_bump() {
327        // `heart` 2 spaces `of` 3 spaces `gold`
328        const XS: &str = "heart  of   gold";
329
330        // peek and bump
331        let mut it = make_it(XS);
332        assert_eq!(it.peek(), Some(&Token::Heart));
333        it.bump(5);
334        assert_eq!(it.peek(), Some(&Token::Of));
335        assert_eq!(it.remainder(), "  of   gold");
336        assert_eq!(it.next(), Some(Token::Of));
337        assert_eq!(it.remainder(), "   gold");
338
339        // bump
340        let mut it = make_it(XS);
341        it.bump(5);
342        assert_eq!(it.remainder(), "  of   gold");
343        assert_eq!(it.next(), Some(Token::Of));
344
345        // next and bump
346        let mut it = make_it(XS);
347        assert_eq!(it.next(), Some(Token::Heart));
348        it.bump(4); // `  of`
349        assert_eq!(it.remainder(), "   gold");
350        assert_eq!(it.next(), Some(Token::Gold));
351    }
352
353    #[test]
354    fn test_lexer_peekable() {
355        const XS: &str = "0 1 2 3 4 5 heart of gold";
356        let mut it = make_it(XS);
357
358        assert_eq!(it.remainder(), XS);
359        assert_eq!(it.peek().unwrap(), &Token::Zero);
360        assert_eq!(it.peek_slice(), "0");
361        assert_eq!(it.peek_span(), 0..1);
362        assert_eq!(it.peek_remainder(), " 1 2 3 4 5 heart of gold");
363        assert_eq!(it.remainder(), XS);
364        assert_eq!(it.next().unwrap(), Token::Zero);
365        assert_eq!(it.slice(), "0");
366        assert_eq!(it.span(), 0..1);
367        assert_eq!(it.remainder(), " 1 2 3 4 5 heart of gold");
368        assert_eq!(it.next().unwrap(), Token::One);
369        assert_eq!(it.slice(), "1");
370        assert_eq!(it.span(), 2..3);
371        assert_eq!(it.remainder(), " 2 3 4 5 heart of gold");
372        assert_eq!(it.next().unwrap(), Token::Two);
373        assert_eq!(it.slice(), "2");
374        assert_eq!(it.span(), 4..5);
375        assert_eq!(it.remainder(), " 3 4 5 heart of gold");
376        assert_eq!(it.peek().unwrap(), &Token::Three);
377        assert_eq!(it.peek_slice(), "3");
378        assert_eq!(it.peek_span(), 6..7);
379        assert_eq!(it.peek_remainder(), " 4 5 heart of gold");
380        assert_eq!(it.peek().unwrap(), &Token::Three);
381        assert_eq!(it.peek_slice(), "3");
382        assert_eq!(it.peek_span(), 6..7);
383        assert_eq!(it.peek_remainder(), " 4 5 heart of gold");
384        assert_eq!(it.next().unwrap(), Token::Three);
385        assert_eq!(it.slice(), "3");
386        assert_eq!(it.span(), 6..7);
387        assert_eq!(it.remainder(), " 4 5 heart of gold");
388        assert_eq!(it.next().unwrap(), Token::Four);
389        assert_eq!(it.slice(), "4");
390        assert_eq!(it.span(), 8..9);
391        assert_eq!(it.remainder(), " 5 heart of gold");
392        assert_eq!(it.peek().unwrap(), &Token::Five);
393        assert_eq!(it.peek_slice(), "5");
394        assert_eq!(it.peek_span(), 10..11);
395        assert_eq!(it.peek_remainder(), " heart of gold");
396        assert_eq!(it.next().unwrap(), Token::Five);
397        assert_eq!(it.slice(), "5");
398        assert_eq!(it.span(), 10..11);
399        assert_eq!(it.remainder(), " heart of gold");
400        assert_eq!(it.peek().unwrap(), &Token::Heart);
401        assert_eq!(it.peek_slice(), "heart");
402        assert_eq!(it.peek_span(), 12..17);
403        assert_eq!(it.peek_remainder(), " of gold");
404        assert_eq!(it.next().unwrap(), Token::Heart);
405        assert_eq!(it.slice(), "heart");
406        assert_eq!(it.span(), 12..17);
407        assert_eq!(it.remainder(), " of gold");
408        assert_eq!(it.peek().unwrap(), &Token::Of);
409        assert_eq!(it.peek_slice(), "of");
410        assert_eq!(it.peek_span(), 18..20);
411        assert_eq!(it.peek_remainder(), " gold");
412        assert_eq!(it.next().unwrap(), Token::Of);
413        assert_eq!(it.slice(), "of");
414        assert_eq!(it.span(), 18..20);
415        assert_eq!(it.remainder(), " gold");
416        assert_eq!(it.peek().unwrap(), &Token::Gold);
417        assert_eq!(it.peek_slice(), "gold");
418        assert_eq!(it.peek_span(), 21..25);
419        assert_eq!(it.peek_remainder(), "");
420        assert_eq!(it.next().unwrap(), Token::Gold);
421        assert_eq!(it.slice(), "gold");
422        assert_eq!(it.span(), 21..25);
423        assert_eq!(it.remainder(), "");
424        assert!(it.peek().is_none());
425        assert!(it.next().is_none());
426    }
427
428    #[test]
429    fn test_iterator_peekable_count() {
430        const XS: &str = "0 1 2 3 4 5";
431        const YS: &str = "1 0";
432        const ZS: &str = "";
433
434        let xs = make_it(XS);
435        assert_eq!(xs.count(), 6);
436
437        let mut it = make_it(XS);
438        assert_eq!(it.peek(), Some(&Token::Zero));
439        assert_eq!(it.count(), 6);
440
441        assert_eq!(make_it(YS).count(), 2);
442
443        let mut it = make_it(YS);
444        assert_eq!(it.peek(), Some(&Token::One));
445        assert_eq!(it.count(), 2);
446
447        assert_eq!(make_it(ZS).count(), 0);
448
449        let mut it = make_it(ZS);
450        assert_eq!(it.peek(), None);
451    }
452
453    #[allow(clippy::iter_nth_zero)]
454    #[test]
455    fn test_iterator_peekable_nth() {
456        const XS: &str = "0 1 2 3 4 5";
457        let mut it = make_it(XS);
458
459        assert_eq!(it.peek(), Some(&Token::Zero));
460        assert_eq!(it.nth(0), Some(Token::Zero));
461        assert_eq!(it.peek(), Some(&Token::One));
462        assert_eq!(it.nth(1), Some(Token::Two));
463        assert_eq!(it.peek(), Some(&Token::Three));
464        assert_eq!(it.nth(2), Some(Token::Five));
465        assert_eq!(it.next(), None);
466    }
467
468    #[test]
469    fn test_iterator_peekable_last() {
470        const XS: &str = "0 1 2 3 4 5";
471        const YS: &str = "0";
472
473        let mut it = make_it(XS);
474        assert_eq!(it.peek(), Some(&Token::Zero));
475        assert_eq!(it.last(), Some(Token::Five));
476
477        let mut it = make_it(YS);
478        assert_eq!(it.peek(), Some(&Token::Zero));
479        assert_eq!(it.last(), Some(Token::Zero));
480
481        let mut it = make_it(YS);
482        assert_eq!(it.next(), Some(Token::Zero));
483        assert_eq!(it.peek(), None);
484        assert_eq!(it.last(), None);
485    }
486
487    #[test]
488    fn test_iterator_peekable_fold() {
489        const XS: &str = "0 1 2 3 4 5";
490
491        let mut it = make_it(XS);
492        let xs: Vec<_> = it.clone().collect();
493        assert_eq!(it.peek(), Some(&Token::Zero));
494        let i = it.fold(0, |i, x| {
495            assert_eq!(x, xs[i]);
496            i + 1
497        });
498        assert_eq!(i, xs.len());
499    }
500
501    #[test]
502    fn test_iterator_peekable_next_if_eq() {
503        // first, try on references
504        const XS: &str = "heart of gold";
505        let mut it = make_it(XS);
506        // try before `peek()`
507        assert_eq!(it.next_if_eq(&Token::Trillian), None);
508        assert_eq!(it.next_if_eq(&Token::Heart), Some(Token::Heart));
509        // try after peek()
510        assert_eq!(it.peek(), Some(&Token::Of));
511        assert_eq!(it.next_if_eq(&Token::Of), Some(Token::Of));
512        assert_eq!(it.next_if_eq(&Token::Zaphod), None);
513        // make sure `next()` still behaves
514        assert_eq!(it.next(), Some(Token::Gold));
515    }
516
517    #[test]
518    fn test_iterator_peekable_mut() {
519        const XS: &str = "1 2 3";
520        let mut it = make_it(XS);
521        if let Some(p) = it.peek_mut() {
522            if *p == Token::One {
523                *p = Token::Five;
524            }
525        }
526        assert_eq!(it.collect::<Vec<_>>(), vec![
527            Token::Five,
528            Token::Two,
529            Token::Three
530        ]);
531    }
532
533    /*
534
535    #[test]
536        fn test_iterator_peekable_remember_peek_none_1() {
537            // Check that the loop using .peek() terminates
538            let lexer = make_it("1 2 3");
539            let data: Vec<Token> = lexer.clone().collect();
540            let mut iter = lexer.cycle();
541
542            let mut n = 0;
543            while let Some(_) = iter.next() {
544                let is_the_last = iter.peek().is_none();
545                assert_eq!(is_the_last, n == data.len() - 1);
546                n += 1;
547                if n > data.len() {
548                    break;
549                }
550            }
551            assert_eq!(n, data.len());
552        }
553
554    #[test]
555        fn test_iterator_peekable_remember_peek_none_2() {
556            let lexer = make_it("0");
557            let data: Vec<Token> = lexer.collect();
558            let mut iter = lexer.cycle();
559            iter.next();
560            assert_eq!(iter.peek(), None);
561            assert_eq!(iter.last(), None);
562        }
563
564    #[test]
565    fn test_iterator_peekable_remember_peek_none_3() {
566        let lexer = make_it("Token::Zero");
567        let data: Vec<Token> = lexer.collect();
568        let mut iter = lexer.cycle();
569        iter.peek();
570        assert_eq!(iter.nth(Token::Zero), Some(&Token::Zero));
571
572        let mut iter = lexer.cycle();
573        iter.next();
574        assert_eq!(iter.peek(), None);
575        assert_eq!(iter.nth(Token::Zero), None);
576    }
577
578    */
579
580    #[test]
581    fn test_peek_try_folds() {
582        const XS: &str = "1 2 3 4 5";
583        const YS: &str = "heart of gold 4 3 2 1";
584        const ZS: &str = "2 3 4";
585
586        let f = &|acc, x| i32::checked_add(2 * acc as i32, x as i32);
587
588        assert_eq!(make_it(XS).try_fold(7, f), Lexer::new(XS).try_fold(7, f));
589
590        let mut iter = make_it(XS);
591        assert_eq!(iter.peek(), Some(&Token::One));
592        assert_eq!(iter.try_fold(7, f), Lexer::new(XS).try_fold(7, f));
593
594        let mut iter = make_it(YS);
595        assert_eq!(iter.peek(), Some(&Token::Heart));
596        assert_eq!(
597            iter.try_fold(0, |acc, x| {
598                if x == Token::Four {
599                    None
600                } else {
601                    Some(acc + 1)
602                }
603            }),
604            None
605        );
606        assert_eq!(iter.peek(), Some(&Token::Three));
607
608        let mut iter = make_it(ZS);
609        assert_eq!(iter.peek(), Some(&Token::Two));
610        assert_eq!(iter.try_for_each(Err), Err(Token::Two));
611        assert_eq!(iter.peek(), Some(&Token::Three));
612        assert_eq!(iter.try_for_each(Err), Err(Token::Three));
613        assert_eq!(iter.peek(), Some(&Token::Four));
614        assert_eq!(iter.try_for_each(Err), Err(Token::Four));
615        assert_eq!(iter.peek(), None);
616        assert_eq!(iter.try_for_each(Err), Ok(()));
617    }
618
619    #[test]
620    fn issue_1() {
621        fn parse_num(lex: &mut Lexer<Issue1Token>) -> u32 {
622            lex.slice().parse().unwrap()
623        }
624
625        #[derive(Debug, Logos, Eq, PartialEq)]
626        enum Issue1Token {
627            #[error]
628            Error,
629            #[regex(r"\d+", parse_num)]
630            I32Literal(u32),
631            #[token("+")]
632            Plus,
633        }
634
635        let mut l = Issue1Token::lexer("1+2").peekable_lexer();
636        assert_eq!(l.next(), Some(Issue1Token::I32Literal(1)));
637        assert_eq!(l.span(), 0..1);
638        assert_eq!(l.peek(), Some(&Issue1Token::Plus));
639        // since the lexer hasn't progressed, its span should be the same
640        assert_eq!(l.span(), 0..1);
641    }
642}