text_scanner/
lib.rs

1//! See [`Scanner`] docs for more information and its [methods]
2//! for many examples.
3//!
4//! [methods]: Scanner#implementations
5
6#![forbid(unsafe_code)]
7#![forbid(elided_lifetimes_in_paths)]
8
9#[cfg(feature = "ext")]
10pub mod ext;
11
12pub mod prelude {
13    pub use super::{ScanResult, Scanner, ScannerItem, ScannerResult};
14}
15
16mod private {
17    pub trait Sealed {}
18
19    impl Sealed for crate::Scanner<'_> {}
20    impl Sealed for str {}
21}
22
23pub use char_ranges::{CharRanges, CharRangesExt, CharRangesOffset};
24
25use std::ops::Range;
26
27pub type ScannerItem<T> = (Range<usize>, T);
28
29pub type ScannerResult<'text, T> = Result<ScannerItem<T>, ScannerItem<&'text str>>;
30
31pub type ScanResult<'text> = Result<(), ScannerItem<&'text str>>;
32
33#[cfg(test)]
34macro_rules! assert_valid_cases {
35    ($method:ident, $cases:expr) => {
36        $crate::assert_valid_cases!($method, $cases, "");
37    };
38
39    ($method:ident, $cases:expr, $remaining:expr) => {
40        let remaining = $remaining;
41        for expected in $cases {
42            let text = match remaining.is_empty() {
43                true => std::borrow::Cow::Borrowed(expected),
44                false => std::borrow::Cow::Owned(format!("{expected}{remaining}")),
45            };
46            let text = text.as_ref();
47
48            let mut scanner = Scanner::new(text);
49            let actual = scanner.$method();
50
51            assert_eq!(actual, Ok((0..expected.len(), expected)));
52            assert_eq!(scanner.remaining_text(), remaining);
53        }
54    };
55}
56
57#[cfg(test)]
58macro_rules! assert_invalid_cases {
59    ($method:ident, $cases:expr) => {
60        for case in $cases {
61            let mut scanner = Scanner::new(case);
62            let actual = scanner.$method();
63            if actual.is_ok() {
64                panic!("expected `Err`, received {:?}", actual);
65            }
66        }
67    };
68}
69
70#[cfg(test)]
71pub(crate) use assert_invalid_cases;
72#[cfg(test)]
73pub(crate) use assert_valid_cases;
74
75/// A `Scanner` is a UTF-8 [`char`] text scanner, implementing various methods
76/// for scanning a string slice, as well as backtracking capabilities, which
77/// can be used to implement lexers for tokenizing text or code. It is essentially
78/// just a fancy wrapper around [`CharRanges`].
79///
80/// **Note:** Cloning `Scanner` is essentially a copy, as it just contains
81/// a `&str` and a `usize` for its `cursor`. However, `Copy` is not
82/// implemented, to avoid accidentally copying immutable `Scanner`s.
83#[derive(Clone, Debug)]
84pub struct Scanner<'text> {
85    text: &'text str,
86    cursor: usize,
87}
88
89impl<'text> Scanner<'text> {
90    /// Constructs a new [`Scanner`] with `text`.
91    #[inline]
92    pub fn new(text: &'text str) -> Self {
93        Self { text, cursor: 0 }
94    }
95
96    /// Returns the `text` the scanner was constructed with.
97    ///
98    /// **Note:** This has the same lifetime as the original `text`,
99    /// so the scanner can continue to be used while this exists.
100    ///
101    /// # Example
102    ///
103    /// ```rust
104    /// # use text_scanner::Scanner;
105    /// let mut scanner = Scanner::new("Hello World");
106    ///
107    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
108    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
109    ///
110    /// assert_eq!(scanner.text(), "Hello World");
111    /// assert_eq!(scanner.remaining_text(), "llo World");
112    /// ```
113    #[inline]
114    pub fn text(&self) -> &'text str {
115        self.text
116    }
117
118    /// Returns the remaining `text` of the scanner, i.e. the [`text()`]
119    /// after [`cursor_pos()`], in other words
120    /// <code style="white-space: nowrap;">self.[text()]\[self.[cursor_pos()]..]</code>.
121    ///
122    /// **Note:** This has the same lifetime as the original `text`,
123    /// so the scanner can continue to be used while this exists.
124    ///
125    /// # Example
126    ///
127    /// ```rust
128    /// # use text_scanner::Scanner;
129    /// let mut scanner = Scanner::new("Hello World");
130    ///
131    /// assert_eq!(scanner.text(), "Hello World");
132    /// assert_eq!(scanner.remaining_text(), "Hello World");
133    ///
134    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
135    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
136    ///
137    /// assert_eq!(scanner.text(), "Hello World");
138    /// assert_eq!(scanner.remaining_text(), "llo World");
139    /// ```
140    ///
141    /// [`text()`]: Self::text
142    /// [text()]: Self::text
143    /// [`cursor_pos()`]: Self::cursor_pos
144    /// [cursor_pos()]: Self::cursor_pos
145    #[inline]
146    pub fn remaining_text(&self) -> &'text str {
147        &self.text[self.cursor..]
148    }
149
150    /// Returns `true` if [`remaining_text()`] has text, i.e.
151    /// if it is not [empty].
152    ///
153    /// # Example
154    ///
155    /// ```rust
156    /// # use text_scanner::Scanner;
157    /// let mut scanner = Scanner::new("Foo");
158    ///
159    /// # assert_eq!(scanner.text(), "Foo");
160    /// assert_eq!(scanner.remaining_text(), "Foo");
161    /// assert_eq!(scanner.has_remaining_text(), true);
162    ///
163    /// assert_eq!(scanner.next(), Ok((0..1, 'F')));
164    /// assert_eq!(scanner.next(), Ok((1..2, 'o')));
165    /// assert_eq!(scanner.next(), Ok((2..3, 'o')));
166    ///
167    /// # assert_eq!(scanner.text(), "Foo");
168    /// assert_eq!(scanner.remaining_text(), "");
169    /// assert_eq!(scanner.has_remaining_text(), false);
170    /// ```
171    ///
172    /// [`remaining_text()`]: Self::remaining_text
173    /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
174    #[inline]
175    pub fn has_remaining_text(&self) -> bool {
176        self.cursor < self.text.len()
177    }
178
179    /// Utility for turning a `Range<usize>` into `(Range<usize>, &'text str)`.
180    /// Where `range` is the start end end byte index relative to [`text()`].
181    ///
182    /// The same as `(range.clone(), &self.text()[range])`.
183    ///
184    /// [`text()`]: Self::text
185    #[inline]
186    pub fn ranged_text(&self, range: Range<usize>) -> ScannerItem<&'text str> {
187        (range.clone(), &self.text[range])
188    }
189
190    /// Returns the current cursor position of the
191    /// scanner, i.e. the byte offset into [`text()`].
192    ///
193    /// [`text()`]: Self::text
194    #[inline]
195    pub fn cursor_pos(&self) -> usize {
196        self.cursor
197    }
198
199    /// Replaces the current cursor position with `pos`,
200    /// while returning the old cursor position.
201    ///
202    /// # Panics
203    ///
204    /// If `pos` is not at a valid UTF-8 sequence boundary,
205    /// then the next operation using the cursor position
206    /// will panic.
207    ///
208    /// # Example
209    ///
210    /// ```rust
211    /// # use text_scanner::Scanner;
212    /// let mut scanner = Scanner::new("Hello World");
213    ///
214    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
215    ///
216    /// let backtrack = scanner.cursor_pos();
217    ///
218    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
219    /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
220    /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
221    ///
222    /// scanner.set_cursor_pos(backtrack);
223    ///
224    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
225    /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
226    /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
227    /// ```
228    #[inline]
229    pub fn set_cursor_pos(&mut self, pos: usize) -> usize {
230        let old_pos = self.cursor;
231        self.cursor = pos;
232        old_pos
233    }
234
235    /// Resets the cursor position to the start, while returning
236    /// the old cursor position.
237    ///
238    /// # Example
239    ///
240    /// ```rust
241    /// # use text_scanner::Scanner;
242    /// # let mut scanner = Scanner::new("Hello World");
243    /// # assert_eq!(scanner.next(), Ok((0..1, 'H')));
244    /// # assert_eq!(scanner.next(), Ok((1..2, 'e')));
245    /// # assert_eq!(scanner.remaining_text(), "llo World");
246    /// let old_pos = scanner.reset();
247    /// // same as
248    /// let old_pos = scanner.set_cursor_pos(0);
249    /// # assert_eq!(scanner.remaining_text(), "Hello World");
250    /// # assert_eq!(scanner.next(), Ok((0..1, 'H')));
251    /// ```
252    #[inline]
253    pub fn reset(&mut self) -> usize {
254        self.set_cursor_pos(0)
255    }
256
257    /// Advances the scanner cursor and returns the next
258    /// [`char`] and its [`Range`], if any.
259    ///
260    /// # Example
261    ///
262    /// ```rust
263    /// # use text_scanner::Scanner;
264    /// let mut scanner = Scanner::new("Hello");
265    ///
266    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
267    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
268    ///
269    /// assert_eq!(scanner.remaining_text(), "llo");
270    ///
271    /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
272    /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
273    /// assert_eq!(scanner.next(), Ok((4..5, 'o')));
274    /// assert_eq!(scanner.next(), Err((5..5, "")));
275    ///
276    /// assert_eq!(scanner.remaining_text(), "");
277    /// ```
278    #[inline]
279    #[allow(clippy::should_implement_trait)]
280    pub fn next(&mut self) -> ScannerResult<'text, char> {
281        let (r, c) = self.peek()?;
282        self.cursor = r.end;
283        Ok((r, c))
284    }
285
286    /// Returns the next [`char`] and its [`Range`], if any,
287    /// without advancing the cursor position.
288    ///
289    /// See also [`peek_str()`], [`peek_nth()`], and [`peek_iter()`].
290    ///
291    /// # Example
292    ///
293    /// ```rust
294    /// # use text_scanner::Scanner;
295    /// let mut scanner = Scanner::new("Hello World");
296    ///
297    /// assert_eq!(scanner.peek(), Ok((0..1, 'H')));
298    /// assert_eq!(scanner.peek(), Ok((0..1, 'H')));
299    ///
300    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
301    ///
302    /// assert_eq!(scanner.peek(), Ok((1..2, 'e')));
303    /// assert_eq!(scanner.peek(), Ok((1..2, 'e')));
304    ///
305    /// assert_eq!(scanner.remaining_text(), "ello World");
306    /// ```
307    ///
308    /// [`peek_str()`]: Self::peek_str
309    /// [`peek_nth()`]: Self::peek_nth
310    /// [`peek_iter()`]: Self::peek_iter
311    #[inline]
312    pub fn peek(&self) -> ScannerResult<'text, char> {
313        match self.peek_iter().next() {
314            Some((r, c)) => Ok((r, c)),
315            // No character remaining
316            None => Err((self.cursor..self.cursor, "")),
317        }
318    }
319
320    /// Returns the `n`th [`char`] and its [`Range`], if any,
321    /// without advancing the cursor position.
322    ///
323    /// See also [`peek_str()`] and [`peek_iter()`].
324    ///
325    /// # Example
326    ///
327    /// ```rust
328    /// # use text_scanner::Scanner;
329    /// let mut scanner = Scanner::new("Hello World");
330    ///
331    /// assert_eq!(scanner.peek_nth(0), Ok((0..1, 'H')));
332    /// assert_eq!(scanner.peek_nth(1), Ok((1..2, 'e')));
333    /// assert_eq!(scanner.peek_nth(2), Ok((2..3, 'l')));
334    ///
335    /// assert_eq!(scanner.peek_nth(6), Ok((6..7, 'W')));
336    ///
337    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
338    ///
339    /// assert_eq!(scanner.remaining_text(), "ello World");
340    /// ```
341    ///
342    /// [`peek_str()`]: Self::peek_str
343    /// [`peek_iter()`]: Self::peek_iter
344    #[inline]
345    pub fn peek_nth(&self, n: usize) -> ScannerResult<'text, char> {
346        match self.peek_iter().nth(n) {
347            Some((r, c)) => Ok((r, c)),
348            None => Err(self.ranged_text(self.cursor..self.text.len())),
349        }
350    }
351
352    /// Returns an iterator that produces all the remaining [`char`]s
353    /// and their [`Range`]s, if any, without advancing the cursor position.
354    ///
355    /// **Note:** This has the same lifetime as the original `text`,
356    /// so the scanner can continue to be used while this exists.
357    ///
358    /// See also [`peek_str()`].
359    ///
360    /// # Example
361    ///
362    /// ```rust
363    /// # use text_scanner::Scanner;
364    /// let mut scanner = Scanner::new("Hello World");
365    ///
366    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
367    /// assert_eq!(scanner.remaining_text(), "ello World");
368    ///
369    /// let mut peek = scanner.peek_iter();
370    /// assert_eq!(peek.next(), Some((1..2, 'e')));
371    /// assert_eq!(peek.next(), Some((2..3, 'l')));
372    /// assert_eq!(peek.next(), Some((3..4, 'l')));
373    /// assert_eq!(scanner.remaining_text(), "ello World");
374    ///
375    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
376    /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
377    /// assert_eq!(scanner.remaining_text(), "lo World");
378    /// ```
379    ///
380    /// [`peek_str()`]: Self::peek_str
381    #[inline]
382    pub fn peek_iter(&self) -> CharRangesOffset<'text> {
383        self.remaining_text().char_ranges().offset(self.cursor)
384    }
385
386    /// Advances the scanner cursor and returns [`Ok`] with a string
387    /// slice of the following `n` characters. If less than `n` are
388    /// remaining, then [`Err`] is returned, with the [remaining text],
389    /// if any, without advancing the cursor.
390    ///
391    /// **Note:** The returned string slice has the same lifetime as
392    /// the original `text`, so the scanner can continue to be used
393    /// while this exists.
394    ///
395    /// # Bytes vs Characters
396    ///
397    /// The [`Ok`] string slice contains `n` characters,
398    /// i.e. where `n` matches <code>str.[chars()].[count()]</code>
399    /// and **not** [`len()`] (which is the byte length of a string slice).
400    ///
401    /// Consider `"foo"` vs `"🦀🦀🦀"`, both string slices contain 3
402    /// characters. However `"foo"` has a length of 3 bytes, while `"🦀🦀🦀"`
403    /// has a length of 12 bytes, when encoded in UTF-8.
404    ///
405    /// # Panics
406    ///
407    /// Panics in non-optimized builds, if `n` is `0`.
408    ///
409    /// In optimized builds <code>Err(([cursor]..[cursor], &quot;&quot;))</code>
410    /// is returned instead, regardless of whether there is any remaining
411    /// characters.
412    ///
413    /// In short there is a <code>[debug_assert_ne!](n, 0)</code>.
414    ///
415    /// # Example
416    ///
417    /// ```rust
418    /// # use text_scanner::Scanner;
419    /// let mut scanner = Scanner::new("Foo Bar Baz");
420    ///
421    /// # assert_eq!(scanner.remaining_text(), "Foo Bar Baz");
422    /// assert_eq!(scanner.next_str(3), Ok((0..3, "Foo")));
423    /// assert_eq!(scanner.next_str(3), Ok((3..6, " Ba")));
424    /// assert_eq!(scanner.next_str(3), Ok((6..9, "r B")));
425    /// // Less than 3 characters are remaining, so `Err`
426    /// // is returned
427    /// assert_eq!(scanner.next_str(3), Err((9..11, "az")));
428    /// # assert_eq!(scanner.remaining_text(), "az");
429    /// # assert_eq!(scanner.next_str(2), Ok((9..11, "az")));
430    /// # assert_eq!(scanner.remaining_text(), "");
431    /// ```
432    ///
433    /// [remaining text]: Self::remaining_text
434    /// [chars()]: str::chars
435    /// [count()]: Iterator::count()
436    /// [`len()`]: str::len
437    /// [cursor]: Self::cursor_pos()
438    #[inline]
439    pub fn next_str(&mut self, chars: usize) -> ScannerResult<'text, &'text str> {
440        let (r, s) = self.peek_str(chars)?;
441        self.cursor = r.end;
442        Ok((r, s))
443    }
444
445    /// Returns [`Ok`] with a string slice of the following `n` characters,
446    /// if any, without advancing the cursor. If less than `n` are remaining,
447    /// then [`Err`] is returned, with the [remaining text].
448    ///
449    /// **Note:** The returned string slice has the same lifetime as
450    /// the original `text`, so the scanner can continue to be used
451    /// while this exists.
452    ///
453    /// # Bytes vs Characters
454    ///
455    /// The [`Ok`] string slice contains `n` characters,
456    /// i.e. where `n` matches <code>str.[chars()].[count()]</code>
457    /// and **not** [`len()`] (which is the byte length of a string slice).
458    ///
459    /// Consider `"foo"` vs `"🦀🦀🦀"`, both string slices contain 3
460    /// characters. However `"foo"` has a length of 3 bytes, while `"🦀🦀🦀"`
461    /// has a length of 12 bytes, when encoded in UTF-8.
462    ///
463    /// # Panics
464    ///
465    /// Panics in non-optimized builds, if `n` is `0`.
466    ///
467    /// In optimized builds <code>Err(([cursor]..[cursor], &quot;&quot;))</code>
468    /// is returned instead, regardless of whether there is any remaining
469    /// characters.
470    ///
471    /// In short there is a <code>[debug_assert_ne!](n, 0)</code>.
472    ///
473    /// # Example
474    ///
475    /// ```rust
476    /// # use text_scanner::Scanner;
477    /// let mut scanner = Scanner::new("Hello 👋 World 🌏");
478    ///
479    /// assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
480    /// // The emoji is a multi-byte character, thereby the returned
481    /// // range has a length of 10 and not 7.
482    /// assert_eq!(scanner.peek_str(7), Ok((0..10, "Hello 👋")));
483    /// # assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
484    ///
485    /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
486    /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
487    ///
488    /// assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
489    /// assert_eq!(scanner.peek_str(7), Ok((2..12, "llo 👋 W")));
490    /// # assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
491    /// ```
492    ///
493    /// [remaining text]: Self::remaining_text
494    /// [chars()]: str::chars
495    /// [count()]: Iterator::count()
496    /// [`len()`]: str::len
497    /// [cursor]: Self::cursor_pos()
498    #[inline]
499    pub fn peek_str(&self, n: usize) -> ScannerResult<'text, &'text str> {
500        debug_assert_ne!(n, 0, "`n` must be greater than 0");
501        if n == 0 {
502            return Err((self.cursor..self.cursor, ""));
503        }
504        let (last, _) = self.peek_nth(n - 1)?;
505        let r = self.cursor..last.end;
506        Ok(self.ranged_text(r))
507    }
508
509    /// Advances the scanner cursor and returns the next
510    /// [`char`] and its [`Range`], if `f(c)` returns `true`
511    /// where `c` is the next character.
512    ///
513    /// # Example
514    ///
515    /// ```rust
516    /// # use text_scanner::Scanner;
517    /// let mut scanner = Scanner::new("Hello World");
518    ///
519    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((0..1, 'H')));
520    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((1..2, 'e')));
521    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((2..3, 'l')));
522    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((3..4, 'l')));
523    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((4..5, 'o')));
524    /// assert_eq!(scanner.accept_if(char::is_alphabetic), Err((5..5, "")));
525    ///
526    /// assert_eq!(scanner.remaining_text(), " World");
527    /// ```
528    #[inline]
529    pub fn accept_if<F>(&mut self, f: F) -> ScannerResult<'text, char>
530    where
531        F: FnOnce(char) -> bool,
532    {
533        let (r, c) = self.peek()?;
534        if f(c) {
535            self.cursor = r.end;
536            Ok((r, c))
537        } else {
538            Err((self.cursor..self.cursor, ""))
539        }
540    }
541
542    #[allow(dead_code)]
543    #[inline]
544    pub(crate) fn accept_if_ext<A, Args>(&mut self, accept: A) -> ScannerResult<'text, char>
545    where
546        A: ScanOne<Args>,
547    {
548        self.accept_if(|c| accept.scan_one(c))
549    }
550
551    /// Advances the scanner cursor and returns the next
552    /// [`char`] and its [`Range`], if the next character
553    /// matches `expected`.
554    ///
555    /// # Example
556    ///
557    /// ```rust
558    /// # use text_scanner::Scanner;
559    /// let mut scanner = Scanner::new("Hello World");
560    ///
561    /// assert_eq!(scanner.accept_char('H'), Ok((0..1, 'H')));
562    /// assert_eq!(scanner.accept_char('E'), Err((1..1, "")));
563    /// assert_eq!(scanner.accept_char('e'), Ok((1..2, 'e')));
564    /// assert_eq!(scanner.accept_char('W'), Err((2..2, "")));
565    ///
566    /// assert_eq!(scanner.remaining_text(), "llo World");
567    /// ```
568    #[inline]
569    pub fn accept_char(&mut self, expected: char) -> ScannerResult<'text, char> {
570        self.accept_if(|c| c == expected)
571    }
572
573    /// Advances the scanner cursor and returns the next
574    /// [`char`] and its [`Range`], if the next character
575    /// matches any `char` produced by `expected`.
576    ///
577    /// # Panics
578    ///
579    /// Panics in non-optimized builds, if `expected` is [empty].
580    ///
581    /// In optimized builds <code>Err(([cursor]..[cursor], &quot;&quot;))</code>
582    /// is returned instead, regardless of whether there is any remaining
583    /// characters.
584    ///
585    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
586    ///
587    /// # Example
588    ///
589    /// ```rust
590    /// # use text_scanner::Scanner;
591    /// let mut scanner = Scanner::new("Hello World");
592    ///
593    /// let any = &['H', 'e', 'l', 'o', ' '];
594    /// assert_eq!(scanner.accept_char_any(any), Ok((0..1, 'H')));
595    /// assert_eq!(scanner.accept_char_any(any), Ok((1..2, 'e')));
596    /// assert_eq!(scanner.accept_char_any(any), Ok((2..3, 'l')));
597    /// assert_eq!(scanner.accept_char_any(any), Ok((3..4, 'l')));
598    /// assert_eq!(scanner.accept_char_any(any), Ok((4..5, 'o')));
599    /// assert_eq!(scanner.accept_char_any(any), Ok((5..6, ' ')));
600    /// assert_eq!(scanner.accept_char_any(any), Err((6..6, "")));
601    ///
602    /// assert_eq!(scanner.remaining_text(), "World");
603    /// ```
604    ///
605    /// [cursor]: Self::cursor_pos
606    /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
607    pub fn accept_char_any(&mut self, expected: &[char]) -> ScannerResult<'text, char> {
608        debug_assert!(!expected.is_empty(), "`expected` is empty");
609        let (r, c) = self.peek()?;
610        if expected.contains(&c) {
611            self.cursor = r.end;
612            Ok((r, c))
613        } else {
614            Err((self.cursor..self.cursor, ""))
615        }
616    }
617
618    /// Advances the scanner cursor and returns `Ok` with the `&'text str`
619    /// and its [`Range`], if the next characters matches the characters
620    /// in `expected`. If not, then an `Err` is returned, with the longest
621    /// matching substring and its [`Range`].
622    ///
623    /// **Note:** The returned string slice has the same lifetime as
624    /// the original `text`, so the scanner can continue to be used
625    /// while this exists.
626    ///
627    /// If `expected` is only 1 character, then use [`accept_char()`]
628    /// instead.
629    ///
630    /// # Panics
631    ///
632    /// Panics in non-optimized builds, if `expected` is [empty].
633    ///
634    /// In optimized builds <code>Err(([cursor]..[cursor], &quot;&quot;))</code>
635    /// is returned instead, regardless of whether there is any remaining
636    /// characters.
637    ///
638    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
639    ///
640    /// # Example
641    ///
642    /// ```rust
643    /// # use text_scanner::Scanner;
644    /// let mut scanner = Scanner::new("FooBaaar");
645    ///
646    /// // The next 3 characters matches "Foo", so `Ok` is returned
647    /// assert_eq!(scanner.accept_str("Foo"), Ok((0..3, "Foo")));
648    ///
649    /// // The next 3 characters is "Baa" not "Bar", so `Err` is
650    /// // returned, with the longest matching part, i.e. "Ba"
651    /// assert_eq!(scanner.accept_str("Bar"), Err((3..5, "Ba")));
652    ///
653    /// assert_eq!(scanner.remaining_text(), "Baaar");
654    /// ```
655    ///
656    /// [`accept_char()`]: Self::accept_char
657    /// [cursor]: Self::cursor_pos
658    /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
659    pub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str> {
660        debug_assert!(!expected.is_empty(), "`expected` is empty");
661        if expected.is_empty() {
662            return Err((self.cursor..self.cursor, ""));
663        }
664
665        let start = self.cursor;
666
667        let mut chars = self.peek_iter();
668        for expected in expected.chars() {
669            match chars.next() {
670                Some((r, c)) if c == expected => {
671                    self.cursor = r.end;
672                }
673                _ => {
674                    let end = self.cursor;
675                    self.cursor = start;
676                    return Err(self.ranged_text(start..end));
677                }
678            }
679        }
680
681        Ok(self.ranged_text(start..self.cursor))
682    }
683
684    /// Advances the scanner cursor and returns `Ok` with the `&'text str`
685    /// and its [`Range`], if the next characters matches any `&str`
686    /// in `expected`. If not, then an `Err` is returned, with the longest
687    /// matching substring and its [`Range`].
688    ///
689    /// **Warning:** The strings are tested in sequential order, thereby
690    /// if `accept_str_any()` is called with e.g. `["foo", "foobar"]`,
691    /// then `"foobar"` would never be tested, as `"foo"` would be
692    /// matched and return `Ok` beforehand. Instead simply change the
693    /// order of the strings into longest-to-shortest order,
694    /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
695    ///
696    /// **Note:** The returned string slice has the same lifetime as
697    /// the original `text`, so the scanner can continue to be used
698    /// while this exists.
699    ///
700    /// If `expected` only contains 1 character strings, then use
701    /// [`accept_char_any()`] instead.
702    ///
703    /// # Panics
704    ///
705    /// Panics in non-optimized builds, if `expected` is [empty],
706    /// or if `expected` contains an [empty][empty2] `&str`.
707    ///
708    /// In optimized builds <code>Err(([cursor]..[cursor], &quot;&quot;))</code>
709    /// is returned instead, regardless of whether there is any remaining
710    /// characters.
711    ///
712    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
713    /// (along with a similar assertion for the strings).
714    ///
715    /// # Example
716    ///
717    /// ```rust
718    /// # use text_scanner::Scanner;
719    /// let mut scanner = Scanner::new("FooBarFooBaaar");
720    ///
721    /// let any = &["Foo", "Bar"];
722    ///
723    /// // The next 3 characters matches "Foo", so `Ok` is returned
724    /// assert_eq!(scanner.accept_str_any(any), Ok((0..3, "Foo")));
725    /// assert_eq!(scanner.accept_str_any(any), Ok((3..6, "Bar")));
726    /// assert_eq!(scanner.accept_str_any(any), Ok((6..9, "Foo")));
727    ///
728    /// // The next 3 characters is "Baa" not "Foo" nor "Bar", so `Err`
729    /// // is returned, with the longest matching part, i.e. "Ba"
730    /// assert_eq!(scanner.accept_str_any(any), Err((9..11, "Ba")));
731    ///
732    /// assert_eq!(scanner.remaining_text(), "Baaar");
733    /// ```
734    ///
735    /// [`accept_char_any()`]: Self::accept_char_any
736    /// [cursor]: Self::cursor_pos
737    /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
738    /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
739    pub fn accept_str_any(&mut self, expected: &[&str]) -> ScannerResult<'text, &'text str> {
740        debug_assert!(!expected.is_empty(), "`expected` is empty");
741        if expected.is_empty() {
742            return Err((self.cursor..self.cursor, ""));
743        }
744
745        let mut max_end = self.cursor;
746        for expected in expected {
747            match self.accept_str(expected) {
748                Ok((r, s)) => return Ok((r, s)),
749                Err((r, _s)) => {
750                    max_end = max_end.max(r.end);
751                }
752            }
753        }
754
755        let r = self.cursor..max_end;
756        Err(self.ranged_text(r))
757    }
758
759    /// Advances the scanner cursor and skips zero-to-many characters,
760    /// **while** `f(c)` returns `true`, where `c` is the [remaining characters]
761    /// in sequential order.
762    ///
763    /// Returns the string slice and its [`Range`], of the matched
764    /// (i.e. skipped) characters.
765    ///
766    /// Returns <code>([cursor]..[cursor], &quot;&quot;)</code> if 0 characters
767    /// were matched (i.e. skipped).
768    ///
769    /// **Note:** The returned string slice has the same lifetime as
770    /// the original `text`, so the scanner can continue to be used
771    /// while this exists.
772    ///
773    /// # Example
774    ///
775    /// ```rust
776    /// # use text_scanner::Scanner;
777    /// let mut scanner = Scanner::new("Hello World");
778    ///
779    /// // Skip all alphabetic characters
780    /// assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (0..5, "Hello"));
781    ///
782    /// // Returns an empty range and an empty string slice
783    /// // since 0 characters were skipped
784    /// assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (5..5, ""));
785    ///
786    /// // Skip 1 whitespace character
787    /// assert_eq!(scanner.skip_while(char::is_whitespace), (5..6, " "));
788    ///
789    /// assert_eq!(scanner.remaining_text(), "World");
790    /// ```
791    ///
792    /// [remaining characters]: Self::remaining_text
793    /// [cursor]: Self::cursor_pos
794    pub fn skip_while<F>(&mut self, mut f: F) -> ScannerItem<&'text str>
795    where
796        F: FnMut(char) -> bool,
797    {
798        let start = self.cursor;
799
800        for (r, c) in self.peek_iter() {
801            if f(c) {
802                self.cursor = r.end;
803            } else {
804                break;
805            }
806        }
807
808        let r = start..self.cursor;
809        self.ranged_text(r)
810    }
811
812    #[allow(dead_code)]
813    #[inline]
814    pub(crate) fn skip_while_ext<A, Args>(&mut self, mut skip: A) -> ScannerItem<&'text str>
815    where
816        A: ScanMany<Args>,
817    {
818        self.skip_while(|c| skip.scan_many(c))
819    }
820
821    /// Skips zero-to-many characters matching `expected`, same as:
822    ///
823    /// ```rust
824    /// # use text_scanner::Scanner;
825    /// # let mut scanner = Scanner::new("Hello World");
826    /// # let expected = 'H';
827    /// scanner.skip_while(|c| c == expected);
828    /// # assert_eq!(scanner.remaining_text(), "ello World");
829    /// ```
830    #[inline]
831    pub fn skip_while_char(&mut self, expected: char) -> ScannerItem<&'text str> {
832        self.skip_while(|c| c == expected)
833    }
834
835    /// Skips zero-to-many characters, which match any
836    /// character in `expected`, same as:
837    ///
838    /// ```rust
839    /// # use text_scanner::Scanner;
840    /// # let mut scanner = Scanner::new("Hello World");
841    /// # let expected = ['H', 'e', 'L'];
842    /// scanner.skip_while(|c| expected.contains(&c));
843    /// # assert_eq!(scanner.remaining_text(), "llo World");
844    /// ```
845    #[inline]
846    pub fn skip_while_char_any(&mut self, expected: &[char]) -> ScannerItem<&'text str> {
847        self.skip_while(|c| expected.contains(&c))
848    }
849
850    /// Skips zero-to-many characters, while the next characters
851    /// matches the characters in `expected` completely.
852    ///
853    /// **Note:** The returned string slice has the same lifetime as
854    /// the original `text`, so the scanner can continue to be used
855    /// while this exists.
856    ///
857    /// If `expected` is only 1 character, then use [`skip_while_char()`]
858    /// instead.
859    ///
860    /// # Panics
861    ///
862    /// Panics in non-optimized builds, if `expected` is [empty].
863    ///
864    /// In optimized builds 0 characters are skipped, and
865    /// <code>([cursor]..[cursor], &quot;&quot;)</code> is returned instead,
866    /// regardless of whether there is any remaining characters.
867    ///
868    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
869    ///
870    /// # Example
871    ///
872    /// ```rust
873    /// # use text_scanner::Scanner;
874    /// let mut scanner = Scanner::new("FooFooFooBarBaz");
875    /// assert_eq!(scanner.skip_while_str("Foo"), (0..9, "FooFooFoo"));
876    /// assert_eq!(scanner.remaining_text(), "BarBaz");
877    /// ```
878    ///
879    /// [`skip_while_char()`]: Self::skip_while_char
880    /// [cursor]: Self::cursor_pos
881    /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
882    #[inline]
883    pub fn skip_while_str(&mut self, expected: &str) -> ScannerItem<&'text str> {
884        let start = self.cursor;
885
886        while self.accept_str(expected).is_ok() {}
887
888        self.ranged_text(start..self.cursor)
889    }
890
891    /// Skips zero-to-many characters, while the next characters
892    /// matches the characters of any `&str` in `expected` completely.
893    ///
894    /// **Warning:** The strings are tested in sequential order, thereby
895    /// if `skip_while_str_any()` is called with e.g. `["foo", "foobar"]`,
896    /// then `"foobar"` would never be tested, as `"foo"` would be
897    /// matched and continue beforehand. Instead simply change the
898    /// order of the strings into longest-to-shortest order,
899    /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
900    ///
901    /// **Note:** The returned string slice has the same lifetime as
902    /// the original `text`, so the scanner can continue to be used
903    /// while this exists.
904    ///
905    /// If `expected` only contains 1 character strings, then use
906    /// [`skip_while_char_any()`] instead.
907    ///
908    /// # Panics
909    ///
910    /// Panics in non-optimized builds, if `expected` is [empty],
911    /// or if `expected` contains an [empty][empty2] `&str`.
912    ///
913    /// In optimized builds 0 characters are skipped, and
914    /// <code>([cursor]..[cursor], &quot;&quot;)</code> is returned instead,
915    /// regardless of whether there is any remaining characters.
916    ///
917    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
918    /// (along with a similar assertion for the strings).
919    ///
920    /// # Example
921    ///
922    /// ```rust
923    /// # use text_scanner::Scanner;
924    /// let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
925    /// assert_eq!(scanner.skip_while_str_any(&["Foo", "Bar"]), (0..15, "FooBarFooBarFoo"));
926    /// assert_eq!(scanner.remaining_text(), "BaaarBaz");
927    /// ```
928    ///
929    /// [`skip_while_char_any()`]: Self::skip_while_char_any
930    /// [cursor]: Self::cursor_pos
931    /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
932    /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
933    #[inline]
934    pub fn skip_while_str_any(&mut self, expected: &[&str]) -> ScannerItem<&'text str> {
935        let start = self.cursor;
936
937        while self.accept_str_any(expected).is_ok() {}
938
939        self.ranged_text(start..self.cursor)
940    }
941
942    /// Advances the scanner cursor and skips zero-to-many characters,
943    /// **while** `f(c)` returns `false`, where `c` is the [remaining characters]
944    /// in sequential order.
945    ///
946    /// Returns the string slice and its [`Range`], of the matched
947    /// (i.e. skipped) characters.
948    ///
949    /// Returns <code>([cursor]..[cursor], &quot;&quot;)</code> if 0 characters
950    /// were matched (i.e. skipped).
951    ///
952    /// **Note:** The returned string slice has the same lifetime as
953    /// the original `text`, so the scanner can continue to be used
954    /// while this exists.
955    ///
956    /// # Example
957    ///
958    /// ```rust
959    /// # use text_scanner::Scanner;
960    /// let mut scanner = Scanner::new("Hello World");
961    ///
962    /// // Skip all characters until a whitespace is found
963    /// assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (0..5, "Hello"));
964    ///
965    /// // Returns an empty range and an empty string slice
966    /// // since 0 characters were skipped
967    /// assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (5..5, ""));
968    ///
969    /// // Skip 1 whitespace character
970    /// assert_eq!(scanner.skip_until(char::is_alphabetic), (5..6, " "));
971    ///
972    /// assert_eq!(scanner.remaining_text(), "World");
973    /// ```
974    ///
975    /// [remaining characters]: Self::remaining_text
976    /// [cursor]: Self::cursor_pos
977    #[inline]
978    pub fn skip_until<F>(&mut self, mut f: F) -> ScannerItem<&'text str>
979    where
980        F: FnMut(char) -> bool,
981    {
982        self.skip_while(|c| !f(c))
983    }
984
985    #[allow(dead_code)]
986    #[inline]
987    pub(crate) fn skip_until_ext<A, Args>(&mut self, mut skip: A) -> ScannerItem<&'text str>
988    where
989        A: ScanMany<Args>,
990    {
991        self.skip_until(|c| skip.scan_many(c))
992    }
993
994    /// Skips zero-to-many characters, until the next character
995    /// matches `expected`, same as:
996    ///
997    /// ```rust
998    /// # use text_scanner::Scanner;
999    /// # let mut scanner = Scanner::new("Hello World");
1000    /// # let expected = ' ';
1001    /// scanner.skip_until(|c| c == expected);
1002    /// # assert_eq!(scanner.remaining_text(), " World");
1003    /// ```
1004    #[inline]
1005    pub fn skip_until_char(&mut self, expected: char) -> ScannerItem<&'text str> {
1006        self.skip_until(|c| c == expected)
1007    }
1008
1009    /// Skips zero-to-many characters, until the next character
1010    /// match any in `expected`, same as:
1011    ///
1012    /// ```rust
1013    /// # use text_scanner::Scanner;
1014    /// # let mut scanner = Scanner::new("Hello World");
1015    /// # let expected = [' ', 'o'];
1016    /// scanner.skip_until(|c| expected.contains(&c));
1017    /// # assert_eq!(scanner.remaining_text(), "o World");
1018    /// ```
1019    #[inline]
1020    pub fn skip_until_char_any(&mut self, expected: &[char]) -> ScannerItem<&'text str> {
1021        self.skip_until(|c| expected.contains(&c))
1022    }
1023
1024    /// Skips zero-to-many characters, until the next characters
1025    /// matches the characters in `expected` completely.
1026    ///
1027    /// **Note:** The returned string slice has the same lifetime as
1028    /// the original `text`, so the scanner can continue to be used
1029    /// while this exists.
1030    ///
1031    /// If `expected` is only 1 character, then use [`skip_until_char()`]
1032    /// instead.
1033    ///
1034    /// # Panics
1035    ///
1036    /// Panics in non-optimized builds, if `expected` is [empty].
1037    ///
1038    /// In optimized builds 0 characters are skipped, and
1039    /// <code>([cursor]..[cursor], &quot;&quot;)</code> is returned instead,
1040    /// regardless of whether there is any remaining characters.
1041    ///
1042    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
1043    ///
1044    /// # Example
1045    ///
1046    /// ```rust
1047    /// # use text_scanner::Scanner;
1048    /// let mut scanner = Scanner::new("FooFooFooBarBaz");
1049    /// assert_eq!(scanner.skip_until_str("Bar"), (0..9, "FooFooFoo"));
1050    /// assert_eq!(scanner.remaining_text(), "BarBaz");
1051    /// ```
1052    ///
1053    /// [`skip_until_char()`]: Self::skip_until_char
1054    /// [cursor]: Self::cursor_pos
1055    /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
1056    pub fn skip_until_str(&mut self, expected: &str) -> ScannerItem<&'text str> {
1057        let remaining_text = self.remaining_text();
1058        let end = remaining_text
1059            .find(expected)
1060            .unwrap_or(remaining_text.len());
1061
1062        let start = self.cursor;
1063        self.cursor = end;
1064
1065        self.ranged_text(start..end)
1066    }
1067
1068    /// Skips zero-to-many characters, until the next characters
1069    /// matches the characters of any `&str` in `expected` completely.
1070    ///
1071    /// **Warning:** The strings are tested in sequential order, thereby
1072    /// if `skip_until_str_any()` is called with e.g. `["foo", "foobar"]`,
1073    /// then `"foobar"` would never be tested, as `"foo"` would be
1074    /// matched and continue beforehand. Instead simply change the
1075    /// order of the strings into longest-to-shortest order,
1076    /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
1077    ///
1078    /// **Note:** The returned string slice has the same lifetime as
1079    /// the original `text`, so the scanner can continue to be used
1080    /// while this exists.
1081    ///
1082    /// If `expected` only contains 1 character strings, then use
1083    /// [`skip_until_char_any()`] instead.
1084    ///
1085    /// # Panics
1086    ///
1087    /// Panics in non-optimized builds, if `expected` is [empty],
1088    /// or if `expected` contains an [empty][empty2] `&str`.
1089    ///
1090    /// In optimized builds 0 characters are skipped, and
1091    /// <code>([cursor]..[cursor], &quot;&quot;)</code> is returned instead,
1092    /// regardless of whether there is any remaining characters.
1093    ///
1094    /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
1095    /// (along with a similar assertion for the strings).
1096    ///
1097    /// # Example
1098    ///
1099    /// ```rust
1100    /// # use text_scanner::Scanner;
1101    /// let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
1102    /// assert_eq!(scanner.skip_until_str_any(&["Baaar", "Baz"]), (0..15, "FooBarFooBarFoo"));
1103    /// assert_eq!(scanner.remaining_text(), "BaaarBaz");
1104    /// ```
1105    ///
1106    /// [`skip_until_char_any()`]: Self::skip_until_char_any
1107    /// [cursor]: Self::cursor_pos
1108    /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
1109    /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
1110    pub fn skip_until_str_any(&mut self, expected: &[&str]) -> ScannerItem<&'text str> {
1111        let start = self.cursor;
1112
1113        while self.has_remaining_text() {
1114            if let Ok((r, _)) = self.accept_str_any(expected) {
1115                self.cursor = r.start;
1116                break;
1117            }
1118
1119            _ = self.next();
1120        }
1121
1122        self.ranged_text(start..self.cursor)
1123    }
1124
1125    /// Skips zero-to-many characters, while the next character
1126    /// is a [whitespace], same as:
1127    ///
1128    /// ```rust
1129    /// # use text_scanner::Scanner;
1130    /// # let mut scanner = Scanner::new("  Hello World");
1131    /// scanner.skip_while(char::is_whitespace);
1132    /// # assert_eq!(scanner.remaining_text(), "Hello World");
1133    /// ```
1134    ///
1135    /// [whitespace]: https://doc.rust-lang.org/std/primitive.char.html#method.is_whitespace
1136    #[inline]
1137    pub fn skip_whitespace(&mut self) -> ScannerItem<&'text str> {
1138        self.skip_while(char::is_whitespace)
1139    }
1140
1141    /// Advances the cursor if `f()` returns `Ok`, otherwise on `Err` the
1142    /// cursor position is backtracked to before `f()` was called.
1143    ///
1144    /// Utility for scanning [tokens], where an unexpected character during
1145    /// scanning, should restore the cursor position before the the scan
1146    /// was started.
1147    ///
1148    /// Additionally, returns `Err` if `f()` returns `Ok`, without advancing
1149    /// the cursor position.
1150    ///
1151    /// # Example
1152    ///
1153    /// ```rust
1154    /// # use text_scanner::{Scanner, ScannerItem};
1155    /// fn scan_word<'text>(scanner: &mut Scanner<'text>) -> Result<(), ScannerItem<&'text str>> {
1156    ///     // Get next char if alphabetic or return err
1157    ///     let (first, _c) = scanner.accept_if(char::is_alphabetic)?;
1158    ///     // Skip zero-to-many alphabetic characters
1159    ///     let (last, _s) = scanner.skip_while(char::is_alphabetic);
1160    ///     Ok(())
1161    /// }
1162    ///
1163    /// let text = "Hello World";
1164    /// let mut scanner = Scanner::new(text);
1165    ///
1166    /// assert_eq!(scanner.scan_with(scan_word), Ok((0..5, "Hello")));
1167    /// assert_eq!(scanner.scan_with(scan_word), Err((5..5, "")));
1168    /// assert_eq!(scanner.next(), Ok((5..6, ' ')));
1169    /// assert_eq!(scanner.scan_with(scan_word), Ok((6..11, "World")));
1170    /// # assert_eq!(scanner.remaining_text(), "");
1171    /// ```
1172    ///
1173    /// [tokens]: https://en.wikipedia.org/wiki/Lexical_analysis#Token
1174    #[inline]
1175    pub fn scan_with<F>(&mut self, f: F) -> ScannerResult<'text, &'text str>
1176    where
1177        F: FnOnce(&mut Self) -> ScanResult<'text>,
1178    {
1179        let start = self.cursor;
1180
1181        let mut scanner = self.clone();
1182
1183        match f(&mut scanner) {
1184            Ok(()) => {
1185                self.cursor = scanner.cursor;
1186
1187                if self.cursor == start {
1188                    return Err((start..start, ""));
1189                }
1190
1191                let r = start..self.cursor;
1192                Ok(self.ranged_text(r))
1193            }
1194            Err((last, _last_s)) => {
1195                let r = self.cursor..last.end;
1196                Err(self.ranged_text(r))
1197            }
1198        }
1199    }
1200
1201    /// Calls `f` with a <code>&mut [Scanner]</code> of this
1202    /// <code>&[Scanner]</code>, i.e. a [`Scanner`] with the
1203    /// same [`text()`], [`remaining_text()`], and [`cursor_pos()`].
1204    ///
1205    /// [`text()`]: Self::text
1206    /// [`remaining_text()`]: Self::remaining_text
1207    /// [`cursor_pos()`]: Self::cursor_pos
1208    pub fn peeking<T, F>(&self, f: F) -> T
1209    where
1210        F: FnOnce(&mut Self) -> T,
1211    {
1212        let mut scanner = self.clone();
1213        f(&mut scanner)
1214    }
1215
1216    /// This function accepts the following formats:
1217    ///
1218    /// - `0`
1219    /// - `1`
1220    /// - `5`
1221    /// - `123`
1222    /// - `00000`
1223    ///
1224    /// The following is **not** accepted by this function:
1225    ///
1226    /// - `0__000__0`
1227    /// - `_`
1228    /// - `___`
1229    /// - `_123`
1230    ///
1231    /// See also:
1232    /// - [`scan_digits_or_underscores()`]
1233    /// - [`scan_non_zero_digits()`]
1234    /// - [`scan_non_zero_digits_or_underscores()`]
1235    /// - [`scan_rust_int_dec()`]
1236    /// - [`scan_c_int_dec()`]
1237    /// - [`scan_python_int_dec()`]
1238    /// - _and [more extensions]_
1239    ///
1240    /// # Grammar
1241    ///
1242    /// The following [EBNF] grammar represents what this method accepts:
1243    ///
1244    /// ```text
1245    /// Digits ::= Digit Digit*
1246    /// Digit  ::= [0-9]
1247    /// ```
1248    ///
1249    /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1250    /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1251    /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1252    /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1253    /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1254    /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1255    /// [more extensions]: ext
1256    /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1257    pub fn scan_digits(&mut self) -> ScannerResult<'text, &'text str> {
1258        let (first, _c) = self.accept_if_ext(char::is_ascii_digit)?;
1259        let (last, _s) = self.skip_while_ext(char::is_ascii_digit);
1260        Ok(self.ranged_text(first.start..last.end))
1261    }
1262
1263    /// This function accepts the following formats:
1264    ///
1265    /// - `0`
1266    /// - `1`
1267    /// - `5_`
1268    /// - `0000`
1269    /// - `12345`
1270    /// - `1_2_3`
1271    /// - `0__000__0`
1272    ///
1273    /// The following is **not** accepted by this function:
1274    ///
1275    /// - `_`
1276    /// - `___`
1277    /// - `_123`
1278    ///
1279    /// See also:
1280    /// - [`scan_digits()`]
1281    /// - [`scan_non_zero_digits()`]
1282    /// - [`scan_non_zero_digits_or_underscores()`]
1283    /// - [`scan_rust_int_dec()`]
1284    /// - [`scan_c_int_dec()`]
1285    /// - [`scan_python_int_dec()`]
1286    /// - _and [more extensions]_
1287    ///
1288    /// # Grammar
1289    ///
1290    /// The following [EBNF] grammar represents what this method accepts:
1291    ///
1292    /// ```text
1293    /// Digits ::= Digit ( Digit | '_' )*
1294    /// Digit  ::= [0-9]
1295    /// ```
1296    ///
1297    /// [`scan_digits()`]: Self::scan_digits
1298    /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1299    /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1300    /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1301    /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1302    /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1303    /// [more extensions]: ext
1304    /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1305    pub fn scan_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str> {
1306        let (first, _c) = self.accept_if_ext(char::is_ascii_digit)?;
1307        let (last, _s) = self.skip_while(|c| c.is_ascii_digit() || (c == '_'));
1308        Ok(self.ranged_text(first.start..last.end))
1309    }
1310
1311    /// This function accepts the following formats:
1312    ///
1313    /// - `0`
1314    /// - `1`
1315    /// - `5`
1316    /// - `123`
1317    /// - `102030`
1318    ///
1319    /// The following is **not** accepted by this function:
1320    ///
1321    /// - `0000`
1322    /// - `01`
1323    /// - `012345`
1324    /// - `0__000__0`
1325    /// - `_`
1326    /// - `___`
1327    /// - `_123`
1328    ///
1329    /// See also:
1330    /// - [`scan_digits()`]
1331    /// - [`scan_digits_or_underscores()`]
1332    /// - [`scan_non_zero_digits_or_underscores()`]
1333    /// - [`scan_rust_int_dec()`]
1334    /// - [`scan_c_int_dec()`]
1335    /// - [`scan_python_int_dec()`]
1336    /// - _and [more extensions]_
1337    ///
1338    /// # Grammar
1339    ///
1340    /// The following [EBNF] grammar represents what this method accepts:
1341    ///
1342    /// ```text
1343    /// Digits       ::= ( '0' |
1344    ///                    NonZeroDigit Digit* )
1345    /// NonZeroDigit ::= [1-9]
1346    /// Digit        ::= [0-9]
1347    /// ```
1348    ///
1349    /// [`scan_digits()`]: Self::scan_digits
1350    /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1351    /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1352    /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1353    /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1354    /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1355    /// [more extensions]: ext
1356    /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1357    pub fn scan_non_zero_digits(&mut self) -> ScannerResult<'text, &'text str> {
1358        self.scan_with(|scanner| {
1359            match scanner.accept_char('0') {
1360                Ok((r, _)) => {
1361                    if scanner.accept_if_ext(char::is_ascii_digit).is_ok() {
1362                        return Err(scanner.ranged_text(r));
1363                    }
1364                }
1365                _ => {
1366                    scanner.accept_if(char::is_ascii_non_zero_digit)?;
1367                    scanner.skip_while_ext(char::is_ascii_digit);
1368                }
1369            }
1370            Ok(())
1371        })
1372    }
1373
1374    /// This function accepts the following formats:
1375    ///
1376    /// - `0`
1377    /// - `1`
1378    /// - `5_`
1379    /// - `123`
1380    /// - `102030`
1381    /// - `1_2_3`
1382    /// - `0___`
1383    /// - `12345__`
1384    ///
1385    /// The following is **not** accepted by this function:
1386    ///
1387    /// - `0000`
1388    /// - `01`
1389    /// - `012345`
1390    /// - `0__000__0`
1391    /// - `_`
1392    /// - `___`
1393    /// - `_123`
1394    ///
1395    /// See also:
1396    /// - [`scan_digits()`]
1397    /// - [`scan_digits_or_underscores()`]
1398    /// - [`scan_non_zero_digits()`]
1399    /// - [`scan_rust_int_dec()`]
1400    /// - [`scan_c_int_dec()`]
1401    /// - [`scan_python_int_dec()`]
1402    /// - _and [more extensions]_
1403    ///
1404    /// # Grammar
1405    ///
1406    /// The following [EBNF] grammar represents what this method accepts:
1407    ///
1408    /// ```text
1409    /// Digits       ::= ( '0' |
1410    ///                    NonZeroDigit ( Digit | '_' )* )
1411    /// NonZeroDigit ::= [1-9]
1412    /// Digit        ::= [0-9]
1413    /// ```
1414    ///
1415    /// [`scan_digits()`]: Self::scan_digits
1416    /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1417    /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1418    /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1419    /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1420    /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1421    /// [more extensions]: ext
1422    /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1423    pub fn scan_non_zero_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str> {
1424        self.scan_with(|scanner| {
1425            match scanner.accept_char('0') {
1426                Ok((first, _)) => {
1427                    let (last, _) = scanner.skip_while_char('_');
1428                    if scanner.accept_if_ext(char::is_ascii_digit).is_ok() {
1429                        return Err(scanner.ranged_text(first.start..last.end));
1430                    }
1431                }
1432                _ => {
1433                    scanner.accept_if(char::is_ascii_non_zero_digit)?;
1434                    scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
1435                }
1436            }
1437            Ok(())
1438        })
1439    }
1440}
1441
1442// Currently not publicly exported, as using e.g. `accept_if()` with a
1443// closure would require specifying types more often than desired.
1444pub(crate) trait ScanOne<Args> {
1445    fn scan_one(self, next: char) -> bool;
1446}
1447
1448impl<F> ScanOne<char> for F
1449where
1450    F: FnOnce(char) -> bool,
1451{
1452    #[inline]
1453    fn scan_one(self, next: char) -> bool {
1454        self(next)
1455    }
1456}
1457
1458impl<F> ScanOne<&char> for F
1459where
1460    F: FnOnce(&char) -> bool,
1461{
1462    #[inline]
1463    fn scan_one(self, next: char) -> bool {
1464        self(&next)
1465    }
1466}
1467
1468// Currently not publicly exported, as using e.g. `skip_while()` with a
1469// closure would require specifying types more often than desired.
1470pub(crate) trait ScanMany<Args>: ScanOne<Args> {
1471    fn scan_many(&mut self, next: char) -> bool;
1472}
1473
1474impl<F> ScanMany<char> for F
1475where
1476    F: FnMut(char) -> bool,
1477{
1478    #[inline]
1479    fn scan_many(&mut self, next: char) -> bool {
1480        self(next)
1481    }
1482}
1483
1484impl<F> ScanMany<&char> for F
1485where
1486    F: FnMut(&char) -> bool,
1487{
1488    #[inline]
1489    fn scan_many(&mut self, next: char) -> bool {
1490        self(&next)
1491    }
1492}
1493
1494#[allow(clippy::wrong_self_convention)]
1495pub(crate) trait CharExt {
1496    fn is_ascii_non_zero_digit(self) -> bool;
1497
1498    // `std::char::is_ascii_octdigit` is unstable
1499    fn is_ascii_octdigit(self) -> bool;
1500
1501    fn is_ascii_bindigit(self) -> bool;
1502}
1503
1504impl CharExt for char {
1505    #[inline]
1506    fn is_ascii_non_zero_digit(self) -> bool {
1507        matches!(self, '1'..='9')
1508    }
1509
1510    #[inline]
1511    fn is_ascii_octdigit(self) -> bool {
1512        matches!(self, '0'..='7')
1513    }
1514
1515    #[inline]
1516    fn is_ascii_bindigit(self) -> bool {
1517        matches!(self, '0' | '1')
1518    }
1519}
1520
1521// If you are looking for tests, then the majority
1522// are implemented in the form of doc tests
1523
1524#[cfg(test)]
1525mod tests {
1526    use super::*;
1527
1528    #[test]
1529    fn test_accept_str_any_order() {
1530        let mut scanner = Scanner::new("FooBarBaz");
1531
1532        #[rustfmt::skip]
1533        assert_eq!(scanner.accept_str_any(&["Foo", "FooBar"]), Ok((0..3, "Foo")));
1534        assert_eq!(scanner.remaining_text(), "BarBaz");
1535
1536        scanner.reset();
1537
1538        #[rustfmt::skip]
1539        assert_eq!(scanner.accept_str_any(&["FooBar", "Foo"]), Ok((0..6, "FooBar")));
1540        assert_eq!(scanner.remaining_text(), "Baz");
1541    }
1542
1543    #[test]
1544    fn test_scan_digits() {
1545        let cases = ["0", "1", "0000", "0123", "123", "123456789", "0123456789"];
1546        assert_valid_cases!(scan_digits, cases);
1547        assert_valid_cases!(scan_digits, cases, "remaining");
1548    }
1549
1550    #[test]
1551    fn test_scan_digits_invalid() {
1552        let cases = ["_", "___", "_123"];
1553        assert_invalid_cases!(scan_digits, cases);
1554    }
1555
1556    #[test]
1557    fn test_scan_digits_or_underscores() {
1558        let cases = [
1559            "0",
1560            "1",
1561            "5_",
1562            "0000",
1563            "0123",
1564            "123",
1565            "1_2_3",
1566            "123456789",
1567            "0123456789",
1568            "0__000__0",
1569        ];
1570        assert_valid_cases!(scan_digits_or_underscores, cases);
1571        assert_valid_cases!(scan_digits_or_underscores, cases, "remaining");
1572    }
1573
1574    #[test]
1575    fn test_scan_digits_or_underscores_invalid() {
1576        let cases = ["_", "___", "_123"];
1577        assert_invalid_cases!(scan_digits_or_underscores, cases);
1578    }
1579
1580    #[test]
1581    fn test_scan_non_zero_digits() {
1582        let cases = ["0", "1", "5", "123", "102030"];
1583        assert_valid_cases!(scan_non_zero_digits, cases);
1584        assert_valid_cases!(scan_non_zero_digits, cases, "remaining");
1585    }
1586
1587    #[test]
1588    fn test_scan_non_zero_digits_invalid() {
1589        let cases = ["0000", "01", "012345", "_", "___", "_123"];
1590        assert_invalid_cases!(scan_non_zero_digits, cases);
1591    }
1592
1593    #[test]
1594    fn test_scan_non_zero_digits_or_underscores() {
1595        let cases = ["0", "1", "5_", "123", "102030", "1_2_3", "0___", "12345__"];
1596        assert_valid_cases!(scan_non_zero_digits_or_underscores, cases);
1597        assert_valid_cases!(scan_non_zero_digits_or_underscores, cases, "remaining");
1598    }
1599
1600    #[test]
1601    fn test_scan_non_zero_digits_or_underscores_invalid() {
1602        let cases = [
1603            "00",
1604            "0000",
1605            "01",
1606            "012345",
1607            "0_0",
1608            "0__000__0",
1609            "_",
1610            "___",
1611            "_123",
1612            "_0123",
1613        ];
1614        assert_invalid_cases!(scan_non_zero_digits_or_underscores, cases);
1615    }
1616}