text_scanner/lib.rs
1//! See [`Scanner`] docs for more information and its [methods]
2//! for many examples.
3//!
4//! [methods]: Scanner#implementations
5
6#![forbid(unsafe_code)]
7#![forbid(elided_lifetimes_in_paths)]
8
9#[cfg(feature = "ext")]
10pub mod ext;
11
12pub mod prelude {
13 pub use super::{ScanResult, Scanner, ScannerItem, ScannerResult};
14}
15
16mod private {
17 pub trait Sealed {}
18
19 impl Sealed for crate::Scanner<'_> {}
20 impl Sealed for str {}
21}
22
23pub use char_ranges::{CharRanges, CharRangesExt, CharRangesOffset};
24
25use std::ops::Range;
26
27pub type ScannerItem<T> = (Range<usize>, T);
28
29pub type ScannerResult<'text, T> = Result<ScannerItem<T>, ScannerItem<&'text str>>;
30
31pub type ScanResult<'text> = Result<(), ScannerItem<&'text str>>;
32
33#[cfg(test)]
34macro_rules! assert_valid_cases {
35 ($method:ident, $cases:expr) => {
36 $crate::assert_valid_cases!($method, $cases, "");
37 };
38
39 ($method:ident, $cases:expr, $remaining:expr) => {
40 let remaining = $remaining;
41 for expected in $cases {
42 let text = match remaining.is_empty() {
43 true => std::borrow::Cow::Borrowed(expected),
44 false => std::borrow::Cow::Owned(format!("{expected}{remaining}")),
45 };
46 let text = text.as_ref();
47
48 let mut scanner = Scanner::new(text);
49 let actual = scanner.$method();
50
51 assert_eq!(actual, Ok((0..expected.len(), expected)));
52 assert_eq!(scanner.remaining_text(), remaining);
53 }
54 };
55}
56
57#[cfg(test)]
58macro_rules! assert_invalid_cases {
59 ($method:ident, $cases:expr) => {
60 for case in $cases {
61 let mut scanner = Scanner::new(case);
62 let actual = scanner.$method();
63 if actual.is_ok() {
64 panic!("expected `Err`, received {:?}", actual);
65 }
66 }
67 };
68}
69
70#[cfg(test)]
71pub(crate) use assert_invalid_cases;
72#[cfg(test)]
73pub(crate) use assert_valid_cases;
74
75/// A `Scanner` is a UTF-8 [`char`] text scanner, implementing various methods
76/// for scanning a string slice, as well as backtracking capabilities, which
77/// can be used to implement lexers for tokenizing text or code. It is essentially
78/// just a fancy wrapper around [`CharRanges`].
79///
80/// **Note:** Cloning `Scanner` is essentially a copy, as it just contains
81/// a `&str` and a `usize` for its `cursor`. However, `Copy` is not
82/// implemented, to avoid accidentally copying immutable `Scanner`s.
83#[derive(Clone, Debug)]
84pub struct Scanner<'text> {
85 text: &'text str,
86 cursor: usize,
87}
88
89impl<'text> Scanner<'text> {
90 /// Constructs a new [`Scanner`] with `text`.
91 #[inline]
92 pub fn new(text: &'text str) -> Self {
93 Self { text, cursor: 0 }
94 }
95
96 /// Returns the `text` the scanner was constructed with.
97 ///
98 /// **Note:** This has the same lifetime as the original `text`,
99 /// so the scanner can continue to be used while this exists.
100 ///
101 /// # Example
102 ///
103 /// ```rust
104 /// # use text_scanner::Scanner;
105 /// let mut scanner = Scanner::new("Hello World");
106 ///
107 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
108 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
109 ///
110 /// assert_eq!(scanner.text(), "Hello World");
111 /// assert_eq!(scanner.remaining_text(), "llo World");
112 /// ```
113 #[inline]
114 pub fn text(&self) -> &'text str {
115 self.text
116 }
117
118 /// Returns the remaining `text` of the scanner, i.e. the [`text()`]
119 /// after [`cursor_pos()`], in other words
120 /// <code style="white-space: nowrap;">self.[text()]\[self.[cursor_pos()]..]</code>.
121 ///
122 /// **Note:** This has the same lifetime as the original `text`,
123 /// so the scanner can continue to be used while this exists.
124 ///
125 /// # Example
126 ///
127 /// ```rust
128 /// # use text_scanner::Scanner;
129 /// let mut scanner = Scanner::new("Hello World");
130 ///
131 /// assert_eq!(scanner.text(), "Hello World");
132 /// assert_eq!(scanner.remaining_text(), "Hello World");
133 ///
134 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
135 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
136 ///
137 /// assert_eq!(scanner.text(), "Hello World");
138 /// assert_eq!(scanner.remaining_text(), "llo World");
139 /// ```
140 ///
141 /// [`text()`]: Self::text
142 /// [text()]: Self::text
143 /// [`cursor_pos()`]: Self::cursor_pos
144 /// [cursor_pos()]: Self::cursor_pos
145 #[inline]
146 pub fn remaining_text(&self) -> &'text str {
147 &self.text[self.cursor..]
148 }
149
150 /// Returns `true` if [`remaining_text()`] has text, i.e.
151 /// if it is not [empty].
152 ///
153 /// # Example
154 ///
155 /// ```rust
156 /// # use text_scanner::Scanner;
157 /// let mut scanner = Scanner::new("Foo");
158 ///
159 /// # assert_eq!(scanner.text(), "Foo");
160 /// assert_eq!(scanner.remaining_text(), "Foo");
161 /// assert_eq!(scanner.has_remaining_text(), true);
162 ///
163 /// assert_eq!(scanner.next(), Ok((0..1, 'F')));
164 /// assert_eq!(scanner.next(), Ok((1..2, 'o')));
165 /// assert_eq!(scanner.next(), Ok((2..3, 'o')));
166 ///
167 /// # assert_eq!(scanner.text(), "Foo");
168 /// assert_eq!(scanner.remaining_text(), "");
169 /// assert_eq!(scanner.has_remaining_text(), false);
170 /// ```
171 ///
172 /// [`remaining_text()`]: Self::remaining_text
173 /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
174 #[inline]
175 pub fn has_remaining_text(&self) -> bool {
176 self.cursor < self.text.len()
177 }
178
179 /// Utility for turning a `Range<usize>` into `(Range<usize>, &'text str)`.
180 /// Where `range` is the start end end byte index relative to [`text()`].
181 ///
182 /// The same as `(range.clone(), &self.text()[range])`.
183 ///
184 /// [`text()`]: Self::text
185 #[inline]
186 pub fn ranged_text(&self, range: Range<usize>) -> ScannerItem<&'text str> {
187 (range.clone(), &self.text[range])
188 }
189
190 /// Returns the current cursor position of the
191 /// scanner, i.e. the byte offset into [`text()`].
192 ///
193 /// [`text()`]: Self::text
194 #[inline]
195 pub fn cursor_pos(&self) -> usize {
196 self.cursor
197 }
198
199 /// Replaces the current cursor position with `pos`,
200 /// while returning the old cursor position.
201 ///
202 /// # Panics
203 ///
204 /// If `pos` is not at a valid UTF-8 sequence boundary,
205 /// then the next operation using the cursor position
206 /// will panic.
207 ///
208 /// # Example
209 ///
210 /// ```rust
211 /// # use text_scanner::Scanner;
212 /// let mut scanner = Scanner::new("Hello World");
213 ///
214 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
215 ///
216 /// let backtrack = scanner.cursor_pos();
217 ///
218 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
219 /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
220 /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
221 ///
222 /// scanner.set_cursor_pos(backtrack);
223 ///
224 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
225 /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
226 /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
227 /// ```
228 #[inline]
229 pub fn set_cursor_pos(&mut self, pos: usize) -> usize {
230 let old_pos = self.cursor;
231 self.cursor = pos;
232 old_pos
233 }
234
235 /// Resets the cursor position to the start, while returning
236 /// the old cursor position.
237 ///
238 /// # Example
239 ///
240 /// ```rust
241 /// # use text_scanner::Scanner;
242 /// # let mut scanner = Scanner::new("Hello World");
243 /// # assert_eq!(scanner.next(), Ok((0..1, 'H')));
244 /// # assert_eq!(scanner.next(), Ok((1..2, 'e')));
245 /// # assert_eq!(scanner.remaining_text(), "llo World");
246 /// let old_pos = scanner.reset();
247 /// // same as
248 /// let old_pos = scanner.set_cursor_pos(0);
249 /// # assert_eq!(scanner.remaining_text(), "Hello World");
250 /// # assert_eq!(scanner.next(), Ok((0..1, 'H')));
251 /// ```
252 #[inline]
253 pub fn reset(&mut self) -> usize {
254 self.set_cursor_pos(0)
255 }
256
257 /// Advances the scanner cursor and returns the next
258 /// [`char`] and its [`Range`], if any.
259 ///
260 /// # Example
261 ///
262 /// ```rust
263 /// # use text_scanner::Scanner;
264 /// let mut scanner = Scanner::new("Hello");
265 ///
266 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
267 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
268 ///
269 /// assert_eq!(scanner.remaining_text(), "llo");
270 ///
271 /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
272 /// assert_eq!(scanner.next(), Ok((3..4, 'l')));
273 /// assert_eq!(scanner.next(), Ok((4..5, 'o')));
274 /// assert_eq!(scanner.next(), Err((5..5, "")));
275 ///
276 /// assert_eq!(scanner.remaining_text(), "");
277 /// ```
278 #[inline]
279 #[allow(clippy::should_implement_trait)]
280 pub fn next(&mut self) -> ScannerResult<'text, char> {
281 let (r, c) = self.peek()?;
282 self.cursor = r.end;
283 Ok((r, c))
284 }
285
286 /// Returns the next [`char`] and its [`Range`], if any,
287 /// without advancing the cursor position.
288 ///
289 /// See also [`peek_str()`], [`peek_nth()`], and [`peek_iter()`].
290 ///
291 /// # Example
292 ///
293 /// ```rust
294 /// # use text_scanner::Scanner;
295 /// let mut scanner = Scanner::new("Hello World");
296 ///
297 /// assert_eq!(scanner.peek(), Ok((0..1, 'H')));
298 /// assert_eq!(scanner.peek(), Ok((0..1, 'H')));
299 ///
300 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
301 ///
302 /// assert_eq!(scanner.peek(), Ok((1..2, 'e')));
303 /// assert_eq!(scanner.peek(), Ok((1..2, 'e')));
304 ///
305 /// assert_eq!(scanner.remaining_text(), "ello World");
306 /// ```
307 ///
308 /// [`peek_str()`]: Self::peek_str
309 /// [`peek_nth()`]: Self::peek_nth
310 /// [`peek_iter()`]: Self::peek_iter
311 #[inline]
312 pub fn peek(&self) -> ScannerResult<'text, char> {
313 match self.peek_iter().next() {
314 Some((r, c)) => Ok((r, c)),
315 // No character remaining
316 None => Err((self.cursor..self.cursor, "")),
317 }
318 }
319
320 /// Returns the `n`th [`char`] and its [`Range`], if any,
321 /// without advancing the cursor position.
322 ///
323 /// See also [`peek_str()`] and [`peek_iter()`].
324 ///
325 /// # Example
326 ///
327 /// ```rust
328 /// # use text_scanner::Scanner;
329 /// let mut scanner = Scanner::new("Hello World");
330 ///
331 /// assert_eq!(scanner.peek_nth(0), Ok((0..1, 'H')));
332 /// assert_eq!(scanner.peek_nth(1), Ok((1..2, 'e')));
333 /// assert_eq!(scanner.peek_nth(2), Ok((2..3, 'l')));
334 ///
335 /// assert_eq!(scanner.peek_nth(6), Ok((6..7, 'W')));
336 ///
337 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
338 ///
339 /// assert_eq!(scanner.remaining_text(), "ello World");
340 /// ```
341 ///
342 /// [`peek_str()`]: Self::peek_str
343 /// [`peek_iter()`]: Self::peek_iter
344 #[inline]
345 pub fn peek_nth(&self, n: usize) -> ScannerResult<'text, char> {
346 match self.peek_iter().nth(n) {
347 Some((r, c)) => Ok((r, c)),
348 None => Err(self.ranged_text(self.cursor..self.text.len())),
349 }
350 }
351
352 /// Returns an iterator that produces all the remaining [`char`]s
353 /// and their [`Range`]s, if any, without advancing the cursor position.
354 ///
355 /// **Note:** This has the same lifetime as the original `text`,
356 /// so the scanner can continue to be used while this exists.
357 ///
358 /// See also [`peek_str()`].
359 ///
360 /// # Example
361 ///
362 /// ```rust
363 /// # use text_scanner::Scanner;
364 /// let mut scanner = Scanner::new("Hello World");
365 ///
366 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
367 /// assert_eq!(scanner.remaining_text(), "ello World");
368 ///
369 /// let mut peek = scanner.peek_iter();
370 /// assert_eq!(peek.next(), Some((1..2, 'e')));
371 /// assert_eq!(peek.next(), Some((2..3, 'l')));
372 /// assert_eq!(peek.next(), Some((3..4, 'l')));
373 /// assert_eq!(scanner.remaining_text(), "ello World");
374 ///
375 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
376 /// assert_eq!(scanner.next(), Ok((2..3, 'l')));
377 /// assert_eq!(scanner.remaining_text(), "lo World");
378 /// ```
379 ///
380 /// [`peek_str()`]: Self::peek_str
381 #[inline]
382 pub fn peek_iter(&self) -> CharRangesOffset<'text> {
383 self.remaining_text().char_ranges().offset(self.cursor)
384 }
385
386 /// Advances the scanner cursor and returns [`Ok`] with a string
387 /// slice of the following `n` characters. If less than `n` are
388 /// remaining, then [`Err`] is returned, with the [remaining text],
389 /// if any, without advancing the cursor.
390 ///
391 /// **Note:** The returned string slice has the same lifetime as
392 /// the original `text`, so the scanner can continue to be used
393 /// while this exists.
394 ///
395 /// # Bytes vs Characters
396 ///
397 /// The [`Ok`] string slice contains `n` characters,
398 /// i.e. where `n` matches <code>str.[chars()].[count()]</code>
399 /// and **not** [`len()`] (which is the byte length of a string slice).
400 ///
401 /// Consider `"foo"` vs `"🦀🦀🦀"`, both string slices contain 3
402 /// characters. However `"foo"` has a length of 3 bytes, while `"🦀🦀🦀"`
403 /// has a length of 12 bytes, when encoded in UTF-8.
404 ///
405 /// # Panics
406 ///
407 /// Panics in non-optimized builds, if `n` is `0`.
408 ///
409 /// In optimized builds <code>Err(([cursor]..[cursor], ""))</code>
410 /// is returned instead, regardless of whether there is any remaining
411 /// characters.
412 ///
413 /// In short there is a <code>[debug_assert_ne!](n, 0)</code>.
414 ///
415 /// # Example
416 ///
417 /// ```rust
418 /// # use text_scanner::Scanner;
419 /// let mut scanner = Scanner::new("Foo Bar Baz");
420 ///
421 /// # assert_eq!(scanner.remaining_text(), "Foo Bar Baz");
422 /// assert_eq!(scanner.next_str(3), Ok((0..3, "Foo")));
423 /// assert_eq!(scanner.next_str(3), Ok((3..6, " Ba")));
424 /// assert_eq!(scanner.next_str(3), Ok((6..9, "r B")));
425 /// // Less than 3 characters are remaining, so `Err`
426 /// // is returned
427 /// assert_eq!(scanner.next_str(3), Err((9..11, "az")));
428 /// # assert_eq!(scanner.remaining_text(), "az");
429 /// # assert_eq!(scanner.next_str(2), Ok((9..11, "az")));
430 /// # assert_eq!(scanner.remaining_text(), "");
431 /// ```
432 ///
433 /// [remaining text]: Self::remaining_text
434 /// [chars()]: str::chars
435 /// [count()]: Iterator::count()
436 /// [`len()`]: str::len
437 /// [cursor]: Self::cursor_pos()
438 #[inline]
439 pub fn next_str(&mut self, chars: usize) -> ScannerResult<'text, &'text str> {
440 let (r, s) = self.peek_str(chars)?;
441 self.cursor = r.end;
442 Ok((r, s))
443 }
444
445 /// Returns [`Ok`] with a string slice of the following `n` characters,
446 /// if any, without advancing the cursor. If less than `n` are remaining,
447 /// then [`Err`] is returned, with the [remaining text].
448 ///
449 /// **Note:** The returned string slice has the same lifetime as
450 /// the original `text`, so the scanner can continue to be used
451 /// while this exists.
452 ///
453 /// # Bytes vs Characters
454 ///
455 /// The [`Ok`] string slice contains `n` characters,
456 /// i.e. where `n` matches <code>str.[chars()].[count()]</code>
457 /// and **not** [`len()`] (which is the byte length of a string slice).
458 ///
459 /// Consider `"foo"` vs `"🦀🦀🦀"`, both string slices contain 3
460 /// characters. However `"foo"` has a length of 3 bytes, while `"🦀🦀🦀"`
461 /// has a length of 12 bytes, when encoded in UTF-8.
462 ///
463 /// # Panics
464 ///
465 /// Panics in non-optimized builds, if `n` is `0`.
466 ///
467 /// In optimized builds <code>Err(([cursor]..[cursor], ""))</code>
468 /// is returned instead, regardless of whether there is any remaining
469 /// characters.
470 ///
471 /// In short there is a <code>[debug_assert_ne!](n, 0)</code>.
472 ///
473 /// # Example
474 ///
475 /// ```rust
476 /// # use text_scanner::Scanner;
477 /// let mut scanner = Scanner::new("Hello 👋 World 🌏");
478 ///
479 /// assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
480 /// // The emoji is a multi-byte character, thereby the returned
481 /// // range has a length of 10 and not 7.
482 /// assert_eq!(scanner.peek_str(7), Ok((0..10, "Hello 👋")));
483 /// # assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
484 ///
485 /// assert_eq!(scanner.next(), Ok((0..1, 'H')));
486 /// assert_eq!(scanner.next(), Ok((1..2, 'e')));
487 ///
488 /// assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
489 /// assert_eq!(scanner.peek_str(7), Ok((2..12, "llo 👋 W")));
490 /// # assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
491 /// ```
492 ///
493 /// [remaining text]: Self::remaining_text
494 /// [chars()]: str::chars
495 /// [count()]: Iterator::count()
496 /// [`len()`]: str::len
497 /// [cursor]: Self::cursor_pos()
498 #[inline]
499 pub fn peek_str(&self, n: usize) -> ScannerResult<'text, &'text str> {
500 debug_assert_ne!(n, 0, "`n` must be greater than 0");
501 if n == 0 {
502 return Err((self.cursor..self.cursor, ""));
503 }
504 let (last, _) = self.peek_nth(n - 1)?;
505 let r = self.cursor..last.end;
506 Ok(self.ranged_text(r))
507 }
508
509 /// Advances the scanner cursor and returns the next
510 /// [`char`] and its [`Range`], if `f(c)` returns `true`
511 /// where `c` is the next character.
512 ///
513 /// # Example
514 ///
515 /// ```rust
516 /// # use text_scanner::Scanner;
517 /// let mut scanner = Scanner::new("Hello World");
518 ///
519 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((0..1, 'H')));
520 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((1..2, 'e')));
521 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((2..3, 'l')));
522 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((3..4, 'l')));
523 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((4..5, 'o')));
524 /// assert_eq!(scanner.accept_if(char::is_alphabetic), Err((5..5, "")));
525 ///
526 /// assert_eq!(scanner.remaining_text(), " World");
527 /// ```
528 #[inline]
529 pub fn accept_if<F>(&mut self, f: F) -> ScannerResult<'text, char>
530 where
531 F: FnOnce(char) -> bool,
532 {
533 let (r, c) = self.peek()?;
534 if f(c) {
535 self.cursor = r.end;
536 Ok((r, c))
537 } else {
538 Err((self.cursor..self.cursor, ""))
539 }
540 }
541
542 #[allow(dead_code)]
543 #[inline]
544 pub(crate) fn accept_if_ext<A, Args>(&mut self, accept: A) -> ScannerResult<'text, char>
545 where
546 A: ScanOne<Args>,
547 {
548 self.accept_if(|c| accept.scan_one(c))
549 }
550
551 /// Advances the scanner cursor and returns the next
552 /// [`char`] and its [`Range`], if the next character
553 /// matches `expected`.
554 ///
555 /// # Example
556 ///
557 /// ```rust
558 /// # use text_scanner::Scanner;
559 /// let mut scanner = Scanner::new("Hello World");
560 ///
561 /// assert_eq!(scanner.accept_char('H'), Ok((0..1, 'H')));
562 /// assert_eq!(scanner.accept_char('E'), Err((1..1, "")));
563 /// assert_eq!(scanner.accept_char('e'), Ok((1..2, 'e')));
564 /// assert_eq!(scanner.accept_char('W'), Err((2..2, "")));
565 ///
566 /// assert_eq!(scanner.remaining_text(), "llo World");
567 /// ```
568 #[inline]
569 pub fn accept_char(&mut self, expected: char) -> ScannerResult<'text, char> {
570 self.accept_if(|c| c == expected)
571 }
572
573 /// Advances the scanner cursor and returns the next
574 /// [`char`] and its [`Range`], if the next character
575 /// matches any `char` produced by `expected`.
576 ///
577 /// # Panics
578 ///
579 /// Panics in non-optimized builds, if `expected` is [empty].
580 ///
581 /// In optimized builds <code>Err(([cursor]..[cursor], ""))</code>
582 /// is returned instead, regardless of whether there is any remaining
583 /// characters.
584 ///
585 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
586 ///
587 /// # Example
588 ///
589 /// ```rust
590 /// # use text_scanner::Scanner;
591 /// let mut scanner = Scanner::new("Hello World");
592 ///
593 /// let any = &['H', 'e', 'l', 'o', ' '];
594 /// assert_eq!(scanner.accept_char_any(any), Ok((0..1, 'H')));
595 /// assert_eq!(scanner.accept_char_any(any), Ok((1..2, 'e')));
596 /// assert_eq!(scanner.accept_char_any(any), Ok((2..3, 'l')));
597 /// assert_eq!(scanner.accept_char_any(any), Ok((3..4, 'l')));
598 /// assert_eq!(scanner.accept_char_any(any), Ok((4..5, 'o')));
599 /// assert_eq!(scanner.accept_char_any(any), Ok((5..6, ' ')));
600 /// assert_eq!(scanner.accept_char_any(any), Err((6..6, "")));
601 ///
602 /// assert_eq!(scanner.remaining_text(), "World");
603 /// ```
604 ///
605 /// [cursor]: Self::cursor_pos
606 /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
607 pub fn accept_char_any(&mut self, expected: &[char]) -> ScannerResult<'text, char> {
608 debug_assert!(!expected.is_empty(), "`expected` is empty");
609 let (r, c) = self.peek()?;
610 if expected.contains(&c) {
611 self.cursor = r.end;
612 Ok((r, c))
613 } else {
614 Err((self.cursor..self.cursor, ""))
615 }
616 }
617
618 /// Advances the scanner cursor and returns `Ok` with the `&'text str`
619 /// and its [`Range`], if the next characters matches the characters
620 /// in `expected`. If not, then an `Err` is returned, with the longest
621 /// matching substring and its [`Range`].
622 ///
623 /// **Note:** The returned string slice has the same lifetime as
624 /// the original `text`, so the scanner can continue to be used
625 /// while this exists.
626 ///
627 /// If `expected` is only 1 character, then use [`accept_char()`]
628 /// instead.
629 ///
630 /// # Panics
631 ///
632 /// Panics in non-optimized builds, if `expected` is [empty].
633 ///
634 /// In optimized builds <code>Err(([cursor]..[cursor], ""))</code>
635 /// is returned instead, regardless of whether there is any remaining
636 /// characters.
637 ///
638 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
639 ///
640 /// # Example
641 ///
642 /// ```rust
643 /// # use text_scanner::Scanner;
644 /// let mut scanner = Scanner::new("FooBaaar");
645 ///
646 /// // The next 3 characters matches "Foo", so `Ok` is returned
647 /// assert_eq!(scanner.accept_str("Foo"), Ok((0..3, "Foo")));
648 ///
649 /// // The next 3 characters is "Baa" not "Bar", so `Err` is
650 /// // returned, with the longest matching part, i.e. "Ba"
651 /// assert_eq!(scanner.accept_str("Bar"), Err((3..5, "Ba")));
652 ///
653 /// assert_eq!(scanner.remaining_text(), "Baaar");
654 /// ```
655 ///
656 /// [`accept_char()`]: Self::accept_char
657 /// [cursor]: Self::cursor_pos
658 /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
659 pub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str> {
660 debug_assert!(!expected.is_empty(), "`expected` is empty");
661 if expected.is_empty() {
662 return Err((self.cursor..self.cursor, ""));
663 }
664
665 let start = self.cursor;
666
667 let mut chars = self.peek_iter();
668 for expected in expected.chars() {
669 match chars.next() {
670 Some((r, c)) if c == expected => {
671 self.cursor = r.end;
672 }
673 _ => {
674 let end = self.cursor;
675 self.cursor = start;
676 return Err(self.ranged_text(start..end));
677 }
678 }
679 }
680
681 Ok(self.ranged_text(start..self.cursor))
682 }
683
684 /// Advances the scanner cursor and returns `Ok` with the `&'text str`
685 /// and its [`Range`], if the next characters matches any `&str`
686 /// in `expected`. If not, then an `Err` is returned, with the longest
687 /// matching substring and its [`Range`].
688 ///
689 /// **Warning:** The strings are tested in sequential order, thereby
690 /// if `accept_str_any()` is called with e.g. `["foo", "foobar"]`,
691 /// then `"foobar"` would never be tested, as `"foo"` would be
692 /// matched and return `Ok` beforehand. Instead simply change the
693 /// order of the strings into longest-to-shortest order,
694 /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
695 ///
696 /// **Note:** The returned string slice has the same lifetime as
697 /// the original `text`, so the scanner can continue to be used
698 /// while this exists.
699 ///
700 /// If `expected` only contains 1 character strings, then use
701 /// [`accept_char_any()`] instead.
702 ///
703 /// # Panics
704 ///
705 /// Panics in non-optimized builds, if `expected` is [empty],
706 /// or if `expected` contains an [empty][empty2] `&str`.
707 ///
708 /// In optimized builds <code>Err(([cursor]..[cursor], ""))</code>
709 /// is returned instead, regardless of whether there is any remaining
710 /// characters.
711 ///
712 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
713 /// (along with a similar assertion for the strings).
714 ///
715 /// # Example
716 ///
717 /// ```rust
718 /// # use text_scanner::Scanner;
719 /// let mut scanner = Scanner::new("FooBarFooBaaar");
720 ///
721 /// let any = &["Foo", "Bar"];
722 ///
723 /// // The next 3 characters matches "Foo", so `Ok` is returned
724 /// assert_eq!(scanner.accept_str_any(any), Ok((0..3, "Foo")));
725 /// assert_eq!(scanner.accept_str_any(any), Ok((3..6, "Bar")));
726 /// assert_eq!(scanner.accept_str_any(any), Ok((6..9, "Foo")));
727 ///
728 /// // The next 3 characters is "Baa" not "Foo" nor "Bar", so `Err`
729 /// // is returned, with the longest matching part, i.e. "Ba"
730 /// assert_eq!(scanner.accept_str_any(any), Err((9..11, "Ba")));
731 ///
732 /// assert_eq!(scanner.remaining_text(), "Baaar");
733 /// ```
734 ///
735 /// [`accept_char_any()`]: Self::accept_char_any
736 /// [cursor]: Self::cursor_pos
737 /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
738 /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
739 pub fn accept_str_any(&mut self, expected: &[&str]) -> ScannerResult<'text, &'text str> {
740 debug_assert!(!expected.is_empty(), "`expected` is empty");
741 if expected.is_empty() {
742 return Err((self.cursor..self.cursor, ""));
743 }
744
745 let mut max_end = self.cursor;
746 for expected in expected {
747 match self.accept_str(expected) {
748 Ok((r, s)) => return Ok((r, s)),
749 Err((r, _s)) => {
750 max_end = max_end.max(r.end);
751 }
752 }
753 }
754
755 let r = self.cursor..max_end;
756 Err(self.ranged_text(r))
757 }
758
759 /// Advances the scanner cursor and skips zero-to-many characters,
760 /// **while** `f(c)` returns `true`, where `c` is the [remaining characters]
761 /// in sequential order.
762 ///
763 /// Returns the string slice and its [`Range`], of the matched
764 /// (i.e. skipped) characters.
765 ///
766 /// Returns <code>([cursor]..[cursor], "")</code> if 0 characters
767 /// were matched (i.e. skipped).
768 ///
769 /// **Note:** The returned string slice has the same lifetime as
770 /// the original `text`, so the scanner can continue to be used
771 /// while this exists.
772 ///
773 /// # Example
774 ///
775 /// ```rust
776 /// # use text_scanner::Scanner;
777 /// let mut scanner = Scanner::new("Hello World");
778 ///
779 /// // Skip all alphabetic characters
780 /// assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (0..5, "Hello"));
781 ///
782 /// // Returns an empty range and an empty string slice
783 /// // since 0 characters were skipped
784 /// assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (5..5, ""));
785 ///
786 /// // Skip 1 whitespace character
787 /// assert_eq!(scanner.skip_while(char::is_whitespace), (5..6, " "));
788 ///
789 /// assert_eq!(scanner.remaining_text(), "World");
790 /// ```
791 ///
792 /// [remaining characters]: Self::remaining_text
793 /// [cursor]: Self::cursor_pos
794 pub fn skip_while<F>(&mut self, mut f: F) -> ScannerItem<&'text str>
795 where
796 F: FnMut(char) -> bool,
797 {
798 let start = self.cursor;
799
800 for (r, c) in self.peek_iter() {
801 if f(c) {
802 self.cursor = r.end;
803 } else {
804 break;
805 }
806 }
807
808 let r = start..self.cursor;
809 self.ranged_text(r)
810 }
811
812 #[allow(dead_code)]
813 #[inline]
814 pub(crate) fn skip_while_ext<A, Args>(&mut self, mut skip: A) -> ScannerItem<&'text str>
815 where
816 A: ScanMany<Args>,
817 {
818 self.skip_while(|c| skip.scan_many(c))
819 }
820
821 /// Skips zero-to-many characters matching `expected`, same as:
822 ///
823 /// ```rust
824 /// # use text_scanner::Scanner;
825 /// # let mut scanner = Scanner::new("Hello World");
826 /// # let expected = 'H';
827 /// scanner.skip_while(|c| c == expected);
828 /// # assert_eq!(scanner.remaining_text(), "ello World");
829 /// ```
830 #[inline]
831 pub fn skip_while_char(&mut self, expected: char) -> ScannerItem<&'text str> {
832 self.skip_while(|c| c == expected)
833 }
834
835 /// Skips zero-to-many characters, which match any
836 /// character in `expected`, same as:
837 ///
838 /// ```rust
839 /// # use text_scanner::Scanner;
840 /// # let mut scanner = Scanner::new("Hello World");
841 /// # let expected = ['H', 'e', 'L'];
842 /// scanner.skip_while(|c| expected.contains(&c));
843 /// # assert_eq!(scanner.remaining_text(), "llo World");
844 /// ```
845 #[inline]
846 pub fn skip_while_char_any(&mut self, expected: &[char]) -> ScannerItem<&'text str> {
847 self.skip_while(|c| expected.contains(&c))
848 }
849
850 /// Skips zero-to-many characters, while the next characters
851 /// matches the characters in `expected` completely.
852 ///
853 /// **Note:** The returned string slice has the same lifetime as
854 /// the original `text`, so the scanner can continue to be used
855 /// while this exists.
856 ///
857 /// If `expected` is only 1 character, then use [`skip_while_char()`]
858 /// instead.
859 ///
860 /// # Panics
861 ///
862 /// Panics in non-optimized builds, if `expected` is [empty].
863 ///
864 /// In optimized builds 0 characters are skipped, and
865 /// <code>([cursor]..[cursor], "")</code> is returned instead,
866 /// regardless of whether there is any remaining characters.
867 ///
868 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
869 ///
870 /// # Example
871 ///
872 /// ```rust
873 /// # use text_scanner::Scanner;
874 /// let mut scanner = Scanner::new("FooFooFooBarBaz");
875 /// assert_eq!(scanner.skip_while_str("Foo"), (0..9, "FooFooFoo"));
876 /// assert_eq!(scanner.remaining_text(), "BarBaz");
877 /// ```
878 ///
879 /// [`skip_while_char()`]: Self::skip_while_char
880 /// [cursor]: Self::cursor_pos
881 /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
882 #[inline]
883 pub fn skip_while_str(&mut self, expected: &str) -> ScannerItem<&'text str> {
884 let start = self.cursor;
885
886 while self.accept_str(expected).is_ok() {}
887
888 self.ranged_text(start..self.cursor)
889 }
890
891 /// Skips zero-to-many characters, while the next characters
892 /// matches the characters of any `&str` in `expected` completely.
893 ///
894 /// **Warning:** The strings are tested in sequential order, thereby
895 /// if `skip_while_str_any()` is called with e.g. `["foo", "foobar"]`,
896 /// then `"foobar"` would never be tested, as `"foo"` would be
897 /// matched and continue beforehand. Instead simply change the
898 /// order of the strings into longest-to-shortest order,
899 /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
900 ///
901 /// **Note:** The returned string slice has the same lifetime as
902 /// the original `text`, so the scanner can continue to be used
903 /// while this exists.
904 ///
905 /// If `expected` only contains 1 character strings, then use
906 /// [`skip_while_char_any()`] instead.
907 ///
908 /// # Panics
909 ///
910 /// Panics in non-optimized builds, if `expected` is [empty],
911 /// or if `expected` contains an [empty][empty2] `&str`.
912 ///
913 /// In optimized builds 0 characters are skipped, and
914 /// <code>([cursor]..[cursor], "")</code> is returned instead,
915 /// regardless of whether there is any remaining characters.
916 ///
917 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
918 /// (along with a similar assertion for the strings).
919 ///
920 /// # Example
921 ///
922 /// ```rust
923 /// # use text_scanner::Scanner;
924 /// let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
925 /// assert_eq!(scanner.skip_while_str_any(&["Foo", "Bar"]), (0..15, "FooBarFooBarFoo"));
926 /// assert_eq!(scanner.remaining_text(), "BaaarBaz");
927 /// ```
928 ///
929 /// [`skip_while_char_any()`]: Self::skip_while_char_any
930 /// [cursor]: Self::cursor_pos
931 /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
932 /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
933 #[inline]
934 pub fn skip_while_str_any(&mut self, expected: &[&str]) -> ScannerItem<&'text str> {
935 let start = self.cursor;
936
937 while self.accept_str_any(expected).is_ok() {}
938
939 self.ranged_text(start..self.cursor)
940 }
941
942 /// Advances the scanner cursor and skips zero-to-many characters,
943 /// **while** `f(c)` returns `false`, where `c` is the [remaining characters]
944 /// in sequential order.
945 ///
946 /// Returns the string slice and its [`Range`], of the matched
947 /// (i.e. skipped) characters.
948 ///
949 /// Returns <code>([cursor]..[cursor], "")</code> if 0 characters
950 /// were matched (i.e. skipped).
951 ///
952 /// **Note:** The returned string slice has the same lifetime as
953 /// the original `text`, so the scanner can continue to be used
954 /// while this exists.
955 ///
956 /// # Example
957 ///
958 /// ```rust
959 /// # use text_scanner::Scanner;
960 /// let mut scanner = Scanner::new("Hello World");
961 ///
962 /// // Skip all characters until a whitespace is found
963 /// assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (0..5, "Hello"));
964 ///
965 /// // Returns an empty range and an empty string slice
966 /// // since 0 characters were skipped
967 /// assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (5..5, ""));
968 ///
969 /// // Skip 1 whitespace character
970 /// assert_eq!(scanner.skip_until(char::is_alphabetic), (5..6, " "));
971 ///
972 /// assert_eq!(scanner.remaining_text(), "World");
973 /// ```
974 ///
975 /// [remaining characters]: Self::remaining_text
976 /// [cursor]: Self::cursor_pos
977 #[inline]
978 pub fn skip_until<F>(&mut self, mut f: F) -> ScannerItem<&'text str>
979 where
980 F: FnMut(char) -> bool,
981 {
982 self.skip_while(|c| !f(c))
983 }
984
985 #[allow(dead_code)]
986 #[inline]
987 pub(crate) fn skip_until_ext<A, Args>(&mut self, mut skip: A) -> ScannerItem<&'text str>
988 where
989 A: ScanMany<Args>,
990 {
991 self.skip_until(|c| skip.scan_many(c))
992 }
993
994 /// Skips zero-to-many characters, until the next character
995 /// matches `expected`, same as:
996 ///
997 /// ```rust
998 /// # use text_scanner::Scanner;
999 /// # let mut scanner = Scanner::new("Hello World");
1000 /// # let expected = ' ';
1001 /// scanner.skip_until(|c| c == expected);
1002 /// # assert_eq!(scanner.remaining_text(), " World");
1003 /// ```
1004 #[inline]
1005 pub fn skip_until_char(&mut self, expected: char) -> ScannerItem<&'text str> {
1006 self.skip_until(|c| c == expected)
1007 }
1008
1009 /// Skips zero-to-many characters, until the next character
1010 /// match any in `expected`, same as:
1011 ///
1012 /// ```rust
1013 /// # use text_scanner::Scanner;
1014 /// # let mut scanner = Scanner::new("Hello World");
1015 /// # let expected = [' ', 'o'];
1016 /// scanner.skip_until(|c| expected.contains(&c));
1017 /// # assert_eq!(scanner.remaining_text(), "o World");
1018 /// ```
1019 #[inline]
1020 pub fn skip_until_char_any(&mut self, expected: &[char]) -> ScannerItem<&'text str> {
1021 self.skip_until(|c| expected.contains(&c))
1022 }
1023
1024 /// Skips zero-to-many characters, until the next characters
1025 /// matches the characters in `expected` completely.
1026 ///
1027 /// **Note:** The returned string slice has the same lifetime as
1028 /// the original `text`, so the scanner can continue to be used
1029 /// while this exists.
1030 ///
1031 /// If `expected` is only 1 character, then use [`skip_until_char()`]
1032 /// instead.
1033 ///
1034 /// # Panics
1035 ///
1036 /// Panics in non-optimized builds, if `expected` is [empty].
1037 ///
1038 /// In optimized builds 0 characters are skipped, and
1039 /// <code>([cursor]..[cursor], "")</code> is returned instead,
1040 /// regardless of whether there is any remaining characters.
1041 ///
1042 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>.
1043 ///
1044 /// # Example
1045 ///
1046 /// ```rust
1047 /// # use text_scanner::Scanner;
1048 /// let mut scanner = Scanner::new("FooFooFooBarBaz");
1049 /// assert_eq!(scanner.skip_until_str("Bar"), (0..9, "FooFooFoo"));
1050 /// assert_eq!(scanner.remaining_text(), "BarBaz");
1051 /// ```
1052 ///
1053 /// [`skip_until_char()`]: Self::skip_until_char
1054 /// [cursor]: Self::cursor_pos
1055 /// [empty]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
1056 pub fn skip_until_str(&mut self, expected: &str) -> ScannerItem<&'text str> {
1057 let remaining_text = self.remaining_text();
1058 let end = remaining_text
1059 .find(expected)
1060 .unwrap_or(remaining_text.len());
1061
1062 let start = self.cursor;
1063 self.cursor = end;
1064
1065 self.ranged_text(start..end)
1066 }
1067
1068 /// Skips zero-to-many characters, until the next characters
1069 /// matches the characters of any `&str` in `expected` completely.
1070 ///
1071 /// **Warning:** The strings are tested in sequential order, thereby
1072 /// if `skip_until_str_any()` is called with e.g. `["foo", "foobar"]`,
1073 /// then `"foobar"` would never be tested, as `"foo"` would be
1074 /// matched and continue beforehand. Instead simply change the
1075 /// order of the strings into longest-to-shortest order,
1076 /// i.e. `["foo", "foobar"]` into `["foobar", "foo"]`.
1077 ///
1078 /// **Note:** The returned string slice has the same lifetime as
1079 /// the original `text`, so the scanner can continue to be used
1080 /// while this exists.
1081 ///
1082 /// If `expected` only contains 1 character strings, then use
1083 /// [`skip_until_char_any()`] instead.
1084 ///
1085 /// # Panics
1086 ///
1087 /// Panics in non-optimized builds, if `expected` is [empty],
1088 /// or if `expected` contains an [empty][empty2] `&str`.
1089 ///
1090 /// In optimized builds 0 characters are skipped, and
1091 /// <code>([cursor]..[cursor], "")</code> is returned instead,
1092 /// regardless of whether there is any remaining characters.
1093 ///
1094 /// In short there is a <code>[debug_assert!]\(!expected.is_empty())</code>
1095 /// (along with a similar assertion for the strings).
1096 ///
1097 /// # Example
1098 ///
1099 /// ```rust
1100 /// # use text_scanner::Scanner;
1101 /// let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
1102 /// assert_eq!(scanner.skip_until_str_any(&["Baaar", "Baz"]), (0..15, "FooBarFooBarFoo"));
1103 /// assert_eq!(scanner.remaining_text(), "BaaarBaz");
1104 /// ```
1105 ///
1106 /// [`skip_until_char_any()`]: Self::skip_until_char_any
1107 /// [cursor]: Self::cursor_pos
1108 /// [empty]: https://doc.rust-lang.org/std/primitive.slice.html#method.is_empty
1109 /// [empty2]: https://doc.rust-lang.org/std/primitive.str.html#method.is_empty
1110 pub fn skip_until_str_any(&mut self, expected: &[&str]) -> ScannerItem<&'text str> {
1111 let start = self.cursor;
1112
1113 while self.has_remaining_text() {
1114 if let Ok((r, _)) = self.accept_str_any(expected) {
1115 self.cursor = r.start;
1116 break;
1117 }
1118
1119 _ = self.next();
1120 }
1121
1122 self.ranged_text(start..self.cursor)
1123 }
1124
1125 /// Skips zero-to-many characters, while the next character
1126 /// is a [whitespace], same as:
1127 ///
1128 /// ```rust
1129 /// # use text_scanner::Scanner;
1130 /// # let mut scanner = Scanner::new(" Hello World");
1131 /// scanner.skip_while(char::is_whitespace);
1132 /// # assert_eq!(scanner.remaining_text(), "Hello World");
1133 /// ```
1134 ///
1135 /// [whitespace]: https://doc.rust-lang.org/std/primitive.char.html#method.is_whitespace
1136 #[inline]
1137 pub fn skip_whitespace(&mut self) -> ScannerItem<&'text str> {
1138 self.skip_while(char::is_whitespace)
1139 }
1140
1141 /// Advances the cursor if `f()` returns `Ok`, otherwise on `Err` the
1142 /// cursor position is backtracked to before `f()` was called.
1143 ///
1144 /// Utility for scanning [tokens], where an unexpected character during
1145 /// scanning, should restore the cursor position before the the scan
1146 /// was started.
1147 ///
1148 /// Additionally, returns `Err` if `f()` returns `Ok`, without advancing
1149 /// the cursor position.
1150 ///
1151 /// # Example
1152 ///
1153 /// ```rust
1154 /// # use text_scanner::{Scanner, ScannerItem};
1155 /// fn scan_word<'text>(scanner: &mut Scanner<'text>) -> Result<(), ScannerItem<&'text str>> {
1156 /// // Get next char if alphabetic or return err
1157 /// let (first, _c) = scanner.accept_if(char::is_alphabetic)?;
1158 /// // Skip zero-to-many alphabetic characters
1159 /// let (last, _s) = scanner.skip_while(char::is_alphabetic);
1160 /// Ok(())
1161 /// }
1162 ///
1163 /// let text = "Hello World";
1164 /// let mut scanner = Scanner::new(text);
1165 ///
1166 /// assert_eq!(scanner.scan_with(scan_word), Ok((0..5, "Hello")));
1167 /// assert_eq!(scanner.scan_with(scan_word), Err((5..5, "")));
1168 /// assert_eq!(scanner.next(), Ok((5..6, ' ')));
1169 /// assert_eq!(scanner.scan_with(scan_word), Ok((6..11, "World")));
1170 /// # assert_eq!(scanner.remaining_text(), "");
1171 /// ```
1172 ///
1173 /// [tokens]: https://en.wikipedia.org/wiki/Lexical_analysis#Token
1174 #[inline]
1175 pub fn scan_with<F>(&mut self, f: F) -> ScannerResult<'text, &'text str>
1176 where
1177 F: FnOnce(&mut Self) -> ScanResult<'text>,
1178 {
1179 let start = self.cursor;
1180
1181 let mut scanner = self.clone();
1182
1183 match f(&mut scanner) {
1184 Ok(()) => {
1185 self.cursor = scanner.cursor;
1186
1187 if self.cursor == start {
1188 return Err((start..start, ""));
1189 }
1190
1191 let r = start..self.cursor;
1192 Ok(self.ranged_text(r))
1193 }
1194 Err((last, _last_s)) => {
1195 let r = self.cursor..last.end;
1196 Err(self.ranged_text(r))
1197 }
1198 }
1199 }
1200
1201 /// Calls `f` with a <code>&mut [Scanner]</code> of this
1202 /// <code>&[Scanner]</code>, i.e. a [`Scanner`] with the
1203 /// same [`text()`], [`remaining_text()`], and [`cursor_pos()`].
1204 ///
1205 /// [`text()`]: Self::text
1206 /// [`remaining_text()`]: Self::remaining_text
1207 /// [`cursor_pos()`]: Self::cursor_pos
1208 pub fn peeking<T, F>(&self, f: F) -> T
1209 where
1210 F: FnOnce(&mut Self) -> T,
1211 {
1212 let mut scanner = self.clone();
1213 f(&mut scanner)
1214 }
1215
1216 /// This function accepts the following formats:
1217 ///
1218 /// - `0`
1219 /// - `1`
1220 /// - `5`
1221 /// - `123`
1222 /// - `00000`
1223 ///
1224 /// The following is **not** accepted by this function:
1225 ///
1226 /// - `0__000__0`
1227 /// - `_`
1228 /// - `___`
1229 /// - `_123`
1230 ///
1231 /// See also:
1232 /// - [`scan_digits_or_underscores()`]
1233 /// - [`scan_non_zero_digits()`]
1234 /// - [`scan_non_zero_digits_or_underscores()`]
1235 /// - [`scan_rust_int_dec()`]
1236 /// - [`scan_c_int_dec()`]
1237 /// - [`scan_python_int_dec()`]
1238 /// - _and [more extensions]_
1239 ///
1240 /// # Grammar
1241 ///
1242 /// The following [EBNF] grammar represents what this method accepts:
1243 ///
1244 /// ```text
1245 /// Digits ::= Digit Digit*
1246 /// Digit ::= [0-9]
1247 /// ```
1248 ///
1249 /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1250 /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1251 /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1252 /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1253 /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1254 /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1255 /// [more extensions]: ext
1256 /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1257 pub fn scan_digits(&mut self) -> ScannerResult<'text, &'text str> {
1258 let (first, _c) = self.accept_if_ext(char::is_ascii_digit)?;
1259 let (last, _s) = self.skip_while_ext(char::is_ascii_digit);
1260 Ok(self.ranged_text(first.start..last.end))
1261 }
1262
1263 /// This function accepts the following formats:
1264 ///
1265 /// - `0`
1266 /// - `1`
1267 /// - `5_`
1268 /// - `0000`
1269 /// - `12345`
1270 /// - `1_2_3`
1271 /// - `0__000__0`
1272 ///
1273 /// The following is **not** accepted by this function:
1274 ///
1275 /// - `_`
1276 /// - `___`
1277 /// - `_123`
1278 ///
1279 /// See also:
1280 /// - [`scan_digits()`]
1281 /// - [`scan_non_zero_digits()`]
1282 /// - [`scan_non_zero_digits_or_underscores()`]
1283 /// - [`scan_rust_int_dec()`]
1284 /// - [`scan_c_int_dec()`]
1285 /// - [`scan_python_int_dec()`]
1286 /// - _and [more extensions]_
1287 ///
1288 /// # Grammar
1289 ///
1290 /// The following [EBNF] grammar represents what this method accepts:
1291 ///
1292 /// ```text
1293 /// Digits ::= Digit ( Digit | '_' )*
1294 /// Digit ::= [0-9]
1295 /// ```
1296 ///
1297 /// [`scan_digits()`]: Self::scan_digits
1298 /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1299 /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1300 /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1301 /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1302 /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1303 /// [more extensions]: ext
1304 /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1305 pub fn scan_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str> {
1306 let (first, _c) = self.accept_if_ext(char::is_ascii_digit)?;
1307 let (last, _s) = self.skip_while(|c| c.is_ascii_digit() || (c == '_'));
1308 Ok(self.ranged_text(first.start..last.end))
1309 }
1310
1311 /// This function accepts the following formats:
1312 ///
1313 /// - `0`
1314 /// - `1`
1315 /// - `5`
1316 /// - `123`
1317 /// - `102030`
1318 ///
1319 /// The following is **not** accepted by this function:
1320 ///
1321 /// - `0000`
1322 /// - `01`
1323 /// - `012345`
1324 /// - `0__000__0`
1325 /// - `_`
1326 /// - `___`
1327 /// - `_123`
1328 ///
1329 /// See also:
1330 /// - [`scan_digits()`]
1331 /// - [`scan_digits_or_underscores()`]
1332 /// - [`scan_non_zero_digits_or_underscores()`]
1333 /// - [`scan_rust_int_dec()`]
1334 /// - [`scan_c_int_dec()`]
1335 /// - [`scan_python_int_dec()`]
1336 /// - _and [more extensions]_
1337 ///
1338 /// # Grammar
1339 ///
1340 /// The following [EBNF] grammar represents what this method accepts:
1341 ///
1342 /// ```text
1343 /// Digits ::= ( '0' |
1344 /// NonZeroDigit Digit* )
1345 /// NonZeroDigit ::= [1-9]
1346 /// Digit ::= [0-9]
1347 /// ```
1348 ///
1349 /// [`scan_digits()`]: Self::scan_digits
1350 /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1351 /// [`scan_non_zero_digits_or_underscores()`]: Self::scan_non_zero_digits_or_underscores
1352 /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1353 /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1354 /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1355 /// [more extensions]: ext
1356 /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1357 pub fn scan_non_zero_digits(&mut self) -> ScannerResult<'text, &'text str> {
1358 self.scan_with(|scanner| {
1359 match scanner.accept_char('0') {
1360 Ok((r, _)) => {
1361 if scanner.accept_if_ext(char::is_ascii_digit).is_ok() {
1362 return Err(scanner.ranged_text(r));
1363 }
1364 }
1365 _ => {
1366 scanner.accept_if(char::is_ascii_non_zero_digit)?;
1367 scanner.skip_while_ext(char::is_ascii_digit);
1368 }
1369 }
1370 Ok(())
1371 })
1372 }
1373
1374 /// This function accepts the following formats:
1375 ///
1376 /// - `0`
1377 /// - `1`
1378 /// - `5_`
1379 /// - `123`
1380 /// - `102030`
1381 /// - `1_2_3`
1382 /// - `0___`
1383 /// - `12345__`
1384 ///
1385 /// The following is **not** accepted by this function:
1386 ///
1387 /// - `0000`
1388 /// - `01`
1389 /// - `012345`
1390 /// - `0__000__0`
1391 /// - `_`
1392 /// - `___`
1393 /// - `_123`
1394 ///
1395 /// See also:
1396 /// - [`scan_digits()`]
1397 /// - [`scan_digits_or_underscores()`]
1398 /// - [`scan_non_zero_digits()`]
1399 /// - [`scan_rust_int_dec()`]
1400 /// - [`scan_c_int_dec()`]
1401 /// - [`scan_python_int_dec()`]
1402 /// - _and [more extensions]_
1403 ///
1404 /// # Grammar
1405 ///
1406 /// The following [EBNF] grammar represents what this method accepts:
1407 ///
1408 /// ```text
1409 /// Digits ::= ( '0' |
1410 /// NonZeroDigit ( Digit | '_' )* )
1411 /// NonZeroDigit ::= [1-9]
1412 /// Digit ::= [0-9]
1413 /// ```
1414 ///
1415 /// [`scan_digits()`]: Self::scan_digits
1416 /// [`scan_digits_or_underscores()`]: Self::scan_digits_or_underscores
1417 /// [`scan_non_zero_digits()`]: Self::scan_non_zero_digits
1418 /// [`scan_rust_int_dec()`]: ext::RustScannerExt::scan_rust_int_dec
1419 /// [`scan_c_int_dec()`]: ext::CScannerExt::scan_c_int_dec
1420 /// [`scan_python_int_dec()`]: ext::PythonScannerExt::scan_python_int_dec
1421 /// [more extensions]: ext
1422 /// [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
1423 pub fn scan_non_zero_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str> {
1424 self.scan_with(|scanner| {
1425 match scanner.accept_char('0') {
1426 Ok((first, _)) => {
1427 let (last, _) = scanner.skip_while_char('_');
1428 if scanner.accept_if_ext(char::is_ascii_digit).is_ok() {
1429 return Err(scanner.ranged_text(first.start..last.end));
1430 }
1431 }
1432 _ => {
1433 scanner.accept_if(char::is_ascii_non_zero_digit)?;
1434 scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
1435 }
1436 }
1437 Ok(())
1438 })
1439 }
1440}
1441
1442// Currently not publicly exported, as using e.g. `accept_if()` with a
1443// closure would require specifying types more often than desired.
1444pub(crate) trait ScanOne<Args> {
1445 fn scan_one(self, next: char) -> bool;
1446}
1447
1448impl<F> ScanOne<char> for F
1449where
1450 F: FnOnce(char) -> bool,
1451{
1452 #[inline]
1453 fn scan_one(self, next: char) -> bool {
1454 self(next)
1455 }
1456}
1457
1458impl<F> ScanOne<&char> for F
1459where
1460 F: FnOnce(&char) -> bool,
1461{
1462 #[inline]
1463 fn scan_one(self, next: char) -> bool {
1464 self(&next)
1465 }
1466}
1467
1468// Currently not publicly exported, as using e.g. `skip_while()` with a
1469// closure would require specifying types more often than desired.
1470pub(crate) trait ScanMany<Args>: ScanOne<Args> {
1471 fn scan_many(&mut self, next: char) -> bool;
1472}
1473
1474impl<F> ScanMany<char> for F
1475where
1476 F: FnMut(char) -> bool,
1477{
1478 #[inline]
1479 fn scan_many(&mut self, next: char) -> bool {
1480 self(next)
1481 }
1482}
1483
1484impl<F> ScanMany<&char> for F
1485where
1486 F: FnMut(&char) -> bool,
1487{
1488 #[inline]
1489 fn scan_many(&mut self, next: char) -> bool {
1490 self(&next)
1491 }
1492}
1493
1494#[allow(clippy::wrong_self_convention)]
1495pub(crate) trait CharExt {
1496 fn is_ascii_non_zero_digit(self) -> bool;
1497
1498 // `std::char::is_ascii_octdigit` is unstable
1499 fn is_ascii_octdigit(self) -> bool;
1500
1501 fn is_ascii_bindigit(self) -> bool;
1502}
1503
1504impl CharExt for char {
1505 #[inline]
1506 fn is_ascii_non_zero_digit(self) -> bool {
1507 matches!(self, '1'..='9')
1508 }
1509
1510 #[inline]
1511 fn is_ascii_octdigit(self) -> bool {
1512 matches!(self, '0'..='7')
1513 }
1514
1515 #[inline]
1516 fn is_ascii_bindigit(self) -> bool {
1517 matches!(self, '0' | '1')
1518 }
1519}
1520
1521// If you are looking for tests, then the majority
1522// are implemented in the form of doc tests
1523
1524#[cfg(test)]
1525mod tests {
1526 use super::*;
1527
1528 #[test]
1529 fn test_accept_str_any_order() {
1530 let mut scanner = Scanner::new("FooBarBaz");
1531
1532 #[rustfmt::skip]
1533 assert_eq!(scanner.accept_str_any(&["Foo", "FooBar"]), Ok((0..3, "Foo")));
1534 assert_eq!(scanner.remaining_text(), "BarBaz");
1535
1536 scanner.reset();
1537
1538 #[rustfmt::skip]
1539 assert_eq!(scanner.accept_str_any(&["FooBar", "Foo"]), Ok((0..6, "FooBar")));
1540 assert_eq!(scanner.remaining_text(), "Baz");
1541 }
1542
1543 #[test]
1544 fn test_scan_digits() {
1545 let cases = ["0", "1", "0000", "0123", "123", "123456789", "0123456789"];
1546 assert_valid_cases!(scan_digits, cases);
1547 assert_valid_cases!(scan_digits, cases, "remaining");
1548 }
1549
1550 #[test]
1551 fn test_scan_digits_invalid() {
1552 let cases = ["_", "___", "_123"];
1553 assert_invalid_cases!(scan_digits, cases);
1554 }
1555
1556 #[test]
1557 fn test_scan_digits_or_underscores() {
1558 let cases = [
1559 "0",
1560 "1",
1561 "5_",
1562 "0000",
1563 "0123",
1564 "123",
1565 "1_2_3",
1566 "123456789",
1567 "0123456789",
1568 "0__000__0",
1569 ];
1570 assert_valid_cases!(scan_digits_or_underscores, cases);
1571 assert_valid_cases!(scan_digits_or_underscores, cases, "remaining");
1572 }
1573
1574 #[test]
1575 fn test_scan_digits_or_underscores_invalid() {
1576 let cases = ["_", "___", "_123"];
1577 assert_invalid_cases!(scan_digits_or_underscores, cases);
1578 }
1579
1580 #[test]
1581 fn test_scan_non_zero_digits() {
1582 let cases = ["0", "1", "5", "123", "102030"];
1583 assert_valid_cases!(scan_non_zero_digits, cases);
1584 assert_valid_cases!(scan_non_zero_digits, cases, "remaining");
1585 }
1586
1587 #[test]
1588 fn test_scan_non_zero_digits_invalid() {
1589 let cases = ["0000", "01", "012345", "_", "___", "_123"];
1590 assert_invalid_cases!(scan_non_zero_digits, cases);
1591 }
1592
1593 #[test]
1594 fn test_scan_non_zero_digits_or_underscores() {
1595 let cases = ["0", "1", "5_", "123", "102030", "1_2_3", "0___", "12345__"];
1596 assert_valid_cases!(scan_non_zero_digits_or_underscores, cases);
1597 assert_valid_cases!(scan_non_zero_digits_or_underscores, cases, "remaining");
1598 }
1599
1600 #[test]
1601 fn test_scan_non_zero_digits_or_underscores_invalid() {
1602 let cases = [
1603 "00",
1604 "0000",
1605 "01",
1606 "012345",
1607 "0_0",
1608 "0__000__0",
1609 "_",
1610 "___",
1611 "_123",
1612 "_0123",
1613 ];
1614 assert_invalid_cases!(scan_non_zero_digits_or_underscores, cases);
1615 }
1616}