Struct text_scanner::Scanner

source ·
pub struct Scanner<'text> { /* private fields */ }
Expand description

A Scanner is a UTF-8 char text scanner, implementing various methods for scanning a string slice, as well as backtracking capabilities, which can be used to implement lexers for tokenizing text or code. It is essentially just a fancy wrapper around CharRanges.

Note: Cloning Scanner is essentially a copy, as it just contains a &str and a usize for its cursor. However, Copy is not implemented, to avoid accidentally copying immutable Scanners.

Implementations§

source§

impl<'text> Scanner<'text>

source

pub fn new(text: &'text str) -> Self

Constructs a new Scanner with text.

Examples found in repository?
examples/math.rs (line 101)
99
100
101
102
103
104
    fn new(text: &'text str) -> Self {
        Self {
            scanner: Scanner::new(text),
            next: None,
        }
    }
source

pub fn text(&self) -> &'text str

Returns the text the scanner was constructed with.

Note: This has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));

assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "llo World");
Examples found in repository?
examples/math.rs (line 31)
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
        scanner.skip_whitespace();

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
            let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
            return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
        }

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
            let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());

            if scanner.accept_char('.').is_ok() {
                let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Float(f)));
            } else {
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Int(f)));
            }
        }

        if let Some(sym) = Sym::parse_token(scanner) {
            return Ok(Some(Self::Sym(sym)));
        }

        Ok(None)
    }
source

pub fn remaining_text(&self) -> &'text str

Returns the remaining text of the scanner, i.e. the text() after cursor_pos(), in other words self.text()[self.cursor_pos()..].

Note: This has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "Hello World");

assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));

assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "llo World");
source

pub fn has_remaining_text(&self) -> bool

Returns true if remaining_text() has text, i.e. if it is not empty.

Example
let mut scanner = Scanner::new("Foo");

assert_eq!(scanner.remaining_text(), "Foo");
assert_eq!(scanner.has_remaining_text(), true);

assert_eq!(scanner.next(), Ok((0..1, 'F')));
assert_eq!(scanner.next(), Ok((1..2, 'o')));
assert_eq!(scanner.next(), Ok((2..3, 'o')));

assert_eq!(scanner.remaining_text(), "");
assert_eq!(scanner.has_remaining_text(), false);
source

pub fn ranged_text(&self, range: Range<usize>) -> ScannerItem<&'text str>

Utility for turning a Range<usize> into (Range<usize>, &'text str). Where range is the start end end byte index relative to text().

The same as (range.clone(), &self.text()[range]).

source

pub fn cursor_pos(&self) -> usize

Returns the current cursor position of the scanner, i.e. the byte offset into text().

source

pub fn set_cursor_pos(&mut self, pos: usize) -> usize

Replaces the current cursor position with pos, while returning the old cursor position.

Panics

If pos is not at a valid UTF-8 sequence boundary, then the next operation using the cursor position will panic.

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.next(), Ok((0..1, 'H')));

let backtrack = scanner.cursor_pos();

assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));

scanner.set_cursor_pos(backtrack);

assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));
source

pub fn reset(&mut self) -> usize

Resets the cursor position to the start, while returning the old cursor position.

Example
let old_pos = scanner.reset();
// same as
let old_pos = scanner.set_cursor_pos(0);
source

pub fn next(&mut self) -> ScannerResult<'text, char>

Advances the scanner cursor and returns the next char and its Range, if any.

Example
let mut scanner = Scanner::new("Hello");

assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));

assert_eq!(scanner.remaining_text(), "llo");

assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));
assert_eq!(scanner.next(), Ok((4..5, 'o')));
assert_eq!(scanner.next(), Err((5..5, "")));

assert_eq!(scanner.remaining_text(), "");
source

pub fn peek(&self) -> ScannerResult<'text, char>

Returns the next char and its Range, if any, without advancing the cursor position.

See also peek_str(), peek_nth(), and peek_iter().

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.peek(), Ok((0..1, 'H')));
assert_eq!(scanner.peek(), Ok((0..1, 'H')));

assert_eq!(scanner.next(), Ok((0..1, 'H')));

assert_eq!(scanner.peek(), Ok((1..2, 'e')));
assert_eq!(scanner.peek(), Ok((1..2, 'e')));

assert_eq!(scanner.remaining_text(), "ello World");
source

pub fn peek_nth(&self, n: usize) -> ScannerResult<'text, char>

Returns the nth char and its Range, if any, without advancing the cursor position.

See also peek_str() and peek_iter().

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.peek_nth(0), Ok((0..1, 'H')));
assert_eq!(scanner.peek_nth(1), Ok((1..2, 'e')));
assert_eq!(scanner.peek_nth(2), Ok((2..3, 'l')));

assert_eq!(scanner.peek_nth(6), Ok((6..7, 'W')));

assert_eq!(scanner.next(), Ok((0..1, 'H')));

assert_eq!(scanner.remaining_text(), "ello World");
source

pub fn peek_iter(&self) -> CharRangesOffset<'text>

Returns an iterator that produces all the remaining chars and their Ranges, if any, without advancing the cursor position.

Note: This has the same lifetime as the original text, so the scanner can continue to be used while this exists.

See also peek_str().

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.remaining_text(), "ello World");

let mut peek = scanner.peek_iter();
assert_eq!(peek.next(), Some((1..2, 'e')));
assert_eq!(peek.next(), Some((2..3, 'l')));
assert_eq!(peek.next(), Some((3..4, 'l')));
assert_eq!(scanner.remaining_text(), "ello World");

assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.remaining_text(), "lo World");
source

pub fn next_str(&mut self, chars: usize) -> ScannerResult<'text, &'text str>

Advances the scanner cursor and returns Ok with a string slice of the following n characters. If less than n are remaining, then Err is returned, with the remaining text, if any, without advancing the cursor.

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Bytes vs Characters

The Ok string slice contains n characters, i.e. where n matches str.chars().count() and not len() (which is the byte length of a string slice).

Consider "foo" vs "🦀🦀🦀", both string slices contain 3 characters. However "foo" has a length of 3 bytes, while "🦀🦀🦀" has a length of 12 bytes, when encoded in UTF-8.

Panics

Panics in non-optimized builds, if n is 0.

In optimized builds Err((cursor..cursor, "")) is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert_ne!(n, 0).

Example
let mut scanner = Scanner::new("Foo Bar Baz");

assert_eq!(scanner.next_str(3), Ok((0..3, "Foo")));
assert_eq!(scanner.next_str(3), Ok((3..6, " Ba")));
assert_eq!(scanner.next_str(3), Ok((6..9, "r B")));
// Less than 3 characters are remaining, so `Err`
// is returned
assert_eq!(scanner.next_str(3), Err((9..11, "az")));
source

pub fn peek_str(&self, n: usize) -> ScannerResult<'text, &'text str>

Returns Ok with a string slice of the following n characters, if any, without advancing the cursor. If less than n are remaining, then Err is returned, with the remaining text.

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Bytes vs Characters

The Ok string slice contains n characters, i.e. where n matches str.chars().count() and not len() (which is the byte length of a string slice).

Consider "foo" vs "🦀🦀🦀", both string slices contain 3 characters. However "foo" has a length of 3 bytes, while "🦀🦀🦀" has a length of 12 bytes, when encoded in UTF-8.

Panics

Panics in non-optimized builds, if n is 0.

In optimized builds Err((cursor..cursor, "")) is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert_ne!(n, 0).

Example
let mut scanner = Scanner::new("Hello 👋 World 🌏");

assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
// The emoji is a multi-byte character, thereby the returned
// range has a length of 10 and not 7.
assert_eq!(scanner.peek_str(7), Ok((0..10, "Hello 👋")));

assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));

assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
assert_eq!(scanner.peek_str(7), Ok((2..12, "llo 👋 W")));
source

pub fn accept_if<F>(&mut self, f: F) -> ScannerResult<'text, char>where F: FnOnce(char) -> bool,

Advances the scanner cursor and returns the next char and its Range, if f(c) returns true where c is the next character.

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((0..1, 'H')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((1..2, 'e')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((2..3, 'l')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((3..4, 'l')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((4..5, 'o')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Err((5..5, "")));

assert_eq!(scanner.remaining_text(), " World");
Examples found in repository?
examples/math.rs (line 29)
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
        scanner.skip_whitespace();

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
            let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
            return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
        }

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
            let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());

            if scanner.accept_char('.').is_ok() {
                let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Float(f)));
            } else {
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Int(f)));
            }
        }

        if let Some(sym) = Sym::parse_token(scanner) {
            return Ok(Some(Self::Sym(sym)));
        }

        Ok(None)
    }
source

pub fn accept_char(&mut self, expected: char) -> ScannerResult<'text, char>

Advances the scanner cursor and returns the next char and its Range, if the next character matches expected.

Example
let mut scanner = Scanner::new("Hello World");

assert_eq!(scanner.accept_char('H'), Ok((0..1, 'H')));
assert_eq!(scanner.accept_char('E'), Err((1..1, "")));
assert_eq!(scanner.accept_char('e'), Ok((1..2, 'e')));
assert_eq!(scanner.accept_char('W'), Err((2..2, "")));

assert_eq!(scanner.remaining_text(), "llo World");
Examples found in repository?
examples/math.rs (line 37)
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
        scanner.skip_whitespace();

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
            let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
            return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
        }

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
            let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());

            if scanner.accept_char('.').is_ok() {
                let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Float(f)));
            } else {
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Int(f)));
            }
        }

        if let Some(sym) = Sym::parse_token(scanner) {
            return Ok(Some(Self::Sym(sym)));
        }

        Ok(None)
    }
source

pub fn accept_char_any( &mut self, expected: &[char] ) -> ScannerResult<'text, char>

Advances the scanner cursor and returns the next char and its Range, if the next character matches any char produced by expected.

Panics

Panics in non-optimized builds, if expected is empty.

In optimized builds Err((cursor..cursor, "")) is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()).

Example
let mut scanner = Scanner::new("Hello World");

let any = &['H', 'e', 'l', 'o', ' '];
assert_eq!(scanner.accept_char_any(any), Ok((0..1, 'H')));
assert_eq!(scanner.accept_char_any(any), Ok((1..2, 'e')));
assert_eq!(scanner.accept_char_any(any), Ok((2..3, 'l')));
assert_eq!(scanner.accept_char_any(any), Ok((3..4, 'l')));
assert_eq!(scanner.accept_char_any(any), Ok((4..5, 'o')));
assert_eq!(scanner.accept_char_any(any), Ok((5..6, ' ')));
assert_eq!(scanner.accept_char_any(any), Err((6..6, "")));

assert_eq!(scanner.remaining_text(), "World");
Examples found in repository?
examples/math.rs (line 60)
58
59
60
61
62
63
64
65
66
67
68
69
70
71
    fn parse_token(scanner: &mut Scanner<'text>) -> Option<Self> {
        let (_r, c) = scanner
            .accept_char_any(&['+', '-', '*', '/', '(', ')'])
            .ok()?;
        match c {
            '+' => Some(Self::Plus),
            '-' => Some(Self::Minus),
            '*' => Some(Self::Star),
            '/' => Some(Self::Slash),
            '(' => Some(Self::LParen),
            ')' => Some(Self::RParen),
            _ => unreachable!(),
        }
    }
source

pub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str>

Advances the scanner cursor and returns Ok with the &'text str and its Range, if the next characters matches the characters in expected. If not, then an Err is returned, with the longest matching substring and its Range.

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected is only 1 character, then use accept_char() instead.

Panics

Panics in non-optimized builds, if expected is empty.

In optimized builds Err((cursor..cursor, "")) is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()).

Example
let mut scanner = Scanner::new("FooBaaar");

// The next 3 characters matches "Foo", so `Ok` is returned
assert_eq!(scanner.accept_str("Foo"), Ok((0..3, "Foo")));

// The next 3 characters is "Baa" not "Bar", so `Err` is
// returned, with the longest matching part, i.e. "Ba"
assert_eq!(scanner.accept_str("Bar"), Err((3..5, "Ba")));

assert_eq!(scanner.remaining_text(), "Baaar");
source

pub fn accept_str_any( &mut self, expected: &[&str] ) -> ScannerResult<'text, &'text str>

Advances the scanner cursor and returns Ok with the &'text str and its Range, if the next characters matches any &str in expected. If not, then an Err is returned, with the longest matching substring and its Range.

Warning: The strings are tested in sequential order, thereby if accept_str_any() is called with e.g. ["foo", "foobar"], then "foobar" would never be tested, as "foo" would be matched and return Ok beforehand. Instead simply change the order of the strings into longest-to-shortest order, i.e. ["foo", "foobar"] into ["foobar", "foo"].

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected only contains 1 character strings, then use accept_char_any() instead.

Panics

Panics in non-optimized builds, if expected is empty, or if expected contains an empty &str.

In optimized builds Err((cursor..cursor, "")) is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()) (along with a similar assertion for the strings).

Example
let mut scanner = Scanner::new("FooBarFooBaaar");

let any = &["Foo", "Bar"];

// The next 3 characters matches "Foo", so `Ok` is returned
assert_eq!(scanner.accept_str_any(any), Ok((0..3, "Foo")));
assert_eq!(scanner.accept_str_any(any), Ok((3..6, "Bar")));
assert_eq!(scanner.accept_str_any(any), Ok((6..9, "Foo")));

// The next 3 characters is "Baa" not "Foo" nor "Bar", so `Err`
// is returned, with the longest matching part, i.e. "Ba"
assert_eq!(scanner.accept_str_any(any), Err((9..11, "Ba")));

assert_eq!(scanner.remaining_text(), "Baaar");
source

pub fn skip_while<F>(&mut self, f: F) -> ScannerItem<&'text str>where F: FnMut(char) -> bool,

Advances the scanner cursor and skips zero-to-many characters, while f(c) returns true, where c is the remaining characters in sequential order.

Returns the string slice and its Range, of the matched (i.e. skipped) characters.

Returns (cursor..cursor, "") if 0 characters were matched (i.e. skipped).

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Example
let mut scanner = Scanner::new("Hello World");

// Skip all alphabetic characters
assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (0..5, "Hello"));

// Returns an empty range and an empty string slice
// since 0 characters were skipped
assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (5..5, ""));

// Skip 1 whitespace character
assert_eq!(scanner.skip_while(char::is_whitespace), (5..6, " "));

assert_eq!(scanner.remaining_text(), "World");
Examples found in repository?
examples/math.rs (line 30)
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
        scanner.skip_whitespace();

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
            let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
            return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
        }

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
            let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());

            if scanner.accept_char('.').is_ok() {
                let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Float(f)));
            } else {
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Int(f)));
            }
        }

        if let Some(sym) = Sym::parse_token(scanner) {
            return Ok(Some(Self::Sym(sym)));
        }

        Ok(None)
    }
source

pub fn skip_while_char(&mut self, expected: char) -> ScannerItem<&'text str>

Skips zero-to-many characters matching expected, same as:

scanner.skip_while(|c| c == expected);
source

pub fn skip_while_char_any( &mut self, expected: &[char] ) -> ScannerItem<&'text str>

Skips zero-to-many characters, which match any character in expected, same as:

scanner.skip_while(|c| expected.contains(&c));
source

pub fn skip_while_str(&mut self, expected: &str) -> ScannerItem<&'text str>

Skips zero-to-many characters, while the next characters matches the characters in expected completely.

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected is only 1 character, then use skip_while_char() instead.

Panics

Panics in non-optimized builds, if expected is empty.

In optimized builds 0 characters are skipped, and (cursor..cursor, "") is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()).

Example
let mut scanner = Scanner::new("FooFooFooBarBaz");
assert_eq!(scanner.skip_while_str("Foo"), (0..9, "FooFooFoo"));
assert_eq!(scanner.remaining_text(), "BarBaz");
source

pub fn skip_while_str_any( &mut self, expected: &[&str] ) -> ScannerItem<&'text str>

Skips zero-to-many characters, while the next characters matches the characters of any &str in expected completely.

Warning: The strings are tested in sequential order, thereby if skip_while_str_any() is called with e.g. ["foo", "foobar"], then "foobar" would never be tested, as "foo" would be matched and continue beforehand. Instead simply change the order of the strings into longest-to-shortest order, i.e. ["foo", "foobar"] into ["foobar", "foo"].

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected only contains 1 character strings, then use skip_while_char_any() instead.

Panics

Panics in non-optimized builds, if expected is empty, or if expected contains an empty &str.

In optimized builds 0 characters are skipped, and (cursor..cursor, "") is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()) (along with a similar assertion for the strings).

Example
let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
assert_eq!(scanner.skip_while_str_any(&["Foo", "Bar"]), (0..15, "FooBarFooBarFoo"));
assert_eq!(scanner.remaining_text(), "BaaarBaz");
source

pub fn skip_until<F>(&mut self, f: F) -> ScannerItem<&'text str>where F: FnMut(char) -> bool,

Advances the scanner cursor and skips zero-to-many characters, while f(c) returns false, where c is the remaining characters in sequential order.

Returns the string slice and its Range, of the matched (i.e. skipped) characters.

Returns (cursor..cursor, "") if 0 characters were matched (i.e. skipped).

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

Example
let mut scanner = Scanner::new("Hello World");

// Skip all characters until a whitespace is found
assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (0..5, "Hello"));

// Returns an empty range and an empty string slice
// since 0 characters were skipped
assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (5..5, ""));

// Skip 1 whitespace character
assert_eq!(scanner.skip_until(char::is_alphabetic), (5..6, " "));

assert_eq!(scanner.remaining_text(), "World");
source

pub fn skip_until_char(&mut self, expected: char) -> ScannerItem<&'text str>

Skips zero-to-many characters, until the next character matches expected, same as:

scanner.skip_until(|c| c == expected);
source

pub fn skip_until_char_any( &mut self, expected: &[char] ) -> ScannerItem<&'text str>

Skips zero-to-many characters, until the next character match any in expected, same as:

scanner.skip_until(|c| expected.contains(&c));
source

pub fn skip_until_str(&mut self, expected: &str) -> ScannerItem<&'text str>

Skips zero-to-many characters, until the next characters matches the characters in expected completely.

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected is only 1 character, then use skip_until_char() instead.

Panics

Panics in non-optimized builds, if expected is empty.

In optimized builds 0 characters are skipped, and (cursor..cursor, "") is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()).

Example
let mut scanner = Scanner::new("FooFooFooBarBaz");
assert_eq!(scanner.skip_until_str("Bar"), (0..9, "FooFooFoo"));
assert_eq!(scanner.remaining_text(), "BarBaz");
source

pub fn skip_until_str_any( &mut self, expected: &[&str] ) -> ScannerItem<&'text str>

Skips zero-to-many characters, until the next characters matches the characters of any &str in expected completely.

Warning: The strings are tested in sequential order, thereby if skip_until_str_any() is called with e.g. ["foo", "foobar"], then "foobar" would never be tested, as "foo" would be matched and continue beforehand. Instead simply change the order of the strings into longest-to-shortest order, i.e. ["foo", "foobar"] into ["foobar", "foo"].

Note: The returned string slice has the same lifetime as the original text, so the scanner can continue to be used while this exists.

If expected only contains 1 character strings, then use skip_until_char_any() instead.

Panics

Panics in non-optimized builds, if expected is empty, or if expected contains an empty &str.

In optimized builds 0 characters are skipped, and (cursor..cursor, "") is returned instead, regardless of whether there is any remaining characters.

In short there is a debug_assert!(!expected.is_empty()) (along with a similar assertion for the strings).

Example
let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
assert_eq!(scanner.skip_until_str_any(&["Baaar", "Baz"]), (0..15, "FooBarFooBarFoo"));
assert_eq!(scanner.remaining_text(), "BaaarBaz");
source

pub fn skip_whitespace(&mut self) -> ScannerItem<&'text str>

Skips zero-to-many characters, while the next character is a whitespace, same as:

scanner.skip_while(char::is_whitespace);
Examples found in repository?
examples/math.rs (line 27)
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
        scanner.skip_whitespace();

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
            let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
            return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
        }

        if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
            let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());

            if scanner.accept_char('.').is_ok() {
                let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Float(f)));
            } else {
                let text = &scanner.text()[first.start..last.end];
                let f = text.parse()?;
                return Ok(Some(Self::Int(f)));
            }
        }

        if let Some(sym) = Sym::parse_token(scanner) {
            return Ok(Some(Self::Sym(sym)));
        }

        Ok(None)
    }
source

pub fn scan_with<F>(&mut self, f: F) -> ScannerResult<'text, &'text str>where F: FnOnce(&mut Self) -> ScanResult<'text>,

Advances the cursor if f() returns Ok, otherwise on Err the cursor position is backtracked to before f() was called.

Utility for scanning tokens, where an unexpected character during scanning, should restore the cursor position before the the scan was started.

Additionally, returns Err if f() returns Ok, without advancing the cursor position.

Example
fn scan_word<'text>(scanner: &mut Scanner<'text>) -> Result<(), ScannerItem<&'text str>> {
    // Get next char if alphabetic or return err
    let (first, _c) = scanner.accept_if(char::is_alphabetic)?;
    // Skip zero-to-many alphabetic characters
    let (last, _s) = scanner.skip_while(char::is_alphabetic);
    Ok(())
}

let text = "Hello World";
let mut scanner = Scanner::new(text);

assert_eq!(scanner.scan_with(scan_word), Ok((0..5, "Hello")));
assert_eq!(scanner.scan_with(scan_word), Err((5..5, "")));
assert_eq!(scanner.next(), Ok((5..6, ' ')));
assert_eq!(scanner.scan_with(scan_word), Ok((6..11, "World")));
source

pub fn peeking<T, F>(&self, f: F) -> Twhere F: FnOnce(&mut Self) -> T,

Calls f with a &mut Scanner of this &Scanner, i.e. a Scanner with the same text(), remaining_text(), and cursor_pos().

source

pub fn scan_digits(&mut self) -> ScannerResult<'text, &'text str>

This function accepts the following formats:

  • 0
  • 1
  • 5
  • 123
  • 00000

The following is not accepted by this function:

  • 0__000__0
  • _
  • ___
  • _123

See also:

Grammar

The following EBNF grammar represents what this method accepts:

Digits ::= Digit Digit*
Digit  ::= [0-9]
source

pub fn scan_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str>

This function accepts the following formats:

  • 0
  • 1
  • 5_
  • 0000
  • 12345
  • 1_2_3
  • 0__000__0

The following is not accepted by this function:

  • _
  • ___
  • _123

See also:

Grammar

The following EBNF grammar represents what this method accepts:

Digits ::= Digit ( Digit | '_' )*
Digit  ::= [0-9]
source

pub fn scan_non_zero_digits(&mut self) -> ScannerResult<'text, &'text str>

This function accepts the following formats:

  • 0
  • 1
  • 5
  • 123
  • 102030

The following is not accepted by this function:

  • 0000
  • 01
  • 012345
  • 0__000__0
  • _
  • ___
  • _123

See also:

Grammar

The following EBNF grammar represents what this method accepts:

Digits       ::= ( '0' |
                   NonZeroDigit Digit* )
NonZeroDigit ::= [1-9]
Digit        ::= [0-9]
source

pub fn scan_non_zero_digits_or_underscores( &mut self ) -> ScannerResult<'text, &'text str>

This function accepts the following formats:

  • 0
  • 1
  • 5_
  • 123
  • 102030
  • 1_2_3
  • 0___
  • 12345__

The following is not accepted by this function:

  • 0000
  • 01
  • 012345
  • 0__000__0
  • _
  • ___
  • _123

See also:

Grammar

The following EBNF grammar represents what this method accepts:

Digits       ::= ( '0' |
                   NonZeroDigit ( Digit | '_' )* )
NonZeroDigit ::= [1-9]
Digit        ::= [0-9]

Trait Implementations§

source§

impl<'text> CScannerExt<'text> for Scanner<'text>

source§

fn scan_c_line_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_block_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_identifier(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_int_dec(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_int_hex(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_int_oct(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_float(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_char(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_c_string(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> Clone for Scanner<'text>

source§

fn clone(&self) -> Scanner<'text>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl<'text> CssScannerExt<'text> for Scanner<'text>

source§

fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str>

Scans a single CSS identifier. Read more
source§

fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str>

Scans a single CSS string. Read more
source§

fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str>

Scans a single CSS number. Read more
source§

impl<'text> Debug for Scanner<'text>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl<'text> JavaScannerExt<'text> for Scanner<'text>

source§

fn scan_java_line_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_block_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_identifier(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_keyword(&mut self) -> ScannerResult<'text, &'text str>

Note: null, true, and false are not keywords, but literals, see scan_java_null_literal() and scan_java_boolean_literal().
source§

fn scan_java_reserved_keyword(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_contextual_keyword(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_operator(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_separator(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_null_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_boolean_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_int_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_int_dec_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_int_hex_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_int_oct_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_int_bin_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_float_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_float_dec_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_float_hex_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_char_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_java_string_literal(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> JsonCScannerExt<'text> for Scanner<'text>

source§

fn scan_jsonc_line_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_jsonc_block_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> JsonScannerExt<'text> for Scanner<'text>

source§

fn scan_json_string(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_json_number(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> PythonScannerExt<'text> for Scanner<'text>

source§

fn scan_python_line_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_explicit_line_joiner( &mut self ) -> ScannerResult<'text, &'text str>

source§

fn scan_python_identifier(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_keyword(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_soft_keyword(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_operator(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_delimiter(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_int_dec(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_int_hex(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_int_oct(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_int_bin(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_float(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_string(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_short_string(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_long_string(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_bytes(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_short_bytes(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_python_long_bytes(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> RustScannerExt<'text> for Scanner<'text>

source§

fn scan_rust_line_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_block_comment(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_identifier(&mut self) -> ScannerResult<'text, &'text str>

Scans a single Rust identifier. Read more
source§

fn scan_rust_raw_identifier(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_char(&mut self) -> ScannerResult<'text, &'text str>

Scans a single Rust character. Read more
source§

fn scan_rust_string(&mut self) -> ScannerResult<'text, &'text str>

Scans a single Rust string. Read more
source§

fn scan_rust_raw_string(&mut self) -> ScannerResult<'text, &'text str>

Scans a single raw Rust string. Read more
source§

fn scan_rust_int_dec(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_int_hex(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_int_oct(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_int_bin(&mut self) -> ScannerResult<'text, &'text str>

source§

fn scan_rust_float(&mut self) -> ScannerResult<'text, &'text str>

source§

impl<'text> ScssScannerExt<'text> for Scanner<'text>

source§

fn scan_scss_line_comment(&mut self) -> ScannerResult<'text, &'text str>

Auto Trait Implementations§

§

impl<'text> RefUnwindSafe for Scanner<'text>

§

impl<'text> Send for Scanner<'text>

§

impl<'text> Sync for Scanner<'text>

§

impl<'text> Unpin for Scanner<'text>

§

impl<'text> UnwindSafe for Scanner<'text>

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.