Struct text_scanner::Scanner
source · pub struct Scanner<'text> { /* private fields */ }
Expand description
A Scanner
is a UTF-8 char
text scanner, implementing various methods
for scanning a string slice, as well as backtracking capabilities, which
can be used to implement lexers for tokenizing text or code. It is essentially
just a fancy wrapper around CharRanges
.
Note: Cloning Scanner
is essentially a copy, as it just contains
a &str
and a usize
for its cursor
. However, Copy
is not
implemented, to avoid accidentally copying immutable Scanner
s.
Implementations§
source§impl<'text> Scanner<'text>
impl<'text> Scanner<'text>
sourcepub fn text(&self) -> &'text str
pub fn text(&self) -> &'text str
Returns the text
the scanner was constructed with.
Note: This has the same lifetime as the original text
,
so the scanner can continue to be used while this exists.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "llo World");
Examples found in repository?
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
scanner.skip_whitespace();
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
}
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
if scanner.accept_char('.').is_ok() {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Float(f)));
} else {
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Int(f)));
}
}
if let Some(sym) = Sym::parse_token(scanner) {
return Ok(Some(Self::Sym(sym)));
}
Ok(None)
}
sourcepub fn remaining_text(&self) -> &'text str
pub fn remaining_text(&self) -> &'text str
Returns the remaining text
of the scanner, i.e. the text()
after cursor_pos()
, in other words
self.text()[self.cursor_pos()..]
.
Note: This has the same lifetime as the original text
,
so the scanner can continue to be used while this exists.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "Hello World");
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.text(), "Hello World");
assert_eq!(scanner.remaining_text(), "llo World");
sourcepub fn has_remaining_text(&self) -> bool
pub fn has_remaining_text(&self) -> bool
Returns true
if remaining_text()
has text, i.e.
if it is not empty.
Example
let mut scanner = Scanner::new("Foo");
assert_eq!(scanner.remaining_text(), "Foo");
assert_eq!(scanner.has_remaining_text(), true);
assert_eq!(scanner.next(), Ok((0..1, 'F')));
assert_eq!(scanner.next(), Ok((1..2, 'o')));
assert_eq!(scanner.next(), Ok((2..3, 'o')));
assert_eq!(scanner.remaining_text(), "");
assert_eq!(scanner.has_remaining_text(), false);
sourcepub fn ranged_text(&self, range: Range<usize>) -> ScannerItem<&'text str>
pub fn ranged_text(&self, range: Range<usize>) -> ScannerItem<&'text str>
Utility for turning a Range<usize>
into (Range<usize>, &'text str)
.
Where range
is the start end end byte index relative to text()
.
The same as (range.clone(), &self.text()[range])
.
sourcepub fn cursor_pos(&self) -> usize
pub fn cursor_pos(&self) -> usize
Returns the current cursor position of the
scanner, i.e. the byte offset into text()
.
sourcepub fn set_cursor_pos(&mut self, pos: usize) -> usize
pub fn set_cursor_pos(&mut self, pos: usize) -> usize
Replaces the current cursor position with pos
,
while returning the old cursor position.
Panics
If pos
is not at a valid UTF-8 sequence boundary,
then the next operation using the cursor position
will panic.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.next(), Ok((0..1, 'H')));
let backtrack = scanner.cursor_pos();
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));
scanner.set_cursor_pos(backtrack);
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));
sourcepub fn reset(&mut self) -> usize
pub fn reset(&mut self) -> usize
Resets the cursor position to the start, while returning the old cursor position.
Example
let old_pos = scanner.reset();
// same as
let old_pos = scanner.set_cursor_pos(0);
sourcepub fn next(&mut self) -> ScannerResult<'text, char>
pub fn next(&mut self) -> ScannerResult<'text, char>
Advances the scanner cursor and returns the next
char
and its Range
, if any.
Example
let mut scanner = Scanner::new("Hello");
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.remaining_text(), "llo");
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.next(), Ok((3..4, 'l')));
assert_eq!(scanner.next(), Ok((4..5, 'o')));
assert_eq!(scanner.next(), Err((5..5, "")));
assert_eq!(scanner.remaining_text(), "");
sourcepub fn peek(&self) -> ScannerResult<'text, char>
pub fn peek(&self) -> ScannerResult<'text, char>
Returns the next char
and its Range
, if any,
without advancing the cursor position.
See also peek_str()
, peek_nth()
, and peek_iter()
.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.peek(), Ok((0..1, 'H')));
assert_eq!(scanner.peek(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.peek(), Ok((1..2, 'e')));
assert_eq!(scanner.peek(), Ok((1..2, 'e')));
assert_eq!(scanner.remaining_text(), "ello World");
sourcepub fn peek_nth(&self, n: usize) -> ScannerResult<'text, char>
pub fn peek_nth(&self, n: usize) -> ScannerResult<'text, char>
Returns the n
th char
and its Range
, if any,
without advancing the cursor position.
See also peek_str()
and peek_iter()
.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.peek_nth(0), Ok((0..1, 'H')));
assert_eq!(scanner.peek_nth(1), Ok((1..2, 'e')));
assert_eq!(scanner.peek_nth(2), Ok((2..3, 'l')));
assert_eq!(scanner.peek_nth(6), Ok((6..7, 'W')));
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.remaining_text(), "ello World");
sourcepub fn peek_iter(&self) -> CharRangesOffset<'text> ⓘ
pub fn peek_iter(&self) -> CharRangesOffset<'text> ⓘ
Returns an iterator that produces all the remaining char
s
and their Range
s, if any, without advancing the cursor position.
Note: This has the same lifetime as the original text
,
so the scanner can continue to be used while this exists.
See also peek_str()
.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.remaining_text(), "ello World");
let mut peek = scanner.peek_iter();
assert_eq!(peek.next(), Some((1..2, 'e')));
assert_eq!(peek.next(), Some((2..3, 'l')));
assert_eq!(peek.next(), Some((3..4, 'l')));
assert_eq!(scanner.remaining_text(), "ello World");
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.next(), Ok((2..3, 'l')));
assert_eq!(scanner.remaining_text(), "lo World");
sourcepub fn next_str(&mut self, chars: usize) -> ScannerResult<'text, &'text str>
pub fn next_str(&mut self, chars: usize) -> ScannerResult<'text, &'text str>
Advances the scanner cursor and returns Ok
with a string
slice of the following n
characters. If less than n
are
remaining, then Err
is returned, with the remaining text,
if any, without advancing the cursor.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
Bytes vs Characters
The Ok
string slice contains n
characters,
i.e. where n
matches str.chars().count()
and not len()
(which is the byte length of a string slice).
Consider "foo"
vs "🦀🦀🦀"
, both string slices contain 3
characters. However "foo"
has a length of 3 bytes, while "🦀🦀🦀"
has a length of 12 bytes, when encoded in UTF-8.
Panics
Panics in non-optimized builds, if n
is 0
.
In optimized builds Err((cursor..cursor, ""))
is returned instead, regardless of whether there is any remaining
characters.
In short there is a debug_assert_ne!(n, 0)
.
Example
let mut scanner = Scanner::new("Foo Bar Baz");
assert_eq!(scanner.next_str(3), Ok((0..3, "Foo")));
assert_eq!(scanner.next_str(3), Ok((3..6, " Ba")));
assert_eq!(scanner.next_str(3), Ok((6..9, "r B")));
// Less than 3 characters are remaining, so `Err`
// is returned
assert_eq!(scanner.next_str(3), Err((9..11, "az")));
sourcepub fn peek_str(&self, n: usize) -> ScannerResult<'text, &'text str>
pub fn peek_str(&self, n: usize) -> ScannerResult<'text, &'text str>
Returns Ok
with a string slice of the following n
characters,
if any, without advancing the cursor. If less than n
are remaining,
then Err
is returned, with the remaining text.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
Bytes vs Characters
The Ok
string slice contains n
characters,
i.e. where n
matches str.chars().count()
and not len()
(which is the byte length of a string slice).
Consider "foo"
vs "🦀🦀🦀"
, both string slices contain 3
characters. However "foo"
has a length of 3 bytes, while "🦀🦀🦀"
has a length of 12 bytes, when encoded in UTF-8.
Panics
Panics in non-optimized builds, if n
is 0
.
In optimized builds Err((cursor..cursor, ""))
is returned instead, regardless of whether there is any remaining
characters.
In short there is a debug_assert_ne!(n, 0)
.
Example
let mut scanner = Scanner::new("Hello 👋 World 🌏");
assert_eq!(scanner.remaining_text(), "Hello 👋 World 🌏");
// The emoji is a multi-byte character, thereby the returned
// range has a length of 10 and not 7.
assert_eq!(scanner.peek_str(7), Ok((0..10, "Hello 👋")));
assert_eq!(scanner.next(), Ok((0..1, 'H')));
assert_eq!(scanner.next(), Ok((1..2, 'e')));
assert_eq!(scanner.remaining_text(), "llo 👋 World 🌏");
assert_eq!(scanner.peek_str(7), Ok((2..12, "llo 👋 W")));
sourcepub fn accept_if<F>(&mut self, f: F) -> ScannerResult<'text, char>where
F: FnOnce(char) -> bool,
pub fn accept_if<F>(&mut self, f: F) -> ScannerResult<'text, char>where F: FnOnce(char) -> bool,
Advances the scanner cursor and returns the next
char
and its Range
, if f(c)
returns true
where c
is the next character.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((0..1, 'H')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((1..2, 'e')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((2..3, 'l')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((3..4, 'l')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Ok((4..5, 'o')));
assert_eq!(scanner.accept_if(char::is_alphabetic), Err((5..5, "")));
assert_eq!(scanner.remaining_text(), " World");
Examples found in repository?
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
scanner.skip_whitespace();
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
}
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
if scanner.accept_char('.').is_ok() {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Float(f)));
} else {
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Int(f)));
}
}
if let Some(sym) = Sym::parse_token(scanner) {
return Ok(Some(Self::Sym(sym)));
}
Ok(None)
}
sourcepub fn accept_char(&mut self, expected: char) -> ScannerResult<'text, char>
pub fn accept_char(&mut self, expected: char) -> ScannerResult<'text, char>
Advances the scanner cursor and returns the next
char
and its Range
, if the next character
matches expected
.
Example
let mut scanner = Scanner::new("Hello World");
assert_eq!(scanner.accept_char('H'), Ok((0..1, 'H')));
assert_eq!(scanner.accept_char('E'), Err((1..1, "")));
assert_eq!(scanner.accept_char('e'), Ok((1..2, 'e')));
assert_eq!(scanner.accept_char('W'), Err((2..2, "")));
assert_eq!(scanner.remaining_text(), "llo World");
Examples found in repository?
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
scanner.skip_whitespace();
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
}
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
if scanner.accept_char('.').is_ok() {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Float(f)));
} else {
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Int(f)));
}
}
if let Some(sym) = Sym::parse_token(scanner) {
return Ok(Some(Self::Sym(sym)));
}
Ok(None)
}
sourcepub fn accept_char_any(
&mut self,
expected: &[char]
) -> ScannerResult<'text, char>
pub fn accept_char_any( &mut self, expected: &[char] ) -> ScannerResult<'text, char>
Advances the scanner cursor and returns the next
char
and its Range
, if the next character
matches any char
produced by expected
.
Panics
Panics in non-optimized builds, if expected
is empty.
In optimized builds Err((cursor..cursor, ""))
is returned instead, regardless of whether there is any remaining
characters.
In short there is a debug_assert!(!expected.is_empty())
.
Example
let mut scanner = Scanner::new("Hello World");
let any = &['H', 'e', 'l', 'o', ' '];
assert_eq!(scanner.accept_char_any(any), Ok((0..1, 'H')));
assert_eq!(scanner.accept_char_any(any), Ok((1..2, 'e')));
assert_eq!(scanner.accept_char_any(any), Ok((2..3, 'l')));
assert_eq!(scanner.accept_char_any(any), Ok((3..4, 'l')));
assert_eq!(scanner.accept_char_any(any), Ok((4..5, 'o')));
assert_eq!(scanner.accept_char_any(any), Ok((5..6, ' ')));
assert_eq!(scanner.accept_char_any(any), Err((6..6, "")));
assert_eq!(scanner.remaining_text(), "World");
Examples found in repository?
58 59 60 61 62 63 64 65 66 67 68 69 70 71
fn parse_token(scanner: &mut Scanner<'text>) -> Option<Self> {
let (_r, c) = scanner
.accept_char_any(&['+', '-', '*', '/', '(', ')'])
.ok()?;
match c {
'+' => Some(Self::Plus),
'-' => Some(Self::Minus),
'*' => Some(Self::Star),
'/' => Some(Self::Slash),
'(' => Some(Self::LParen),
')' => Some(Self::RParen),
_ => unreachable!(),
}
}
sourcepub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str>
pub fn accept_str(&mut self, expected: &str) -> ScannerResult<'text, &'text str>
Advances the scanner cursor and returns Ok
with the &'text str
and its Range
, if the next characters matches the characters
in expected
. If not, then an Err
is returned, with the longest
matching substring and its Range
.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
is only 1 character, then use accept_char()
instead.
Panics
Panics in non-optimized builds, if expected
is empty.
In optimized builds Err((cursor..cursor, ""))
is returned instead, regardless of whether there is any remaining
characters.
In short there is a debug_assert!(!expected.is_empty())
.
Example
let mut scanner = Scanner::new("FooBaaar");
// The next 3 characters matches "Foo", so `Ok` is returned
assert_eq!(scanner.accept_str("Foo"), Ok((0..3, "Foo")));
// The next 3 characters is "Baa" not "Bar", so `Err` is
// returned, with the longest matching part, i.e. "Ba"
assert_eq!(scanner.accept_str("Bar"), Err((3..5, "Ba")));
assert_eq!(scanner.remaining_text(), "Baaar");
sourcepub fn accept_str_any(
&mut self,
expected: &[&str]
) -> ScannerResult<'text, &'text str>
pub fn accept_str_any( &mut self, expected: &[&str] ) -> ScannerResult<'text, &'text str>
Advances the scanner cursor and returns Ok
with the &'text str
and its Range
, if the next characters matches any &str
in expected
. If not, then an Err
is returned, with the longest
matching substring and its Range
.
Warning: The strings are tested in sequential order, thereby
if accept_str_any()
is called with e.g. ["foo", "foobar"]
,
then "foobar"
would never be tested, as "foo"
would be
matched and return Ok
beforehand. Instead simply change the
order of the strings into longest-to-shortest order,
i.e. ["foo", "foobar"]
into ["foobar", "foo"]
.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
only contains 1 character strings, then use
accept_char_any()
instead.
Panics
Panics in non-optimized builds, if expected
is empty,
or if expected
contains an empty &str
.
In optimized builds Err((cursor..cursor, ""))
is returned instead, regardless of whether there is any remaining
characters.
In short there is a debug_assert!(!expected.is_empty())
(along with a similar assertion for the strings).
Example
let mut scanner = Scanner::new("FooBarFooBaaar");
let any = &["Foo", "Bar"];
// The next 3 characters matches "Foo", so `Ok` is returned
assert_eq!(scanner.accept_str_any(any), Ok((0..3, "Foo")));
assert_eq!(scanner.accept_str_any(any), Ok((3..6, "Bar")));
assert_eq!(scanner.accept_str_any(any), Ok((6..9, "Foo")));
// The next 3 characters is "Baa" not "Foo" nor "Bar", so `Err`
// is returned, with the longest matching part, i.e. "Ba"
assert_eq!(scanner.accept_str_any(any), Err((9..11, "Ba")));
assert_eq!(scanner.remaining_text(), "Baaar");
sourcepub fn skip_while<F>(&mut self, f: F) -> ScannerItem<&'text str>where
F: FnMut(char) -> bool,
pub fn skip_while<F>(&mut self, f: F) -> ScannerItem<&'text str>where F: FnMut(char) -> bool,
Advances the scanner cursor and skips zero-to-many characters,
while f(c)
returns true
, where c
is the remaining characters
in sequential order.
Returns the string slice and its Range
, of the matched
(i.e. skipped) characters.
Returns (cursor..cursor, "")
if 0 characters
were matched (i.e. skipped).
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
Example
let mut scanner = Scanner::new("Hello World");
// Skip all alphabetic characters
assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (0..5, "Hello"));
// Returns an empty range and an empty string slice
// since 0 characters were skipped
assert_eq!(scanner.skip_while(|c| c.is_alphabetic()), (5..5, ""));
// Skip 1 whitespace character
assert_eq!(scanner.skip_while(char::is_whitespace), (5..6, " "));
assert_eq!(scanner.remaining_text(), "World");
Examples found in repository?
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
scanner.skip_whitespace();
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
}
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
if scanner.accept_char('.').is_ok() {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Float(f)));
} else {
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Int(f)));
}
}
if let Some(sym) = Sym::parse_token(scanner) {
return Ok(Some(Self::Sym(sym)));
}
Ok(None)
}
sourcepub fn skip_while_char(&mut self, expected: char) -> ScannerItem<&'text str>
pub fn skip_while_char(&mut self, expected: char) -> ScannerItem<&'text str>
Skips zero-to-many characters matching expected
, same as:
scanner.skip_while(|c| c == expected);
sourcepub fn skip_while_char_any(
&mut self,
expected: &[char]
) -> ScannerItem<&'text str>
pub fn skip_while_char_any( &mut self, expected: &[char] ) -> ScannerItem<&'text str>
Skips zero-to-many characters, which match any
character in expected
, same as:
scanner.skip_while(|c| expected.contains(&c));
sourcepub fn skip_while_str(&mut self, expected: &str) -> ScannerItem<&'text str>
pub fn skip_while_str(&mut self, expected: &str) -> ScannerItem<&'text str>
Skips zero-to-many characters, while the next characters
matches the characters in expected
completely.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
is only 1 character, then use skip_while_char()
instead.
Panics
Panics in non-optimized builds, if expected
is empty.
In optimized builds 0 characters are skipped, and
(cursor..cursor, "")
is returned instead,
regardless of whether there is any remaining characters.
In short there is a debug_assert!(!expected.is_empty())
.
Example
let mut scanner = Scanner::new("FooFooFooBarBaz");
assert_eq!(scanner.skip_while_str("Foo"), (0..9, "FooFooFoo"));
assert_eq!(scanner.remaining_text(), "BarBaz");
sourcepub fn skip_while_str_any(
&mut self,
expected: &[&str]
) -> ScannerItem<&'text str>
pub fn skip_while_str_any( &mut self, expected: &[&str] ) -> ScannerItem<&'text str>
Skips zero-to-many characters, while the next characters
matches the characters of any &str
in expected
completely.
Warning: The strings are tested in sequential order, thereby
if skip_while_str_any()
is called with e.g. ["foo", "foobar"]
,
then "foobar"
would never be tested, as "foo"
would be
matched and continue beforehand. Instead simply change the
order of the strings into longest-to-shortest order,
i.e. ["foo", "foobar"]
into ["foobar", "foo"]
.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
only contains 1 character strings, then use
skip_while_char_any()
instead.
Panics
Panics in non-optimized builds, if expected
is empty,
or if expected
contains an empty &str
.
In optimized builds 0 characters are skipped, and
(cursor..cursor, "")
is returned instead,
regardless of whether there is any remaining characters.
In short there is a debug_assert!(!expected.is_empty())
(along with a similar assertion for the strings).
Example
let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
assert_eq!(scanner.skip_while_str_any(&["Foo", "Bar"]), (0..15, "FooBarFooBarFoo"));
assert_eq!(scanner.remaining_text(), "BaaarBaz");
sourcepub fn skip_until<F>(&mut self, f: F) -> ScannerItem<&'text str>where
F: FnMut(char) -> bool,
pub fn skip_until<F>(&mut self, f: F) -> ScannerItem<&'text str>where F: FnMut(char) -> bool,
Advances the scanner cursor and skips zero-to-many characters,
while f(c)
returns false
, where c
is the remaining characters
in sequential order.
Returns the string slice and its Range
, of the matched
(i.e. skipped) characters.
Returns (cursor..cursor, "")
if 0 characters
were matched (i.e. skipped).
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
Example
let mut scanner = Scanner::new("Hello World");
// Skip all characters until a whitespace is found
assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (0..5, "Hello"));
// Returns an empty range and an empty string slice
// since 0 characters were skipped
assert_eq!(scanner.skip_until(|c| c.is_whitespace()), (5..5, ""));
// Skip 1 whitespace character
assert_eq!(scanner.skip_until(char::is_alphabetic), (5..6, " "));
assert_eq!(scanner.remaining_text(), "World");
sourcepub fn skip_until_char(&mut self, expected: char) -> ScannerItem<&'text str>
pub fn skip_until_char(&mut self, expected: char) -> ScannerItem<&'text str>
Skips zero-to-many characters, until the next character
matches expected
, same as:
scanner.skip_until(|c| c == expected);
sourcepub fn skip_until_char_any(
&mut self,
expected: &[char]
) -> ScannerItem<&'text str>
pub fn skip_until_char_any( &mut self, expected: &[char] ) -> ScannerItem<&'text str>
Skips zero-to-many characters, until the next character
match any in expected
, same as:
scanner.skip_until(|c| expected.contains(&c));
sourcepub fn skip_until_str(&mut self, expected: &str) -> ScannerItem<&'text str>
pub fn skip_until_str(&mut self, expected: &str) -> ScannerItem<&'text str>
Skips zero-to-many characters, until the next characters
matches the characters in expected
completely.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
is only 1 character, then use skip_until_char()
instead.
Panics
Panics in non-optimized builds, if expected
is empty.
In optimized builds 0 characters are skipped, and
(cursor..cursor, "")
is returned instead,
regardless of whether there is any remaining characters.
In short there is a debug_assert!(!expected.is_empty())
.
Example
let mut scanner = Scanner::new("FooFooFooBarBaz");
assert_eq!(scanner.skip_until_str("Bar"), (0..9, "FooFooFoo"));
assert_eq!(scanner.remaining_text(), "BarBaz");
sourcepub fn skip_until_str_any(
&mut self,
expected: &[&str]
) -> ScannerItem<&'text str>
pub fn skip_until_str_any( &mut self, expected: &[&str] ) -> ScannerItem<&'text str>
Skips zero-to-many characters, until the next characters
matches the characters of any &str
in expected
completely.
Warning: The strings are tested in sequential order, thereby
if skip_until_str_any()
is called with e.g. ["foo", "foobar"]
,
then "foobar"
would never be tested, as "foo"
would be
matched and continue beforehand. Instead simply change the
order of the strings into longest-to-shortest order,
i.e. ["foo", "foobar"]
into ["foobar", "foo"]
.
Note: The returned string slice has the same lifetime as
the original text
, so the scanner can continue to be used
while this exists.
If expected
only contains 1 character strings, then use
skip_until_char_any()
instead.
Panics
Panics in non-optimized builds, if expected
is empty,
or if expected
contains an empty &str
.
In optimized builds 0 characters are skipped, and
(cursor..cursor, "")
is returned instead,
regardless of whether there is any remaining characters.
In short there is a debug_assert!(!expected.is_empty())
(along with a similar assertion for the strings).
Example
let mut scanner = Scanner::new("FooBarFooBarFooBaaarBaz");
assert_eq!(scanner.skip_until_str_any(&["Baaar", "Baz"]), (0..15, "FooBarFooBarFoo"));
assert_eq!(scanner.remaining_text(), "BaaarBaz");
sourcepub fn skip_whitespace(&mut self) -> ScannerItem<&'text str>
pub fn skip_whitespace(&mut self) -> ScannerItem<&'text str>
Skips zero-to-many characters, while the next character is a whitespace, same as:
scanner.skip_while(char::is_whitespace);
Examples found in repository?
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
fn parse_token(scanner: &mut Scanner<'text>) -> Result<Option<Self>, Box<dyn error::Error>> {
scanner.skip_whitespace();
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_alphabetic() || (c == '_')) {
let (last, _s) = scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
return Ok(Some(Self::Ident(&scanner.text()[first.start..last.end])));
}
if let Ok((first, _c)) = scanner.accept_if(|c| c.is_ascii_digit()) {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
if scanner.accept_char('.').is_ok() {
let (last, _s) = scanner.skip_while(|c| c.is_ascii_digit());
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Float(f)));
} else {
let text = &scanner.text()[first.start..last.end];
let f = text.parse()?;
return Ok(Some(Self::Int(f)));
}
}
if let Some(sym) = Sym::parse_token(scanner) {
return Ok(Some(Self::Sym(sym)));
}
Ok(None)
}
sourcepub fn scan_with<F>(&mut self, f: F) -> ScannerResult<'text, &'text str>where
F: FnOnce(&mut Self) -> ScanResult<'text>,
pub fn scan_with<F>(&mut self, f: F) -> ScannerResult<'text, &'text str>where F: FnOnce(&mut Self) -> ScanResult<'text>,
Advances the cursor if f()
returns Ok
, otherwise on Err
the
cursor position is backtracked to before f()
was called.
Utility for scanning tokens, where an unexpected character during scanning, should restore the cursor position before the the scan was started.
Additionally, returns Err
if f()
returns Ok
, without advancing
the cursor position.
Example
fn scan_word<'text>(scanner: &mut Scanner<'text>) -> Result<(), ScannerItem<&'text str>> {
// Get next char if alphabetic or return err
let (first, _c) = scanner.accept_if(char::is_alphabetic)?;
// Skip zero-to-many alphabetic characters
let (last, _s) = scanner.skip_while(char::is_alphabetic);
Ok(())
}
let text = "Hello World";
let mut scanner = Scanner::new(text);
assert_eq!(scanner.scan_with(scan_word), Ok((0..5, "Hello")));
assert_eq!(scanner.scan_with(scan_word), Err((5..5, "")));
assert_eq!(scanner.next(), Ok((5..6, ' ')));
assert_eq!(scanner.scan_with(scan_word), Ok((6..11, "World")));
sourcepub fn peeking<T, F>(&self, f: F) -> Twhere
F: FnOnce(&mut Self) -> T,
pub fn peeking<T, F>(&self, f: F) -> Twhere F: FnOnce(&mut Self) -> T,
Calls f
with a &mut Scanner
of this
&Scanner
, i.e. a Scanner
with the
same text()
, remaining_text()
, and cursor_pos()
.
sourcepub fn scan_digits(&mut self) -> ScannerResult<'text, &'text str>
pub fn scan_digits(&mut self) -> ScannerResult<'text, &'text str>
This function accepts the following formats:
0
1
5
123
00000
The following is not accepted by this function:
0__000__0
_
___
_123
See also:
scan_digits_or_underscores()
scan_non_zero_digits()
scan_non_zero_digits_or_underscores()
scan_rust_int_dec()
scan_c_int_dec()
scan_python_int_dec()
- and more extensions
Grammar
The following EBNF grammar represents what this method accepts:
Digits ::= Digit Digit*
Digit ::= [0-9]
sourcepub fn scan_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str>
pub fn scan_digits_or_underscores(&mut self) -> ScannerResult<'text, &'text str>
This function accepts the following formats:
0
1
5_
0000
12345
1_2_3
0__000__0
The following is not accepted by this function:
_
___
_123
See also:
scan_digits()
scan_non_zero_digits()
scan_non_zero_digits_or_underscores()
scan_rust_int_dec()
scan_c_int_dec()
scan_python_int_dec()
- and more extensions
Grammar
The following EBNF grammar represents what this method accepts:
Digits ::= Digit ( Digit | '_' )*
Digit ::= [0-9]
sourcepub fn scan_non_zero_digits(&mut self) -> ScannerResult<'text, &'text str>
pub fn scan_non_zero_digits(&mut self) -> ScannerResult<'text, &'text str>
This function accepts the following formats:
0
1
5
123
102030
The following is not accepted by this function:
0000
01
012345
0__000__0
_
___
_123
See also:
scan_digits()
scan_digits_or_underscores()
scan_non_zero_digits_or_underscores()
scan_rust_int_dec()
scan_c_int_dec()
scan_python_int_dec()
- and more extensions
Grammar
The following EBNF grammar represents what this method accepts:
Digits ::= ( '0' |
NonZeroDigit Digit* )
NonZeroDigit ::= [1-9]
Digit ::= [0-9]
sourcepub fn scan_non_zero_digits_or_underscores(
&mut self
) -> ScannerResult<'text, &'text str>
pub fn scan_non_zero_digits_or_underscores( &mut self ) -> ScannerResult<'text, &'text str>
This function accepts the following formats:
0
1
5_
123
102030
1_2_3
0___
12345__
The following is not accepted by this function:
0000
01
012345
0__000__0
_
___
_123
See also:
scan_digits()
scan_digits_or_underscores()
scan_non_zero_digits()
scan_rust_int_dec()
scan_c_int_dec()
scan_python_int_dec()
- and more extensions
Grammar
The following EBNF grammar represents what this method accepts:
Digits ::= ( '0' |
NonZeroDigit ( Digit | '_' )* )
NonZeroDigit ::= [1-9]
Digit ::= [0-9]
Trait Implementations§
source§impl<'text> CScannerExt<'text> for Scanner<'text>
impl<'text> CScannerExt<'text> for Scanner<'text>
fn scan_c_line_comment(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_block_comment(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_identifier(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_int_dec(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_int_hex(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_int_oct(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_float(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_char(&mut self) -> ScannerResult<'text, &'text str>
fn scan_c_string(&mut self) -> ScannerResult<'text, &'text str>
source§impl<'text> CssScannerExt<'text> for Scanner<'text>
impl<'text> CssScannerExt<'text> for Scanner<'text>
source§fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str>
source§fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str>
source§fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str>
source§fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str>
fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str>
source§impl<'text> JavaScannerExt<'text> for Scanner<'text>
impl<'text> JavaScannerExt<'text> for Scanner<'text>
fn scan_java_line_comment(&mut self) -> ScannerResult<'text, &'text str>
fn scan_java_block_comment(&mut self) -> ScannerResult<'text, &'text str>
fn scan_java_identifier(&mut self) -> ScannerResult<'text, &'text str>
source§fn scan_java_keyword(&mut self) -> ScannerResult<'text, &'text str>
fn scan_java_keyword(&mut self) -> ScannerResult<'text, &'text str>
null
, true
, and false
are not keywords, but literals,
see scan_java_null_literal()
and scan_java_boolean_literal()
.