1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
use log::trace;
use crate::{
internal::{find_matches_impl::FindMatchesImpl, ScannerImpl},
Match, Position, PositionProvider, ScannerModeSwitcher,
};
/// The result of a peek operation.
#[derive(Debug, PartialEq)]
pub enum PeekResult {
/// The peek operation found n matches.
Matches(Vec<Match>),
/// The peek operation found less than n matches because the end of the haystack was reached.
MatchesReachedEnd(Vec<Match>),
/// The peek operation found less than n matches because the last token type would have
/// triggered a mode switch. The matches are returned along with the index of the new mode that
/// would be switched to on the last match.
MatchesReachedModeSwitch((Vec<Match>, usize)),
/// The peek operation found no matches.
NotFound,
}
/// An iterator over all non-overlapping matches.
///
/// The iterator yields [`Match`] values until no more matches could be found.
///
/// * `'h` represents the lifetime of the haystack being searched.
///
/// This iterator can be created with the [`crate::Scanner::find_iter`] method.
#[derive(Debug)]
pub struct FindMatches<'h> {
inner: FindMatchesImpl<'h>,
}
impl<'h> FindMatches<'h> {
/// Creates a new `FindMatches` iterator.
pub(crate) fn new(scanner_impl: ScannerImpl, input: &'h str) -> Self {
Self {
inner: FindMatchesImpl::new(scanner_impl, input),
}
}
/// Set the offset in the haystack to the given position relative to the start of the haystack.
/// If a parser resets the scanner to a certain position, it can use this method.
/// A use case is a parser that backtracks to a previous position in the input or a parser that
/// switches between different scanner modes on its own.
/// If the offset is greater than the length of the haystack, the offset is set to the length of
/// the haystack.
pub fn with_offset(self, offset: usize) -> Self {
Self {
inner: self.inner.with_offset(offset),
}
}
/// Set the offset in the haystack to the given position relative to the start of the haystack.
/// The function is used to set the position in the haystack to the given position.
/// It provides the same functionality as the `with_offset` method, but it mutates the object
/// in place.
pub fn set_offset(&mut self, position: usize) {
self.inner.set_offset(position);
}
/// Retrieve the current byte offset from the start of the haystack.
/// This is the end offset of the last match found by the iterator.
#[inline]
pub fn offset(&self) -> usize {
self.inner.offset()
}
/// Returns the next match in the haystack.
///
/// If a match is found, the function advances the iterator to the end of the match and returns
/// the match.
///
/// If no match is found, the function repeatedly advances the haystack by one and tries again
/// until a match is found or the iterator is exhausted.
///
/// If the iterator is exhausted and no match is found, `None` is returned.
///
/// This method is also used in the implementation of the `Iterator` trait for the `FindMatches`.
#[inline]
pub fn next_match(&mut self) -> Option<Match> {
self.inner.next_match()
}
/// Peeks n matches ahead without consuming the matches.
/// The function returns [PeekResult].
///
/// The peek operation always stops at the end of the haystack or when a mode switch is
/// triggered by the last match. The mode switch is not conducted by the peek operation to not
/// change the state of the scanner as well as to avoid a mix of tokens from different modes
/// being returned.
pub fn peek_n(&mut self, n: usize) -> PeekResult {
self.inner.peek_n(n)
}
/// Advance the haystack to the given position.
/// The function is used to skip a given number of characters in the haystack.
/// It can be used after a peek operation to skip the characters of the peeked matches.
/// The function returns the new position in the haystack.
/// If the new position is greater than the length of the haystack, the function returns the
/// length of the haystack.
/// If the new position is less than the current position in the haystack, the
/// function returns the current position in the haystack, i.e. it does not allow to move
/// backwards in the haystack.
/// The current position in the haystack is the end index of the last match found
/// by the iterator, such that the next call to `next_match` will start searching for matches
/// at the following position.
pub fn advance_to(&mut self, position: usize) -> usize {
self.inner.advance_to(position)
}
}
impl Iterator for FindMatches<'_> {
type Item = Match;
fn next(&mut self) -> Option<Self::Item> {
self.next_match()
}
}
impl PositionProvider for FindMatches<'_> {
/// Returns the line and column numbers of the given offset.
/// The line number is the index of the line offset in the vector plus one.
/// The column number is the offset minus the line offset.
/// If the offset is greater than the length of the haystack, the function returns the last
/// recorded line and the column number is calculated from the last recorded position.
fn position(&self, offset: usize) -> Position {
self.inner.position(offset)
}
/// Sets the offset of the haystack to the given position.
fn set_offset(&mut self, offset: usize) {
self.inner.set_offset(offset);
}
}
impl ScannerModeSwitcher for FindMatches<'_> {
/// Sets the current scanner mode of the scanner implementation.
///
/// A parser can explicitly set the scanner mode to switch to a different set of DFAs.
/// Usually, the scanner mode is changed by the scanner itself based on the transitions defined
/// in the active scanner mode.
fn set_mode(&mut self, mode: usize) {
trace!("Set scanner mode to {}", mode);
self.inner.set_mode(mode);
}
/// Returns the current scanner mode. Used for tests and debugging purposes.
#[allow(dead_code)]
#[inline]
fn current_mode(&self) -> usize {
self.inner.current_mode()
}
fn mode_name(&self, index: usize) -> Option<&str> {
self.inner.mode_name(index)
}
}