1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
// devela::text::parse::scanner::ascii
#[cfg(doc)]
use crate::TextParseErrorKind;
use crate::{AsciiSet, TextRange, TextScanner, TextUnit};
use crate::{is, unwrap, whilst};
/// ASCII whitespace, identifiers.
impl<'a> TextScanner<'a> {
/// Skips ASCII horizontal and vertical whitespace.
///
/// Currently skips: space, tab, line feed, carriage return,
/// form feed, and vertical tab.
pub const fn skip_ascii_ws(&mut self) {
whilst! { let Some(byte) = self.peek_byte(); {
match byte {
b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x0B => self._cursor_bump(1),
_ => break,
}
}}
}
/// Skips ASCII horizontal whitespace.
///
/// Currently skips: space, tab.
pub const fn skip_ascii_hws(&mut self) {
whilst! { let Some(byte) = self.peek_byte(); {
match byte {
b' ' | b'\t' => self._cursor_bump(1),
_ => break,
}
}}
}
/// Consumes and returns an ASCII identifier range.
///
/// Identifier syntax:
/// - first byte: `A..=Z`, `a..=z`, or `_`
/// - following bytes: ASCII alphanumeric or `_`
///
/// Returns `None` if the next byte is not a valid identifier start.
#[must_use]
pub const fn take_ascii_ident(&mut self) -> Option<TextRange> {
self.take_ascii_run(AsciiSet::IDENT_HEAD, AsciiSet::IDENT_TAIL)
}
/// Consumes and returns an ASCII identifier-tail range.
///
/// Identifier-tail syntax:
/// - bytes: ASCII alphanumeric or `_`
///
/// Unlike [`take_ascii_ident`][Self::take_ascii_ident],
/// this accepts digits as the first consumed byte.
///
/// Returns `None` if no identifier-tail byte was consumed.
#[must_use]
pub const fn take_ascii_ident_tail(&mut self) -> Option<TextRange> {
self.take_ascii_set(AsciiSet::IDENT_TAIL)
}
/// Returns `range` trimmed of leading and trailing ASCII whitespace.
///
/// Currently trims: space, tab, line feed, carriage return,
/// form feed, and vertical tab.
pub const fn trim_ascii_ws(&self, range: TextRange) -> TextRange {
let mut start = range.start.0;
let mut end = range.end.0;
whilst! { start < end; {
match self.bytes[start as usize] {
b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x0B => start += 1,
_ => break,
}
}}
whilst! { start < end; {
match self.bytes[(end - 1) as usize] {
b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x0B => end -= 1,
_ => break,
}
}}
TextRange::from_prim(start, end)
}
/// Returns `range` trimmed of leading and trailing ASCII horizontal whitespace.
///
/// Currently trims: space, tab.
pub const fn trim_ascii_hws(&self, range: TextRange) -> TextRange {
let mut start = range.start.0;
let mut end = range.end.0;
whilst! { start < end; {
match self.bytes[start as usize] {
b' ' | b'\t' => start += 1,
_ => break,
}
}}
whilst! { start < end; {
match self.bytes[(end - 1) as usize] {
b' ' | b'\t' => end -= 1,
_ => break,
}
}}
TextRange::from_prim(start, end)
}
}
/// `AsciiSet` scanning.
impl<'a> TextScanner<'a> {
/// Consumes the next byte if it belongs to `set`.
///
/// Returns `true` if a byte was consumed.
///
/// Non-ASCII bytes never match.
#[must_use]
pub const fn eat_ascii_set(&mut self, set: AsciiSet) -> bool {
match self.peek_byte() {
Some(byte) if set.contains_byte(byte) => {
self._cursor_bump(1);
true
}
_ => false,
}
}
/// Skips bytes while they belong to `set`.
///
/// Returns the number of skipped bytes.
pub const fn skip_ascii_set(&mut self, set: AsciiSet) -> TextUnit {
let start = self.cursor.index.0;
whilst! { let Some(byte) = self.peek_byte(); {
is! { set.contains_byte(byte), self._cursor_bump(1), break }
}}
self.cursor.index.0 - start
}
/// Skips bytes until the next byte belonging to `set`.
///
/// Stops before the matching byte.
///
/// If no byte from `set` is found, skips to the end of input.
///
/// Non-ASCII bytes never match.
pub const fn skip_until_ascii_set(&mut self, set: AsciiSet) -> TextUnit {
let start = self.cursor.index.0;
whilst! { let Some(byte) = self.peek_byte(); {
is! { set.contains_byte(byte), break }
self._cursor_bump(1);
}}
self.cursor.index.0 - start
}
/// Consumes and returns a range of bytes belonging to `set`.
///
/// Returns `None` if no byte was consumed.
#[must_use]
pub const fn take_ascii_set(&mut self, set: AsciiSet) -> Option<TextRange> {
let start = self.mark();
whilst! { let Some(byte) = self.peek_byte(); {
is! { set.contains_byte(byte), self._cursor_bump(1), break }
}}
is! { self.cursor.index.0 == start.index.0, None, Some(self.range_from(start)) }
}
/// Consumes and returns an ASCII run with distinct head and tail sets.
///
/// The first byte must belong to `head`.
/// Following bytes may belong to `tail`.
///
/// Returns `None` if the first byte does not belong to `head`.
#[must_use]
pub const fn take_ascii_run(&mut self, head: AsciiSet, tail: AsciiSet) -> Option<TextRange> {
let start = self.mark();
// let Some(byte) = self.peek_byte() else { return None; };
// let byte = is![let Some(byte) = self.peek_byte(), byte, return None];
let byte = unwrap![some? self.peek_byte()];
is! { !head.contains_byte(byte), return None }
self._cursor_bump(1);
whilst! { let Some(byte) = self.peek_byte(); {
is! { tail.contains_byte(byte), self._cursor_bump(1), break }
}}
Some(self.range_from(start))
}
/// Consumes and returns the range up to, but excluding, the next byte belonging to `set`.
///
/// If no byte from `set` is found, consumes to the end of input.
///
/// Non-ASCII bytes never match.
pub const fn take_until_ascii_set(&mut self, set: AsciiSet) -> TextRange {
let start = self.mark();
whilst! { let Some(byte) = self.peek_byte(); {
is! { set.contains_byte(byte), break }
self._cursor_bump(1);
}}
self.range_from(start)
}
}