apollo_saphyr_parser/input/
str.rs1use crate::{
2 char_traits::{
3 is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
4 },
5 input::{Input, SkipTabs},
6};
7use alloc::string::String;
8
9#[allow(clippy::module_name_repetitions)]
11pub struct StrInput<'a> {
12 buffer: &'a str,
14 lookahead: usize,
19}
20
21impl<'a> StrInput<'a> {
22 #[must_use]
24 pub fn new(input: &'a str) -> Self {
25 Self {
26 buffer: input,
27 lookahead: 0,
28 }
29 }
30}
31
32impl Input for StrInput<'_> {
33 #[inline]
34 fn lookahead(&mut self, x: usize) {
35 self.lookahead = self.lookahead.max(x);
39 }
40
41 #[inline]
42 fn buflen(&self) -> usize {
43 self.lookahead
44 }
45
46 #[inline]
47 fn bufmaxlen(&self) -> usize {
48 BUFFER_LEN
49 }
50
51 fn buf_is_empty(&self) -> bool {
52 self.buflen() == 0
53 }
54
55 #[inline]
56 fn raw_read_ch(&mut self) -> char {
57 let mut chars = self.buffer.chars();
58 if let Some(c) = chars.next() {
59 self.buffer = chars.as_str();
60 c
61 } else {
62 '\0'
63 }
64 }
65
66 #[inline]
67 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
68 if let Some((c, sub_str)) = split_first_char(self.buffer) {
69 if is_breakz(c) {
70 None
71 } else {
72 self.buffer = sub_str;
73 Some(c)
74 }
75 } else {
76 None
77 }
78 }
79
80 #[inline]
81 fn skip(&mut self) {
82 let mut chars = self.buffer.chars();
83 if chars.next().is_some() {
84 self.buffer = chars.as_str();
85 }
86 }
87
88 #[inline]
89 fn skip_n(&mut self, count: usize) {
90 let mut chars = self.buffer.chars();
91 for _ in 0..count {
92 if chars.next().is_none() {
93 break;
94 }
95 }
96 self.buffer = chars.as_str();
97 }
98
99 #[inline]
100 fn peek(&self) -> char {
101 self.buffer.chars().next().unwrap_or('\0')
102 }
103
104 #[inline]
105 fn peek_nth(&self, n: usize) -> char {
106 let mut chars = self.buffer.chars();
107 for _ in 0..n {
108 if chars.next().is_none() {
109 return '\0';
110 }
111 }
112 chars.next().unwrap_or('\0')
113 }
114
115 #[inline]
116 fn look_ch(&mut self) -> char {
117 self.lookahead(1);
118 self.peek()
119 }
120
121 #[inline]
122 fn next_char_is(&self, c: char) -> bool {
123 self.peek() == c
124 }
125
126 #[inline]
127 fn nth_char_is(&self, n: usize, c: char) -> bool {
128 self.peek_nth(n) == c
129 }
130
131 #[inline]
132 fn next_2_are(&self, c1: char, c2: char) -> bool {
133 let mut chars = self.buffer.chars();
134 chars.next() == Some(c1) && chars.next() == Some(c2)
135 }
136
137 #[inline]
138 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
139 let mut chars = self.buffer.chars();
140 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
141 }
142
143 #[inline]
144 fn next_is_document_indicator(&self) -> bool {
145 if self.buffer.len() < 3 {
146 false
147 } else {
148 let bytes = self.buffer.as_bytes();
150 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
151 && (bytes[0] == b'.' || bytes[0] == b'-')
152 && bytes[0] == bytes[1]
153 && bytes[1] == bytes[2]
154 }
155 }
156
157 #[inline]
158 fn next_is_document_start(&self) -> bool {
159 if self.buffer.len() < 3 {
160 false
161 } else {
162 let bytes = self.buffer.as_bytes();
164 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
165 && bytes[0] == b'-'
166 && bytes[1] == b'-'
167 && bytes[2] == b'-'
168 }
169 }
170
171 #[inline]
172 fn next_is_document_end(&self) -> bool {
173 if self.buffer.len() < 3 {
174 false
175 } else {
176 let bytes = self.buffer.as_bytes();
178 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
179 && bytes[0] == b'.'
180 && bytes[1] == b'.'
181 && bytes[2] == b'.'
182 }
183 }
184
185 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
186 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
187
188 let mut new_str = self.buffer;
189 let mut has_yaml_ws = false;
190 let mut encountered_tab = false;
191
192 if skip_tabs == SkipTabs::Yes {
195 loop {
196 if let Some(sub_str) = new_str.strip_prefix(' ') {
197 has_yaml_ws = true;
198 new_str = sub_str;
199 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
200 encountered_tab = true;
201 new_str = sub_str;
202 } else {
203 break;
204 }
205 }
206 } else {
207 while let Some(sub_str) = new_str.strip_prefix(' ') {
208 has_yaml_ws = true;
209 new_str = sub_str;
210 }
211 }
212
213 let mut chars_consumed = self.buffer.len() - new_str.len();
216
217 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
218 if !encountered_tab && !has_yaml_ws {
219 return (
220 chars_consumed,
221 Err("comments must be separated from other tokens by whitespace"),
222 );
223 }
224
225 while let Some((c, sub_str)) = split_first_char(new_str) {
227 if is_breakz(c) {
228 break;
229 }
230 new_str = sub_str;
231 chars_consumed += 1;
232 }
233 }
234
235 self.buffer = new_str;
236
237 (
238 chars_consumed,
239 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
240 )
241 }
242
243 #[allow(clippy::inline_always)]
244 #[inline(always)]
245 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
246 let c = self.buffer.as_bytes()[0];
247 if self.buffer.len() > 1 {
248 let nc = self.buffer.as_bytes()[1];
249 match c {
250 b':' if is_blank_or_breakz(nc as char) || (in_flow && is_flow(nc as char)) => false,
252 c if in_flow && is_flow(c as char) => false,
253 _ => true,
254 }
255 } else {
256 match c {
257 b':' => false,
259 c if in_flow && is_flow(c as char) => false,
260 _ => true,
261 }
262 }
263 }
264
265 #[inline]
266 fn next_is_blank_or_break(&self) -> bool {
267 !self.buffer.is_empty()
268 && (is_blank(self.buffer.as_bytes()[0] as char)
269 || is_break(self.buffer.as_bytes()[0] as char))
270 }
271
272 #[inline]
273 fn next_is_blank_or_breakz(&self) -> bool {
274 self.buffer.is_empty()
275 || (is_blank(self.buffer.as_bytes()[0] as char)
276 || is_breakz(self.buffer.as_bytes()[0] as char))
277 }
278
279 #[inline]
280 fn next_is_blank(&self) -> bool {
281 !self.buffer.is_empty() && is_blank(self.buffer.as_bytes()[0] as char)
282 }
283
284 #[inline]
285 fn next_is_break(&self) -> bool {
286 !self.buffer.is_empty() && is_break(self.buffer.as_bytes()[0] as char)
287 }
288
289 #[inline]
290 fn next_is_breakz(&self) -> bool {
291 self.buffer.is_empty() || is_breakz(self.buffer.as_bytes()[0] as char)
292 }
293
294 #[inline]
295 fn next_is_z(&self) -> bool {
296 self.buffer.is_empty() || is_z(self.buffer.as_bytes()[0] as char)
297 }
298
299 #[inline]
300 fn next_is_flow(&self) -> bool {
301 !self.buffer.is_empty() && is_flow(self.buffer.as_bytes()[0] as char)
302 }
303
304 #[inline]
305 fn next_is_digit(&self) -> bool {
306 !self.buffer.is_empty() && is_digit(self.buffer.as_bytes()[0] as char)
307 }
308
309 #[inline]
310 fn next_is_alpha(&self) -> bool {
311 !self.buffer.is_empty() && is_alpha(self.buffer.as_bytes()[0] as char)
312 }
313
314 fn skip_while_non_breakz(&mut self) -> usize {
315 let mut new_str = self.buffer;
316 let mut count = 0;
317
318 while let Some((c, sub_str)) = split_first_char(new_str) {
320 if is_breakz(c) {
321 break;
322 }
323 new_str = sub_str;
324 count += 1;
325 }
326
327 self.buffer = new_str;
328
329 count
330 }
331
332 fn skip_while_blank(&mut self) -> usize {
333 let mut i = 0;
335 while i < self.buffer.len() {
336 if !is_blank(self.buffer.as_bytes()[i] as char) {
337 break;
338 }
339 i += 1;
340 }
341 self.buffer = &self.buffer[i..];
342 i
343 }
344
345 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
346 let mut not_alpha = None;
347
348 let mut chars = self.buffer.chars();
350 for c in chars.by_ref() {
351 if !is_alpha(c) {
352 not_alpha = Some(c);
353 break;
354 }
355 }
356
357 let remaining_string = if let Some(c) = not_alpha {
358 let n_bytes_read = chars.as_str().as_ptr() as usize - self.buffer.as_ptr() as usize;
359 let last_char_bytes = c.len_utf8();
360 &self.buffer[n_bytes_read - last_char_bytes..]
361 } else {
362 chars.as_str()
363 };
364
365 let n_bytes_to_append = remaining_string.as_ptr() as usize - self.buffer.as_ptr() as usize;
366 out.reserve(n_bytes_to_append);
367 out.push_str(&self.buffer[..n_bytes_to_append]);
368 self.buffer = remaining_string;
369
370 n_bytes_to_append
371 }
372}
373
374const BUFFER_LEN: usize = 128;
397
398#[inline]
401fn split_first_char(s: &str) -> Option<(char, &str)> {
402 let mut chars = s.chars();
403 let c = chars.next()?;
404 Some((c, chars.as_str()))
405}
406
407#[cfg(test)]
408mod test {
409 use crate::input::Input;
410
411 use super::StrInput;
412
413 #[test]
414 pub fn is_document_start() {
415 let input = StrInput::new("---\n");
416 assert!(input.next_is_document_start());
417 assert!(input.next_is_document_indicator());
418 let input = StrInput::new("---");
419 assert!(input.next_is_document_start());
420 assert!(input.next_is_document_indicator());
421 let input = StrInput::new("...\n");
422 assert!(!input.next_is_document_start());
423 assert!(input.next_is_document_indicator());
424 let input = StrInput::new("--- ");
425 assert!(input.next_is_document_start());
426 assert!(input.next_is_document_indicator());
427 }
428
429 #[test]
430 pub fn is_document_end() {
431 let input = StrInput::new("...\n");
432 assert!(input.next_is_document_end());
433 assert!(input.next_is_document_indicator());
434 let input = StrInput::new("...");
435 assert!(input.next_is_document_end());
436 assert!(input.next_is_document_indicator());
437 let input = StrInput::new("---\n");
438 assert!(!input.next_is_document_end());
439 assert!(input.next_is_document_indicator());
440 let input = StrInput::new("... ");
441 assert!(input.next_is_document_end());
442 assert!(input.next_is_document_indicator());
443 }
444}