saphyr_parser_bw/input/
str.rs1use crate::{
2 char_traits::{is_blank_or_breakz, is_breakz, is_flow},
3 input::{BorrowedInput, Input, SkipTabs},
4};
5use alloc::string::String;
6
7#[allow(clippy::module_name_repetitions)]
9pub struct StrInput<'a> {
10 original: &'a str,
15 buffer: &'a str,
20 lookahead: usize,
25}
26
27impl<'a> StrInput<'a> {
28 #[must_use]
30 pub fn new(input: &'a str) -> Self {
31 Self {
32 original: input,
33 buffer: input,
34 lookahead: 0,
35 }
36 }
37
38 #[inline]
43 #[must_use]
44 fn consumed_bytes(&self) -> usize {
45 self.original.len() - self.buffer.len()
46 }
47}
48
49impl Input for StrInput<'_> {
50 #[inline]
51 fn lookahead(&mut self, x: usize) {
52 self.lookahead = self.lookahead.max(x);
56 }
57
58 #[inline]
59 fn buflen(&self) -> usize {
60 self.lookahead
61 }
62
63 #[inline]
64 fn bufmaxlen(&self) -> usize {
65 BUFFER_LEN
66 }
67
68 fn buf_is_empty(&self) -> bool {
69 self.buflen() == 0
70 }
71
72 #[inline]
73 fn raw_read_ch(&mut self) -> char {
74 let mut chars = self.buffer.chars();
75 if let Some(c) = chars.next() {
76 self.buffer = chars.as_str();
77 c
78 } else {
79 '\0'
80 }
81 }
82
83 #[inline]
84 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
85 if let Some((c, sub_str)) = split_first_char(self.buffer) {
86 if is_breakz(c) {
87 None
88 } else {
89 self.buffer = sub_str;
90 Some(c)
91 }
92 } else {
93 None
94 }
95 }
96
97 #[inline]
98 fn skip(&mut self) {
99 if !self.buffer.is_empty() {
100 let b = self.buffer.as_bytes()[0];
101 if b < 0x80 {
102 self.buffer = &self.buffer[1..];
103 } else {
104 let mut chars = self.buffer.chars();
105 chars.next();
106 self.buffer = chars.as_str();
107 }
108 }
109 }
110
111 #[inline]
112 fn skip_n(&mut self, count: usize) {
113 let mut chars = self.buffer.chars();
114 for _ in 0..count {
115 if chars.next().is_none() {
116 break;
117 }
118 }
119 self.buffer = chars.as_str();
120 }
121
122 #[inline]
123 fn peek(&self) -> char {
124 if self.buffer.is_empty() {
125 return '\0';
126 }
127 let b = self.buffer.as_bytes()[0];
128 if b < 0x80 {
129 b as char
130 } else {
131 self.buffer.chars().next().unwrap()
132 }
133 }
134
135 #[inline]
136 fn peek_nth(&self, n: usize) -> char {
137 if n == 0 {
138 return self.peek();
139 }
140 let bytes = self.buffer.as_bytes();
141 if n == 1 && bytes.len() >= 2 && bytes[0] < 0x80 && bytes[1] < 0x80 {
142 return bytes[1] as char;
143 }
144 let mut chars = self.buffer.chars();
145 for _ in 0..n {
146 if chars.next().is_none() {
147 return '\0';
148 }
149 }
150 chars.next().unwrap_or('\0')
151 }
152
153 #[inline]
154 fn byte_offset(&self) -> Option<usize> {
155 Some(self.consumed_bytes())
156 }
157
158 #[inline]
159 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
160 debug_assert!(start <= end);
161 debug_assert!(end <= self.original.len());
162 self.original.get(start..end)
163 }
164
165 #[inline]
166 fn look_ch(&mut self) -> char {
167 self.lookahead(1);
168 self.peek()
169 }
170
171 #[inline]
172 fn next_char_is(&self, c: char) -> bool {
173 self.peek() == c
174 }
175
176 #[inline]
177 fn nth_char_is(&self, n: usize, c: char) -> bool {
178 self.peek_nth(n) == c
179 }
180
181 #[inline]
182 fn next_2_are(&self, c1: char, c2: char) -> bool {
183 let mut chars = self.buffer.chars();
184 chars.next() == Some(c1) && chars.next() == Some(c2)
185 }
186
187 #[inline]
188 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
189 let mut chars = self.buffer.chars();
190 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
191 }
192
193 #[inline]
194 fn next_is_document_indicator(&self) -> bool {
195 if self.buffer.len() < 3 {
196 false
197 } else {
198 let bytes = self.buffer.as_bytes();
200 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
201 && (bytes[0] == b'.' || bytes[0] == b'-')
202 && bytes[0] == bytes[1]
203 && bytes[1] == bytes[2]
204 }
205 }
206
207 #[inline]
208 fn next_is_document_start(&self) -> bool {
209 if self.buffer.len() < 3 {
210 false
211 } else {
212 let bytes = self.buffer.as_bytes();
214 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
215 && bytes[0] == b'-'
216 && bytes[1] == b'-'
217 && bytes[2] == b'-'
218 }
219 }
220
221 #[inline]
222 fn next_is_document_end(&self) -> bool {
223 if self.buffer.len() < 3 {
224 false
225 } else {
226 let bytes = self.buffer.as_bytes();
228 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
229 && bytes[0] == b'.'
230 && bytes[1] == b'.'
231 && bytes[2] == b'.'
232 }
233 }
234
235 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
236 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
237
238 let mut new_str = self.buffer;
239 let mut has_yaml_ws = false;
240 let mut encountered_tab = false;
241
242 if skip_tabs == SkipTabs::Yes {
245 loop {
246 if let Some(sub_str) = new_str.strip_prefix(' ') {
247 has_yaml_ws = true;
248 new_str = sub_str;
249 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
250 encountered_tab = true;
251 new_str = sub_str;
252 } else {
253 break;
254 }
255 }
256 } else {
257 while let Some(sub_str) = new_str.strip_prefix(' ') {
258 has_yaml_ws = true;
259 new_str = sub_str;
260 }
261 }
262
263 let mut chars_consumed = self.buffer.len() - new_str.len();
266
267 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
268 if !encountered_tab && !has_yaml_ws {
269 return (
270 chars_consumed,
271 Err("comments must be separated from other tokens by whitespace"),
272 );
273 }
274
275 while let Some((c, sub_str)) = split_first_char(new_str) {
277 if is_breakz(c) {
278 break;
279 }
280 new_str = sub_str;
281 chars_consumed += 1;
282 }
283 }
284
285 self.buffer = new_str;
286
287 (
288 chars_consumed,
289 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
290 )
291 }
292
293 #[allow(clippy::inline_always)]
294 #[inline(always)]
295 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
296 let nc = self.peek_nth(1);
297 match self.peek() {
298 ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false,
300 c if in_flow && is_flow(c) => false,
301 _ => true,
302 }
303 }
304
305 #[inline]
306 fn next_is_blank_or_break(&self) -> bool {
307 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | b'\n' | b'\r')
308 }
309
310 #[inline]
311 fn next_is_blank_or_breakz(&self) -> bool {
312 self.buffer.is_empty()
313 || matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | 0 | b'\n' | b'\r')
314 }
315
316 #[inline]
317 fn next_is_blank(&self) -> bool {
318 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t')
319 }
320
321 #[inline]
322 fn next_is_break(&self) -> bool {
323 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b'\n' | b'\r')
324 }
325
326 #[inline]
327 fn next_is_breakz(&self) -> bool {
328 self.buffer.is_empty() || matches!(self.buffer.as_bytes()[0], 0 | b'\n' | b'\r')
329 }
330
331 #[inline]
332 fn next_is_z(&self) -> bool {
333 self.buffer.is_empty() || self.buffer.as_bytes()[0] == 0
334 }
335
336 #[inline]
337 fn next_is_flow(&self) -> bool {
338 !self.buffer.is_empty()
339 && matches!(self.buffer.as_bytes()[0], b',' | b'[' | b']' | b'{' | b'}')
340 }
341
342 #[inline]
343 fn next_is_digit(&self) -> bool {
344 !self.buffer.is_empty() && self.buffer.as_bytes()[0].is_ascii_digit()
345 }
346
347 #[inline]
355 fn next_is_alpha(&self) -> bool {
356 !self.buffer.is_empty()
357 && matches!(self.buffer.as_bytes()[0], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
358 }
359
360 fn skip_while_non_breakz(&mut self) -> usize {
361 let mut byte_pos = 0;
362 let mut chars_consumed = 0;
363
364 for (i, c) in self.buffer.char_indices() {
365 if is_breakz(c) {
366 break;
367 }
368 byte_pos = i + c.len_utf8();
369 chars_consumed += 1;
370 }
371
372 self.buffer = &self.buffer[byte_pos..];
373 chars_consumed
374 }
375
376 #[inline]
377 fn skip_while_blank(&mut self) -> usize {
378 let bytes = self.buffer.as_bytes();
379
380 let mut i = 0;
381 while i < bytes.len() {
382 match bytes[i] {
383 b' ' | b'\t' => i += 1,
384 _ => break,
385 }
386 }
387
388 self.buffer = &self.buffer[i..];
389 i
390 }
391
392 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
401 let bytes = self.buffer.as_bytes();
402 let mut i = 0;
403
404 while i < bytes.len() {
406 match bytes[i] {
407 b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-' => i += 1,
408 _ => break,
409 }
410 }
411
412 out.push_str(&self.buffer[..i]);
414 self.buffer = &self.buffer[i..];
415
416 i
417 }
418
419 fn fetch_while_is_yaml_non_space(&mut self, out: &mut String) -> usize {
420 let byte_pos = self
421 .buffer
422 .chars()
423 .take_while(|c| crate::char_traits::is_yaml_non_space(*c))
424 .map(char::len_utf8)
425 .sum();
426
427 out.push_str(&self.buffer[..byte_pos]);
428
429 self.buffer = &self.buffer[byte_pos..];
430
431 byte_pos
432 }
433
434 fn fetch_plain_scalar_chunk(
435 &mut self,
436 out: &mut String,
437 _count: usize,
438 flow_level_gt_0: bool,
439 ) -> (bool, usize) {
440 let bytes = self.buffer.as_bytes();
441 let len = bytes.len();
442 let mut byte_pos = 0;
443 let mut chars_consumed = 0;
444
445 while byte_pos < len {
446 let b = bytes[byte_pos];
447 if b < 0x80 {
448 let c = b as char;
449 if crate::char_traits::is_blank_or_breakz(c) {
450 out.push_str(&self.buffer[..byte_pos]);
451 self.buffer = &self.buffer[byte_pos..];
452 return (true, chars_consumed);
453 }
454 if flow_level_gt_0 && crate::char_traits::is_flow(c) {
455 out.push_str(&self.buffer[..byte_pos]);
456 self.buffer = &self.buffer[byte_pos..];
457 return (true, chars_consumed);
458 }
459 if c == ':' {
460 let next_byte = if byte_pos + 1 < len {
461 bytes[byte_pos + 1]
462 } else {
463 0
464 };
465 let is_stop = if next_byte < 0x80 {
467 let nc = next_byte as char;
468 crate::char_traits::is_blank_or_breakz(nc)
469 || (flow_level_gt_0 && crate::char_traits::is_flow(nc))
470 } else {
471 false
472 };
473
474 if is_stop {
475 out.push_str(&self.buffer[..byte_pos]);
476 self.buffer = &self.buffer[byte_pos..];
477 return (true, chars_consumed);
478 }
479 }
480 byte_pos += 1;
481 chars_consumed += 1;
482 } else {
483 let mut chars = self.buffer[byte_pos..].chars();
484 let c = chars.next().unwrap();
485 byte_pos += c.len_utf8();
486 chars_consumed += 1;
487 }
488 }
489
490 out.push_str(&self.buffer[..byte_pos]);
491 self.buffer = &self.buffer[byte_pos..];
492 (true, chars_consumed)
495 }
496}
497
498impl<'a> BorrowedInput<'a> for StrInput<'a> {
499 #[inline]
500 fn slice_borrowed(&self, start: usize, end: usize) -> Option<&'a str> {
501 debug_assert!(start <= end);
502 debug_assert!(end <= self.original.len());
503 self.original.get(start..end)
504 }
505}
506
507const BUFFER_LEN: usize = 128;
530
531#[inline]
534fn split_first_char(s: &str) -> Option<(char, &str)> {
535 let mut chars = s.chars();
536 let c = chars.next()?;
537 Some((c, chars.as_str()))
538}
539
540#[cfg(test)]
541mod test {
542 use crate::input::Input;
543
544 use super::StrInput;
545
546 #[test]
547 pub fn is_document_start() {
548 let input = StrInput::new("---\n");
549 assert!(input.next_is_document_start());
550 assert!(input.next_is_document_indicator());
551 let input = StrInput::new("---");
552 assert!(input.next_is_document_start());
553 assert!(input.next_is_document_indicator());
554 let input = StrInput::new("...\n");
555 assert!(!input.next_is_document_start());
556 assert!(input.next_is_document_indicator());
557 let input = StrInput::new("--- ");
558 assert!(input.next_is_document_start());
559 assert!(input.next_is_document_indicator());
560 }
561
562 #[test]
563 pub fn is_document_end() {
564 let input = StrInput::new("...\n");
565 assert!(input.next_is_document_end());
566 assert!(input.next_is_document_indicator());
567 let input = StrInput::new("...");
568 assert!(input.next_is_document_end());
569 assert!(input.next_is_document_indicator());
570 let input = StrInput::new("---\n");
571 assert!(!input.next_is_document_end());
572 assert!(input.next_is_document_indicator());
573 let input = StrInput::new("... ");
574 assert!(input.next_is_document_end());
575 assert!(input.next_is_document_indicator());
576 }
577}