1use crate::{
2 char_traits::{is_blank_or_breakz, is_breakz, is_flow},
3 input::{BorrowedInput, Input, SkipTabs},
4};
5use alloc::string::String;
6
7#[allow(clippy::module_name_repetitions)]
9pub struct StrInput<'a> {
10 original: &'a str,
15 buffer: &'a str,
20 lookahead: usize,
25}
26
27impl<'a> StrInput<'a> {
28 #[must_use]
30 pub fn new(input: &'a str) -> Self {
31 Self {
32 original: input,
33 buffer: input,
34 lookahead: 0,
35 }
36 }
37
38 #[inline]
43 #[must_use]
44 fn consumed_bytes(&self) -> usize {
45 self.original.len() - self.buffer.len()
46 }
47}
48
49impl Input for StrInput<'_> {
50 #[inline]
51 fn lookahead(&mut self, x: usize) {
52 self.lookahead = self.lookahead.max(x);
56 }
57
58 #[inline]
59 fn buflen(&self) -> usize {
60 self.lookahead
61 }
62
63 #[inline]
64 fn bufmaxlen(&self) -> usize {
65 BUFFER_LEN
66 }
67
68 fn buf_is_empty(&self) -> bool {
69 self.buflen() == 0
70 }
71
72 #[inline]
73 fn raw_read_ch(&mut self) -> char {
74 let mut chars = self.buffer.chars();
75 if let Some(c) = chars.next() {
76 self.buffer = chars.as_str();
77 c
78 } else {
79 '\0'
80 }
81 }
82
83 #[inline]
84 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
85 if let Some((c, sub_str)) = split_first_char(self.buffer) {
86 if is_breakz(c) {
87 None
88 } else {
89 self.buffer = sub_str;
90 Some(c)
91 }
92 } else {
93 None
94 }
95 }
96
97 #[inline]
98 fn skip(&mut self) {
99 if !self.buffer.is_empty() {
100 let b = self.buffer.as_bytes()[0];
101 if b < 0x80 {
102 self.buffer = &self.buffer[1..];
103 } else {
104 let mut chars = self.buffer.chars();
105 chars.next();
106 self.buffer = chars.as_str();
107 }
108 }
109 }
110
111 #[inline]
112 fn skip_n(&mut self, count: usize) {
113 let mut chars = self.buffer.chars();
114 for _ in 0..count {
115 if chars.next().is_none() {
116 break;
117 }
118 }
119 self.buffer = chars.as_str();
120 }
121
122 #[inline]
123 fn peek(&self) -> char {
124 if self.buffer.is_empty() {
125 return '\0';
126 }
127 let b = self.buffer.as_bytes()[0];
128 if b < 0x80 {
129 b as char
130 } else {
131 self.buffer.chars().next().unwrap()
132 }
133 }
134
135 #[inline]
136 fn peek_nth(&self, n: usize) -> char {
137 if n == 0 {
138 return self.peek();
139 }
140 let bytes = self.buffer.as_bytes();
141 if n == 1 && bytes.len() >= 2 && bytes[0] < 0x80 && bytes[1] < 0x80 {
142 return bytes[1] as char;
143 }
144 let mut chars = self.buffer.chars();
145 for _ in 0..n {
146 if chars.next().is_none() {
147 return '\0';
148 }
149 }
150 chars.next().unwrap_or('\0')
151 }
152
153 #[inline]
154 fn byte_offset(&self) -> Option<usize> {
155 Some(self.consumed_bytes())
156 }
157
158 #[inline]
159 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
160 debug_assert!(start <= end);
161 debug_assert!(end <= self.original.len());
162 self.original.get(start..end)
163 }
164
165 #[inline]
166 fn may_contain_comments(&self) -> bool {
167 self.original.as_bytes().contains(&b'#')
168 }
169
170 #[inline]
171 fn look_ch(&mut self) -> char {
172 self.lookahead(1);
173 self.peek()
174 }
175
176 #[inline]
177 fn next_char_is(&self, c: char) -> bool {
178 self.peek() == c
179 }
180
181 #[inline]
182 fn nth_char_is(&self, n: usize, c: char) -> bool {
183 self.peek_nth(n) == c
184 }
185
186 #[inline]
187 fn next_2_are(&self, c1: char, c2: char) -> bool {
188 let mut chars = self.buffer.chars();
189 chars.next() == Some(c1) && chars.next() == Some(c2)
190 }
191
192 #[inline]
193 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
194 let mut chars = self.buffer.chars();
195 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
196 }
197
198 #[inline]
199 fn next_is_document_indicator(&self) -> bool {
200 if self.buffer.len() < 3 {
201 false
202 } else {
203 let bytes = self.buffer.as_bytes();
205 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
206 && (bytes[0] == b'.' || bytes[0] == b'-')
207 && bytes[0] == bytes[1]
208 && bytes[1] == bytes[2]
209 }
210 }
211
212 #[inline]
213 fn next_is_document_start(&self) -> bool {
214 if self.buffer.len() < 3 {
215 false
216 } else {
217 let bytes = self.buffer.as_bytes();
219 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
220 && bytes[0] == b'-'
221 && bytes[1] == b'-'
222 && bytes[2] == b'-'
223 }
224 }
225
226 #[inline]
227 fn next_is_document_end(&self) -> bool {
228 if self.buffer.len() < 3 {
229 false
230 } else {
231 let bytes = self.buffer.as_bytes();
233 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
234 && bytes[0] == b'.'
235 && bytes[1] == b'.'
236 && bytes[2] == b'.'
237 }
238 }
239
240 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
241 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
242
243 let mut new_str = self.buffer;
244 let mut has_yaml_ws = false;
245 let mut encountered_tab = false;
246
247 if skip_tabs == SkipTabs::Yes {
249 loop {
250 if let Some(sub_str) = new_str.strip_prefix(' ') {
251 has_yaml_ws = true;
252 new_str = sub_str;
253 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
254 encountered_tab = true;
255 new_str = sub_str;
256 } else {
257 break;
258 }
259 }
260 } else {
261 while let Some(sub_str) = new_str.strip_prefix(' ') {
262 has_yaml_ws = true;
263 new_str = sub_str;
264 }
265 }
266
267 let mut chars_consumed = self.buffer.len() - new_str.len();
270
271 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
272 if !encountered_tab && !has_yaml_ws {
273 return (
274 chars_consumed,
275 Err("comments must be separated from other tokens by whitespace"),
276 );
277 }
278
279 while let Some((c, sub_str)) = split_first_char(new_str) {
281 if is_breakz(c) {
282 break;
283 }
284 new_str = sub_str;
285 chars_consumed += 1;
286 }
287 }
288
289 self.buffer = new_str;
290
291 (
292 chars_consumed,
293 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
294 )
295 }
296
297 fn skip_ws_to_eol_blanks(&mut self, skip_tabs: SkipTabs) -> (usize, SkipTabs) {
298 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
299
300 let bytes = self.buffer.as_bytes();
301 let mut i = 0;
302 let mut encountered_tab = false;
303 let mut has_yaml_ws = false;
304
305 if skip_tabs == SkipTabs::Yes {
306 while i < bytes.len() {
307 match bytes[i] {
308 b' ' => {
309 has_yaml_ws = true;
310 i += 1;
311 }
312 b'\t' => {
313 encountered_tab = true;
314 i += 1;
315 }
316 _ => break,
317 }
318 }
319 } else {
320 while i < bytes.len() && bytes[i] == b' ' {
321 has_yaml_ws = true;
322 i += 1;
323 }
324 }
325
326 self.buffer = &self.buffer[i..];
327
328 (i, SkipTabs::Result(encountered_tab, has_yaml_ws))
329 }
330
331 #[allow(clippy::inline_always)]
332 #[inline(always)]
333 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
334 let nc = self.peek_nth(1);
335 match self.peek() {
336 ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false,
338 c if in_flow && is_flow(c) => false,
339 _ => true,
340 }
341 }
342
343 #[inline]
344 fn next_is_blank_or_break(&self) -> bool {
345 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | b'\n' | b'\r')
346 }
347
348 #[inline]
349 fn next_is_blank_or_breakz(&self) -> bool {
350 self.buffer.is_empty()
351 || matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | 0 | b'\n' | b'\r')
352 }
353
354 #[inline]
355 fn next_is_blank(&self) -> bool {
356 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t')
357 }
358
359 #[inline]
360 fn next_is_break(&self) -> bool {
361 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b'\n' | b'\r')
362 }
363
364 #[inline]
365 fn next_is_breakz(&self) -> bool {
366 self.buffer.is_empty() || matches!(self.buffer.as_bytes()[0], 0 | b'\n' | b'\r')
367 }
368
369 #[inline]
370 fn next_is_z(&self) -> bool {
371 self.buffer.is_empty() || self.buffer.as_bytes()[0] == 0
372 }
373
374 #[inline]
375 fn next_is_flow(&self) -> bool {
376 !self.buffer.is_empty()
377 && matches!(self.buffer.as_bytes()[0], b',' | b'[' | b']' | b'{' | b'}')
378 }
379
380 #[inline]
381 fn next_is_digit(&self) -> bool {
382 !self.buffer.is_empty() && self.buffer.as_bytes()[0].is_ascii_digit()
383 }
384
385 #[inline]
393 fn next_is_alpha(&self) -> bool {
394 !self.buffer.is_empty()
395 && matches!(self.buffer.as_bytes()[0], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
396 }
397
398 fn skip_while_non_breakz(&mut self) -> usize {
399 let mut byte_pos = 0;
400 let mut chars_consumed = 0;
401
402 for (i, c) in self.buffer.char_indices() {
403 if is_breakz(c) {
404 break;
405 }
406 byte_pos = i + c.len_utf8();
407 chars_consumed += 1;
408 }
409
410 self.buffer = &self.buffer[byte_pos..];
411 chars_consumed
412 }
413
414 #[inline]
415 fn skip_while_blank(&mut self) -> usize {
416 let bytes = self.buffer.as_bytes();
417
418 let mut i = 0;
419 while i < bytes.len() {
420 match bytes[i] {
421 b' ' | b'\t' => i += 1,
422 _ => break,
423 }
424 }
425
426 self.buffer = &self.buffer[i..];
427 i
428 }
429
430 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
439 let bytes = self.buffer.as_bytes();
440 let mut i = 0;
441
442 while i < bytes.len() {
444 match bytes[i] {
445 b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-' => i += 1,
446 _ => break,
447 }
448 }
449
450 out.push_str(&self.buffer[..i]);
452 self.buffer = &self.buffer[i..];
453
454 i
455 }
456
457 fn fetch_while_is_yaml_non_space(&mut self, out: &mut String) -> usize {
458 let mut byte_pos = 0;
459 let mut chars_consumed = 0;
460
461 for (i, c) in self.buffer.char_indices() {
462 if !crate::char_traits::is_yaml_non_space(c) || crate::char_traits::is_z(c) {
463 break;
464 }
465
466 byte_pos = i + c.len_utf8();
467 chars_consumed += 1;
468 }
469
470 out.push_str(&self.buffer[..byte_pos]);
471 self.buffer = &self.buffer[byte_pos..];
472
473 chars_consumed
474 }
475
476 fn fetch_plain_scalar_chunk(
477 &mut self,
478 out: &mut String,
479 _count: usize,
480 flow_level_gt_0: bool,
481 ) -> (bool, usize) {
482 let bytes = self.buffer.as_bytes();
483 let len = bytes.len();
484 let mut byte_pos = 0;
485 let mut chars_consumed = 0;
486
487 while byte_pos < len {
488 let b = bytes[byte_pos];
489 if b < 0x80 {
490 let c = b as char;
491 if crate::char_traits::is_blank_or_breakz(c) {
492 out.push_str(&self.buffer[..byte_pos]);
493 self.buffer = &self.buffer[byte_pos..];
494 return (true, chars_consumed);
495 }
496 if flow_level_gt_0 && crate::char_traits::is_flow(c) {
497 out.push_str(&self.buffer[..byte_pos]);
498 self.buffer = &self.buffer[byte_pos..];
499 return (true, chars_consumed);
500 }
501 if c == ':' {
502 let next_byte = if byte_pos + 1 < len {
503 bytes[byte_pos + 1]
504 } else {
505 0
506 };
507 let is_stop = if next_byte < 0x80 {
509 let nc = next_byte as char;
510 crate::char_traits::is_blank_or_breakz(nc)
511 || (flow_level_gt_0 && crate::char_traits::is_flow(nc))
512 } else {
513 false
514 };
515
516 if is_stop {
517 out.push_str(&self.buffer[..byte_pos]);
518 self.buffer = &self.buffer[byte_pos..];
519 return (true, chars_consumed);
520 }
521 }
522 byte_pos += 1;
523 chars_consumed += 1;
524 } else {
525 let mut chars = self.buffer[byte_pos..].chars();
526 let c = chars.next().unwrap();
527 byte_pos += c.len_utf8();
528 chars_consumed += 1;
529 }
530 }
531
532 out.push_str(&self.buffer[..byte_pos]);
533 self.buffer = &self.buffer[byte_pos..];
534 (true, chars_consumed)
537 }
538}
539
540impl<'a> BorrowedInput<'a> for StrInput<'a> {
541 #[inline]
542 fn slice_borrowed(&self, start: usize, end: usize) -> Option<&'a str> {
543 debug_assert!(start <= end);
544 debug_assert!(end <= self.original.len());
545 self.original.get(start..end)
546 }
547}
548
549const BUFFER_LEN: usize = 128;
572
573#[inline]
576fn split_first_char(s: &str) -> Option<(char, &str)> {
577 let mut chars = s.chars();
578 let c = chars.next()?;
579 Some((c, chars.as_str()))
580}
581
582#[cfg(test)]
583mod test {
584 use alloc::string::String;
585
586 use crate::input::{BorrowedInput, Input, SkipTabs};
587
588 use super::StrInput;
589
590 #[test]
591 pub fn is_document_start() {
592 let input = StrInput::new("---\n");
593 assert!(input.next_is_document_start());
594 assert!(input.next_is_document_indicator());
595 let input = StrInput::new("---");
596 assert!(input.next_is_document_start());
597 assert!(input.next_is_document_indicator());
598 let input = StrInput::new("...\n");
599 assert!(!input.next_is_document_start());
600 assert!(input.next_is_document_indicator());
601 let input = StrInput::new("--- ");
602 assert!(input.next_is_document_start());
603 assert!(input.next_is_document_indicator());
604 }
605
606 #[test]
607 pub fn is_document_end() {
608 let input = StrInput::new("...\n");
609 assert!(input.next_is_document_end());
610 assert!(input.next_is_document_indicator());
611 let input = StrInput::new("...");
612 assert!(input.next_is_document_end());
613 assert!(input.next_is_document_indicator());
614 let input = StrInput::new("---\n");
615 assert!(!input.next_is_document_end());
616 assert!(input.next_is_document_indicator());
617 let input = StrInput::new("... ");
618 assert!(input.next_is_document_end());
619 assert!(input.next_is_document_indicator());
620 }
621
622 #[test]
623 fn raw_reads_track_byte_offsets_and_eof() {
624 let mut input = StrInput::new("aé");
625
626 assert_eq!(input.raw_read_ch(), 'a');
627 assert_eq!(input.byte_offset(), Some(1));
628 assert_eq!(input.raw_read_ch(), 'é');
629 assert_eq!(input.byte_offset(), Some(3));
630 assert_eq!(input.raw_read_ch(), '\0');
631 assert_eq!(input.byte_offset(), Some(3));
632 }
633
634 #[test]
635 fn raw_read_non_breakz_stops_before_breakz() {
636 let mut input = StrInput::new("a\n");
637
638 assert_eq!(input.raw_read_non_breakz_ch(), Some('a'));
639 assert_eq!(input.raw_read_non_breakz_ch(), None);
640 assert_eq!(input.peek(), '\n');
641
642 let mut empty = StrInput::new("");
643 assert_eq!(empty.raw_read_non_breakz_ch(), None);
644 }
645
646 #[test]
647 fn skip_handles_ascii_unicode_and_eof() {
648 let mut input = StrInput::new("éab");
649
650 input.skip();
651 assert_eq!(input.peek(), 'a');
652
653 input.skip_n(8);
654 assert_eq!(input.peek(), '\0');
655
656 input.skip();
657 assert_eq!(input.peek(), '\0');
658 }
659
660 #[test]
661 fn peeking_past_end_returns_nul() {
662 let ascii = StrInput::new("ab");
663 assert_eq!(ascii.peek_nth(1), 'b');
664 assert_eq!(ascii.peek_nth(3), '\0');
665
666 let unicode = StrInput::new("éab");
667 assert!(unicode.next_3_are('é', 'a', 'b'));
668 assert!(!unicode.next_3_are('é', 'a', 'c'));
669 }
670
671 #[test]
672 fn skip_ws_to_eol_without_tabs_stops_before_tab() {
673 let mut input = StrInput::new(" \t# comment\n");
674
675 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::No);
676
677 assert_eq!(consumed, 2);
678 let result = result.unwrap();
679 assert!(!result.found_tabs());
680 assert!(result.has_valid_yaml_ws());
681 assert_eq!(input.peek(), '\t');
682 }
683
684 #[test]
685 fn skip_ws_to_eol_skips_comments_after_whitespace() {
686 let mut input = StrInput::new(" # comment\nnext");
687
688 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::Yes);
689
690 assert_eq!(consumed, 11);
691 let result = result.unwrap();
692 assert!(!result.found_tabs());
693 assert!(result.has_valid_yaml_ws());
694 assert_eq!(input.peek(), '\n');
695 }
696
697 #[test]
698 fn skip_ws_to_eol_rejects_unseparated_comment() {
699 let mut input = StrInput::new("# comment\n");
700
701 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::Yes);
702
703 assert_eq!(consumed, 0);
704 assert_eq!(
705 result.err(),
706 Some("comments must be separated from other tokens by whitespace")
707 );
708 assert_eq!(input.peek(), '#');
709 }
710
711 #[test]
712 fn fetch_while_is_alpha_is_ascii_only() {
713 let mut input = StrInput::new("abc_123-é");
714 let mut out = String::new();
715
716 assert_eq!(input.fetch_while_is_alpha(&mut out), 8);
717 assert_eq!(out, "abc_123-");
718 assert_eq!(input.peek(), 'é');
719 }
720
721 #[test]
722 fn fetch_plain_scalar_chunk_handles_non_ascii_after_colon() {
723 let mut input = StrInput::new("a:é ");
724 let mut out = String::new();
725
726 assert_eq!(
727 input.fetch_plain_scalar_chunk(&mut out, 16, false),
728 (true, 3)
729 );
730 assert_eq!(out, "a:é");
731 assert_eq!(input.peek(), ' ');
732 }
733
734 #[test]
735 fn fetch_plain_scalar_chunk_stops_at_flow_indicator() {
736 let mut input = StrInput::new("abc,def");
737 let mut out = String::new();
738
739 assert_eq!(
740 input.fetch_plain_scalar_chunk(&mut out, 16, true),
741 (true, 3)
742 );
743 assert_eq!(out, "abc");
744 assert_eq!(input.peek(), ',');
745 }
746
747 #[test]
748 fn borrowed_slices_use_original_input_lifetime() {
749 let input = StrInput::new("aéz");
750
751 assert_eq!(BorrowedInput::slice_borrowed(&input, 1, 3), Some("é"));
752 assert_eq!(input.slice_bytes(3, 4), Some("z"));
753 }
754}