1use crate::{
2 char_traits::{is_blank_or_breakz, is_breakz, is_flow},
3 input::{BorrowedInput, Input, SkipTabs},
4};
5use alloc::string::String;
6
7#[allow(clippy::module_name_repetitions)]
9pub struct StrInput<'a> {
10 original: &'a str,
15 buffer: &'a str,
20 lookahead: usize,
25}
26
27impl<'a> StrInput<'a> {
28 #[must_use]
30 pub fn new(input: &'a str) -> Self {
31 Self {
32 original: input,
33 buffer: input,
34 lookahead: 0,
35 }
36 }
37
38 #[inline]
43 #[must_use]
44 fn consumed_bytes(&self) -> usize {
45 self.original.len() - self.buffer.len()
46 }
47}
48
49impl Input for StrInput<'_> {
50 #[inline]
51 fn lookahead(&mut self, x: usize) {
52 self.lookahead = self.lookahead.max(x);
56 }
57
58 #[inline]
59 fn buflen(&self) -> usize {
60 self.lookahead
61 }
62
63 #[inline]
64 fn bufmaxlen(&self) -> usize {
65 BUFFER_LEN
66 }
67
68 fn buf_is_empty(&self) -> bool {
69 self.buflen() == 0
70 }
71
72 #[inline]
73 fn raw_read_ch(&mut self) -> char {
74 let mut chars = self.buffer.chars();
75 if let Some(c) = chars.next() {
76 self.buffer = chars.as_str();
77 c
78 } else {
79 '\0'
80 }
81 }
82
83 #[inline]
84 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
85 if let Some((c, sub_str)) = split_first_char(self.buffer) {
86 if is_breakz(c) {
87 None
88 } else {
89 self.buffer = sub_str;
90 Some(c)
91 }
92 } else {
93 None
94 }
95 }
96
97 #[inline]
98 fn skip(&mut self) {
99 if !self.buffer.is_empty() {
100 let b = self.buffer.as_bytes()[0];
101 if b < 0x80 {
102 self.buffer = &self.buffer[1..];
103 } else {
104 let mut chars = self.buffer.chars();
105 chars.next();
106 self.buffer = chars.as_str();
107 }
108 }
109 }
110
111 #[inline]
112 fn skip_n(&mut self, count: usize) {
113 let mut chars = self.buffer.chars();
114 for _ in 0..count {
115 if chars.next().is_none() {
116 break;
117 }
118 }
119 self.buffer = chars.as_str();
120 }
121
122 #[inline]
123 fn peek(&self) -> char {
124 if self.buffer.is_empty() {
125 return '\0';
126 }
127 let b = self.buffer.as_bytes()[0];
128 if b < 0x80 {
129 b as char
130 } else {
131 self.buffer.chars().next().unwrap()
132 }
133 }
134
135 #[inline]
136 fn peek_nth(&self, n: usize) -> char {
137 if n == 0 {
138 return self.peek();
139 }
140 let bytes = self.buffer.as_bytes();
141 if n == 1 && bytes.len() >= 2 && bytes[0] < 0x80 && bytes[1] < 0x80 {
142 return bytes[1] as char;
143 }
144 let mut chars = self.buffer.chars();
145 for _ in 0..n {
146 if chars.next().is_none() {
147 return '\0';
148 }
149 }
150 chars.next().unwrap_or('\0')
151 }
152
153 #[inline]
154 fn byte_offset(&self) -> Option<usize> {
155 Some(self.consumed_bytes())
156 }
157
158 #[inline]
159 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
160 debug_assert!(start <= end);
161 debug_assert!(end <= self.original.len());
162 self.original.get(start..end)
163 }
164
165 #[inline]
166 fn look_ch(&mut self) -> char {
167 self.lookahead(1);
168 self.peek()
169 }
170
171 #[inline]
172 fn next_char_is(&self, c: char) -> bool {
173 self.peek() == c
174 }
175
176 #[inline]
177 fn nth_char_is(&self, n: usize, c: char) -> bool {
178 self.peek_nth(n) == c
179 }
180
181 #[inline]
182 fn next_2_are(&self, c1: char, c2: char) -> bool {
183 let mut chars = self.buffer.chars();
184 chars.next() == Some(c1) && chars.next() == Some(c2)
185 }
186
187 #[inline]
188 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
189 let mut chars = self.buffer.chars();
190 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
191 }
192
193 #[inline]
194 fn next_is_document_indicator(&self) -> bool {
195 if self.buffer.len() < 3 {
196 false
197 } else {
198 let bytes = self.buffer.as_bytes();
200 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
201 && (bytes[0] == b'.' || bytes[0] == b'-')
202 && bytes[0] == bytes[1]
203 && bytes[1] == bytes[2]
204 }
205 }
206
207 #[inline]
208 fn next_is_document_start(&self) -> bool {
209 if self.buffer.len() < 3 {
210 false
211 } else {
212 let bytes = self.buffer.as_bytes();
214 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
215 && bytes[0] == b'-'
216 && bytes[1] == b'-'
217 && bytes[2] == b'-'
218 }
219 }
220
221 #[inline]
222 fn next_is_document_end(&self) -> bool {
223 if self.buffer.len() < 3 {
224 false
225 } else {
226 let bytes = self.buffer.as_bytes();
228 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
229 && bytes[0] == b'.'
230 && bytes[1] == b'.'
231 && bytes[2] == b'.'
232 }
233 }
234
235 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
236 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
237
238 let mut new_str = self.buffer;
239 let mut has_yaml_ws = false;
240 let mut encountered_tab = false;
241
242 if skip_tabs == SkipTabs::Yes {
244 loop {
245 if let Some(sub_str) = new_str.strip_prefix(' ') {
246 has_yaml_ws = true;
247 new_str = sub_str;
248 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
249 encountered_tab = true;
250 new_str = sub_str;
251 } else {
252 break;
253 }
254 }
255 } else {
256 while let Some(sub_str) = new_str.strip_prefix(' ') {
257 has_yaml_ws = true;
258 new_str = sub_str;
259 }
260 }
261
262 let mut chars_consumed = self.buffer.len() - new_str.len();
265
266 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
267 if !encountered_tab && !has_yaml_ws {
268 return (
269 chars_consumed,
270 Err("comments must be separated from other tokens by whitespace"),
271 );
272 }
273
274 while let Some((c, sub_str)) = split_first_char(new_str) {
276 if is_breakz(c) {
277 break;
278 }
279 new_str = sub_str;
280 chars_consumed += 1;
281 }
282 }
283
284 self.buffer = new_str;
285
286 (
287 chars_consumed,
288 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
289 )
290 }
291
292 #[allow(clippy::inline_always)]
293 #[inline(always)]
294 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
295 let nc = self.peek_nth(1);
296 match self.peek() {
297 ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false,
299 c if in_flow && is_flow(c) => false,
300 _ => true,
301 }
302 }
303
304 #[inline]
305 fn next_is_blank_or_break(&self) -> bool {
306 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | b'\n' | b'\r')
307 }
308
309 #[inline]
310 fn next_is_blank_or_breakz(&self) -> bool {
311 self.buffer.is_empty()
312 || matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | 0 | b'\n' | b'\r')
313 }
314
315 #[inline]
316 fn next_is_blank(&self) -> bool {
317 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t')
318 }
319
320 #[inline]
321 fn next_is_break(&self) -> bool {
322 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b'\n' | b'\r')
323 }
324
325 #[inline]
326 fn next_is_breakz(&self) -> bool {
327 self.buffer.is_empty() || matches!(self.buffer.as_bytes()[0], 0 | b'\n' | b'\r')
328 }
329
330 #[inline]
331 fn next_is_z(&self) -> bool {
332 self.buffer.is_empty() || self.buffer.as_bytes()[0] == 0
333 }
334
335 #[inline]
336 fn next_is_flow(&self) -> bool {
337 !self.buffer.is_empty()
338 && matches!(self.buffer.as_bytes()[0], b',' | b'[' | b']' | b'{' | b'}')
339 }
340
341 #[inline]
342 fn next_is_digit(&self) -> bool {
343 !self.buffer.is_empty() && self.buffer.as_bytes()[0].is_ascii_digit()
344 }
345
346 #[inline]
354 fn next_is_alpha(&self) -> bool {
355 !self.buffer.is_empty()
356 && matches!(self.buffer.as_bytes()[0], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
357 }
358
359 fn skip_while_non_breakz(&mut self) -> usize {
360 let mut byte_pos = 0;
361 let mut chars_consumed = 0;
362
363 for (i, c) in self.buffer.char_indices() {
364 if is_breakz(c) {
365 break;
366 }
367 byte_pos = i + c.len_utf8();
368 chars_consumed += 1;
369 }
370
371 self.buffer = &self.buffer[byte_pos..];
372 chars_consumed
373 }
374
375 #[inline]
376 fn skip_while_blank(&mut self) -> usize {
377 let bytes = self.buffer.as_bytes();
378
379 let mut i = 0;
380 while i < bytes.len() {
381 match bytes[i] {
382 b' ' | b'\t' => i += 1,
383 _ => break,
384 }
385 }
386
387 self.buffer = &self.buffer[i..];
388 i
389 }
390
391 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
400 let bytes = self.buffer.as_bytes();
401 let mut i = 0;
402
403 while i < bytes.len() {
405 match bytes[i] {
406 b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-' => i += 1,
407 _ => break,
408 }
409 }
410
411 out.push_str(&self.buffer[..i]);
413 self.buffer = &self.buffer[i..];
414
415 i
416 }
417
418 fn fetch_while_is_yaml_non_space(&mut self, out: &mut String) -> usize {
419 let mut byte_pos = 0;
420 let mut chars_consumed = 0;
421
422 for (i, c) in self.buffer.char_indices() {
423 if !crate::char_traits::is_yaml_non_space(c) || crate::char_traits::is_z(c) {
424 break;
425 }
426
427 byte_pos = i + c.len_utf8();
428 chars_consumed += 1;
429 }
430
431 out.push_str(&self.buffer[..byte_pos]);
432 self.buffer = &self.buffer[byte_pos..];
433
434 chars_consumed
435 }
436
437 fn fetch_plain_scalar_chunk(
438 &mut self,
439 out: &mut String,
440 _count: usize,
441 flow_level_gt_0: bool,
442 ) -> (bool, usize) {
443 let bytes = self.buffer.as_bytes();
444 let len = bytes.len();
445 let mut byte_pos = 0;
446 let mut chars_consumed = 0;
447
448 while byte_pos < len {
449 let b = bytes[byte_pos];
450 if b < 0x80 {
451 let c = b as char;
452 if crate::char_traits::is_blank_or_breakz(c) {
453 out.push_str(&self.buffer[..byte_pos]);
454 self.buffer = &self.buffer[byte_pos..];
455 return (true, chars_consumed);
456 }
457 if flow_level_gt_0 && crate::char_traits::is_flow(c) {
458 out.push_str(&self.buffer[..byte_pos]);
459 self.buffer = &self.buffer[byte_pos..];
460 return (true, chars_consumed);
461 }
462 if c == ':' {
463 let next_byte = if byte_pos + 1 < len {
464 bytes[byte_pos + 1]
465 } else {
466 0
467 };
468 let is_stop = if next_byte < 0x80 {
470 let nc = next_byte as char;
471 crate::char_traits::is_blank_or_breakz(nc)
472 || (flow_level_gt_0 && crate::char_traits::is_flow(nc))
473 } else {
474 false
475 };
476
477 if is_stop {
478 out.push_str(&self.buffer[..byte_pos]);
479 self.buffer = &self.buffer[byte_pos..];
480 return (true, chars_consumed);
481 }
482 }
483 byte_pos += 1;
484 chars_consumed += 1;
485 } else {
486 let mut chars = self.buffer[byte_pos..].chars();
487 let c = chars.next().unwrap();
488 byte_pos += c.len_utf8();
489 chars_consumed += 1;
490 }
491 }
492
493 out.push_str(&self.buffer[..byte_pos]);
494 self.buffer = &self.buffer[byte_pos..];
495 (true, chars_consumed)
498 }
499}
500
501impl<'a> BorrowedInput<'a> for StrInput<'a> {
502 #[inline]
503 fn slice_borrowed(&self, start: usize, end: usize) -> Option<&'a str> {
504 debug_assert!(start <= end);
505 debug_assert!(end <= self.original.len());
506 self.original.get(start..end)
507 }
508}
509
510const BUFFER_LEN: usize = 128;
533
534#[inline]
537fn split_first_char(s: &str) -> Option<(char, &str)> {
538 let mut chars = s.chars();
539 let c = chars.next()?;
540 Some((c, chars.as_str()))
541}
542
543#[cfg(test)]
544mod test {
545 use alloc::string::String;
546
547 use crate::input::{BorrowedInput, Input, SkipTabs};
548
549 use super::StrInput;
550
551 #[test]
552 pub fn is_document_start() {
553 let input = StrInput::new("---\n");
554 assert!(input.next_is_document_start());
555 assert!(input.next_is_document_indicator());
556 let input = StrInput::new("---");
557 assert!(input.next_is_document_start());
558 assert!(input.next_is_document_indicator());
559 let input = StrInput::new("...\n");
560 assert!(!input.next_is_document_start());
561 assert!(input.next_is_document_indicator());
562 let input = StrInput::new("--- ");
563 assert!(input.next_is_document_start());
564 assert!(input.next_is_document_indicator());
565 }
566
567 #[test]
568 pub fn is_document_end() {
569 let input = StrInput::new("...\n");
570 assert!(input.next_is_document_end());
571 assert!(input.next_is_document_indicator());
572 let input = StrInput::new("...");
573 assert!(input.next_is_document_end());
574 assert!(input.next_is_document_indicator());
575 let input = StrInput::new("---\n");
576 assert!(!input.next_is_document_end());
577 assert!(input.next_is_document_indicator());
578 let input = StrInput::new("... ");
579 assert!(input.next_is_document_end());
580 assert!(input.next_is_document_indicator());
581 }
582
583 #[test]
584 fn raw_reads_track_byte_offsets_and_eof() {
585 let mut input = StrInput::new("aé");
586
587 assert_eq!(input.raw_read_ch(), 'a');
588 assert_eq!(input.byte_offset(), Some(1));
589 assert_eq!(input.raw_read_ch(), 'é');
590 assert_eq!(input.byte_offset(), Some(3));
591 assert_eq!(input.raw_read_ch(), '\0');
592 assert_eq!(input.byte_offset(), Some(3));
593 }
594
595 #[test]
596 fn raw_read_non_breakz_stops_before_breakz() {
597 let mut input = StrInput::new("a\n");
598
599 assert_eq!(input.raw_read_non_breakz_ch(), Some('a'));
600 assert_eq!(input.raw_read_non_breakz_ch(), None);
601 assert_eq!(input.peek(), '\n');
602
603 let mut empty = StrInput::new("");
604 assert_eq!(empty.raw_read_non_breakz_ch(), None);
605 }
606
607 #[test]
608 fn skip_handles_ascii_unicode_and_eof() {
609 let mut input = StrInput::new("éab");
610
611 input.skip();
612 assert_eq!(input.peek(), 'a');
613
614 input.skip_n(8);
615 assert_eq!(input.peek(), '\0');
616
617 input.skip();
618 assert_eq!(input.peek(), '\0');
619 }
620
621 #[test]
622 fn peeking_past_end_returns_nul() {
623 let ascii = StrInput::new("ab");
624 assert_eq!(ascii.peek_nth(1), 'b');
625 assert_eq!(ascii.peek_nth(3), '\0');
626
627 let unicode = StrInput::new("éab");
628 assert!(unicode.next_3_are('é', 'a', 'b'));
629 assert!(!unicode.next_3_are('é', 'a', 'c'));
630 }
631
632 #[test]
633 fn skip_ws_to_eol_without_tabs_stops_before_tab() {
634 let mut input = StrInput::new(" \t# comment\n");
635
636 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::No);
637
638 assert_eq!(consumed, 2);
639 let result = result.unwrap();
640 assert!(!result.found_tabs());
641 assert!(result.has_valid_yaml_ws());
642 assert_eq!(input.peek(), '\t');
643 }
644
645 #[test]
646 fn skip_ws_to_eol_skips_comments_after_whitespace() {
647 let mut input = StrInput::new(" # comment\nnext");
648
649 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::Yes);
650
651 assert_eq!(consumed, 11);
652 let result = result.unwrap();
653 assert!(!result.found_tabs());
654 assert!(result.has_valid_yaml_ws());
655 assert_eq!(input.peek(), '\n');
656 }
657
658 #[test]
659 fn skip_ws_to_eol_rejects_unseparated_comment() {
660 let mut input = StrInput::new("# comment\n");
661
662 let (consumed, result) = input.skip_ws_to_eol(SkipTabs::Yes);
663
664 assert_eq!(consumed, 0);
665 assert_eq!(
666 result.err(),
667 Some("comments must be separated from other tokens by whitespace")
668 );
669 assert_eq!(input.peek(), '#');
670 }
671
672 #[test]
673 fn fetch_while_is_alpha_is_ascii_only() {
674 let mut input = StrInput::new("abc_123-é");
675 let mut out = String::new();
676
677 assert_eq!(input.fetch_while_is_alpha(&mut out), 8);
678 assert_eq!(out, "abc_123-");
679 assert_eq!(input.peek(), 'é');
680 }
681
682 #[test]
683 fn fetch_plain_scalar_chunk_handles_non_ascii_after_colon() {
684 let mut input = StrInput::new("a:é ");
685 let mut out = String::new();
686
687 assert_eq!(
688 input.fetch_plain_scalar_chunk(&mut out, 16, false),
689 (true, 3)
690 );
691 assert_eq!(out, "a:é");
692 assert_eq!(input.peek(), ' ');
693 }
694
695 #[test]
696 fn fetch_plain_scalar_chunk_stops_at_flow_indicator() {
697 let mut input = StrInput::new("abc,def");
698 let mut out = String::new();
699
700 assert_eq!(
701 input.fetch_plain_scalar_chunk(&mut out, 16, true),
702 (true, 3)
703 );
704 assert_eq!(out, "abc");
705 assert_eq!(input.peek(), ',');
706 }
707
708 #[test]
709 fn borrowed_slices_use_original_input_lifetime() {
710 let input = StrInput::new("aéz");
711
712 assert_eq!(BorrowedInput::slice_borrowed(&input, 1, 3), Some("é"));
713 assert_eq!(input.slice_bytes(3, 4), Some("z"));
714 }
715}