granit_parser/input/
str.rs1use crate::{
2 char_traits::{is_blank_or_breakz, is_breakz, is_flow},
3 input::{BorrowedInput, Input, SkipTabs},
4};
5use alloc::string::String;
6
7#[allow(clippy::module_name_repetitions)]
9pub struct StrInput<'a> {
10 original: &'a str,
15 buffer: &'a str,
20 lookahead: usize,
25}
26
27impl<'a> StrInput<'a> {
28 #[must_use]
30 pub fn new(input: &'a str) -> Self {
31 Self {
32 original: input,
33 buffer: input,
34 lookahead: 0,
35 }
36 }
37
38 #[inline]
43 #[must_use]
44 fn consumed_bytes(&self) -> usize {
45 self.original.len() - self.buffer.len()
46 }
47}
48
49impl Input for StrInput<'_> {
50 #[inline]
51 fn lookahead(&mut self, x: usize) {
52 self.lookahead = self.lookahead.max(x);
56 }
57
58 #[inline]
59 fn buflen(&self) -> usize {
60 self.lookahead
61 }
62
63 #[inline]
64 fn bufmaxlen(&self) -> usize {
65 BUFFER_LEN
66 }
67
68 fn buf_is_empty(&self) -> bool {
69 self.buflen() == 0
70 }
71
72 #[inline]
73 fn raw_read_ch(&mut self) -> char {
74 let mut chars = self.buffer.chars();
75 if let Some(c) = chars.next() {
76 self.buffer = chars.as_str();
77 c
78 } else {
79 '\0'
80 }
81 }
82
83 #[inline]
84 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
85 if let Some((c, sub_str)) = split_first_char(self.buffer) {
86 if is_breakz(c) {
87 None
88 } else {
89 self.buffer = sub_str;
90 Some(c)
91 }
92 } else {
93 None
94 }
95 }
96
97 #[inline]
98 fn skip(&mut self) {
99 if !self.buffer.is_empty() {
100 let b = self.buffer.as_bytes()[0];
101 if b < 0x80 {
102 self.buffer = &self.buffer[1..];
103 } else {
104 let mut chars = self.buffer.chars();
105 chars.next();
106 self.buffer = chars.as_str();
107 }
108 }
109 }
110
111 #[inline]
112 fn skip_n(&mut self, count: usize) {
113 let mut chars = self.buffer.chars();
114 for _ in 0..count {
115 if chars.next().is_none() {
116 break;
117 }
118 }
119 self.buffer = chars.as_str();
120 }
121
122 #[inline]
123 fn peek(&self) -> char {
124 if self.buffer.is_empty() {
125 return '\0';
126 }
127 let b = self.buffer.as_bytes()[0];
128 if b < 0x80 {
129 b as char
130 } else {
131 self.buffer.chars().next().unwrap()
132 }
133 }
134
135 #[inline]
136 fn peek_nth(&self, n: usize) -> char {
137 if n == 0 {
138 return self.peek();
139 }
140 let bytes = self.buffer.as_bytes();
141 if n == 1 && bytes.len() >= 2 && bytes[0] < 0x80 && bytes[1] < 0x80 {
142 return bytes[1] as char;
143 }
144 let mut chars = self.buffer.chars();
145 for _ in 0..n {
146 if chars.next().is_none() {
147 return '\0';
148 }
149 }
150 chars.next().unwrap_or('\0')
151 }
152
153 #[inline]
154 fn byte_offset(&self) -> Option<usize> {
155 Some(self.consumed_bytes())
156 }
157
158 #[inline]
159 fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
160 debug_assert!(start <= end);
161 debug_assert!(end <= self.original.len());
162 self.original.get(start..end)
163 }
164
165 #[inline]
166 fn look_ch(&mut self) -> char {
167 self.lookahead(1);
168 self.peek()
169 }
170
171 #[inline]
172 fn next_char_is(&self, c: char) -> bool {
173 self.peek() == c
174 }
175
176 #[inline]
177 fn nth_char_is(&self, n: usize, c: char) -> bool {
178 self.peek_nth(n) == c
179 }
180
181 #[inline]
182 fn next_2_are(&self, c1: char, c2: char) -> bool {
183 let mut chars = self.buffer.chars();
184 chars.next() == Some(c1) && chars.next() == Some(c2)
185 }
186
187 #[inline]
188 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
189 let mut chars = self.buffer.chars();
190 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
191 }
192
193 #[inline]
194 fn next_is_document_indicator(&self) -> bool {
195 if self.buffer.len() < 3 {
196 false
197 } else {
198 let bytes = self.buffer.as_bytes();
200 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
201 && (bytes[0] == b'.' || bytes[0] == b'-')
202 && bytes[0] == bytes[1]
203 && bytes[1] == bytes[2]
204 }
205 }
206
207 #[inline]
208 fn next_is_document_start(&self) -> bool {
209 if self.buffer.len() < 3 {
210 false
211 } else {
212 let bytes = self.buffer.as_bytes();
214 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
215 && bytes[0] == b'-'
216 && bytes[1] == b'-'
217 && bytes[2] == b'-'
218 }
219 }
220
221 #[inline]
222 fn next_is_document_end(&self) -> bool {
223 if self.buffer.len() < 3 {
224 false
225 } else {
226 let bytes = self.buffer.as_bytes();
228 (bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
229 && bytes[0] == b'.'
230 && bytes[1] == b'.'
231 && bytes[2] == b'.'
232 }
233 }
234
235 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
236 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
237
238 let mut new_str = self.buffer;
239 let mut has_yaml_ws = false;
240 let mut encountered_tab = false;
241
242 if skip_tabs == SkipTabs::Yes {
245 loop {
246 if let Some(sub_str) = new_str.strip_prefix(' ') {
247 has_yaml_ws = true;
248 new_str = sub_str;
249 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
250 encountered_tab = true;
251 new_str = sub_str;
252 } else {
253 break;
254 }
255 }
256 } else {
257 while let Some(sub_str) = new_str.strip_prefix(' ') {
258 has_yaml_ws = true;
259 new_str = sub_str;
260 }
261 }
262
263 let mut chars_consumed = self.buffer.len() - new_str.len();
266
267 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
268 if !encountered_tab && !has_yaml_ws {
269 return (
270 chars_consumed,
271 Err("comments must be separated from other tokens by whitespace"),
272 );
273 }
274
275 while let Some((c, sub_str)) = split_first_char(new_str) {
277 if is_breakz(c) {
278 break;
279 }
280 new_str = sub_str;
281 chars_consumed += 1;
282 }
283 }
284
285 self.buffer = new_str;
286
287 (
288 chars_consumed,
289 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
290 )
291 }
292
293 #[allow(clippy::inline_always)]
294 #[inline(always)]
295 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
296 let nc = self.peek_nth(1);
297 match self.peek() {
298 ':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false,
300 c if in_flow && is_flow(c) => false,
301 _ => true,
302 }
303 }
304
305 #[inline]
306 fn next_is_blank_or_break(&self) -> bool {
307 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | b'\n' | b'\r')
308 }
309
310 #[inline]
311 fn next_is_blank_or_breakz(&self) -> bool {
312 self.buffer.is_empty()
313 || matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | 0 | b'\n' | b'\r')
314 }
315
316 #[inline]
317 fn next_is_blank(&self) -> bool {
318 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t')
319 }
320
321 #[inline]
322 fn next_is_break(&self) -> bool {
323 !self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b'\n' | b'\r')
324 }
325
326 #[inline]
327 fn next_is_breakz(&self) -> bool {
328 self.buffer.is_empty() || matches!(self.buffer.as_bytes()[0], 0 | b'\n' | b'\r')
329 }
330
331 #[inline]
332 fn next_is_z(&self) -> bool {
333 self.buffer.is_empty() || self.buffer.as_bytes()[0] == 0
334 }
335
336 #[inline]
337 fn next_is_flow(&self) -> bool {
338 !self.buffer.is_empty()
339 && matches!(self.buffer.as_bytes()[0], b',' | b'[' | b']' | b'{' | b'}')
340 }
341
342 #[inline]
343 fn next_is_digit(&self) -> bool {
344 !self.buffer.is_empty() && self.buffer.as_bytes()[0].is_ascii_digit()
345 }
346
347 #[inline]
355 fn next_is_alpha(&self) -> bool {
356 !self.buffer.is_empty()
357 && matches!(self.buffer.as_bytes()[0], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
358 }
359
360 fn skip_while_non_breakz(&mut self) -> usize {
361 let mut byte_pos = 0;
362 let mut chars_consumed = 0;
363
364 for (i, c) in self.buffer.char_indices() {
365 if is_breakz(c) {
366 break;
367 }
368 byte_pos = i + c.len_utf8();
369 chars_consumed += 1;
370 }
371
372 self.buffer = &self.buffer[byte_pos..];
373 chars_consumed
374 }
375
376 #[inline]
377 fn skip_while_blank(&mut self) -> usize {
378 let bytes = self.buffer.as_bytes();
379
380 let mut i = 0;
381 while i < bytes.len() {
382 match bytes[i] {
383 b' ' | b'\t' => i += 1,
384 _ => break,
385 }
386 }
387
388 self.buffer = &self.buffer[i..];
389 i
390 }
391
392 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
401 let bytes = self.buffer.as_bytes();
402 let mut i = 0;
403
404 while i < bytes.len() {
406 match bytes[i] {
407 b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-' => i += 1,
408 _ => break,
409 }
410 }
411
412 out.push_str(&self.buffer[..i]);
414 self.buffer = &self.buffer[i..];
415
416 i
417 }
418
419 fn fetch_while_is_yaml_non_space(&mut self, out: &mut String) -> usize {
420 let mut byte_pos = 0;
421 let mut chars_consumed = 0;
422
423 for (i, c) in self.buffer.char_indices() {
424 if !crate::char_traits::is_yaml_non_space(c) || crate::char_traits::is_z(c) {
425 break;
426 }
427
428 byte_pos = i + c.len_utf8();
429 chars_consumed += 1;
430 }
431
432 out.push_str(&self.buffer[..byte_pos]);
433 self.buffer = &self.buffer[byte_pos..];
434
435 chars_consumed
436 }
437
438 fn fetch_plain_scalar_chunk(
439 &mut self,
440 out: &mut String,
441 _count: usize,
442 flow_level_gt_0: bool,
443 ) -> (bool, usize) {
444 let bytes = self.buffer.as_bytes();
445 let len = bytes.len();
446 let mut byte_pos = 0;
447 let mut chars_consumed = 0;
448
449 while byte_pos < len {
450 let b = bytes[byte_pos];
451 if b < 0x80 {
452 let c = b as char;
453 if crate::char_traits::is_blank_or_breakz(c) {
454 out.push_str(&self.buffer[..byte_pos]);
455 self.buffer = &self.buffer[byte_pos..];
456 return (true, chars_consumed);
457 }
458 if flow_level_gt_0 && crate::char_traits::is_flow(c) {
459 out.push_str(&self.buffer[..byte_pos]);
460 self.buffer = &self.buffer[byte_pos..];
461 return (true, chars_consumed);
462 }
463 if c == ':' {
464 let next_byte = if byte_pos + 1 < len {
465 bytes[byte_pos + 1]
466 } else {
467 0
468 };
469 let is_stop = if next_byte < 0x80 {
471 let nc = next_byte as char;
472 crate::char_traits::is_blank_or_breakz(nc)
473 || (flow_level_gt_0 && crate::char_traits::is_flow(nc))
474 } else {
475 false
476 };
477
478 if is_stop {
479 out.push_str(&self.buffer[..byte_pos]);
480 self.buffer = &self.buffer[byte_pos..];
481 return (true, chars_consumed);
482 }
483 }
484 byte_pos += 1;
485 chars_consumed += 1;
486 } else {
487 let mut chars = self.buffer[byte_pos..].chars();
488 let c = chars.next().unwrap();
489 byte_pos += c.len_utf8();
490 chars_consumed += 1;
491 }
492 }
493
494 out.push_str(&self.buffer[..byte_pos]);
495 self.buffer = &self.buffer[byte_pos..];
496 (true, chars_consumed)
499 }
500}
501
502impl<'a> BorrowedInput<'a> for StrInput<'a> {
503 #[inline]
504 fn slice_borrowed(&self, start: usize, end: usize) -> Option<&'a str> {
505 debug_assert!(start <= end);
506 debug_assert!(end <= self.original.len());
507 self.original.get(start..end)
508 }
509}
510
511const BUFFER_LEN: usize = 128;
534
535#[inline]
538fn split_first_char(s: &str) -> Option<(char, &str)> {
539 let mut chars = s.chars();
540 let c = chars.next()?;
541 Some((c, chars.as_str()))
542}
543
544#[cfg(test)]
545mod test {
546 use crate::input::Input;
547
548 use super::StrInput;
549
550 #[test]
551 pub fn is_document_start() {
552 let input = StrInput::new("---\n");
553 assert!(input.next_is_document_start());
554 assert!(input.next_is_document_indicator());
555 let input = StrInput::new("---");
556 assert!(input.next_is_document_start());
557 assert!(input.next_is_document_indicator());
558 let input = StrInput::new("...\n");
559 assert!(!input.next_is_document_start());
560 assert!(input.next_is_document_indicator());
561 let input = StrInput::new("--- ");
562 assert!(input.next_is_document_start());
563 assert!(input.next_is_document_indicator());
564 }
565
566 #[test]
567 pub fn is_document_end() {
568 let input = StrInput::new("...\n");
569 assert!(input.next_is_document_end());
570 assert!(input.next_is_document_indicator());
571 let input = StrInput::new("...");
572 assert!(input.next_is_document_end());
573 assert!(input.next_is_document_indicator());
574 let input = StrInput::new("---\n");
575 assert!(!input.next_is_document_end());
576 assert!(input.next_is_document_indicator());
577 let input = StrInput::new("... ");
578 assert!(input.next_is_document_end());
579 assert!(input.next_is_document_indicator());
580 }
581}