saphyr_parser/input/
str.rs1use crate::{
2 char_traits::{
3 is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
4 },
5 input::{Input, SkipTabs},
6};
7
8#[allow(clippy::module_name_repetitions)]
10pub struct StrInput<'a> {
11 buffer: &'a str,
13 lookahead: usize,
18}
19
20impl<'a> StrInput<'a> {
21 #[must_use]
23 pub fn new(input: &'a str) -> Self {
24 Self {
25 buffer: input,
26 lookahead: 0,
27 }
28 }
29}
30
31impl<'a> Input for StrInput<'a> {
32 #[inline]
33 fn lookahead(&mut self, x: usize) {
34 self.lookahead = self.lookahead.max(x);
38 }
39
40 #[inline]
41 fn buflen(&self) -> usize {
42 self.lookahead
43 }
44
45 #[inline]
46 fn bufmaxlen(&self) -> usize {
47 BUFFER_LEN
48 }
49
50 fn buf_is_empty(&self) -> bool {
51 self.buflen() == 0
52 }
53
54 #[inline]
55 fn raw_read_ch(&mut self) -> char {
56 let mut chars = self.buffer.chars();
57 if let Some(c) = chars.next() {
58 self.buffer = chars.as_str();
59 c
60 } else {
61 '\0'
62 }
63 }
64
65 #[inline]
66 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
67 if let Some((c, sub_str)) = split_first_char(self.buffer) {
68 if is_breakz(c) {
69 None
70 } else {
71 self.buffer = sub_str;
72 Some(c)
73 }
74 } else {
75 None
76 }
77 }
78
79 #[inline]
80 fn skip(&mut self) {
81 let mut chars = self.buffer.chars();
82 if chars.next().is_some() {
83 self.buffer = chars.as_str();
84 }
85 }
86
87 #[inline]
88 fn skip_n(&mut self, count: usize) {
89 let mut chars = self.buffer.chars();
90 for _ in 0..count {
91 if chars.next().is_none() {
92 break;
93 }
94 }
95 self.buffer = chars.as_str();
96 }
97
98 #[inline]
99 fn peek(&self) -> char {
100 self.buffer.chars().next().unwrap_or('\0')
101 }
102
103 #[inline]
104 fn peek_nth(&self, n: usize) -> char {
105 let mut chars = self.buffer.chars();
106 for _ in 0..n {
107 if chars.next().is_none() {
108 return '\0';
109 }
110 }
111 chars.next().unwrap_or('\0')
112 }
113
114 #[inline]
115 fn look_ch(&mut self) -> char {
116 self.lookahead(1);
117 self.peek()
118 }
119
120 #[inline]
121 fn next_char_is(&self, c: char) -> bool {
122 self.peek() == c
123 }
124
125 #[inline]
126 fn nth_char_is(&self, n: usize, c: char) -> bool {
127 self.peek_nth(n) == c
128 }
129
130 #[inline]
131 fn next_2_are(&self, c1: char, c2: char) -> bool {
132 let mut chars = self.buffer.chars();
133 chars.next().is_some_and(|c| c == c1) && chars.next().is_some_and(|c| c == c2)
134 }
135
136 #[inline]
137 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
138 let mut chars = self.buffer.chars();
139 chars.next().is_some_and(|c| c == c1)
140 && chars.next().is_some_and(|c| c == c2)
141 && chars.next().is_some_and(|c| c == c3)
142 }
143
144 #[inline]
145 fn next_is_document_indicator(&self) -> bool {
146 if self.buffer.len() < 3 {
147 false
148 } else {
149 let bytes = self.buffer.as_bytes();
151 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
152 && (bytes[0] == b'.' || bytes[0] == b'-')
153 && bytes[0] == bytes[1]
154 && bytes[1] == bytes[2]
155 }
156 }
157
158 #[inline]
159 fn next_is_document_start(&self) -> bool {
160 if self.buffer.len() < 3 {
161 false
162 } else {
163 let bytes = self.buffer.as_bytes();
165 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
166 && bytes[0] == b'-'
167 && bytes[1] == b'-'
168 && bytes[2] == b'-'
169 }
170 }
171
172 #[inline]
173 fn next_is_document_end(&self) -> bool {
174 if self.buffer.len() < 3 {
175 false
176 } else {
177 let bytes = self.buffer.as_bytes();
179 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
180 && bytes[0] == b'.'
181 && bytes[1] == b'.'
182 && bytes[2] == b'.'
183 }
184 }
185
186 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
187 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
188
189 let mut new_str = self.buffer;
190 let mut has_yaml_ws = false;
191 let mut encountered_tab = false;
192
193 if skip_tabs == SkipTabs::Yes {
196 loop {
197 if let Some(sub_str) = new_str.strip_prefix(' ') {
198 has_yaml_ws = true;
199 new_str = sub_str;
200 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
201 encountered_tab = true;
202 new_str = sub_str;
203 } else {
204 break;
205 }
206 }
207 } else {
208 while let Some(sub_str) = new_str.strip_prefix(' ') {
209 has_yaml_ws = true;
210 new_str = sub_str;
211 }
212 }
213
214 let mut chars_consumed = self.buffer.len() - new_str.len();
217
218 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
219 if !encountered_tab && !has_yaml_ws {
220 return (
221 chars_consumed,
222 Err("comments must be separated from other tokens by whitespace"),
223 );
224 }
225
226 while let Some((c, sub_str)) = split_first_char(new_str) {
228 if is_breakz(c) {
229 break;
230 }
231 new_str = sub_str;
232 chars_consumed += 1;
233 }
234 }
235
236 self.buffer = new_str;
237
238 (
239 chars_consumed,
240 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
241 )
242 }
243
244 #[allow(clippy::inline_always)]
245 #[inline(always)]
246 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
247 let c = self.buffer.as_bytes()[0];
248 if self.buffer.len() > 1 {
249 let nc = self.buffer.as_bytes()[1];
250 match c {
251 b':' if is_blank_or_breakz(nc as char) || (in_flow && is_flow(nc as char)) => false,
253 c if in_flow && is_flow(c as char) => false,
254 _ => true,
255 }
256 } else {
257 match c {
258 b':' => false,
260 c if in_flow && is_flow(c as char) => false,
261 _ => true,
262 }
263 }
264 }
265
266 #[inline]
267 fn next_is_blank_or_break(&self) -> bool {
268 !self.buffer.is_empty()
269 && (is_blank(self.buffer.as_bytes()[0] as char)
270 || is_break(self.buffer.as_bytes()[0] as char))
271 }
272
273 #[inline]
274 fn next_is_blank_or_breakz(&self) -> bool {
275 self.buffer.is_empty()
276 || (is_blank(self.buffer.as_bytes()[0] as char)
277 || is_breakz(self.buffer.as_bytes()[0] as char))
278 }
279
280 #[inline]
281 fn next_is_blank(&self) -> bool {
282 !self.buffer.is_empty() && is_blank(self.buffer.as_bytes()[0] as char)
283 }
284
285 #[inline]
286 fn next_is_break(&self) -> bool {
287 !self.buffer.is_empty() && is_break(self.buffer.as_bytes()[0] as char)
288 }
289
290 #[inline]
291 fn next_is_breakz(&self) -> bool {
292 self.buffer.is_empty() || is_breakz(self.buffer.as_bytes()[0] as char)
293 }
294
295 #[inline]
296 fn next_is_z(&self) -> bool {
297 self.buffer.is_empty() || is_z(self.buffer.as_bytes()[0] as char)
298 }
299
300 #[inline]
301 fn next_is_flow(&self) -> bool {
302 !self.buffer.is_empty() && is_flow(self.buffer.as_bytes()[0] as char)
303 }
304
305 #[inline]
306 fn next_is_digit(&self) -> bool {
307 !self.buffer.is_empty() && is_digit(self.buffer.as_bytes()[0] as char)
308 }
309
310 #[inline]
311 fn next_is_alpha(&self) -> bool {
312 !self.buffer.is_empty() && is_alpha(self.buffer.as_bytes()[0] as char)
313 }
314
315 fn skip_while_non_breakz(&mut self) -> usize {
316 let mut new_str = self.buffer;
317 let mut count = 0;
318
319 while let Some((c, sub_str)) = split_first_char(new_str) {
321 if is_breakz(c) {
322 break;
323 }
324 new_str = sub_str;
325 count += 1;
326 }
327
328 self.buffer = new_str;
329
330 count
331 }
332
333 fn skip_while_blank(&mut self) -> usize {
334 let mut i = 0;
336 while i < self.buffer.len() {
337 if !is_blank(self.buffer.as_bytes()[i] as char) {
338 break;
339 }
340 i += 1;
341 }
342 self.buffer = &self.buffer[i..];
343 i
344 }
345
346 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
347 let mut not_alpha = None;
348
349 let mut chars = self.buffer.chars();
351 for c in chars.by_ref() {
352 if !is_alpha(c) {
353 not_alpha = Some(c);
354 break;
355 }
356 }
357
358 let remaining_string = if let Some(c) = not_alpha {
359 let n_bytes_read = chars.as_str().as_ptr() as usize - self.buffer.as_ptr() as usize;
360 let last_char_bytes = c.len_utf8();
361 &self.buffer[n_bytes_read - last_char_bytes..]
362 } else {
363 chars.as_str()
364 };
365
366 let n_bytes_to_append = remaining_string.as_ptr() as usize - self.buffer.as_ptr() as usize;
367 out.reserve(n_bytes_to_append);
368 out.push_str(&self.buffer[..n_bytes_to_append]);
369 self.buffer = remaining_string;
370
371 n_bytes_to_append
372 }
373}
374
375const BUFFER_LEN: usize = 128;
398
399#[inline]
402fn split_first_char(s: &str) -> Option<(char, &str)> {
403 let mut chars = s.chars();
404 let c = chars.next()?;
405 Some((c, chars.as_str()))
406}
407
408#[cfg(test)]
409mod test {
410 use crate::input::Input;
411
412 use super::StrInput;
413
414 #[test]
415 pub fn is_document_start() {
416 let input = StrInput::new("---\n");
417 assert!(input.next_is_document_start());
418 assert!(input.next_is_document_indicator());
419 let input = StrInput::new("---");
420 assert!(input.next_is_document_start());
421 assert!(input.next_is_document_indicator());
422 let input = StrInput::new("...\n");
423 assert!(!input.next_is_document_start());
424 assert!(input.next_is_document_indicator());
425 let input = StrInput::new("--- ");
426 assert!(input.next_is_document_start());
427 assert!(input.next_is_document_indicator());
428 }
429
430 #[test]
431 pub fn is_document_end() {
432 let input = StrInput::new("...\n");
433 assert!(input.next_is_document_end());
434 assert!(input.next_is_document_indicator());
435 let input = StrInput::new("...");
436 assert!(input.next_is_document_end());
437 assert!(input.next_is_document_indicator());
438 let input = StrInput::new("---\n");
439 assert!(!input.next_is_document_end());
440 assert!(input.next_is_document_indicator());
441 let input = StrInput::new("... ");
442 assert!(input.next_is_document_end());
443 assert!(input.next_is_document_indicator());
444 }
445}