saphyr_parser/input/
str.rs1use crate::{
2 char_traits::{
3 is_alpha, is_blank, is_blank_or_breakz, is_break, is_breakz, is_digit, is_flow, is_z,
4 },
5 input::{Input, SkipTabs},
6};
7
8#[allow(clippy::module_name_repetitions)]
10pub struct StrInput<'a> {
11 buffer: &'a str,
13 lookahead: usize,
18}
19
20impl<'a> StrInput<'a> {
21 #[must_use]
23 pub fn new(input: &'a str) -> Self {
24 Self {
25 buffer: input,
26 lookahead: 0,
27 }
28 }
29}
30
31impl Input for StrInput<'_> {
32 #[inline]
33 fn lookahead(&mut self, x: usize) {
34 self.lookahead = self.lookahead.max(x);
38 }
39
40 #[inline]
41 fn buflen(&self) -> usize {
42 self.lookahead
43 }
44
45 #[inline]
46 fn bufmaxlen(&self) -> usize {
47 BUFFER_LEN
48 }
49
50 fn buf_is_empty(&self) -> bool {
51 self.buflen() == 0
52 }
53
54 #[inline]
55 fn raw_read_ch(&mut self) -> char {
56 let mut chars = self.buffer.chars();
57 if let Some(c) = chars.next() {
58 self.buffer = chars.as_str();
59 c
60 } else {
61 '\0'
62 }
63 }
64
65 #[inline]
66 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
67 if let Some((c, sub_str)) = split_first_char(self.buffer) {
68 if is_breakz(c) {
69 None
70 } else {
71 self.buffer = sub_str;
72 Some(c)
73 }
74 } else {
75 None
76 }
77 }
78
79 #[inline]
80 fn skip(&mut self) {
81 let mut chars = self.buffer.chars();
82 if chars.next().is_some() {
83 self.buffer = chars.as_str();
84 }
85 }
86
87 #[inline]
88 fn skip_n(&mut self, count: usize) {
89 let mut chars = self.buffer.chars();
90 for _ in 0..count {
91 if chars.next().is_none() {
92 break;
93 }
94 }
95 self.buffer = chars.as_str();
96 }
97
98 #[inline]
99 fn peek(&self) -> char {
100 self.buffer.chars().next().unwrap_or('\0')
101 }
102
103 #[inline]
104 fn peek_nth(&self, n: usize) -> char {
105 let mut chars = self.buffer.chars();
106 for _ in 0..n {
107 if chars.next().is_none() {
108 return '\0';
109 }
110 }
111 chars.next().unwrap_or('\0')
112 }
113
114 #[inline]
115 fn look_ch(&mut self) -> char {
116 self.lookahead(1);
117 self.peek()
118 }
119
120 #[inline]
121 fn next_char_is(&self, c: char) -> bool {
122 self.peek() == c
123 }
124
125 #[inline]
126 fn nth_char_is(&self, n: usize, c: char) -> bool {
127 self.peek_nth(n) == c
128 }
129
130 #[inline]
131 fn next_2_are(&self, c1: char, c2: char) -> bool {
132 let mut chars = self.buffer.chars();
133 chars.next() == Some(c1) && chars.next() == Some(c2)
134 }
135
136 #[inline]
137 fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
138 let mut chars = self.buffer.chars();
139 chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
140 }
141
142 #[inline]
143 fn next_is_document_indicator(&self) -> bool {
144 if self.buffer.len() < 3 {
145 false
146 } else {
147 let bytes = self.buffer.as_bytes();
149 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
150 && (bytes[0] == b'.' || bytes[0] == b'-')
151 && bytes[0] == bytes[1]
152 && bytes[1] == bytes[2]
153 }
154 }
155
156 #[inline]
157 fn next_is_document_start(&self) -> bool {
158 if self.buffer.len() < 3 {
159 false
160 } else {
161 let bytes = self.buffer.as_bytes();
163 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
164 && bytes[0] == b'-'
165 && bytes[1] == b'-'
166 && bytes[2] == b'-'
167 }
168 }
169
170 #[inline]
171 fn next_is_document_end(&self) -> bool {
172 if self.buffer.len() < 3 {
173 false
174 } else {
175 let bytes = self.buffer.as_bytes();
177 (bytes.len() == 3 || is_blank_or_breakz(bytes[3] as char))
178 && bytes[0] == b'.'
179 && bytes[1] == b'.'
180 && bytes[2] == b'.'
181 }
182 }
183
184 fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
185 assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
186
187 let mut new_str = self.buffer;
188 let mut has_yaml_ws = false;
189 let mut encountered_tab = false;
190
191 if skip_tabs == SkipTabs::Yes {
194 loop {
195 if let Some(sub_str) = new_str.strip_prefix(' ') {
196 has_yaml_ws = true;
197 new_str = sub_str;
198 } else if let Some(sub_str) = new_str.strip_prefix('\t') {
199 encountered_tab = true;
200 new_str = sub_str;
201 } else {
202 break;
203 }
204 }
205 } else {
206 while let Some(sub_str) = new_str.strip_prefix(' ') {
207 has_yaml_ws = true;
208 new_str = sub_str;
209 }
210 }
211
212 let mut chars_consumed = self.buffer.len() - new_str.len();
215
216 if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
217 if !encountered_tab && !has_yaml_ws {
218 return (
219 chars_consumed,
220 Err("comments must be separated from other tokens by whitespace"),
221 );
222 }
223
224 while let Some((c, sub_str)) = split_first_char(new_str) {
226 if is_breakz(c) {
227 break;
228 }
229 new_str = sub_str;
230 chars_consumed += 1;
231 }
232 }
233
234 self.buffer = new_str;
235
236 (
237 chars_consumed,
238 Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
239 )
240 }
241
242 #[allow(clippy::inline_always)]
243 #[inline(always)]
244 fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
245 let c = self.buffer.as_bytes()[0];
246 if self.buffer.len() > 1 {
247 let nc = self.buffer.as_bytes()[1];
248 match c {
249 b':' if is_blank_or_breakz(nc as char) || (in_flow && is_flow(nc as char)) => false,
251 c if in_flow && is_flow(c as char) => false,
252 _ => true,
253 }
254 } else {
255 match c {
256 b':' => false,
258 c if in_flow && is_flow(c as char) => false,
259 _ => true,
260 }
261 }
262 }
263
264 #[inline]
265 fn next_is_blank_or_break(&self) -> bool {
266 !self.buffer.is_empty()
267 && (is_blank(self.buffer.as_bytes()[0] as char)
268 || is_break(self.buffer.as_bytes()[0] as char))
269 }
270
271 #[inline]
272 fn next_is_blank_or_breakz(&self) -> bool {
273 self.buffer.is_empty()
274 || (is_blank(self.buffer.as_bytes()[0] as char)
275 || is_breakz(self.buffer.as_bytes()[0] as char))
276 }
277
278 #[inline]
279 fn next_is_blank(&self) -> bool {
280 !self.buffer.is_empty() && is_blank(self.buffer.as_bytes()[0] as char)
281 }
282
283 #[inline]
284 fn next_is_break(&self) -> bool {
285 !self.buffer.is_empty() && is_break(self.buffer.as_bytes()[0] as char)
286 }
287
288 #[inline]
289 fn next_is_breakz(&self) -> bool {
290 self.buffer.is_empty() || is_breakz(self.buffer.as_bytes()[0] as char)
291 }
292
293 #[inline]
294 fn next_is_z(&self) -> bool {
295 self.buffer.is_empty() || is_z(self.buffer.as_bytes()[0] as char)
296 }
297
298 #[inline]
299 fn next_is_flow(&self) -> bool {
300 !self.buffer.is_empty() && is_flow(self.buffer.as_bytes()[0] as char)
301 }
302
303 #[inline]
304 fn next_is_digit(&self) -> bool {
305 !self.buffer.is_empty() && is_digit(self.buffer.as_bytes()[0] as char)
306 }
307
308 #[inline]
309 fn next_is_alpha(&self) -> bool {
310 !self.buffer.is_empty() && is_alpha(self.buffer.as_bytes()[0] as char)
311 }
312
313 fn skip_while_non_breakz(&mut self) -> usize {
314 let mut new_str = self.buffer;
315 let mut count = 0;
316
317 while let Some((c, sub_str)) = split_first_char(new_str) {
319 if is_breakz(c) {
320 break;
321 }
322 new_str = sub_str;
323 count += 1;
324 }
325
326 self.buffer = new_str;
327
328 count
329 }
330
331 fn skip_while_blank(&mut self) -> usize {
332 let mut i = 0;
334 while i < self.buffer.len() {
335 if !is_blank(self.buffer.as_bytes()[i] as char) {
336 break;
337 }
338 i += 1;
339 }
340 self.buffer = &self.buffer[i..];
341 i
342 }
343
344 fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
345 let mut not_alpha = None;
346
347 let mut chars = self.buffer.chars();
349 for c in chars.by_ref() {
350 if !is_alpha(c) {
351 not_alpha = Some(c);
352 break;
353 }
354 }
355
356 let remaining_string = if let Some(c) = not_alpha {
357 let n_bytes_read = chars.as_str().as_ptr() as usize - self.buffer.as_ptr() as usize;
358 let last_char_bytes = c.len_utf8();
359 &self.buffer[n_bytes_read - last_char_bytes..]
360 } else {
361 chars.as_str()
362 };
363
364 let n_bytes_to_append = remaining_string.as_ptr() as usize - self.buffer.as_ptr() as usize;
365 out.reserve(n_bytes_to_append);
366 out.push_str(&self.buffer[..n_bytes_to_append]);
367 self.buffer = remaining_string;
368
369 n_bytes_to_append
370 }
371}
372
373const BUFFER_LEN: usize = 128;
396
397#[inline]
400fn split_first_char(s: &str) -> Option<(char, &str)> {
401 let mut chars = s.chars();
402 let c = chars.next()?;
403 Some((c, chars.as_str()))
404}
405
406#[cfg(test)]
407mod test {
408 use crate::input::Input;
409
410 use super::StrInput;
411
412 #[test]
413 pub fn is_document_start() {
414 let input = StrInput::new("---\n");
415 assert!(input.next_is_document_start());
416 assert!(input.next_is_document_indicator());
417 let input = StrInput::new("---");
418 assert!(input.next_is_document_start());
419 assert!(input.next_is_document_indicator());
420 let input = StrInput::new("...\n");
421 assert!(!input.next_is_document_start());
422 assert!(input.next_is_document_indicator());
423 let input = StrInput::new("--- ");
424 assert!(input.next_is_document_start());
425 assert!(input.next_is_document_indicator());
426 }
427
428 #[test]
429 pub fn is_document_end() {
430 let input = StrInput::new("...\n");
431 assert!(input.next_is_document_end());
432 assert!(input.next_is_document_indicator());
433 let input = StrInput::new("...");
434 assert!(input.next_is_document_end());
435 assert!(input.next_is_document_indicator());
436 let input = StrInput::new("---\n");
437 assert!(!input.next_is_document_end());
438 assert!(input.next_is_document_indicator());
439 let input = StrInput::new("... ");
440 assert!(input.next_is_document_end());
441 assert!(input.next_is_document_indicator());
442 }
443}