1use super::{QuoteStyle, Token, TokenType};
4use crate::{Error, Position, Result};
5
6pub trait ScalarScanner {
8 fn scan_plain_scalar(&mut self) -> Result<Token>;
10
11 fn scan_quoted_string(&mut self, quote_char: char) -> Result<Token>;
13
14 fn scan_number(&mut self) -> Result<Token>;
16
17 fn scan_literal_block_scalar(&mut self) -> Result<Token>;
19
20 fn scan_folded_block_scalar(&mut self) -> Result<Token>;
22
23 fn scan_block_scalar_header(&mut self) -> Result<(bool, Option<usize>)>;
25
26 fn current_position(&self) -> Position;
28
29 fn current_char(&self) -> Option<char>;
31
32 fn advance_char(&mut self) -> Option<char>;
34
35 fn peek_char(&self, offset: usize) -> Option<char>;
37
38 fn at_line_start(&self) -> bool;
40}
41
42pub(super) fn is_plain_scalar_char(ch: char) -> bool {
44 !matches!(
45 ch,
46 ':' | ','
47 | '['
48 | ']'
49 | '{'
50 | '}'
51 | '#'
52 | '&'
53 | '*'
54 | '!'
55 | '|'
56 | '>'
57 | '\''
58 | '"'
59 | '%'
60 | '@'
61 | '`'
62 )
63}
64
65pub(super) fn process_escape_sequence(ch: char) -> Result<String> {
66 match ch {
67 'n' => Ok("\n".to_string()),
68 'r' => Ok("\r".to_string()),
69 't' => Ok("\t".to_string()),
70 '\\' => Ok("\\".to_string()),
71 '"' => Ok("\"".to_string()),
72 '\'' => Ok("'".to_string()),
73 '0' => Ok("\0".to_string()),
74 'a' => Ok("\x07".to_string()), 'b' => Ok("\x08".to_string()), 'f' => Ok("\x0C".to_string()), 'v' => Ok("\x0B".to_string()), 'e' => Ok("\x1B".to_string()), ' ' => Ok(" ".to_string()),
80 'N' => Ok("\u{85}".to_string()), '_' => Ok("\u{A0}".to_string()), 'L' => Ok("\u{2028}".to_string()), 'P' => Ok("\u{2029}".to_string()), _ => Err(Error::scan(
85 Position::new(),
86 format!("Invalid escape sequence: \\{}", ch),
87 )),
88 }
89}
90
91impl ScalarScanner for super::BasicScanner {
93 fn scan_plain_scalar(&mut self) -> Result<Token> {
94 let start_pos = self.position;
95 let mut value = String::new();
96
97 while let Some(ch) = self.current_char {
98 if self.flow_level == 0 {
100 match ch {
101 '\n' | '\r' => break,
102 ':' if self.peek_char(1).map_or(true, |c| c.is_whitespace()) => break,
103 '#' if value.is_empty()
104 || self.peek_char(-1).map_or(false, |c| c.is_whitespace()) =>
105 {
106 break;
107 }
108 _ => {}
109 }
110 } else {
111 match ch {
113 ',' | '[' | ']' | '{' | '}' => break,
114 ':' if self
115 .peek_char(1)
116 .map_or(true, |c| c.is_whitespace() || "]}".contains(c)) =>
117 {
118 break;
119 }
120 '#' if value.is_empty()
121 || self.peek_char(-1).map_or(false, |c| c.is_whitespace()) =>
122 {
123 break;
124 }
125 _ => {}
126 }
127 }
128
129 value.push(ch);
130 self.advance();
131 }
132
133 self.resource_tracker
135 .check_string_length(&self.limits, value.len())?;
136
137 let value = value.trim_end().to_string();
139 let normalized_value = Self::normalize_scalar(value);
140
141 Ok(Token::new(
142 TokenType::Scalar(normalized_value, QuoteStyle::Plain),
143 start_pos,
144 self.position,
145 ))
146 }
147
148 fn scan_quoted_string(&mut self, quote_char: char) -> Result<Token> {
149 let start_pos = self.position;
150 let mut value = String::new();
151
152 self.advance();
154
155 while let Some(ch) = self.current_char {
156 if ch == quote_char {
157 self.advance();
159 break;
160 } else if ch == '\\' && quote_char == '"' {
161 self.advance();
163 if let Some(escaped_char) = self.current_char {
164 match escaped_char {
165 'n' => value.push('\n'),
166 'r' => value.push('\r'),
167 't' => value.push('\t'),
168 '\\' => value.push('\\'),
169 '"' => value.push('"'),
170 '\'' => value.push('\''),
171 '0' => value.push('\0'),
172 'a' => value.push('\x07'), 'b' => value.push('\x08'), 'f' => value.push('\x0C'), 'v' => value.push('\x0B'), 'e' => value.push('\x1B'), ' ' => value.push(' '),
178 'N' => value.push('\u{85}'), '_' => value.push('\u{A0}'), 'L' => value.push('\u{2028}'), 'P' => value.push('\u{2029}'), _ => {
183 return Err(Error::scan(
185 self.position,
186 format!("Invalid escape sequence: \\{}", escaped_char),
187 ));
188 }
189 }
190 self.advance();
191 } else {
192 return Err(Error::scan(
193 self.position,
194 "Unterminated escape sequence".to_string(),
195 ));
196 }
197 } else {
198 value.push(ch);
199 self.advance();
200 }
201 }
202
203 self.resource_tracker
205 .check_string_length(&self.limits, value.len())?;
206
207 let quote_style = match quote_char {
208 '\'' => QuoteStyle::Single,
209 '"' => QuoteStyle::Double,
210 _ => QuoteStyle::Plain,
211 };
212
213 Ok(Token::new(
214 TokenType::Scalar(value, quote_style),
215 start_pos,
216 self.position,
217 ))
218 }
219
220 fn scan_number(&mut self) -> Result<Token> {
221 let start_pos = self.position;
222 let mut value = String::new();
223
224 if self.current_char == Some('-') {
226 value.push('-');
227 self.advance();
228 }
229
230 while let Some(ch) = self.current_char {
232 if ch.is_ascii_digit() {
233 value.push(ch);
234 self.advance();
235 } else if ch == '.' {
236 value.push(ch);
237 self.advance();
238 while let Some(ch) = self.current_char {
240 if ch.is_ascii_digit() {
241 value.push(ch);
242 self.advance();
243 } else {
244 break;
245 }
246 }
247 break;
248 } else {
249 break;
250 }
251 }
252
253 Ok(Token::new(
254 TokenType::Scalar(value, QuoteStyle::Plain),
255 start_pos,
256 self.position,
257 ))
258 }
259
260 fn scan_literal_block_scalar(&mut self) -> Result<Token> {
261 let start_pos = self.position;
262
263 self.advance();
265
266 let (keep_chomping, explicit_indent) = self.scan_block_scalar_header()?;
268
269 let mut base_indent = None;
271 let mut lines = Vec::new();
272 let mut current_line = String::new();
273
274 while let Some(ch) = self.current_char {
276 if ch == '\n' || ch == '\r' {
277 self.advance();
278 break;
279 }
280 self.advance();
281 }
282
283 while let Some(ch) = self.current_char {
285 if ch == '\n' || ch == '\r' {
286 lines.push(current_line.clone());
287 current_line.clear();
288 self.advance();
289
290 let mut temp_indent = 0usize;
292 let mut has_content = false;
293
294 while let Some(next_ch) = self.peek_char(temp_indent as isize) {
295 if next_ch == ' ' || next_ch == '\t' {
296 temp_indent += 1;
297 } else if next_ch == '\n' || next_ch == '\r' {
298 break;
300 } else {
301 has_content = true;
302 break;
303 }
304 }
305
306 if !has_content {
307 break;
309 }
310
311 if base_indent.is_none() && has_content {
313 base_indent = Some(explicit_indent.unwrap_or(temp_indent));
314 }
315 } else {
316 current_line.push(ch);
317 self.advance();
318 }
319 }
320
321 if !current_line.is_empty() {
323 lines.push(current_line);
324 }
325
326 let mut value = lines.join("\n");
328
329 if !keep_chomping {
331 value = value.trim_end_matches('\n').to_string();
332 }
333
334 self.resource_tracker
336 .check_string_length(&self.limits, value.len())?;
337
338 Ok(Token::new(
339 TokenType::BlockScalarLiteral(value),
340 start_pos,
341 self.position,
342 ))
343 }
344
345 fn scan_folded_block_scalar(&mut self) -> Result<Token> {
346 let start_pos = self.position;
347
348 self.advance();
350
351 let (keep_chomping, explicit_indent) = self.scan_block_scalar_header()?;
353
354 let mut base_indent = None;
356 let mut lines = Vec::new();
357 let mut current_line = String::new();
358
359 while let Some(ch) = self.current_char {
361 if ch == '\n' || ch == '\r' {
362 self.advance();
363 break;
364 }
365 self.advance();
366 }
367
368 while let Some(ch) = self.current_char {
370 if ch == '\n' || ch == '\r' {
371 lines.push(current_line.clone());
372 current_line.clear();
373 self.advance();
374
375 let mut temp_indent = 0usize;
377 let mut has_content = false;
378
379 while let Some(next_ch) = self.peek_char(temp_indent as isize) {
380 if next_ch == ' ' || next_ch == '\t' {
381 temp_indent += 1;
382 } else if next_ch == '\n' || next_ch == '\r' {
383 break;
384 } else {
385 has_content = true;
386 break;
387 }
388 }
389
390 if !has_content {
391 break;
392 }
393
394 if base_indent.is_none() && has_content {
395 base_indent = Some(explicit_indent.unwrap_or(temp_indent));
396 }
397 } else {
398 current_line.push(ch);
399 self.advance();
400 }
401 }
402
403 if !current_line.is_empty() {
404 lines.push(current_line);
405 }
406
407 let mut value = String::new();
409 let mut prev_was_empty = false;
410
411 for (i, line) in lines.iter().enumerate() {
412 if line.trim().is_empty() {
413 if !prev_was_empty && i > 0 {
414 value.push('\n');
415 }
416 prev_was_empty = true;
417 } else {
418 if i > 0 && !prev_was_empty {
419 value.push(' ');
420 } else if prev_was_empty && i > 0 {
421 value.push('\n');
422 }
423 value.push_str(line.trim());
424 prev_was_empty = false;
425 }
426 }
427
428 if !keep_chomping {
430 value = value.trim_end_matches('\n').to_string();
431 }
432
433 self.resource_tracker
435 .check_string_length(&self.limits, value.len())?;
436
437 Ok(Token::new(
438 TokenType::BlockScalarFolded(value),
439 start_pos,
440 self.position,
441 ))
442 }
443
444 fn scan_block_scalar_header(&mut self) -> Result<(bool, Option<usize>)> {
445 let mut keep_chomping = true;
446 let mut explicit_indent = None;
447
448 while let Some(ch) = self.current_char {
450 if ch == ' ' || ch == '\t' {
451 self.advance();
452 } else {
453 break;
454 }
455 }
456
457 if let Some(ch) = self.current_char {
459 if ch.is_ascii_digit() {
460 explicit_indent = Some(ch.to_digit(10).unwrap() as usize);
461 self.advance();
462 }
463 }
464
465 if let Some(ch) = self.current_char {
467 match ch {
468 '-' => {
469 keep_chomping = false; self.advance();
471 }
472 '+' => {
473 keep_chomping = true; self.advance();
475 }
476 _ => {}
477 }
478 }
479
480 Ok((keep_chomping, explicit_indent))
481 }
482
483 fn current_position(&self) -> Position {
485 self.position
486 }
487
488 fn current_char(&self) -> Option<char> {
489 self.current_char
490 }
491
492 fn advance_char(&mut self) -> Option<char> {
493 self.advance()
494 }
495
496 fn peek_char(&self, offset: usize) -> Option<char> {
497 self.peek_char(offset as isize)
498 }
499
500 fn at_line_start(&self) -> bool {
501 self.position.column == 1
502 }
503}