reifydb_testing/testscript/
parser.rs1use std::{collections::HashSet, error, fmt};
13
14use crate::testscript::command::{Argument, Block, Command};
15
16#[derive(Debug, Clone)]
17pub struct ParseError {
18 pub message: String,
19 pub line: u32,
20 pub column: usize,
21 pub input: LocatedSpan,
22 pub code: String,
23}
24
25impl fmt::Display for ParseError {
26 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 write!(f, "Parse error at line {}:{}: {}", self.line, self.column, self.message)
28 }
29}
30
31impl error::Error for ParseError {}
32
33#[derive(Debug, Clone)]
34pub struct LocatedSpan {
35 column: usize,
36 line: u32,
37 line_text: String,
38}
39
40impl LocatedSpan {
41 fn new(_line_start: usize, column: usize, line: u32, line_text: String) -> Self {
42 LocatedSpan {
43 column,
44 line,
45 line_text,
46 }
47 }
48
49 pub fn location_line(&self) -> u32 {
50 self.line
51 }
52
53 pub fn get_column(&self) -> usize {
54 self.column
55 }
56
57 pub fn get_utf8_column(&self) -> usize {
58 self.column
59 }
60
61 pub fn get_line_beginning(&self) -> &[u8] {
62 self.line_text.as_bytes()
63 }
64}
65
66pub(crate) fn parse(input: &str) -> Result<Vec<Block>, ParseError> {
67 let mut parser = Parser::new(input);
68 parser.parse_blocks()
69}
70
71#[cfg(test)]
72pub(crate) fn parse_command(input: &str) -> Result<Command, ParseError> {
73 let mut parser = Parser::new(input);
74 parser.parse_command()
75}
76
77struct Parser<'a> {
78 input: &'a str,
79 pos: usize,
80 line: u32,
81 column: usize,
82 line_start_pos: usize,
83}
84
85impl<'a> Parser<'a> {
86 fn new(input: &'a str) -> Self {
87 Parser {
88 input,
89 pos: 0,
90 line: 1,
91 column: 1,
92 line_start_pos: 0,
93 }
94 }
95
96 fn current_char(&self) -> Option<char> {
97 self.input[self.pos..].chars().next()
98 }
99
100 fn peek_char(&self) -> Option<char> {
101 self.current_char()
102 }
103
104 fn peek_str(&self, n: usize) -> &str {
105 let end = (self.pos + n).min(self.input.len());
109
110 let mut safe_end = end;
112 while safe_end > self.pos && !self.input.is_char_boundary(safe_end) {
113 safe_end -= 1;
114 }
115
116 &self.input[self.pos..safe_end]
117 }
118
119 fn advance(&mut self) -> Option<char> {
120 if let Some(ch) = self.current_char() {
121 self.pos += ch.len_utf8();
122 if ch == '\n' {
123 self.line += 1;
124 self.column = 1;
125 self.line_start_pos = self.pos;
126 } else {
127 self.column += 1;
128 }
129 Some(ch)
130 } else {
131 None
132 }
133 }
134
135 fn skip_whitespace(&mut self) {
136 while let Some(ch) = self.peek_char() {
137 if ch.is_whitespace() && ch != '\n' {
138 self.advance();
139 } else {
140 break;
141 }
142 }
143 }
144
145 fn skip_line(&mut self) {
146 while let Some(ch) = self.peek_char() {
147 if ch == '\n' {
148 self.advance();
149 break;
150 }
151 self.advance();
152 }
153 }
154
155 fn is_at_end(&self) -> bool {
156 self.pos >= self.input.len()
157 }
158
159 fn error(&self, message: impl Into<String>) -> ParseError {
160 let line_end = self.input[self.line_start_pos..]
161 .find('\n')
162 .map(|i| self.line_start_pos + i)
163 .unwrap_or(self.input.len());
164 let line_text = &self.input[self.line_start_pos..line_end];
165
166 ParseError {
167 message: message.into(),
168 line: self.line,
169 column: self.column,
170 input: LocatedSpan::new(self.line_start_pos, self.column, self.line, line_text.to_string()),
171 code: format!("{:?}", line_text),
172 }
173 }
174
175 fn parse_blocks(&mut self) -> Result<Vec<Block>, ParseError> {
176 let mut blocks = Vec::new();
177
178 while !self.is_at_end() {
179 if let Some(block) = self.parse_block()? {
180 blocks.push(block);
181 }
182 }
183
184 Ok(blocks)
185 }
186
187 fn parse_block(&mut self) -> Result<Option<Block>, ParseError> {
188 let line_number = self.line;
189 let literal_start = self.pos;
190
191 let commands = self.parse_commands()?;
193
194 let literal_end = self.pos;
196 let literal = self.input[literal_start..literal_end].to_string();
197
198 if self.is_at_end() && commands.is_empty() {
200 return Ok(Some(Block {
201 literal,
202 commands,
203 line_number,
204 }));
205 }
206
207 if commands.is_empty() {
209 return Ok(None);
210 }
211
212 if !self.parse_separator()? {
214 return Err(self.error("Expected --- separator"));
215 }
216
217 self.parse_output()?;
219
220 Ok(Some(Block {
221 literal,
222 commands,
223 line_number,
224 }))
225 }
226
227 fn parse_commands(&mut self) -> Result<Vec<Command>, ParseError> {
228 let mut commands = Vec::new();
229
230 loop {
231 if self.skip_empty_or_comment_line() {
233 continue;
234 }
235
236 if self.is_at_end() {
238 break;
239 }
240
241 if self.peek_str(3) == "---" {
243 if !commands.is_empty() {
244 break;
245 }
246 }
247
248 if let Some(ch) = self.peek_char() {
251 if ch.is_whitespace() && ch != '\n' {
252 return Err(self.error("Command cannot start with whitespace"));
253 }
254 }
255
256 match self.parse_command() {
258 Ok(cmd) => commands.push(cmd),
259 Err(e) => {
260 if self.peek_str(3) == "---" && commands.is_empty() {
263 return Err(e);
264 }
265 return Err(e);
266 }
267 }
268 }
269
270 Ok(commands)
271 }
272
273 fn parse_command(&mut self) -> Result<Command, ParseError> {
274 let line_number = self.line;
275
276 let silent = if self.peek_char() == Some('(') {
278 self.advance();
279 self.skip_whitespace();
280 true
281 } else {
282 false
283 };
284
285 let mut tags = HashSet::new();
287 let mut prefix = None;
288
289 let saved_pos = self.pos;
291 self.skip_whitespace();
292 if let Ok(s) = self.parse_string() {
293 self.skip_whitespace();
294 if self.peek_char() == Some(':') {
295 self.advance();
296 self.skip_whitespace();
297 prefix = Some(s);
298 } else {
299 self.pos = saved_pos;
301 }
302 }
303
304 self.skip_whitespace();
306 if let Some(parsed_tags) = self.parse_taglist()? {
307 tags.extend(parsed_tags);
308 }
309 self.skip_whitespace();
310
311 let fail = if self.peek_char() == Some('!') {
313 self.advance();
314 self.skip_whitespace();
315 true
316 } else {
317 false
318 };
319
320 if self.peek_char() == Some('>') {
322 self.advance();
323 self.skip_whitespace();
324 let name = self.parse_line_continuation()?;
325 return Ok(Command {
326 name,
327 args: Vec::new(),
328 tags,
329 prefix,
330 silent,
331 fail,
332 line_number,
333 });
334 }
335
336 self.skip_whitespace();
338 let name = self.parse_string().map_err(|_| self.error("Expected command name"))?;
339
340 let mut args = Vec::new();
342 loop {
343 self.skip_whitespace();
344 if self.peek_char() == Some('[') {
345 if let Some(parsed_tags) = self.parse_taglist()? {
347 tags.extend(parsed_tags);
348 break;
349 }
350 }
351
352 if silent && self.peek_char() == Some(')') {
354 break;
355 }
356
357 if self.peek_char() == Some('#') || self.peek_str(2) == "//" {
358 break;
359 }
360
361 if self.peek_char() == Some('\n') || self.is_at_end() {
362 break;
363 }
364
365 let saved_pos = self.pos;
367 let saved_line = self.line;
368 let saved_column = self.column;
369 let saved_line_start = self.line_start_pos;
370 match self.parse_argument() {
371 Ok(arg) => args.push(arg),
372 Err(_) => {
373 self.pos = saved_pos;
374 self.line = saved_line;
375 self.column = saved_column;
376 self.line_start_pos = saved_line_start;
377 break;
378 }
379 }
380 }
381
382 if silent {
384 self.skip_whitespace();
385 if self.peek_char() != Some(')') {
386 return Err(self.error("Expected closing ) for silent command"));
387 }
388 self.advance();
389 }
390
391 self.skip_whitespace();
393 if self.peek_char() == Some('#') || self.peek_str(2) == "//" {
394 self.skip_line();
395 } else if self.peek_char() == Some('\n') {
396 self.advance();
397 } else if !self.is_at_end() {
398 return Err(self.error("Expected end of line"));
399 }
400
401 Ok(Command {
402 name,
403 args,
404 tags,
405 prefix,
406 silent,
407 fail,
408 line_number,
409 })
410 }
411
412 fn parse_argument(&mut self) -> Result<Argument, ParseError> {
413 let saved_pos = self.pos;
415 let saved_line = self.line;
416 let saved_column = self.column;
417 let saved_line_start = self.line_start_pos;
418
419 self.skip_whitespace();
420 if let Ok(key) = self.parse_string() {
421 if self.peek_char() == Some('=') {
422 self.advance();
423 let value = if matches!(self.peek_char(), Some(ch) if ch.is_whitespace())
427 || matches!(self.peek_char(), Some('[' | ')' | '#'))
428 || self.peek_char().is_none() || self.peek_str(2) == "//"
429 {
430 String::new()
432 } else {
433 match self.parse_string() {
435 Ok(v) => v,
436 Err(_) => {
437 let check_pos = self.pos;
443 let check_line = self.line;
444 let check_column = self.column;
445 let check_line_start = self.line_start_pos;
446
447 if let Ok(_) = self.parse_string() {
451 if self.peek_char() == Some('=') {
452 self.pos = check_pos;
455 self.line = check_line;
456 self.column = check_column;
457 self.line_start_pos = check_line_start;
458 String::new()
459 } else {
460 self.pos = saved_pos;
462 self.line = saved_line;
463 self.column = saved_column;
464 self.line_start_pos = saved_line_start;
465 return Err(self.error(
466 "Expected argument value after =",
467 ));
468 }
469 } else {
470 self.pos = saved_pos;
474 self.line = saved_line;
475 self.column = saved_column;
476 self.line_start_pos = saved_line_start;
477 return Err(
478 self.error("Expected argument value after =")
479 );
480 }
481 }
482 }
483 };
484 return Ok(Argument {
485 key: Some(key),
486 value,
487 });
488 }
489 return Ok(Argument {
491 key: None,
492 value: key,
493 });
494 }
495
496 self.pos = saved_pos;
497 Err(self.error("Expected argument"))
498 }
499
500 fn parse_taglist(&mut self) -> Result<Option<HashSet<String>>, ParseError> {
501 if self.peek_char() != Some('[') {
502 return Ok(None);
503 }
504
505 self.advance();
506 let mut tags = HashSet::new();
507
508 loop {
509 self.skip_whitespace();
510
511 if self.peek_char() == Some(']') {
512 if tags.is_empty() {
514 return Err(self.error("Empty tag list"));
515 }
516 self.advance();
517 break;
518 }
519
520 self.skip_whitespace();
521 let tag = self.parse_string().map_err(|_| self.error("Expected tag name"))?;
522 tags.insert(tag);
523
524 self.skip_whitespace();
525 if self.peek_char() == Some(',') {
526 self.advance();
527 self.skip_whitespace();
528 } else if self.peek_char() == Some(' ') {
529 self.skip_whitespace();
530 }
531 }
532
533 Ok(Some(tags))
534 }
535
536 fn parse_string(&mut self) -> Result<String, ParseError> {
537 match self.peek_char() {
541 Some('\'') => self.parse_quoted_string('\''),
542 Some('"') => self.parse_quoted_string('"'),
543 _ => self.parse_unquoted_string(),
544 }
545 }
546
547 fn parse_unquoted_string(&mut self) -> Result<String, ParseError> {
548 let mut result = String::new();
549
550 match self.peek_char() {
552 Some(ch) if ch.is_alphanumeric() || ch == '_' => {
553 result.push(ch);
554 self.advance();
555 }
556 _ => return Err(self.error("Expected string")),
557 }
558
559 while let Some(ch) = self.peek_char() {
561 if ch.is_alphanumeric() || "_-./@".contains(ch) {
562 result.push(ch);
563 self.advance();
564 } else {
565 break;
566 }
567 }
568
569 Ok(result)
570 }
571
572 fn parse_quoted_string(&mut self, quote: char) -> Result<String, ParseError> {
573 let mut result = String::new();
574
575 if self.peek_char() != Some(quote) {
577 return Err(self.error(format!("Expected {} quote", quote)));
578 }
579 self.advance();
580
581 while let Some(ch) = self.peek_char() {
582 if ch == quote {
583 self.advance();
584 return Ok(result);
585 } else if ch == '\\' {
586 self.advance();
587 match self.peek_char() {
588 Some('\'') => {
589 result.push('\'');
590 self.advance();
591 }
592 Some('"') => {
593 result.push('"');
594 self.advance();
595 }
596 Some('\\') => {
597 result.push('\\');
598 self.advance();
599 }
600 Some('0') => {
601 result.push('\0');
602 self.advance();
603 }
604 Some('n') => {
605 result.push('\n');
606 self.advance();
607 }
608 Some('r') => {
609 result.push('\r');
610 self.advance();
611 }
612 Some('t') => {
613 result.push('\t');
614 self.advance();
615 }
616 Some('x') => {
617 self.advance();
618 let hex = self.parse_hex_digits(2, 2)?;
619 let byte = u8::from_str_radix(&hex, 16)
620 .map_err(|_| self.error("Invalid hex escape"))?;
621 result.push(char::from(byte));
622 }
623 Some('u') => {
624 self.advance();
625 if self.peek_char() != Some('{') {
626 return Err(self.error("Expected { after \\u"));
627 }
628 self.advance();
629 let hex = self.parse_hex_digits(1, 6)?;
630 if self.peek_char() != Some('}') {
631 return Err(self.error("Expected } after unicode escape"));
632 }
633 self.advance();
634 let codepoint = u32::from_str_radix(&hex, 16)
635 .map_err(|_| self.error("Invalid unicode escape"))?;
636 let ch = char::from_u32(codepoint)
637 .ok_or_else(|| self.error("Invalid unicode codepoint"))?;
638 result.push(ch);
639 }
640 _ => {
641 return Err(self.error("Invalid escape sequence"));
642 }
643 }
644 } else {
645 result.push(ch);
646 self.advance();
647 }
648 }
649
650 Err(self.error(format!("Unterminated string (missing {})", quote)))
651 }
652
653 fn parse_hex_digits(&mut self, min: usize, max: usize) -> Result<String, ParseError> {
654 let mut hex = String::new();
655 for i in 0..max {
656 match self.peek_char() {
657 Some(ch) if ch.is_ascii_hexdigit() => {
658 hex.push(ch);
659 self.advance();
660 }
661 _ => {
662 if i < min {
663 return Err(self.error(format!("Expected at least {} hex digits", min)));
664 }
665 break;
666 }
667 }
668 }
669 if hex.len() < min {
670 return Err(self.error(format!("Expected at least {} hex digits", min)));
671 }
672 Ok(hex)
673 }
674
675 fn skip_empty_or_comment_line(&mut self) -> bool {
676 let saved_pos = self.pos;
677
678 self.skip_whitespace();
679
680 if self.peek_char() == Some('#') || self.peek_str(2) == "//" {
682 self.skip_line();
683 return true;
684 }
685
686 if self.peek_char() == Some('\n') {
688 self.advance();
689 return true;
690 }
691
692 self.pos = saved_pos;
694 false
695 }
696
697 fn parse_separator(&mut self) -> Result<bool, ParseError> {
698 if self.peek_str(3) != "---" {
699 return Ok(false);
700 }
701
702 self.advance(); self.advance(); self.advance(); match self.peek_char() {
708 Some('\r') => {
709 self.advance();
710 if self.peek_char() == Some('\n') {
711 self.advance();
712 }
713 Ok(true)
714 }
715 Some('\n') => {
716 self.advance();
717 Ok(true)
718 }
719 None => Ok(true),
720 _ => Err(self.error("Separator must be followed by newline or EOF")),
721 }
722 }
723
724 fn parse_output(&mut self) -> Result<(), ParseError> {
725 if self.peek_char() == Some('\n') || self.is_at_end() {
727 if self.peek_char() == Some('\n') {
728 self.advance();
729 }
730 return Ok(());
731 }
732
733 let mut last_was_newline = false;
735 while !self.is_at_end() {
736 let ch = self.advance().unwrap();
737 if ch == '\n' {
738 if last_was_newline {
739 break;
740 }
741 last_was_newline = true;
742 } else {
743 last_was_newline = false;
744 }
745 }
746
747 Ok(())
748 }
749
750 fn parse_line_continuation(&mut self) -> Result<String, ParseError> {
751 let mut result = String::new();
752
753 loop {
754 while let Some(ch) = self.peek_char() {
756 if ch == '\n' {
757 break;
758 }
759 result.push(ch);
760 self.advance();
761 }
762
763 if result.ends_with('\\') {
765 result.pop(); if self.peek_char() == Some('\n') {
767 self.advance(); continue;
769 }
770 }
771
772 if self.peek_char() == Some('\n') {
774 self.advance();
775 }
776
777 break;
778 }
779
780 Ok(result)
781 }
782}