Skip to main content

cctr_corpus/
lib.rs

1//! Corpus test file parser.
2//!
3//! Parses `.txt` corpus test files into structured test cases using winnow.
4//!
5//! # File Format
6//!
7//! ```text
8//! ===
9//! test name
10//! ===
11//! command to run
12//! ---
13//! expected output
14//!
15//! ===
16//! test with variables
17//! ===
18//! some_command
19//! ---
20//! Completed in {{ time: number }}s
21//! ---
22//! where
23//! * time > 0
24//! * time < 60
25//! ```
26//!
27//! ## Skip Directives
28//!
29//! Tests can be conditionally skipped using `%skip` directives:
30//!
31//! ```text
32//! %skip                           # unconditional skip
33//! %skip(not yet implemented)      # unconditional skip with message
34//! %skip if: test "$OS" = "Win"    # conditional skip
35//! %skip(unix only) if: test ...   # conditional skip with message
36//! ```
37//!
38//! File-level skips go at the top of the file before any tests.
39//! Test-level skips go after the test name, before the closing `===`.
40
41use std::path::{Path, PathBuf};
42use thiserror::Error;
43use winnow::combinator::{alt, opt, repeat};
44use winnow::error::ContextError;
45use winnow::prelude::*;
46use winnow::token::{take_till, take_while};
47
48// ============ Data Types ============
49
50#[derive(Debug, Clone, Copy, PartialEq)]
51pub enum VarType {
52    Number,
53    String,
54    JsonString,
55    JsonBool,
56    JsonArray,
57    JsonObject,
58}
59
60#[derive(Debug, Clone, PartialEq)]
61pub struct VariableDecl {
62    pub name: String,
63    pub var_type: Option<VarType>,
64}
65
66#[derive(Debug, Clone, PartialEq, Default)]
67pub struct SkipDirective {
68    pub message: Option<String>,
69    pub condition: Option<String>,
70}
71
72#[derive(Debug, Clone, PartialEq)]
73pub struct TestCase {
74    pub name: String,
75    pub command: String,
76    pub expected_output: String,
77    pub file_path: PathBuf,
78    pub start_line: usize,
79    pub end_line: usize,
80    pub variables: Vec<VariableDecl>,
81    pub constraints: Vec<String>,
82    pub skip: Option<SkipDirective>,
83}
84
85impl TestCase {
86    pub fn variable_names(&self) -> Vec<&str> {
87        self.variables.iter().map(|v| v.name.as_str()).collect()
88    }
89}
90
91#[derive(Debug, Clone, PartialEq)]
92pub struct CorpusFile {
93    pub file_skip: Option<SkipDirective>,
94    pub tests: Vec<TestCase>,
95}
96
97#[derive(Error, Debug)]
98pub enum ParseError {
99    #[error("IO error: {0}")]
100    Io(#[from] std::io::Error),
101    #[error("parse error at line {line}: {message}")]
102    Parse { line: usize, message: String },
103}
104
105// ============ Public API ============
106
107pub fn parse_file(path: &Path) -> Result<CorpusFile, ParseError> {
108    let content = std::fs::read_to_string(path)?;
109    parse_content(&content, path)
110}
111
112pub fn parse_content(content: &str, path: &Path) -> Result<CorpusFile, ParseError> {
113    let mut state = ParseState::new(content, path);
114    match corpus_file(&mut state) {
115        Ok(file) => Ok(file),
116        Err(_) => Err(ParseError::Parse {
117            line: state.current_line,
118            message: "failed to parse corpus file".to_string(),
119        }),
120    }
121}
122
123// ============ Parse State ============
124
125struct ParseState<'a> {
126    input: &'a str,
127    path: &'a Path,
128    current_line: usize,
129}
130
131impl<'a> ParseState<'a> {
132    fn new(input: &'a str, path: &'a Path) -> Self {
133        Self {
134            input,
135            path,
136            current_line: 1,
137        }
138    }
139}
140
141// ============ Type Annotation Parsing ============
142
143fn parse_type_annotation(type_str: &str) -> Option<VarType> {
144    match type_str.to_lowercase().as_str() {
145        "number" => Some(VarType::Number),
146        "string" => Some(VarType::String),
147        "json string" => Some(VarType::JsonString),
148        "json bool" => Some(VarType::JsonBool),
149        "json array" => Some(VarType::JsonArray),
150        "json object" => Some(VarType::JsonObject),
151        _ => None,
152    }
153}
154
155const RESERVED_KEYWORDS: &[&str] = &[
156    "true",
157    "false",
158    "null",
159    "and",
160    "or",
161    "not",
162    "in",
163    "forall",
164    "contains",
165    "startswith",
166    "endswith",
167    "matches",
168    "len",
169    "type",
170    "keys",
171    "values",
172    "sum",
173    "min",
174    "max",
175    "abs",
176    "unique",
177    "lower",
178    "upper",
179    "number",
180    "string",
181    "bool",
182    "array",
183    "object",
184    "env",
185];
186
187fn is_reserved_keyword(name: &str) -> bool {
188    RESERVED_KEYWORDS.contains(&name)
189}
190
191fn parse_placeholder(content: &str) -> Result<(String, Option<VarType>), String> {
192    let content = content.trim();
193    let (name, var_type) = if let Some(colon_pos) = content.find(':') {
194        let name = content[..colon_pos].trim().to_string();
195        let type_str = content[colon_pos + 1..].trim();
196        (name, parse_type_annotation(type_str))
197    } else {
198        (content.to_string(), None)
199    };
200
201    if is_reserved_keyword(&name) {
202        return Err(format!(
203            "'{}' is a reserved keyword and cannot be used as a variable name",
204            name
205        ));
206    }
207
208    Ok((name, var_type))
209}
210
211fn extract_variables_from_expected(expected: &str) -> Result<Vec<VariableDecl>, String> {
212    let mut variables = Vec::new();
213    let mut seen = std::collections::HashSet::new();
214    let mut remaining = expected;
215
216    while let Some(start) = remaining.find("{{") {
217        if let Some(end) = remaining[start..].find("}}") {
218            let content = &remaining[start + 2..start + end];
219            let (name, var_type) = parse_placeholder(content)?;
220            if !name.is_empty() && seen.insert(name.clone()) {
221                variables.push(VariableDecl { name, var_type });
222            }
223            remaining = &remaining[start + end + 2..];
224        } else {
225            break;
226        }
227    }
228
229    Ok(variables)
230}
231
232// ============ Winnow Parsers ============
233
234fn header_sep(input: &mut &str) -> ModalResult<()> {
235    let line: &str = take_while(1.., '=').parse_next(input)?;
236    if line.len() >= 3 {
237        Ok(())
238    } else {
239        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
240    }
241}
242
243fn dash_sep(input: &mut &str) -> ModalResult<()> {
244    let line: &str = take_while(1.., '-').parse_next(input)?;
245    if line.len() >= 3 {
246        Ok(())
247    } else {
248        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
249    }
250}
251
252fn line_content<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
253    take_till(0.., |c| c == '\n' || c == '\r').parse_next(input)
254}
255
256fn newline(input: &mut &str) -> ModalResult<()> {
257    alt(("\r\n".value(()), "\n".value(()), "\r".value(()))).parse_next(input)
258}
259
260fn opt_newline(input: &mut &str) -> ModalResult<()> {
261    opt(newline).map(|_| ()).parse_next(input)
262}
263
264fn blank_line(input: &mut &str) -> ModalResult<()> {
265    (take_while(0.., ' '), newline)
266        .map(|_| ())
267        .parse_next(input)
268}
269
270fn skip_blank_lines(input: &mut &str) -> ModalResult<()> {
271    repeat(0.., blank_line)
272        .map(|_: Vec<()>| ())
273        .parse_next(input)
274}
275
276fn is_separator_line(line: &str) -> bool {
277    let trimmed = line.trim();
278    (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '='))
279        || (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '-'))
280}
281
282// ============ Skip Directive Parser ============
283
284fn skip_message(input: &mut &str) -> ModalResult<String> {
285    '('.parse_next(input)?;
286    let msg: &str = take_till(0.., ')').parse_next(input)?;
287    ')'.parse_next(input)?;
288    Ok(msg.to_string())
289}
290
291fn skip_condition(input: &mut &str) -> ModalResult<String> {
292    let _ = take_while(0.., ' ').parse_next(input)?;
293    "if:".parse_next(input)?;
294    let _ = take_while(0.., ' ').parse_next(input)?;
295    let condition = line_content.parse_next(input)?;
296    Ok(condition.trim().to_string())
297}
298
299fn skip_directive(input: &mut &str) -> ModalResult<SkipDirective> {
300    "%skip".parse_next(input)?;
301    let message = opt(skip_message).parse_next(input)?;
302    let condition = opt(skip_condition).parse_next(input)?;
303
304    if message.is_none() && condition.is_none() {
305        let _ = line_content.parse_next(input)?;
306    }
307
308    opt_newline.parse_next(input)?;
309
310    Ok(SkipDirective { message, condition })
311}
312
313fn try_skip_directive(input: &mut &str) -> ModalResult<Option<SkipDirective>> {
314    let _ = take_while(0.., ' ').parse_next(input)?;
315    if input.starts_with("%skip") {
316        Ok(Some(skip_directive.parse_next(input)?))
317    } else {
318        Ok(None)
319    }
320}
321
322// ============ Test Case Parser ============
323
324fn description_line(input: &mut &str) -> ModalResult<String> {
325    let content = line_content.parse_next(input)?;
326    opt_newline.parse_next(input)?;
327    Ok(content.trim().to_string())
328}
329
330fn command_lines(input: &mut &str) -> ModalResult<String> {
331    let mut lines = Vec::new();
332
333    loop {
334        if input.is_empty() {
335            break;
336        }
337
338        let peek_line = input.lines().next().unwrap_or("");
339        if is_separator_line(peek_line) {
340            break;
341        }
342
343        let line = line_content.parse_next(input)?;
344        opt_newline.parse_next(input)?;
345        lines.push(line);
346    }
347
348    while lines.last().is_some_and(|s| s.trim().is_empty()) {
349        lines.pop();
350    }
351
352    Ok(lines.join("\n"))
353}
354
355fn expected_block(input: &mut &str) -> ModalResult<String> {
356    let mut lines = Vec::new();
357
358    loop {
359        if input.is_empty() {
360            break;
361        }
362
363        let peek_line = input.lines().next().unwrap_or("");
364        if is_separator_line(peek_line) {
365            break;
366        }
367
368        let line = line_content.parse_next(input)?;
369        opt_newline.parse_next(input)?;
370        lines.push(line);
371    }
372
373    while lines.last() == Some(&"") {
374        lines.pop();
375    }
376
377    Ok(lines.join("\n"))
378}
379
380fn constraint_line(input: &mut &str) -> ModalResult<String> {
381    let _ = take_while(0.., ' ').parse_next(input)?;
382    let _ = opt('*').parse_next(input)?;
383    let _ = take_while(0.., ' ').parse_next(input)?;
384
385    let content = line_content.parse_next(input)?;
386    opt_newline.parse_next(input)?;
387
388    let trimmed = content.trim();
389    if trimmed.is_empty() || trimmed == "where" {
390        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
391    } else {
392        Ok(trimmed.to_string())
393    }
394}
395
396fn where_section(input: &mut &str) -> ModalResult<Vec<String>> {
397    dash_sep.parse_next(input)?;
398    opt_newline.parse_next(input)?;
399
400    let _ = take_while(0.., ' ').parse_next(input)?;
401    "where".parse_next(input)?;
402    opt_newline.parse_next(input)?;
403
404    let constraints: Vec<String> = repeat(0.., constraint_line).parse_next(input)?;
405    Ok(constraints)
406}
407
408// ============ Main Parsers ============
409
410fn test_case(state: &mut ParseState) -> Result<TestCase, winnow::error::ErrMode<ContextError>> {
411    let input = &mut state.input;
412
413    skip_blank_lines.parse_next(input)?;
414
415    let start_line = state.current_line;
416
417    header_sep.parse_next(input)?;
418    opt_newline.parse_next(input)?;
419    state.current_line += 1;
420
421    let name = description_line.parse_next(input)?;
422    state.current_line += 1;
423
424    let skip = try_skip_directive.parse_next(input)?;
425    if skip.is_some() {
426        state.current_line += 1;
427    }
428
429    header_sep.parse_next(input)?;
430    opt_newline.parse_next(input)?;
431    state.current_line += 1;
432
433    let command_start = state.current_line;
434    let command = command_lines.parse_next(input)?;
435    state.current_line = command_start + command.lines().count().max(1);
436
437    dash_sep.parse_next(input)?;
438    opt_newline.parse_next(input)?;
439    state.current_line += 1;
440
441    let expected_start = state.current_line;
442    let expected_output = expected_block.parse_next(input)?;
443    let expected_lines = expected_output.lines().count();
444    state.current_line =
445        expected_start + expected_lines.max(if expected_output.is_empty() { 0 } else { 1 });
446
447    let constraints = opt(where_section).parse_next(input)?.unwrap_or_default();
448    if !constraints.is_empty() {
449        state.current_line += 2 + constraints.len();
450    }
451
452    skip_blank_lines.parse_next(input)?;
453
454    let end_line = state.current_line;
455
456    let variables = extract_variables_from_expected(&expected_output)
457        .map_err(|_| winnow::error::ErrMode::Backtrack(ContextError::new()))?;
458
459    Ok(TestCase {
460        name,
461        command,
462        expected_output,
463        file_path: state.path.to_path_buf(),
464        start_line,
465        end_line,
466        variables,
467        constraints,
468        skip,
469    })
470}
471
472fn corpus_file(state: &mut ParseState) -> Result<CorpusFile, winnow::error::ErrMode<ContextError>> {
473    let input = &mut state.input;
474
475    skip_blank_lines.parse_next(input)?;
476
477    let file_skip = try_skip_directive.parse_next(input)?;
478    if file_skip.is_some() {
479        state.current_line += 1;
480    }
481
482    skip_blank_lines.parse_next(input)?;
483
484    let mut tests = Vec::new();
485
486    while !state.input.is_empty() {
487        let peeked = state.input.trim_start();
488        if peeked.is_empty() {
489            break;
490        }
491
492        if !peeked.starts_with("===") {
493            break;
494        }
495
496        let tc = test_case(state)?;
497        tests.push(tc);
498    }
499
500    Ok(CorpusFile { file_skip, tests })
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506    use std::io::Write;
507    use tempfile::NamedTempFile;
508
509    fn parse_test(content: &str) -> CorpusFile {
510        parse_content(content, Path::new("<test>")).unwrap()
511    }
512
513    #[test]
514    fn test_parse_single_test() {
515        let content = r#"===
516test name
517===
518echo hello
519---
520hello
521"#;
522        let file = parse_test(content);
523        assert!(file.file_skip.is_none());
524        assert_eq!(file.tests.len(), 1);
525        assert_eq!(file.tests[0].name, "test name");
526        assert_eq!(file.tests[0].command, "echo hello");
527        assert_eq!(file.tests[0].expected_output, "hello");
528        assert!(file.tests[0].variables.is_empty());
529        assert!(file.tests[0].constraints.is_empty());
530        assert!(file.tests[0].skip.is_none());
531    }
532
533    #[test]
534    fn test_parse_multiple_tests() {
535        let content = r#"===
536first test
537===
538echo first
539---
540first
541
542===
543second test
544===
545echo second
546---
547second
548"#;
549        let file = parse_test(content);
550        assert_eq!(file.tests.len(), 2);
551        assert_eq!(file.tests[0].name, "first test");
552        assert_eq!(file.tests[1].name, "second test");
553    }
554
555    #[test]
556    fn test_parse_multiline_output() {
557        let content = r#"===
558multiline test
559===
560echo -e "line1\nline2\nline3"
561---
562line1
563line2
564line3
565"#;
566        let file = parse_test(content);
567        assert_eq!(file.tests.len(), 1);
568        assert_eq!(file.tests[0].expected_output, "line1\nline2\nline3");
569    }
570
571    #[test]
572    fn test_parse_empty_expected() {
573        let content = r#"===
574exit only test
575===
576true
577---
578"#;
579        let file = parse_test(content);
580        assert_eq!(file.tests.len(), 1);
581        assert_eq!(file.tests[0].expected_output, "");
582    }
583
584    #[test]
585    fn test_parse_with_inline_type() {
586        let content = r#"===
587timing test
588===
589time_command
590---
591Completed in {{ n: number }}s
592"#;
593        let file = parse_test(content);
594        assert_eq!(file.tests.len(), 1);
595        assert_eq!(
596            file.tests[0].expected_output,
597            "Completed in {{ n: number }}s"
598        );
599        assert_eq!(file.tests[0].variables.len(), 1);
600        assert_eq!(file.tests[0].variables[0].name, "n");
601        assert_eq!(file.tests[0].variables[0].var_type, Some(VarType::Number));
602    }
603
604    #[test]
605    fn test_parse_with_constraints() {
606        let content = r#"===
607timing test
608===
609time_command
610---
611Completed in {{ n: number }}s
612---
613where
614* n > 0
615* n < 60
616"#;
617        let file = parse_test(content);
618        assert_eq!(file.tests.len(), 1);
619        assert_eq!(file.tests[0].variables.len(), 1);
620        assert_eq!(file.tests[0].constraints.len(), 2);
621        assert_eq!(file.tests[0].constraints[0], "n > 0");
622        assert_eq!(file.tests[0].constraints[1], "n < 60");
623    }
624
625    #[test]
626    fn test_parse_multiple_variables() {
627        let content = r#"===
628multi var test
629===
630some_command
631---
632{{ count: number }} items in {{ time: number }}s: {{ msg: string }}
633---
634where
635* count > 0
636* time < 10
637"#;
638        let file = parse_test(content);
639        assert_eq!(file.tests.len(), 1);
640        assert_eq!(file.tests[0].variables.len(), 3);
641        assert_eq!(file.tests[0].variables[0].name, "count");
642        assert_eq!(file.tests[0].variables[1].name, "time");
643        assert_eq!(file.tests[0].variables[2].name, "msg");
644        assert_eq!(file.tests[0].variables[2].var_type, Some(VarType::String));
645    }
646
647    #[test]
648    fn test_parse_duck_typed_variable() {
649        let content = r#"===
650duck typed
651===
652echo "val: 42"
653---
654val: {{ x }}
655---
656where
657* x > 0
658"#;
659        let file = parse_test(content);
660        assert_eq!(file.tests.len(), 1);
661        assert_eq!(file.tests[0].variables.len(), 1);
662        assert_eq!(file.tests[0].variables[0].name, "x");
663        assert_eq!(file.tests[0].variables[0].var_type, None);
664    }
665
666    #[test]
667    fn test_parse_empty_string_var() {
668        let content = r#"===
669empty string
670===
671echo "val: "
672---
673val: {{ s: string }}
674---
675where
676* len(s) == 0
677"#;
678        let file = parse_test(content);
679        assert_eq!(file.tests.len(), 1);
680        assert_eq!(file.tests[0].name, "empty string");
681        assert_eq!(file.tests[0].expected_output, "val: {{ s: string }}");
682        assert_eq!(file.tests[0].variables.len(), 1);
683        assert_eq!(file.tests[0].variables[0].name, "s");
684        assert_eq!(file.tests[0].variables[0].var_type, Some(VarType::String));
685        assert_eq!(file.tests[0].constraints.len(), 1);
686        assert_eq!(file.tests[0].constraints[0], "len(s) == 0");
687    }
688
689    #[test]
690    fn test_skip_unconditional() {
691        let content = r#"===
692skipped test
693%skip
694===
695echo hello
696---
697hello
698"#;
699        let file = parse_test(content);
700        assert_eq!(file.tests.len(), 1);
701        let skip = file.tests[0].skip.as_ref().unwrap();
702        assert!(skip.message.is_none());
703        assert!(skip.condition.is_none());
704    }
705
706    #[test]
707    fn test_skip_with_message() {
708        let content = r#"===
709skipped test
710%skip(not yet implemented)
711===
712echo hello
713---
714hello
715"#;
716        let file = parse_test(content);
717        assert_eq!(file.tests.len(), 1);
718        let skip = file.tests[0].skip.as_ref().unwrap();
719        assert_eq!(skip.message.as_deref(), Some("not yet implemented"));
720        assert!(skip.condition.is_none());
721    }
722
723    #[test]
724    fn test_skip_with_condition() {
725        let content = r#"===
726unix only test
727%skip if: test "$OS" = "Windows_NT"
728===
729echo hello
730---
731hello
732"#;
733        let file = parse_test(content);
734        assert_eq!(file.tests.len(), 1);
735        let skip = file.tests[0].skip.as_ref().unwrap();
736        assert!(skip.message.is_none());
737        assert_eq!(
738            skip.condition.as_deref(),
739            Some(r#"test "$OS" = "Windows_NT""#)
740        );
741    }
742
743    #[test]
744    fn test_skip_with_message_and_condition() {
745        let content = r#"===
746unix only test
747%skip(requires bash) if: test "$OS" = "Windows_NT"
748===
749echo hello
750---
751hello
752"#;
753        let file = parse_test(content);
754        assert_eq!(file.tests.len(), 1);
755        let skip = file.tests[0].skip.as_ref().unwrap();
756        assert_eq!(skip.message.as_deref(), Some("requires bash"));
757        assert_eq!(
758            skip.condition.as_deref(),
759            Some(r#"test "$OS" = "Windows_NT""#)
760        );
761    }
762
763    #[test]
764    fn test_file_level_skip() {
765        let content = r#"%skip(windows tests) if: test "$OS" != "Windows_NT"
766
767===
768test 1
769===
770echo hello
771---
772hello
773"#;
774        let file = parse_test(content);
775        let file_skip = file.file_skip.as_ref().unwrap();
776        assert_eq!(file_skip.message.as_deref(), Some("windows tests"));
777        assert_eq!(
778            file_skip.condition.as_deref(),
779            Some(r#"test "$OS" != "Windows_NT""#)
780        );
781        assert_eq!(file.tests.len(), 1);
782    }
783
784    #[test]
785    fn test_file_level_skip_unconditional() {
786        let content = r#"%skip(all tests disabled)
787
788===
789test 1
790===
791echo hello
792---
793hello
794"#;
795        let file = parse_test(content);
796        let file_skip = file.file_skip.as_ref().unwrap();
797        assert_eq!(file_skip.message.as_deref(), Some("all tests disabled"));
798        assert!(file_skip.condition.is_none());
799    }
800
801    #[test]
802    fn test_parse_file() {
803        let mut f = NamedTempFile::new().unwrap();
804        write!(f, "===\ntest\n===\necho hi\n---\nhi\n").unwrap();
805
806        let file = parse_file(f.path()).unwrap();
807        assert_eq!(file.tests.len(), 1);
808        assert_eq!(file.tests[0].name, "test");
809        assert_eq!(file.tests[0].file_path, f.path());
810    }
811
812    #[test]
813    fn test_multiline_command() {
814        let content = r#"===
815multiline command
816===
817echo "line 1"
818echo "line 2"
819echo "line 3"
820---
821line 1
822line 2
823line 3
824"#;
825        let file = parse_test(content);
826        assert_eq!(file.tests.len(), 1);
827        assert_eq!(
828            file.tests[0].command,
829            "echo \"line 1\"\necho \"line 2\"\necho \"line 3\""
830        );
831    }
832
833    #[test]
834    fn test_line_numbers() {
835        let content = r#"===
836first test
837===
838echo hello
839---
840hello
841
842===
843second test
844===
845echo world
846---
847world
848"#;
849        let file = parse_test(content);
850        assert_eq!(file.tests.len(), 2);
851        assert_eq!(file.tests[0].start_line, 1);
852        // Just verify we have reasonable line tracking
853        assert!(file.tests[0].start_line < file.tests[0].end_line);
854        assert!(file.tests[1].start_line < file.tests[1].end_line);
855    }
856}