cctr_corpus/
lib.rs

1//! Corpus test file parser.
2//!
3//! Parses `.txt` corpus test files into structured test cases using winnow.
4//!
5//! # File Format
6//!
7//! ```text
8//! ===
9//! test name
10//! ===
11//! command to run
12//! ---
13//! expected output
14//!
15//! ===
16//! test with variables
17//! ===
18//! some_command
19//! ---
20//! Completed in {{ time: number }}s
21//! ---
22//! where
23//! * time > 0
24//! * time < 60
25//! ```
26//!
27//! Types can be specified inline in placeholders: `{{ x }}`, `{{ x: number }}`,
28//! `{{ x:string }}`, `{{ x : json object }}`. If no type is given, the type is
29//! inferred from the matched value using duck-typing.
30
31use std::path::Path;
32use thiserror::Error;
33use winnow::combinator::{alt, opt, repeat};
34use winnow::error::ContextError;
35use winnow::prelude::*;
36use winnow::token::{take_till, take_while};
37
38// ============ Data Types ============
39
40/// A segment of a template string - either literal text or a placeholder.
41#[derive(Debug, Clone, PartialEq)]
42pub enum Segment {
43    Literal(String),
44    /// Placeholder with name and optional type annotation
45    Placeholder {
46        name: String,
47        var_type: Option<VarType>,
48    },
49}
50
51/// Variable type for pattern matching.
52#[derive(Debug, Clone, Copy, PartialEq)]
53pub enum VarType {
54    Number,
55    String,
56    JsonString,
57    JsonBool,
58    JsonArray,
59    JsonObject,
60}
61
62/// A declared variable with name and optional type (None means duck-typed).
63#[derive(Debug, Clone, PartialEq)]
64pub struct Variable {
65    pub name: String,
66    pub var_type: Option<VarType>,
67}
68
69/// A single test case parsed from a corpus file.
70#[derive(Debug, Clone, PartialEq)]
71pub struct TestCase {
72    pub description: String,
73    pub command: Vec<Segment>,
74    pub expected: Vec<Segment>,
75    pub variables: Vec<Variable>,
76    pub constraints: Vec<String>,
77    pub start_line: usize,
78    pub end_line: usize,
79}
80
81#[derive(Error, Debug)]
82pub enum ParseError {
83    #[error("IO error: {0}")]
84    Io(#[from] std::io::Error),
85    #[error("parse error at line {line}: {message}")]
86    Parse { line: usize, message: String },
87    #[error("invalid variable type '{0}' (expected 'number' or 'string')")]
88    InvalidVarType(String),
89}
90
91// ============ Public API ============
92
93pub fn parse_file(path: &Path) -> Result<Vec<TestCase>, ParseError> {
94    let content = std::fs::read_to_string(path)?;
95    parse_content(&content)
96}
97
98pub fn parse_content(content: &str) -> Result<Vec<TestCase>, ParseError> {
99    let mut input = content;
100    match test_file.parse_next(&mut input) {
101        Ok(tests) => Ok(tests),
102        Err(e) => Err(ParseError::Parse {
103            line: 1,
104            message: format!("{:?}", e),
105        }),
106    }
107}
108
109// ============ Segment Parsing ============
110
111/// Parse a type annotation string into a VarType
112fn parse_type_annotation(type_str: &str) -> Option<VarType> {
113    match type_str.to_lowercase().as_str() {
114        "number" => Some(VarType::Number),
115        "string" => Some(VarType::String),
116        "json string" => Some(VarType::JsonString),
117        "json bool" => Some(VarType::JsonBool),
118        "json array" => Some(VarType::JsonArray),
119        "json object" => Some(VarType::JsonObject),
120        _ => None,
121    }
122}
123
124pub fn parse_segments(input: &str) -> Vec<Segment> {
125    let mut result = Vec::new();
126    let mut remaining = input;
127
128    while !remaining.is_empty() {
129        if let Some(start) = remaining.find("{{") {
130            if start > 0 {
131                result.push(Segment::Literal(remaining[..start].to_string()));
132            }
133            if let Some(end) = remaining[start..].find("}}") {
134                let content = remaining[start + 2..start + end].trim();
135                // Check for inline type annotation: "name : type" or "name: type" or "name :type"
136                let (name, var_type) = if let Some(colon_pos) = content.find(':') {
137                    let name = content[..colon_pos].trim().to_string();
138                    let type_str = content[colon_pos + 1..].trim();
139                    (name, parse_type_annotation(type_str))
140                } else {
141                    (content.to_string(), None)
142                };
143                result.push(Segment::Placeholder { name, var_type });
144                remaining = &remaining[start + end + 2..];
145            } else {
146                result.push(Segment::Literal(remaining.to_string()));
147                break;
148            }
149        } else {
150            if !remaining.is_empty() {
151                result.push(Segment::Literal(remaining.to_string()));
152            }
153            break;
154        }
155    }
156
157    result
158}
159
160// ============ Winnow Parsers ============
161
162fn header_sep(input: &mut &str) -> ModalResult<()> {
163    let line: &str = take_while(1.., '=').parse_next(input)?;
164    if line.len() >= 3 {
165        Ok(())
166    } else {
167        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
168    }
169}
170
171fn dash_sep(input: &mut &str) -> ModalResult<()> {
172    let line: &str = take_while(1.., '-').parse_next(input)?;
173    if line.len() >= 3 {
174        Ok(())
175    } else {
176        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
177    }
178}
179
180fn line_content<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
181    take_till(0.., |c| c == '\n' || c == '\r').parse_next(input)
182}
183
184fn newline(input: &mut &str) -> ModalResult<()> {
185    alt(("\r\n".value(()), "\n".value(()), "\r".value(()))).parse_next(input)
186}
187
188fn opt_newline(input: &mut &str) -> ModalResult<()> {
189    opt(newline).map(|_| ()).parse_next(input)
190}
191
192fn blank_line(input: &mut &str) -> ModalResult<()> {
193    (take_while(0.., ' '), newline)
194        .map(|_| ())
195        .parse_next(input)
196}
197
198fn skip_blank_lines(input: &mut &str) -> ModalResult<()> {
199    repeat(0.., blank_line)
200        .map(|_: Vec<()>| ())
201        .parse_next(input)
202}
203
204fn description_line(input: &mut &str) -> ModalResult<String> {
205    let content = line_content.parse_next(input)?;
206    opt_newline.parse_next(input)?;
207    Ok(content.trim().to_string())
208}
209
210fn command_line(input: &mut &str) -> ModalResult<String> {
211    let content = line_content.parse_next(input)?;
212    opt_newline.parse_next(input)?;
213    Ok(content.to_string())
214}
215
216fn expected_line<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
217    let content = line_content.parse_next(input)?;
218    opt_newline.parse_next(input)?;
219    Ok(content)
220}
221
222fn is_separator_line(line: &str) -> bool {
223    let trimmed = line.trim();
224    (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '='))
225        || (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '-'))
226}
227
228fn expected_block(input: &mut &str) -> ModalResult<String> {
229    let mut lines = Vec::new();
230
231    loop {
232        if input.is_empty() {
233            break;
234        }
235
236        // Peek at current line to check for separators
237        let peek_line = input.lines().next().unwrap_or("");
238        if is_separator_line(peek_line) {
239            break;
240        }
241
242        let line = expected_line.parse_next(input)?;
243        lines.push(line);
244    }
245
246    // Trim trailing empty lines
247    while lines.last() == Some(&"") {
248        lines.pop();
249    }
250
251    Ok(lines.join("\n"))
252}
253
254fn constraint_line(input: &mut &str) -> ModalResult<String> {
255    let _ = take_while(0.., ' ').parse_next(input)?;
256    let _ = opt('*').parse_next(input)?;
257    let _ = take_while(0.., ' ').parse_next(input)?;
258
259    let content = line_content.parse_next(input)?;
260    opt_newline.parse_next(input)?;
261
262    let trimmed = content.trim();
263    if trimmed.is_empty() || trimmed == "where" {
264        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
265    } else {
266        Ok(trimmed.to_string())
267    }
268}
269
270fn where_section(input: &mut &str) -> ModalResult<Vec<String>> {
271    dash_sep.parse_next(input)?;
272    opt_newline.parse_next(input)?;
273
274    // "where" line
275    let _ = take_while(0.., ' ').parse_next(input)?;
276    "where".parse_next(input)?;
277    opt_newline.parse_next(input)?;
278
279    // Constraints
280    let constraints: Vec<String> = repeat(0.., constraint_line).parse_next(input)?;
281
282    Ok(constraints)
283}
284
285/// Extract variables from segments (placeholders with their optional types)
286fn extract_variables(segments: &[Segment]) -> Vec<Variable> {
287    let mut seen = std::collections::HashSet::new();
288    let mut variables = Vec::new();
289
290    for segment in segments {
291        if let Segment::Placeholder { name, var_type } = segment {
292            if seen.insert(name.clone()) {
293                variables.push(Variable {
294                    name: name.clone(),
295                    var_type: *var_type,
296                });
297            }
298        }
299    }
300
301    variables
302}
303
304fn test_case(input: &mut &str) -> ModalResult<TestCase> {
305    skip_blank_lines.parse_next(input)?;
306
307    // Opening ===
308    header_sep.parse_next(input)?;
309    opt_newline.parse_next(input)?;
310
311    // Description
312    let description = description_line.parse_next(input)?;
313
314    // Closing ===
315    header_sep.parse_next(input)?;
316    opt_newline.parse_next(input)?;
317
318    // Command
319    let command_str = command_line.parse_next(input)?;
320
321    // ---
322    dash_sep.parse_next(input)?;
323    opt_newline.parse_next(input)?;
324
325    // Expected output
326    let expected_str = expected_block.parse_next(input)?;
327
328    // Optional where section (constraints only, variables extracted from segments)
329    let constraints = opt(where_section).parse_next(input)?.unwrap_or_default();
330
331    skip_blank_lines.parse_next(input)?;
332
333    let expected = parse_segments(&expected_str);
334    let variables = extract_variables(&expected);
335
336    Ok(TestCase {
337        description,
338        command: parse_segments(&command_str),
339        expected,
340        variables,
341        constraints,
342        start_line: 1, // Would need more work to track accurately
343        end_line: 1,
344    })
345}
346
347fn test_file(input: &mut &str) -> ModalResult<Vec<TestCase>> {
348    skip_blank_lines.parse_next(input)?;
349    let tests: Vec<TestCase> = repeat(0.., test_case).parse_next(input)?;
350    skip_blank_lines.parse_next(input)?;
351    Ok(tests)
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357
358    #[test]
359    fn test_parse_segments_simple() {
360        let segments = parse_segments("hello world");
361        assert_eq!(segments, vec![Segment::Literal("hello world".to_string())]);
362    }
363
364    #[test]
365    fn test_parse_segments_placeholder() {
366        let segments = parse_segments("hello {{ name }}");
367        assert_eq!(
368            segments,
369            vec![
370                Segment::Literal("hello ".to_string()),
371                Segment::Placeholder {
372                    name: "name".to_string(),
373                    var_type: None
374                },
375            ]
376        );
377    }
378
379    #[test]
380    fn test_parse_segments_placeholder_with_type() {
381        let segments = parse_segments("count: {{ n: number }}");
382        assert_eq!(
383            segments,
384            vec![
385                Segment::Literal("count: ".to_string()),
386                Segment::Placeholder {
387                    name: "n".to_string(),
388                    var_type: Some(VarType::Number)
389                },
390            ]
391        );
392    }
393
394    #[test]
395    fn test_parse_segments_placeholder_type_variations() {
396        // No spaces
397        let s1 = parse_segments("{{ x:number }}");
398        assert_eq!(
399            s1,
400            vec![Segment::Placeholder {
401                name: "x".to_string(),
402                var_type: Some(VarType::Number)
403            }]
404        );
405
406        // Spaces around colon
407        let s2 = parse_segments("{{ x : string }}");
408        assert_eq!(
409            s2,
410            vec![Segment::Placeholder {
411                name: "x".to_string(),
412                var_type: Some(VarType::String)
413            }]
414        );
415
416        // Json types
417        let s3 = parse_segments("{{ data : json object }}");
418        assert_eq!(
419            s3,
420            vec![Segment::Placeholder {
421                name: "data".to_string(),
422                var_type: Some(VarType::JsonObject)
423            }]
424        );
425    }
426
427    #[test]
428    fn test_parse_segments_multiple() {
429        let segments = parse_segments("{{ a }} + {{ b }}");
430        assert_eq!(
431            segments,
432            vec![
433                Segment::Placeholder {
434                    name: "a".to_string(),
435                    var_type: None
436                },
437                Segment::Literal(" + ".to_string()),
438                Segment::Placeholder {
439                    name: "b".to_string(),
440                    var_type: None
441                },
442            ]
443        );
444    }
445
446    #[test]
447    fn test_parse_simple_test() {
448        let content = r#"===
449test name
450===
451echo hello
452---
453hello
454"#;
455        let tests = parse_content(content).unwrap();
456        assert_eq!(tests.len(), 1);
457        assert_eq!(tests[0].description, "test name");
458        assert_eq!(
459            tests[0].command,
460            vec![Segment::Literal("echo hello".to_string())]
461        );
462        assert_eq!(
463            tests[0].expected,
464            vec![Segment::Literal("hello".to_string())]
465        );
466    }
467
468    #[test]
469    fn test_parse_with_inline_types() {
470        let content = r#"===
471timing test
472===
473time_command
474---
475Completed in {{ n: number }}s
476---
477where
478* n > 0
479* n < 60
480"#;
481        let tests = parse_content(content).unwrap();
482        assert_eq!(tests.len(), 1);
483        assert_eq!(
484            tests[0].expected,
485            vec![
486                Segment::Literal("Completed in ".to_string()),
487                Segment::Placeholder {
488                    name: "n".to_string(),
489                    var_type: Some(VarType::Number)
490                },
491                Segment::Literal("s".to_string()),
492            ]
493        );
494        assert_eq!(tests[0].variables.len(), 1);
495        assert_eq!(tests[0].variables[0].name, "n");
496        assert_eq!(tests[0].variables[0].var_type, Some(VarType::Number));
497        assert_eq!(tests[0].constraints, vec!["n > 0", "n < 60"]);
498    }
499
500    #[test]
501    fn test_parse_without_type_annotation() {
502        let content = r#"===
503duck typed
504===
505some_command
506---
507value: {{ x }}
508---
509where
510* x > 0
511"#;
512        let tests = parse_content(content).unwrap();
513        assert_eq!(tests.len(), 1);
514        assert_eq!(tests[0].variables.len(), 1);
515        assert_eq!(tests[0].variables[0].name, "x");
516        assert_eq!(tests[0].variables[0].var_type, None); // Duck-typed
517    }
518
519    #[test]
520    fn test_parse_multiple_tests() {
521        let content = r#"===
522first
523===
524echo 1
525---
5261
527
528===
529second
530===
531echo 2
532---
5332
534"#;
535        let tests = parse_content(content).unwrap();
536        assert_eq!(tests.len(), 2);
537        assert_eq!(tests[0].description, "first");
538        assert_eq!(tests[1].description, "second");
539    }
540
541    #[test]
542    fn test_parse_multiline_expected() {
543        let content = r#"===
544multiline
545===
546printf "a\nb\nc"
547---
548a
549b
550c
551"#;
552        let tests = parse_content(content).unwrap();
553        assert_eq!(tests.len(), 1);
554        assert_eq!(
555            tests[0].expected,
556            vec![Segment::Literal("a\nb\nc".to_string())]
557        );
558    }
559
560    #[test]
561    fn test_parse_empty_expected() {
562        let content = r#"===
563exit only
564===
565true
566---
567"#;
568        let tests = parse_content(content).unwrap();
569        assert_eq!(tests.len(), 1);
570        assert_eq!(tests[0].expected, vec![]);
571    }
572}