cctr_corpus/
lib.rs

1//! Corpus test file parser.
2//!
3//! Parses `.txt` corpus test files into structured test cases using winnow.
4//!
5//! # File Format
6//!
7//! ```text
8//! ===
9//! test name
10//! ===
11//! command to run
12//! ---
13//! expected output
14//!
15//! ===
16//! test with variables
17//! ===
18//! some_command
19//! ---
20//! Completed in {{ time }}s
21//! ---
22//! with
23//! * time: number
24//! having
25//! * time > 0
26//! * time < 60
27//! ```
28
29use std::path::Path;
30use thiserror::Error;
31use winnow::combinator::{alt, opt, repeat};
32use winnow::error::ContextError;
33use winnow::prelude::*;
34use winnow::token::{take_till, take_while};
35
36// ============ Data Types ============
37
38/// A segment of a template string - either literal text or a placeholder.
39#[derive(Debug, Clone, PartialEq)]
40pub enum Segment {
41    Literal(String),
42    Placeholder(String),
43}
44
45/// Variable type for pattern matching.
46#[derive(Debug, Clone, Copy, PartialEq)]
47pub enum VarType {
48    Number,
49    String,
50}
51
52/// A declared variable with name and type.
53#[derive(Debug, Clone, PartialEq)]
54pub struct Variable {
55    pub name: String,
56    pub var_type: VarType,
57}
58
59/// A single test case parsed from a corpus file.
60#[derive(Debug, Clone, PartialEq)]
61pub struct TestCase {
62    pub description: String,
63    pub command: Vec<Segment>,
64    pub expected: Vec<Segment>,
65    pub variables: Vec<Variable>,
66    pub constraints: Vec<String>,
67    pub start_line: usize,
68    pub end_line: usize,
69}
70
71#[derive(Error, Debug)]
72pub enum ParseError {
73    #[error("IO error: {0}")]
74    Io(#[from] std::io::Error),
75    #[error("parse error at line {line}: {message}")]
76    Parse { line: usize, message: String },
77    #[error("invalid variable type '{0}' (expected 'number' or 'string')")]
78    InvalidVarType(String),
79}
80
81// ============ Public API ============
82
83pub fn parse_file(path: &Path) -> Result<Vec<TestCase>, ParseError> {
84    let content = std::fs::read_to_string(path)?;
85    parse_content(&content)
86}
87
88pub fn parse_content(content: &str) -> Result<Vec<TestCase>, ParseError> {
89    let mut input = content;
90    match test_file.parse_next(&mut input) {
91        Ok(tests) => Ok(tests),
92        Err(e) => Err(ParseError::Parse {
93            line: 1,
94            message: format!("{:?}", e),
95        }),
96    }
97}
98
99// ============ Segment Parsing ============
100
101pub fn parse_segments(input: &str) -> Vec<Segment> {
102    let mut result = Vec::new();
103    let mut remaining = input;
104
105    while !remaining.is_empty() {
106        if let Some(start) = remaining.find("{{") {
107            if start > 0 {
108                result.push(Segment::Literal(remaining[..start].to_string()));
109            }
110            if let Some(end) = remaining[start..].find("}}") {
111                let name = remaining[start + 2..start + end].trim().to_string();
112                result.push(Segment::Placeholder(name));
113                remaining = &remaining[start + end + 2..];
114            } else {
115                result.push(Segment::Literal(remaining.to_string()));
116                break;
117            }
118        } else {
119            if !remaining.is_empty() {
120                result.push(Segment::Literal(remaining.to_string()));
121            }
122            break;
123        }
124    }
125
126    result
127}
128
129// ============ Winnow Parsers ============
130
131fn header_sep(input: &mut &str) -> ModalResult<()> {
132    let line: &str = take_while(1.., '=').parse_next(input)?;
133    if line.len() >= 3 {
134        Ok(())
135    } else {
136        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
137    }
138}
139
140fn dash_sep(input: &mut &str) -> ModalResult<()> {
141    let line: &str = take_while(1.., '-').parse_next(input)?;
142    if line.len() >= 3 {
143        Ok(())
144    } else {
145        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
146    }
147}
148
149fn line_content<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
150    take_till(0.., |c| c == '\n' || c == '\r').parse_next(input)
151}
152
153fn newline(input: &mut &str) -> ModalResult<()> {
154    alt(("\r\n".value(()), "\n".value(()), "\r".value(()))).parse_next(input)
155}
156
157fn opt_newline(input: &mut &str) -> ModalResult<()> {
158    opt(newline).map(|_| ()).parse_next(input)
159}
160
161fn blank_line(input: &mut &str) -> ModalResult<()> {
162    (take_while(0.., ' '), newline)
163        .map(|_| ())
164        .parse_next(input)
165}
166
167fn skip_blank_lines(input: &mut &str) -> ModalResult<()> {
168    repeat(0.., blank_line)
169        .map(|_: Vec<()>| ())
170        .parse_next(input)
171}
172
173fn description_line(input: &mut &str) -> ModalResult<String> {
174    let content = line_content.parse_next(input)?;
175    opt_newline.parse_next(input)?;
176    Ok(content.trim().to_string())
177}
178
179fn command_line(input: &mut &str) -> ModalResult<String> {
180    let content = line_content.parse_next(input)?;
181    opt_newline.parse_next(input)?;
182    Ok(content.to_string())
183}
184
185fn expected_line<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
186    let content = line_content.parse_next(input)?;
187    opt_newline.parse_next(input)?;
188    Ok(content)
189}
190
191fn is_separator_line(line: &str) -> bool {
192    let trimmed = line.trim();
193    (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '='))
194        || (trimmed.len() >= 3 && trimmed.chars().all(|c| c == '-'))
195}
196
197fn expected_block(input: &mut &str) -> ModalResult<String> {
198    let mut lines = Vec::new();
199
200    loop {
201        if input.is_empty() {
202            break;
203        }
204
205        // Peek at current line to check for separators
206        let peek_line = input.lines().next().unwrap_or("");
207        if is_separator_line(peek_line) {
208            break;
209        }
210
211        let line = expected_line.parse_next(input)?;
212        lines.push(line);
213    }
214
215    // Trim trailing empty lines
216    while lines.last() == Some(&"") {
217        lines.pop();
218    }
219
220    Ok(lines.join("\n"))
221}
222
223fn var_type(input: &mut &str) -> ModalResult<VarType> {
224    alt((
225        "number".value(VarType::Number),
226        "string".value(VarType::String),
227    ))
228    .parse_next(input)
229}
230
231fn variable_decl(input: &mut &str) -> ModalResult<Variable> {
232    let _ = take_while(0.., ' ').parse_next(input)?;
233    let _ = opt('*').parse_next(input)?;
234    let _ = take_while(0.., ' ').parse_next(input)?;
235
236    let name: &str =
237        take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_').parse_next(input)?;
238    let _ = take_while(0.., ' ').parse_next(input)?;
239    ':'.parse_next(input)?;
240    let _ = take_while(0.., ' ').parse_next(input)?;
241    let vtype = var_type.parse_next(input)?;
242    let _ = take_while(0.., ' ').parse_next(input)?;
243    opt_newline.parse_next(input)?;
244
245    Ok(Variable {
246        name: name.to_string(),
247        var_type: vtype,
248    })
249}
250
251fn constraint_line(input: &mut &str) -> ModalResult<String> {
252    let _ = take_while(0.., ' ').parse_next(input)?;
253    let _ = opt('*').parse_next(input)?;
254    let _ = take_while(0.., ' ').parse_next(input)?;
255
256    let content = line_content.parse_next(input)?;
257    opt_newline.parse_next(input)?;
258
259    let trimmed = content.trim();
260    if trimmed.is_empty() || trimmed == "with" || trimmed == "having" {
261        Err(winnow::error::ErrMode::Backtrack(ContextError::new()))
262    } else {
263        Ok(trimmed.to_string())
264    }
265}
266
267fn with_having_section(input: &mut &str) -> ModalResult<(Vec<Variable>, Vec<String>)> {
268    dash_sep.parse_next(input)?;
269    opt_newline.parse_next(input)?;
270
271    // "with" line
272    let _ = take_while(0.., ' ').parse_next(input)?;
273    "with".parse_next(input)?;
274    opt_newline.parse_next(input)?;
275
276    // Variable declarations
277    let variables: Vec<Variable> = repeat(0.., variable_decl).parse_next(input)?;
278
279    // "having" section (optional)
280    let _ = take_while(0.., ' ').parse_next(input)?;
281    let has_having: Option<&str> = opt("having").parse_next(input)?;
282
283    let constraints = if has_having.is_some() {
284        opt_newline.parse_next(input)?;
285        repeat(0.., constraint_line).parse_next(input)?
286    } else {
287        Vec::new()
288    };
289
290    Ok((variables, constraints))
291}
292
293fn test_case(input: &mut &str) -> ModalResult<TestCase> {
294    skip_blank_lines.parse_next(input)?;
295
296    // Opening ===
297    header_sep.parse_next(input)?;
298    opt_newline.parse_next(input)?;
299
300    // Description
301    let description = description_line.parse_next(input)?;
302
303    // Closing ===
304    header_sep.parse_next(input)?;
305    opt_newline.parse_next(input)?;
306
307    // Command
308    let command_str = command_line.parse_next(input)?;
309
310    // ---
311    dash_sep.parse_next(input)?;
312    opt_newline.parse_next(input)?;
313
314    // Expected output
315    let expected_str = expected_block.parse_next(input)?;
316
317    // Optional with/having section
318    let (variables, constraints) = opt(with_having_section)
319        .parse_next(input)?
320        .unwrap_or_default();
321
322    skip_blank_lines.parse_next(input)?;
323
324    Ok(TestCase {
325        description,
326        command: parse_segments(&command_str),
327        expected: parse_segments(&expected_str),
328        variables,
329        constraints,
330        start_line: 1, // Would need more work to track accurately
331        end_line: 1,
332    })
333}
334
335fn test_file(input: &mut &str) -> ModalResult<Vec<TestCase>> {
336    skip_blank_lines.parse_next(input)?;
337    let tests: Vec<TestCase> = repeat(0.., test_case).parse_next(input)?;
338    skip_blank_lines.parse_next(input)?;
339    Ok(tests)
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn test_parse_segments_simple() {
348        let segments = parse_segments("hello world");
349        assert_eq!(segments, vec![Segment::Literal("hello world".to_string())]);
350    }
351
352    #[test]
353    fn test_parse_segments_placeholder() {
354        let segments = parse_segments("hello {{ name }}");
355        assert_eq!(
356            segments,
357            vec![
358                Segment::Literal("hello ".to_string()),
359                Segment::Placeholder("name".to_string()),
360            ]
361        );
362    }
363
364    #[test]
365    fn test_parse_segments_multiple() {
366        let segments = parse_segments("{{ a }} + {{ b }}");
367        assert_eq!(
368            segments,
369            vec![
370                Segment::Placeholder("a".to_string()),
371                Segment::Literal(" + ".to_string()),
372                Segment::Placeholder("b".to_string()),
373            ]
374        );
375    }
376
377    #[test]
378    fn test_parse_simple_test() {
379        let content = r#"===
380test name
381===
382echo hello
383---
384hello
385"#;
386        let tests = parse_content(content).unwrap();
387        assert_eq!(tests.len(), 1);
388        assert_eq!(tests[0].description, "test name");
389        assert_eq!(
390            tests[0].command,
391            vec![Segment::Literal("echo hello".to_string())]
392        );
393        assert_eq!(
394            tests[0].expected,
395            vec![Segment::Literal("hello".to_string())]
396        );
397    }
398
399    #[test]
400    fn test_parse_with_variables() {
401        let content = r#"===
402timing test
403===
404time_command
405---
406Completed in {{ n }}s
407---
408with
409* n: number
410having
411* n > 0
412* n < 60
413"#;
414        let tests = parse_content(content).unwrap();
415        assert_eq!(tests.len(), 1);
416        assert_eq!(
417            tests[0].expected,
418            vec![
419                Segment::Literal("Completed in ".to_string()),
420                Segment::Placeholder("n".to_string()),
421                Segment::Literal("s".to_string()),
422            ]
423        );
424        assert_eq!(tests[0].variables.len(), 1);
425        assert_eq!(tests[0].variables[0].name, "n");
426        assert_eq!(tests[0].variables[0].var_type, VarType::Number);
427        assert_eq!(tests[0].constraints, vec!["n > 0", "n < 60"]);
428    }
429
430    #[test]
431    fn test_parse_multiple_tests() {
432        let content = r#"===
433first
434===
435echo 1
436---
4371
438
439===
440second
441===
442echo 2
443---
4442
445"#;
446        let tests = parse_content(content).unwrap();
447        assert_eq!(tests.len(), 2);
448        assert_eq!(tests[0].description, "first");
449        assert_eq!(tests[1].description, "second");
450    }
451
452    #[test]
453    fn test_parse_multiline_expected() {
454        let content = r#"===
455multiline
456===
457printf "a\nb\nc"
458---
459a
460b
461c
462"#;
463        let tests = parse_content(content).unwrap();
464        assert_eq!(tests.len(), 1);
465        assert_eq!(
466            tests[0].expected,
467            vec![Segment::Literal("a\nb\nc".to_string())]
468        );
469    }
470
471    #[test]
472    fn test_parse_empty_expected() {
473        let content = r#"===
474exit only
475===
476true
477---
478"#;
479        let tests = parse_content(content).unwrap();
480        assert_eq!(tests.len(), 1);
481        assert_eq!(tests[0].expected, vec![]);
482    }
483}