aufbau 0.1.0

Type-aware constrained decoding for LLMs using context-dependent grammars with typing rules
Documentation
//! Parser Unit Tests
//!
//! Basic functionality tests for the partial parser implementation.
//! These tests verify core parsing behavior including:
//! - Literal matching
//! - Alternative handling
//! - Partial parse states
//! - Binding preservation
//! - Special token handling

use crate::logic::grammar::Grammar;
use crate::logic::partial::parse::Parser;
use crate::logic::partial::structure::Node;
use crate::logic::partial::MetaParser;
use crate::set_debug_level;

#[test]
fn test_simple_literal() {
    let spec = r#"
    start ::= 'hello'
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("hello").unwrap();
    assert!(ast.is_complete());
}

#[test]
fn test_partial_literal() {
    let spec = r#"
    start ::= 'hello'
    "#;
    set_debug_level(crate::DebugLevel::Debug);
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("hel").unwrap();
    assert!(!ast.is_complete());
}

#[test]
fn test_alternatives() {
    let spec = r#"
    A ::= 'a'
    B ::= 'b'
    start ::= A | B
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("a").unwrap();
    assert!(ast.is_complete());
    assert_eq!(ast.roots().len(), 1);
    assert_eq!(ast.roots()[0].name, "start");

    // Verify child structure
    let child = &ast.roots()[0].children[0];
    if let Node::NonTerminal(nt) = child {
        assert_eq!(nt.name, "A");
    } else {
        panic!("Expected NonTerminal A");
    }
}

#[test]
fn test_partial_alternatives() {
    let spec = r#"
    A ::= 'a'
    B ::= 'a' 'b'
    start ::= A | B
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("a").unwrap();
    // A: complete (matched 'a')
    // B: partial (matched 'a', missing 'b')
    // With SPPF expansion there can be duplicate materializations, so assert
    // at least two candidates are available.
    assert!(ast.roots().len() >= 2);
}

#[test]
fn test_partial_at_end() {
    let spec = r#"
    start ::= 'hello' 'world'
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("hello wor").unwrap();
    assert!(!ast.is_complete());
}

#[test]
fn test_mismatch_rejection() {
    let spec = r#"
    start ::= 'hello'
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let _ast = p.partial("goodbye").unwrap_err();
}

#[test]
fn test_complex_grammar() {
    let spec = r#"
    Number ::= /[0-9]+/
    Op ::= '+' | '-'
    Expr ::= Number | Number Op Expr
    start ::= Expr
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("1 + 2 - 3").unwrap();
    assert!(ast.is_complete());
}

#[test]
fn test_binding_preservation() {
    let spec = r#"
    Number ::= /[0-9]+/
    start ::= Number[x]
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    let ast = p.partial("42").unwrap();
    assert!(ast.is_complete());

    let root = &ast.roots()[0];
    let child = &root.children[0];
    if let Node::NonTerminal(nt) = child {
        assert_eq!(nt.binding, Some("x".to_string()));
    } else {
        panic!("Expected NonTerminal node");
    }
}

#[test]
fn test_partial_special_token_arrow() {
    // Test parsing partial input where "-" is the start of "->" special token
    // This tests the fix for tokenization of partial special tokens at end of input
    let spec = r#"
    Identifier ::= /[A-Za-z]+/
    BaseType ::= Identifier
    Type ::= BaseType '->' Type | BaseType
    start ::= Type
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    // "A-" should parse as partial, where "-" is a prefix of "->"
    let result = p.partial("A-");
    assert!(result.is_ok(), "Expected Ok, got: {:?}", result);
    let ast = result.unwrap();
    assert!(!ast.is_complete(), "AST should be partial (incomplete)");
}

#[test]
fn test_partial_lambda_arrow() {
    // Test a more realistic lambda calculus scenario
    let spec = r#"
    Identifier ::= /[A-Za-z]+/
    Variable ::= Identifier
    BaseType ::= Identifier | '(' Type ')'
    Type ::= BaseType '->' Type | BaseType
    Lambda ::= 'λ' Variable ':' Type '.' Expr
    Expr ::= Variable | Lambda | '(' Expr ')'
    start ::= Expr
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = Parser::new(g);

    // "λf:(A-" should parse as partial
    // The "-" at the end is a prefix of the "->" special token
    let result = p.partial("λf:(A-");
    assert!(result.is_ok(), "Expected Ok, got: {:?}", result);
    let ast = result.unwrap();
    assert!(!ast.is_complete(), "AST should be partial (incomplete)");
}

#[test]
fn test_fun_float_operator_prefix_parseability() {
    let spec = include_str!("../../../../examples/fun.auf");
    let g = Grammar::load(spec).unwrap();

    let tokenized = g.tokenize("1.0 +.").unwrap();
    let texts: Vec<String> = tokenized.iter().map(|s| s.text()).collect();
    assert!(
        !texts.is_empty(),
        "Tokenizer should produce at least one segment for float op prefix"
    );

    let mut p = MetaParser::new(g).with_max_depth(41);
    let parsed = p.partial("1.0 +.");
    assert!(
        parsed.is_ok(),
        "Expected structural partial parse for '1.0 +.', got {:?}",
        parsed.err()
    );
}

#[test]
fn test_toy_concat_prefix_parseability() {
    let spec = include_str!("../../../../examples/toy.auf");
    let g = Grammar::load(spec).unwrap();
    let mut p = MetaParser::new(g).with_max_depth(62);
    let parsed = p.partial("beep: Fizz +");
    assert!(
        parsed.is_ok(),
        "Expected structural partial parse for 'beep: Fizz +', got {:?}",
        parsed.err()
    );
}

#[test]
fn test_fun_let_prefix_contains_let_branch() {
    let spec = include_str!("../../../../examples/fun.auf");
    let g = Grammar::load(spec).unwrap();
    let mut p = MetaParser::new(g).with_max_depth(41);
    let ast = p.partial("let ").expect("partial parse should succeed");

    let has_let = ast
        .roots()
        .iter()
        .any(|root| root.serialize().contains("(Let") || root.serialize().contains("Let("));
    assert!(has_let, "expected a Let branch for prefix 'let '");
}

#[test]
fn test_keywords_are_not_matched_by_identifier_regex() {
    let spec = r#"
    Identifier ::= /[a-z][a-z0-9]*/
    Variable ::= Identifier
    Let ::= 'let' Identifier
    start ::= Let | Variable
    "#;
    let g = Grammar::load(spec).unwrap();
    let mut p = MetaParser::new(g).with_max_depth(27);
    let ast = p.partial("let").expect("partial parse should succeed");

    // Reserved keywords should not be consumed through broad regex terminals.
    let any_variable_root = ast
        .roots()
        .iter()
        .any(|r| r.serialize().contains("Variable"));
    assert!(!any_variable_root, "keyword `let` parsed as Variable");
}