aufbau 0.1.0

Type-aware constrained decoding for LLMs using context-dependent grammars with typing rules
Documentation
pub mod parse;
pub use parse::{ParseError, Parser, PartialParseOutcome};

pub mod meta;
pub use meta::*;

pub mod structure;
pub use structure::{Node, NonTerminal, PartialAST, Terminal};

pub mod completion;
pub use completion::*;

pub mod synth;
pub use synth::Synthesizer;

#[cfg(test)]
mod tests;

pub mod serialize;
pub use serialize::*;

pub mod compact;
pub use compact::CompactTree;

pub mod display;

#[test]
fn test_debug() {
    crate::set_debug_level(crate::logic::debug::DebugLevel::Trace);

    let spec = r#"
U ::= 'barcbarcu'
A ::= 'a'
B ::= 'b' A 'r'
Loop ::= B 'c' Loop | B 'c'
start ::= U | Loop | 't'
    "#;

    // Safety: Grammar loading must succeed for static test specification.
    let g = crate::logic::grammar::Grammar::load(spec).unwrap();
    println!("Grammar: {:#?}", g);
    let mut p = crate::logic::partial::Parser::new(g);
    let input = "barcbarc";
    // Safety: Parse must succeed for valid test input.
    let ast = p.partial(input).unwrap();
    println!("Partial AST: {}", ast);
}

#[test]
fn test_complete_len() {
    // Test that complete_len correctly computes segment range for complete alternatives
    let spec = r#"
    A ::= 'hello'
    B ::= 'world'
    start ::= A  B
    "#;

    crate::set_debug_level(crate::DebugLevel::Trace);

    // Safety: Grammar loading must succeed for static test specification.
    let g = crate::logic::grammar::Grammar::load(spec).unwrap();
    let mut p = crate::logic::partial::Parser::new(g.clone());

    // Test complete parse
    let input = "hello world";
    println!("Input: {}", input);
    // Safety: Parse must succeed for valid test input.
    let ast = p.partial(input).unwrap();

    assert!(ast.is_complete(), "AST should be complete");

    // Tokenize using the grammar's tokenizer
    // Safety: Tokenization must succeed for valid test input.
    let segments = g.tokenize(input).unwrap();

    // Get the complete alternative's segment range
    let roots = ast.roots();
    let root = roots
        .iter()
        .find(|r| r.is_complete())
        .expect("Expected a complete root");
    let range = root.complete_len(&segments);
    assert!(range.is_some(), "Complete root should have a segment range");

    if let Some(seg_range) = range {
        // Convert to byte range to verify coverage
        // Safety: Segment range must be valid for the segments it was derived from.
        let (start_byte, end_byte) = seg_range.to_byte_range(&segments).unwrap();
        assert_eq!(
            end_byte - start_byte,
            11,
            "Should cover all 11 bytes of 'hello world'"
        );
    }
}

#[test]
fn test_complete_len_partial() {
    // Test that complete_len returns None for partial alternatives
    let spec = r#"
    start ::= 'complete' 'sentence'
    "#;

    // Safety: Grammar loading must succeed for static test specification.
    let g = crate::logic::grammar::Grammar::load(spec).unwrap();
    let mut p = crate::logic::partial::Parser::new(g.clone());

    // Partial input
    let input = "complete";
    // Safety: Parse must succeed for valid test input.
    let ast = p.partial(input).unwrap();

    // Safety: Tokenization must succeed for valid test input.
    let segments = g.tokenize(input).unwrap();

    // The AST may have partial alternatives
    // Check if any root claims to be complete (should be none)
    let roots = ast.roots();
    let complete_root = roots.iter().find(|r| r.is_complete());
    assert!(
        complete_root.is_none(),
        "Partial parse should not have complete root"
    );

    // Even if we check complete_len on a partial root, it should return None
    if let Some(root) = roots.first() {
        let range = root.complete_len(&segments);
        assert_eq!(
            range, None,
            "Partial parse should return None for complete_len"
        );
    }
}

#[test]
fn test_complete_len_nested() {
    // Test complete_len with nested nonterminals
    let spec = r#"
    Inner ::= 'foo'
    Outer ::= Inner 'bar'
    start ::= Outer
    "#;

    // Safety: Grammar loading must succeed for static test specification.
    let g = crate::logic::grammar::Grammar::load(spec).unwrap();
    let mut p = crate::logic::partial::Parser::new(g.clone());

    let input = "foobar";
    // Safety: Parse must succeed for valid test input.
    let ast = p.partial(input).unwrap();

    assert!(ast.is_complete(), "Nested parse should be complete");

    // Safety: Tokenization must succeed for valid test input.
    let segments = g.tokenize(input).unwrap();

    let roots = ast.roots();
    let root = roots
        .iter()
        .find(|r| r.is_complete())
        .expect("Expected a complete root");
    let range = root.complete_len(&segments);
    assert!(
        range.is_some(),
        "Complete nested alt should have a segment range"
    );

    if let Some(seg_range) = range {
        // Safety: Segment range must be valid for the segments it was derived from.
        let (start_byte, end_byte) = seg_range.to_byte_range(&segments).unwrap();
        assert_eq!(
            end_byte - start_byte,
            6,
            "Should cover all 6 bytes of 'foobar'"
        );
    }
}

#[test]
fn serialize_complex_stlc() {
    let spec = include_str!("../../../examples/stlc.auf");

    // Safety: Grammar loading must succeed for static test specification.
    let g = crate::logic::grammar::Grammar::load(spec).unwrap();
    let mut p = MetaParser::new(g.clone());

    let input = "λ f : A -> B -> C -> D . f x y z";
    // Safety: Parse must succeed for valid test input.
    let ast = p.partial(input).unwrap();

    // Safety: Root must be complete for this specific test case.
    let complete = ast.complete().unwrap();

    let serialized = complete.serialize();
    println!("Serialized complete AST: {}", serialized);

    let _deserialized = NonTerminal::deserialize(&serialized, &g);
}