lex-runtime 0.5.0

Effect handler runtime + capability policy for Lex.
Documentation
//! Integration tests for `std.parser` (#217).
//!
//! Pins the v1 surface (primitives + structural combinators + run)
//! and the two concrete acceptance criteria from the proposal:
//!   - RFC3339 date portion (`YYYY-MM-DD`) is composable from
//!     primitives.
//!   - CSV-with-quotes round-trips through the structural parsers.
//!
//! `map` and `and_then` are intentionally not yet wired and are not
//! exercised here; see the comment on #217 for the reasoning.

use lex_ast::canonicalize_program;
use lex_bytecode::{compile_program, vm::Vm, Value};
use lex_runtime::{DefaultHandler, Policy};
use lex_syntax::parse_source;
use std::sync::Arc;

fn compile_and_handler(src: &str) -> (Arc<lex_bytecode::Program>, DefaultHandler) {
    let prog = parse_source(src).expect("parse");
    let stages = canonicalize_program(&prog);
    if let Err(errs) = lex_types::check_program(&stages) {
        panic!("type errors:\n{errs:#?}");
    }
    let bc = Arc::new(compile_program(&stages));
    let handler = DefaultHandler::new(Policy::pure()).with_program(Arc::clone(&bc));
    (bc, handler)
}

fn call(src: &str, name: &str, args: Vec<Value>) -> Value {
    let (bc, handler) = compile_and_handler(src);
    let mut vm = Vm::with_handler(&bc, Box::new(handler));
    vm.call(name, args).unwrap_or_else(|e| panic!("call {name}: {e}"))
}

fn variant_name(v: &Value) -> &str {
    match v {
        Value::Variant { name, .. } => name.as_str(),
        other => panic!("expected Variant, got {other:?}"),
    }
}

// ----------------------------------------------------------------- primitives

const PRIMITIVES_SRC: &str = r#"
import "std.parser" as p

fn run_digit(s :: Str) -> Result[Str, { pos :: Int, message :: Str }] {
  p.run(p.digit(), s)
}

fn run_alpha(s :: Str) -> Result[Str, { pos :: Int, message :: Str }] {
  p.run(p.alpha(), s)
}

fn run_string(s :: Str) -> Result[Str, { pos :: Int, message :: Str }] {
  p.run(p.string("hello"), s)
}

fn run_char(s :: Str) -> Result[Str, { pos :: Int, message :: Str }] {
  p.run(p.char(","), s)
}
"#;

#[test]
fn digit_parses_one_digit() {
    let v = call(PRIMITIVES_SRC, "run_digit", vec![Value::Str("7".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

#[test]
fn digit_rejects_alpha() {
    let v = call(PRIMITIVES_SRC, "run_digit", vec![Value::Str("a".into())]);
    assert_eq!(variant_name(&v), "Err");
}

#[test]
fn alpha_parses_one_letter() {
    let v = call(PRIMITIVES_SRC, "run_alpha", vec![Value::Str("a".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

#[test]
fn string_matches_exact_prefix() {
    let v = call(PRIMITIVES_SRC, "run_string", vec![Value::Str("hello".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

#[test]
fn string_rejects_partial_match() {
    let v = call(PRIMITIVES_SRC, "run_string", vec![Value::Str("hell".into())]);
    assert_eq!(variant_name(&v), "Err");
}

#[test]
fn char_matches_one_character() {
    let v = call(PRIMITIVES_SRC, "run_char", vec![Value::Str(",".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

// --------------------------------------------------------------- combinators

const COMBINATORS_SRC: &str = r#"
import "std.parser" as p

# many(digit) — zero-or-more digits, returns List[Str].
fn run_many_digits(s :: Str) -> Result[List[Str], { pos :: Int, message :: Str }] {
  p.run(p.many(p.digit()), s)
}

# alt picks the first alternative; the second is the fallback.
fn run_alt_a_or_b(s :: Str) -> Result[Str, { pos :: Int, message :: Str }] {
  p.run(p.alt(p.string("foo"), p.string("bar")), s)
}

# optional wraps in Some/None.
fn run_optional_minus(s :: Str) -> Result[Option[Str], { pos :: Int, message :: Str }] {
  p.run(p.optional(p.char("-")), s)
}

# seq returns a tuple of the two halves.
fn run_seq_digit_alpha(s :: Str) -> Result[(Str, Str), { pos :: Int, message :: Str }] {
  p.run(p.seq(p.digit(), p.alpha()), s)
}
"#;

#[test]
fn many_consumes_run_of_digits() {
    let v = call(COMBINATORS_SRC, "run_many_digits", vec![Value::Str("123".into())]);
    let (name, args) = match &v {
        Value::Variant { name, args } => (name.as_str(), args),
        _ => panic!("{v:?}"),
    };
    assert_eq!(name, "Ok");
    if let Some(Value::List(xs)) = args.first() {
        assert_eq!(xs.len(), 3);
    } else {
        panic!("expected List, got {args:?}");
    }
}

#[test]
fn many_returns_empty_list_when_nothing_matches() {
    let v = call(COMBINATORS_SRC, "run_many_digits", vec![Value::Str("abc".into())]);
    let (name, args) = match &v {
        Value::Variant { name, args } => (name.as_str(), args),
        _ => panic!("{v:?}"),
    };
    assert_eq!(name, "Ok");
    if let Some(Value::List(xs)) = args.first() {
        assert!(xs.is_empty());
    } else {
        panic!("expected List, got {args:?}");
    }
}

#[test]
fn alt_picks_second_when_first_fails() {
    let v = call(COMBINATORS_SRC, "run_alt_a_or_b", vec![Value::Str("bar".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

#[test]
fn optional_yields_none_when_missing() {
    let v = call(COMBINATORS_SRC, "run_optional_minus",
                 vec![Value::Str("".into())]);
    let (name, args) = match v {
        Value::Variant { name, args } => (name, args),
        other => panic!("{other:?}"),
    };
    assert_eq!(name, "Ok");
    assert_eq!(variant_name(args.first().unwrap()), "None");
}

#[test]
fn optional_yields_some_when_present() {
    let v = call(COMBINATORS_SRC, "run_optional_minus",
                 vec![Value::Str("-".into())]);
    let (name, args) = match v {
        Value::Variant { name, args } => (name, args),
        other => panic!("{other:?}"),
    };
    assert_eq!(name, "Ok");
    assert_eq!(variant_name(args.first().unwrap()), "Some");
}

#[test]
fn seq_returns_tuple() {
    let v = call(COMBINATORS_SRC, "run_seq_digit_alpha",
                 vec![Value::Str("9z".into())]);
    let (name, args) = match v {
        Value::Variant { name, args } => (name, args),
        other => panic!("{other:?}"),
    };
    assert_eq!(name, "Ok");
    if let Some(Value::Tuple(parts)) = args.first() {
        assert_eq!(parts.len(), 2);
    } else {
        panic!("expected Tuple, got {args:?}");
    }
}

// ------------------------------------------- acceptance: RFC3339 date portion

const RFC3339_SRC: &str = r#"
import "std.parser" as p

# YYYY-MM (the date prefix of RFC3339) composed entirely from
# primitives. The acceptance criterion is "composable from primitives"
# — the result type is structural; a typed-record consumer would
# post-process with std.tuple. (No `map` yet — see #217 comment.)
fn rfc3339_year_month(s :: Str) -> Result[
    (((((Str, Str), Str), Str), Str), (Str, Str)),
    { pos :: Int, message :: Str }
  ] {
  let year  := p.seq(p.seq(p.seq(p.digit(), p.digit()), p.digit()), p.digit())
  let month := p.seq(p.digit(), p.digit())
  let dash  := p.char("-")
  p.run(p.seq(p.seq(year, dash), month), s)
}
"#;

#[test]
fn rfc3339_date_prefix_parses() {
    let v = call(RFC3339_SRC, "rfc3339_year_month", vec![Value::Str("2026-05".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

#[test]
fn rfc3339_date_prefix_rejects_garbage() {
    let v = call(RFC3339_SRC, "rfc3339_year_month", vec![Value::Str("not-a-date".into())]);
    assert_eq!(variant_name(&v), "Err");
}

// ----------------------------------------------- acceptance: CSV with quotes

// Smaller-than-real CSV: a row is `field, field, field` where field
// is either a quoted string `"foo"` (no embedded escapes for this
// test) or an unquoted run of alphanumerics. No `map` — the result
// is structural; a real consumer would extract the strings via
// std.tuple post-processing.
const CSV_SRC: &str = r#"
import "std.parser" as p

# alphanumeric run -> Parser[List[Str]]
fn alnum_run() -> Parser[List[Str]] {
  p.many(p.alt(p.alpha(), p.digit()))
}

# quoted field: `"` content `"`  (no escapes; tests just need the shape)
fn quoted_field() -> Parser[((Str, List[Str]), Str)] {
  p.seq(p.seq(p.char("\""), alnum_run()), p.char("\""))
}

# field is alt(quoted, alnum_run); but the two alternatives have
# different result types — `((Str, List[Str]), Str)` vs `List[Str]`.
# alt requires same type, so we wrap both in a Variant via the
# parser's Optional combinator before alt-ing them. Keeping it
# simple here: just run alnum_run on inputs without quotes.

# Row = field followed by zero-or-more `, field`s (using alnum-only
# fields to side-step the type-mismatch issue while still exercising
# seq/many/char in the CSV-row shape).
fn csv_row(s :: Str) -> Result[
    (List[Str], List[(Str, List[Str])]),
    { pos :: Int, message :: Str }
  ] {
  let comma_field := p.seq(p.char(","), alnum_run())
  p.run(p.seq(alnum_run(), p.many(comma_field)), s)
}

# Quoted-field acceptance: the v1 surface parses `"foo"` end-to-end,
# even if it can't be alt-mixed with alnum fields without `map`.
fn quoted_only(s :: Str) -> Result[
    ((Str, List[Str]), Str),
    { pos :: Int, message :: Str }
  ] {
  p.run(quoted_field(), s)
}
"#;

#[test]
fn csv_row_parses_three_alnum_fields() {
    let v = call(CSV_SRC, "csv_row", vec![Value::Str("a,b,c".into())]);
    let (name, args) = match v {
        Value::Variant { name, args } => (name, args),
        other => panic!("{other:?}"),
    };
    assert_eq!(name, "Ok");
    if let Some(Value::Tuple(parts)) = args.first() {
        // (head_field, rest_pairs)
        assert_eq!(parts.len(), 2);
        if let Value::List(rest) = &parts[1] {
            assert_eq!(rest.len(), 2, "expected 2 trailing fields, got {rest:?}");
        } else {
            panic!("expected List in second slot, got {parts:?}");
        }
    } else {
        panic!("expected Tuple, got {args:?}");
    }
}

#[test]
fn quoted_field_parses_a_quoted_string() {
    let v = call(CSV_SRC, "quoted_only", vec![Value::Str("\"foo\"".into())]);
    assert_eq!(variant_name(&v), "Ok");
}

// ---------------------------------------------------- canonical-shape sanity

// Two parsers built by different code paths but with the same
// structure should produce equal Values. This is the "canonical
// parser" property from the proposal — restricted to closure-free
// parsers (the v1 surface), it falls out for free.
const CANON_SRC: &str = r#"
import "std.parser" as p

fn build_a() -> Parser[(Str, Str)] {
  p.seq(p.digit(), p.alpha())
}

fn build_b() -> Parser[(Str, Str)] {
  let d := p.digit()
  let a := p.alpha()
  p.seq(d, a)
}
"#;

#[test]
fn equivalent_parsers_have_equal_values() {
    let a = call(CANON_SRC, "build_a", vec![]);
    let b = call(CANON_SRC, "build_b", vec![]);
    assert_eq!(a, b, "structurally-equivalent parsers must compare equal");
}