Skip to main content

lex_bytecode/
parser_runtime.rs

1//! Parser combinator interpreter (#221).
2//!
3//! Lives in `lex-bytecode` rather than `lex-runtime` because it needs
4//! to invoke `Value::Closure` values from `Map` / `AndThen` nodes,
5//! which requires VM-level access. The structural primitives are
6//! constructed by `lex-runtime::builtins`; only the recursive
7//! interpretation step (`parser.run`) is here.
8//!
9//! Calling convention:
10//!   - The Vm intercepts `("parser", "run")` effect dispatch before
11//!     invoking the handler and routes the args to `run_parser`,
12//!     passing itself as the `ClosureCaller`.
13//!   - `Map(p, f)` and `AndThen(p, f)` AST nodes carry a closure
14//!     value `f`; the interpreter calls back via `caller.call_closure`
15//!     to invoke it on the parsed result.
16
17use crate::Value;
18
19/// Trait the parser interpreter uses to invoke captured closures
20/// during the recursive walk. The Vm implements it via
21/// `Vm::invoke_closure_value`, but the interpreter is generic over
22/// the implementation so `lex-bytecode` doesn't need to depend on
23/// any higher-level runtime concepts.
24pub trait ClosureCaller {
25    fn call_closure(&mut self, closure: Value, args: Vec<Value>) -> Result<Value, String>;
26}
27
28/// Walk a parser AST. Returns `(value, end_pos)` on success or
29/// `(failure_pos, message)` on failure. The interpreter is the same
30/// shape as the original (in `lex-runtime::builtins`) plus the
31/// `Map` and `AndThen` cases that need closure invocation.
32pub fn run_parser(
33    node: &Value,
34    input: &str,
35    pos: usize,
36    caller: &mut dyn ClosureCaller,
37) -> Result<(Value, usize), (usize, String)> {
38    let rec = match node {
39        Value::Record(r) => r,
40        _ => return Err((pos, "parser: expected Parser node".into())),
41    };
42    let kind = match rec.get("kind") {
43        Some(Value::Str(s)) => s.as_str(),
44        _ => return Err((pos, "parser: malformed node (no kind)".into())),
45    };
46    let bytes = input.as_bytes();
47    match kind {
48        "Char" => {
49            let want = match rec.get("ch") {
50                Some(Value::Str(s)) => s,
51                _ => return Err((pos, "char: missing ch".into())),
52            };
53            let want_bytes = want.as_bytes();
54            if pos + want_bytes.len() > bytes.len() {
55                return Err((pos, format!("expected {want:?}, got EOF")));
56            }
57            if &bytes[pos..pos + want_bytes.len()] == want_bytes {
58                Ok((Value::Str(want.clone()), pos + want_bytes.len()))
59            } else {
60                Err((pos, format!("expected {want:?}")))
61            }
62        }
63        "String" => {
64            let want = match rec.get("s") {
65                Some(Value::Str(s)) => s,
66                _ => return Err((pos, "string: missing s".into())),
67            };
68            if input[pos..].starts_with(want.as_str()) {
69                Ok((Value::Str(want.clone()), pos + want.len()))
70            } else {
71                Err((pos, format!("expected {want:?}")))
72            }
73        }
74        "Digit" => {
75            if let Some(&b) = bytes.get(pos) {
76                if b.is_ascii_digit() {
77                    return Ok((Value::Str((b as char).to_string()), pos + 1));
78                }
79            }
80            Err((pos, "expected digit".into()))
81        }
82        "Alpha" => {
83            if let Some(&b) = bytes.get(pos) {
84                if b.is_ascii_alphabetic() {
85                    return Ok((Value::Str((b as char).to_string()), pos + 1));
86                }
87            }
88            Err((pos, "expected alpha".into()))
89        }
90        "Whitespace" => {
91            if let Some(&b) = bytes.get(pos) {
92                if b.is_ascii_whitespace() {
93                    return Ok((Value::Str((b as char).to_string()), pos + 1));
94                }
95            }
96            Err((pos, "expected whitespace".into()))
97        }
98        "Eof" => {
99            if pos == bytes.len() {
100                Ok((Value::Unit, pos))
101            } else {
102                Err((pos, "expected EOF".into()))
103            }
104        }
105        "Seq" => {
106            let a = rec.get("a").ok_or((pos, "seq: missing a".to_string()))?;
107            let b = rec.get("b").ok_or((pos, "seq: missing b".to_string()))?;
108            let (va, p1) = run_parser(a, input, pos, caller)?;
109            let (vb, p2) = run_parser(b, input, p1, caller)?;
110            Ok((Value::Tuple(vec![va, vb]), p2))
111        }
112        "Alt" => {
113            let a = rec.get("a").ok_or((pos, "alt: missing a".to_string()))?;
114            let b = rec.get("b").ok_or((pos, "alt: missing b".to_string()))?;
115            match run_parser(a, input, pos, caller) {
116                Ok(r) => Ok(r),
117                Err(_) => run_parser(b, input, pos, caller),
118            }
119        }
120        "Many" => {
121            let p = rec.get("p").ok_or((pos, "many: missing p".to_string()))?;
122            let mut cur = pos;
123            let mut out = Vec::new();
124            loop {
125                match run_parser(p, input, cur, caller) {
126                    Ok((v, np)) if np > cur => { out.push(v); cur = np; }
127                    _ => break,
128                }
129            }
130            Ok((Value::List(out), cur))
131        }
132        "Optional" => {
133            let p = rec.get("p").ok_or((pos, "optional: missing p".to_string()))?;
134            match run_parser(p, input, pos, caller) {
135                Ok((v, np)) => Ok((
136                    Value::Variant { name: "Some".into(), args: vec![v] },
137                    np,
138                )),
139                Err(_) => Ok((
140                    Value::Variant { name: "None".into(), args: vec![] },
141                    pos,
142                )),
143            }
144        }
145        "Map" => {
146            // Map(p, f): run p, then call f on the result. The new
147            // value replaces the parsed one; the position advances by
148            // whatever p consumed.
149            let p = rec.get("p").ok_or((pos, "map: missing p".to_string()))?;
150            let f = rec.get("f").cloned().ok_or((pos, "map: missing f".to_string()))?;
151            let (v, np) = run_parser(p, input, pos, caller)?;
152            let mapped = caller.call_closure(f, vec![v])
153                .map_err(|e| (pos, format!("map: closure failed: {e}")))?;
154            Ok((mapped, np))
155        }
156        "AndThen" => {
157            // AndThen(p, f): run p; call f with the result, which
158            // returns *another parser*; run that parser starting at
159            // the position p left off. Monadic bind.
160            let p = rec.get("p").ok_or((pos, "and_then: missing p".to_string()))?;
161            let f = rec.get("f").cloned()
162                .ok_or((pos, "and_then: missing f".to_string()))?;
163            let (v, np) = run_parser(p, input, pos, caller)?;
164            let next_parser = caller.call_closure(f, vec![v])
165                .map_err(|e| (np, format!("and_then: closure failed: {e}")))?;
166            run_parser(&next_parser, input, np, caller)
167        }
168        other => Err((pos, format!("unknown parser kind: {other:?}"))),
169    }
170}