Skip to main content

lex_bytecode/
parser_runtime.rs

1//! Parser combinator interpreter (#221).
2//!
3//! Lives in `lex-bytecode` rather than `lex-runtime` because it needs
4//! to invoke `Value::Closure` values from `Map` / `AndThen` nodes,
5//! which requires VM-level access. The structural primitives are
6//! constructed by `lex-runtime::builtins`; only the recursive
7//! interpretation step (`parser.run`) is here.
8//!
9//! Calling convention:
10//!   - The Vm intercepts `("parser", "run")` effect dispatch before
11//!     invoking the handler and routes the args to `run_parser`,
12//!     passing itself as the `ClosureCaller`.
13//!   - `Map(p, f)` and `AndThen(p, f)` AST nodes carry a closure
14//!     value `f`; the interpreter calls back via `caller.call_closure`
15//!     to invoke it on the parsed result.
16
17use crate::Value;
18use smol_str::SmolStr;
19
20/// Trait the parser interpreter uses to invoke captured closures
21/// during the recursive walk. The Vm implements it via
22/// `Vm::invoke_closure_value`, but the interpreter is generic over
23/// the implementation so `lex-bytecode` doesn't need to depend on
24/// any higher-level runtime concepts.
25pub trait ClosureCaller {
26    fn call_closure(&mut self, closure: Value, args: Vec<Value>) -> Result<Value, String>;
27}
28
29/// Walk a parser AST. Returns `(value, end_pos)` on success or
30/// `(failure_pos, message)` on failure. The interpreter is the same
31/// shape as the original (in `lex-runtime::builtins`) plus the
32/// `Map` and `AndThen` cases that need closure invocation.
33pub fn run_parser(
34    node: &Value,
35    input: &str,
36    pos: usize,
37    caller: &mut dyn ClosureCaller,
38) -> Result<(Value, usize), (usize, String)> {
39    let rec = match node {
40        Value::Record(r) => r,
41        _ => return Err((pos, "parser: expected Parser node".into())),
42    };
43    let kind = match rec.get("kind") {
44        Some(Value::Str(s)) => s.as_str(),
45        _ => return Err((pos, "parser: malformed node (no kind)".into())),
46    };
47    let bytes = input.as_bytes();
48    match kind {
49        "Char" => {
50            let want = match rec.get("ch") {
51                Some(Value::Str(s)) => s,
52                _ => return Err((pos, "char: missing ch".into())),
53            };
54            let want_bytes = want.as_bytes();
55            if pos + want_bytes.len() > bytes.len() {
56                return Err((pos, format!("expected {want:?}, got EOF")));
57            }
58            if &bytes[pos..pos + want_bytes.len()] == want_bytes {
59                Ok((Value::Str(want.clone()), pos + want_bytes.len()))
60            } else {
61                Err((pos, format!("expected {want:?}")))
62            }
63        }
64        "String" => {
65            let want = match rec.get("s") {
66                Some(Value::Str(s)) => s,
67                _ => return Err((pos, "string: missing s".into())),
68            };
69            if input[pos..].starts_with(want.as_str()) {
70                Ok((Value::Str(want.clone()), pos + want.len()))
71            } else {
72                Err((pos, format!("expected {want:?}")))
73            }
74        }
75        "Digit" => {
76            if let Some(&b) = bytes.get(pos) {
77                if b.is_ascii_digit() {
78                    return Ok((Value::Str(SmolStr::new_inline(&(b as char).to_string())), pos + 1));
79                }
80            }
81            Err((pos, "expected digit".into()))
82        }
83        "Alpha" => {
84            if let Some(&b) = bytes.get(pos) {
85                if b.is_ascii_alphabetic() {
86                    return Ok((Value::Str(SmolStr::new_inline(&(b as char).to_string())), pos + 1));
87                }
88            }
89            Err((pos, "expected alpha".into()))
90        }
91        "Whitespace" => {
92            if let Some(&b) = bytes.get(pos) {
93                if b.is_ascii_whitespace() {
94                    return Ok((Value::Str(SmolStr::new_inline(&(b as char).to_string())), pos + 1));
95                }
96            }
97            Err((pos, "expected whitespace".into()))
98        }
99        "Eof" => {
100            if pos == bytes.len() {
101                Ok((Value::Unit, pos))
102            } else {
103                Err((pos, "expected EOF".into()))
104            }
105        }
106        "Seq" => {
107            let a = rec.get("a").ok_or((pos, "seq: missing a".to_string()))?;
108            let b = rec.get("b").ok_or((pos, "seq: missing b".to_string()))?;
109            let (va, p1) = run_parser(a, input, pos, caller)?;
110            let (vb, p2) = run_parser(b, input, p1, caller)?;
111            Ok((Value::Tuple(vec![va, vb]), p2))
112        }
113        "Alt" => {
114            let a = rec.get("a").ok_or((pos, "alt: missing a".to_string()))?;
115            let b = rec.get("b").ok_or((pos, "alt: missing b".to_string()))?;
116            match run_parser(a, input, pos, caller) {
117                Ok(r) => Ok(r),
118                Err(_) => run_parser(b, input, pos, caller),
119            }
120        }
121        "Many" => {
122            let p = rec.get("p").ok_or((pos, "many: missing p".to_string()))?;
123            let mut cur = pos;
124            let mut out = Vec::new();
125            loop {
126                match run_parser(p, input, cur, caller) {
127                    Ok((v, np)) if np > cur => { out.push(v); cur = np; }
128                    _ => break,
129                }
130            }
131            Ok((Value::List(out.into()), cur))
132        }
133        "Optional" => {
134            let p = rec.get("p").ok_or((pos, "optional: missing p".to_string()))?;
135            match run_parser(p, input, pos, caller) {
136                Ok((v, np)) => Ok((
137                    Value::Variant { name: "Some".into(), args: vec![v] },
138                    np,
139                )),
140                Err(_) => Ok((
141                    Value::Variant { name: "None".into(), args: vec![] },
142                    pos,
143                )),
144            }
145        }
146        "Map" => {
147            // Map(p, f): run p, then call f on the result. The new
148            // value replaces the parsed one; the position advances by
149            // whatever p consumed.
150            let p = rec.get("p").ok_or((pos, "map: missing p".to_string()))?;
151            let f = rec.get("f").cloned().ok_or((pos, "map: missing f".to_string()))?;
152            let (v, np) = run_parser(p, input, pos, caller)?;
153            let mapped = caller.call_closure(f, vec![v])
154                .map_err(|e| (pos, format!("map: closure failed: {e}")))?;
155            Ok((mapped, np))
156        }
157        "AndThen" => {
158            // AndThen(p, f): run p; call f with the result, which
159            // returns *another parser*; run that parser starting at
160            // the position p left off. Monadic bind.
161            let p = rec.get("p").ok_or((pos, "and_then: missing p".to_string()))?;
162            let f = rec.get("f").cloned()
163                .ok_or((pos, "and_then: missing f".to_string()))?;
164            let (v, np) = run_parser(p, input, pos, caller)?;
165            let next_parser = caller.call_closure(f, vec![v])
166                .map_err(|e| (np, format!("and_then: closure failed: {e}")))?;
167            run_parser(&next_parser, input, np, caller)
168        }
169        other => Err((pos, format!("unknown parser kind: {other:?}"))),
170    }
171}