Skip to main content

harn_opcode_macros/
lib.rs

1//! `define_opcodes!` function-like proc-macro.
2//!
3//! Single source of truth for the VM's opcode set. One invocation in
4//! `harn-vm/src/chunk.rs` emits the `Op` enum, the byte-to-variant mapping,
5//! the sync and async dispatch tables, the disassembly renderer, and the
6//! classification helpers (`op_reads_outer_name`, `is_adaptive_binary_op`).
7//! Adding or renaming an opcode is a one-line edit; coverage drift across
8//! the previously hand-maintained tables is now impossible.
9
10extern crate proc_macro;
11
12use proc_macro::TokenStream;
13use proc_macro2::TokenStream as TokenStream2;
14use quote::{format_ident, quote};
15use syn::parse::{Parse, ParseStream};
16use syn::punctuated::Punctuated;
17use syn::{braced, bracketed, parenthesized, parse_macro_input, Expr, Ident, LitStr, Token};
18
19/// Parse a single opcode entry. Syntax:
20///
21/// ```ignore
22/// VariantName {
23///     <kind>(<expr>[, <expr>]),
24///     disasm: <helper>("<LABEL>"),
25///     flags: [<flag>, ...]   // optional
26/// };
27/// ```
28///
29/// where `<kind>` is one of:
30/// - `sync(expr)` — `expr: Result<(), VmError>`
31/// - `sync_void(expr)` — `expr: ()`, wrapped to `Ok(())`
32/// - `sync_return(expr)` — `expr: VmError`, wrapped to `return Some(Err(expr))`
33/// - `split(sync_expr, async_expr)` — sync path returns `Option<Result>`,
34///   async path returns `Result<(), VmError>`
35/// - `async_op(expr)` — `expr: Result<(), VmError>`, only dispatched in async
36///   table; sync table emits `return None`. Named `async_op` instead of
37///   `async` to avoid the reserved-keyword friction even though `syn` would
38///   tolerate the raw identifier.
39struct OpcodeEntry {
40    variant: Ident,
41    dispatch: Dispatch,
42    disasm: Disasm,
43    flags: Vec<Ident>,
44}
45
46enum Dispatch {
47    Sync(Expr),
48    SyncVoid(Expr),
49    SyncReturn(Expr),
50    Split { sync: Expr, async_: Expr },
51    Async(Expr),
52}
53
54struct Disasm {
55    helper: Ident,
56    label: LitStr,
57}
58
59impl Parse for OpcodeEntry {
60    fn parse(input: ParseStream) -> syn::Result<Self> {
61        let variant: Ident = input.parse()?;
62        let body;
63        braced!(body in input);
64
65        let kind: Ident = body.parse()?;
66        let kind_args;
67        parenthesized!(kind_args in body);
68
69        let dispatch = match kind.to_string().as_str() {
70            "sync" => {
71                let expr: Expr = kind_args.parse()?;
72                Dispatch::Sync(expr)
73            }
74            "sync_void" => {
75                let expr: Expr = kind_args.parse()?;
76                Dispatch::SyncVoid(expr)
77            }
78            "sync_return" => {
79                let expr: Expr = kind_args.parse()?;
80                Dispatch::SyncReturn(expr)
81            }
82            "split" => {
83                let sync: Expr = kind_args.parse()?;
84                kind_args.parse::<Token![,]>()?;
85                let async_: Expr = kind_args.parse()?;
86                Dispatch::Split { sync, async_ }
87            }
88            "async_op" => {
89                let expr: Expr = kind_args.parse()?;
90                Dispatch::Async(expr)
91            }
92            other => {
93                return Err(syn::Error::new(
94                    kind.span(),
95                    format!(
96                        "unknown opcode kind `{other}` — expected one of: \
97                         sync, sync_void, sync_return, split, async_op"
98                    ),
99                ));
100            }
101        };
102
103        body.parse::<Token![,]>()?;
104        let disasm_kw: Ident = body.parse()?;
105        if disasm_kw != "disasm" {
106            return Err(syn::Error::new(
107                disasm_kw.span(),
108                "expected `disasm:` after dispatch kind",
109            ));
110        }
111        body.parse::<Token![:]>()?;
112        let helper: Ident = body.parse()?;
113        let helper_args;
114        parenthesized!(helper_args in body);
115        let label: LitStr = helper_args.parse()?;
116        let disasm = Disasm { helper, label };
117
118        let mut flags = Vec::new();
119        if body.peek(Token![,]) {
120            body.parse::<Token![,]>()?;
121            if !body.is_empty() {
122                let flags_kw: Ident = body.parse()?;
123                if flags_kw != "flags" {
124                    return Err(syn::Error::new(
125                        flags_kw.span(),
126                        "expected `flags: [...]` after `disasm`",
127                    ));
128                }
129                body.parse::<Token![:]>()?;
130                let flags_inner;
131                bracketed!(flags_inner in body);
132                let parsed: Punctuated<Ident, Token![,]> =
133                    Punctuated::parse_terminated(&flags_inner)?;
134                flags = parsed.into_iter().collect();
135            }
136        }
137
138        Ok(Self {
139            variant,
140            dispatch,
141            disasm,
142            flags,
143        })
144    }
145}
146
147struct Opcodes {
148    entries: Vec<OpcodeEntry>,
149}
150
151impl Parse for Opcodes {
152    fn parse(input: ParseStream) -> syn::Result<Self> {
153        let mut entries = Vec::new();
154        while !input.is_empty() {
155            let entry: OpcodeEntry = input.parse()?;
156            input.parse::<Token![;]>()?;
157            entries.push(entry);
158        }
159        Ok(Self { entries })
160    }
161}
162
163/// Emit the VM opcode definitions from a centralized declarative table.
164///
165/// See the crate-level doc for syntax. Generated outputs:
166/// - `pub enum Op { ... }` (`#[repr(u8)]`, `Debug + Clone + Copy + Eq`)
167/// - `impl Op { const ALL: &[Self]; const COUNT: usize; fn from_byte(...) -> Option<Self> }`
168/// - `impl crate::vm::Vm { fn execute_op_sync(...); async fn execute_op_async(...) }`
169/// - `impl crate::chunk::Chunk { fn disassemble_op(...) }`
170/// - One free `pub(crate) fn <flag>(op: Op) -> bool` per declared flag.
171///   The well-known flags `reads_outer_name` and `adaptive_binary` map
172///   to `op_reads_outer_name` and `is_adaptive_binary_op` respectively
173///   so existing call sites need no renames.
174#[proc_macro]
175pub fn define_opcodes(input: TokenStream) -> TokenStream {
176    let opcodes = parse_macro_input!(input as Opcodes);
177    match expand(&opcodes) {
178        Ok(ts) => ts.into(),
179        Err(e) => e.to_compile_error().into(),
180    }
181}
182
183fn expand(opcodes: &Opcodes) -> syn::Result<TokenStream2> {
184    if opcodes.entries.is_empty() {
185        return Err(syn::Error::new(
186            proc_macro2::Span::call_site(),
187            "define_opcodes! requires at least one opcode entry",
188        ));
189    }
190
191    let variants = opcodes.entries.iter().map(|e| &e.variant);
192    let all_variants: Vec<_> = opcodes.entries.iter().map(|e| &e.variant).collect();
193
194    // Sync dispatch arms.
195    let sync_arms = opcodes.entries.iter().map(|e| {
196        let v = &e.variant;
197        match &e.dispatch {
198            Dispatch::Sync(expr) => quote!(Op::#v => #expr,),
199            Dispatch::SyncVoid(expr) => quote!(Op::#v => { #expr; Ok(()) },),
200            Dispatch::SyncReturn(expr) => quote!(Op::#v => return ::core::option::Option::Some(::core::result::Result::Err(#expr)),),
201            Dispatch::Split { sync, .. } => quote!(Op::#v => return #sync,),
202            Dispatch::Async(_) => quote!(Op::#v => return ::core::option::Option::None,),
203        }
204    });
205
206    // Async dispatch arms — only `async` and `split` kinds appear here.
207    // Sync-only opcodes hit a debug_assert!-guarded panic to flag any
208    // out-of-sync caller (the hot loop never calls execute_op_async for
209    // a sync opcode because execute_op_sync returns Some(...) for them).
210    let async_arms = opcodes.entries.iter().filter_map(|e| {
211        let v = &e.variant;
212        match &e.dispatch {
213            Dispatch::Async(expr) => Some(quote!(Op::#v => #expr,)),
214            Dispatch::Split { async_, .. } => Some(quote!(Op::#v => #async_,)),
215            _ => None,
216        }
217    });
218
219    // Disasm arms — call the per-opcode helper with the label.
220    // Helpers live in `crate::chunk` as `pub(crate) fn disasm_<helper>(...)`
221    // and take `(&Chunk, &mut usize, &str)`. Fully-qualifying the path
222    // means this dispatch table can be emitted into any module without
223    // forcing a `use crate::chunk::*` at the call site.
224    let disasm_arms = opcodes.entries.iter().map(|e| {
225        let v = &e.variant;
226        let helper = format_ident!("disasm_{}", e.disasm.helper);
227        let label = &e.disasm.label;
228        quote!(Op::#v => crate::chunk::#helper(self, ip, #label),)
229    });
230
231    // Flag-derived classification helpers. Collect the set of distinct
232    // flag names declared across all entries so each generated predicate
233    // has a contributor list and zero-cost `matches!` macro expansion.
234    let mut flag_groups: std::collections::BTreeMap<String, Vec<&Ident>> =
235        std::collections::BTreeMap::new();
236    for entry in &opcodes.entries {
237        for flag in &entry.flags {
238            flag_groups
239                .entry(flag.to_string())
240                .or_default()
241                .push(&entry.variant);
242        }
243    }
244
245    // Each flag becomes a free `fn` at module scope, named for the flag.
246    // Hand-rolled mapping ties into the existing call sites (chunk.rs,
247    // arithmetic.rs) so the macro replaces the predicate definitions
248    // without forcing every caller to switch namespaces.
249    let flag_fns = flag_groups.iter().map(|(name, variants)| {
250        let fn_ident = match name.as_str() {
251            "reads_outer_name" => format_ident!("op_reads_outer_name"),
252            "adaptive_binary" => format_ident!("is_adaptive_binary_op"),
253            other => format_ident!("op_has_flag_{}", other),
254        };
255        quote!(
256            #[inline]
257            pub(crate) fn #fn_ident(op: Op) -> bool {
258                matches!(op, #(Op::#variants)|*)
259            }
260        )
261    });
262
263    let opcode_count = all_variants.len();
264
265    let out = quote! {
266        /// Bytecode opcodes for the Harn VM. Defined by
267        /// `define_opcodes!`; the `u8` representation is the on-disk
268        /// bytecode encoding.
269        #[derive(::core::fmt::Debug, ::core::clone::Clone, ::core::marker::Copy, ::core::cmp::PartialEq, ::core::cmp::Eq)]
270        #[repr(u8)]
271        pub enum Op {
272            #(#variants),*
273        }
274
275        impl Op {
276            /// Every opcode in declaration order. The index is the
277            /// canonical `u8` representation; `from_byte` is the
278            /// inverse mapping.
279            pub(crate) const ALL: &'static [Self] = &[#(Op::#all_variants),*];
280
281            /// Number of declared opcodes (== `ALL.len()`).
282            pub(crate) const COUNT: usize = #opcode_count;
283
284            #[inline]
285            pub(crate) fn from_byte(byte: u8) -> ::core::option::Option<Self> {
286                Self::ALL.get(byte as usize).copied()
287            }
288        }
289
290        impl crate::vm::Vm {
291            /// Sync dispatch table. Returns `Some(Ok(()))` / `Some(Err(_))`
292            /// when the opcode completed synchronously and `None` when
293            /// the caller must fall through to [`Self::execute_op_async`].
294            /// See the per-arm comments in `define_opcodes!` for what
295            /// each variant does on dispatch.
296            pub(super) fn execute_op_sync(&mut self, op: Op) -> ::core::option::Option<::core::result::Result<(), crate::value::VmError>> {
297                let result: ::core::result::Result<(), crate::value::VmError> = match op {
298                    #(#sync_arms)*
299                };
300                ::core::option::Option::Some(result)
301            }
302
303            /// Async dispatch table. The caller must have observed `None`
304            /// from [`Self::execute_op_sync`] for this opcode; reaching the
305            /// catch-all is a coverage bug between the two halves.
306            pub(super) async fn execute_op_async(&mut self, op: Op) -> ::core::result::Result<(), crate::value::VmError> {
307                match op {
308                    #(#async_arms)*
309                    sync_op => {
310                        debug_assert!(
311                            false,
312                            "execute_op_async called with sync opcode {sync_op:?} \
313                             — define_opcodes! kept the two halves aligned"
314                        );
315                        ::core::result::Result::Err(crate::value::VmError::Runtime(format!(
316                            "internal VM dispatch error: {sync_op:?} is not an async opcode"
317                        )))
318                    }
319                }
320            }
321        }
322
323        impl crate::chunk::Chunk {
324            /// Render a single opcode at `ip` (positioned immediately
325            /// after the opcode byte) into a human-readable disassembly
326            /// line. Each helper advances `ip` past the operand bytes
327            /// it consumes. Delegated from
328            /// [`crate::chunk::Chunk::disassemble`].
329            #[allow(clippy::too_many_lines)]
330            pub(crate) fn disassemble_op(&self, op: Op, ip: &mut usize, out: &mut String) {
331                let line = match op {
332                    #(#disasm_arms)*
333                };
334                out.push_str(&line);
335                out.push('\n');
336            }
337        }
338
339        #(#flag_fns)*
340    };
341
342    Ok(out)
343}