Skip to main content

shape_vm/executor/
mod.rs

1//! Virtual machine executor for Shape bytecode
2
3// Opcode category implementations (split into submodules)
4mod additional;
5mod arithmetic;
6mod async_ops;
7mod builtins;
8mod call_convention;
9mod comparison;
10mod control_flow;
11mod dispatch;
12mod exceptions;
13pub(crate) mod ic_fast_paths;
14mod jit_ops;
15mod logical;
16mod loops;
17mod objects;
18mod osr;
19mod resume;
20mod snapshot;
21mod stack_ops;
22pub mod state_builtins;
23pub mod time_travel;
24mod trait_object_ops;
25mod variables;
26pub(crate) mod vm_state_snapshot;
27mod window_join;
28
29// VM infrastructure modules
30pub mod debugger_integration;
31pub mod gc_integration;
32pub mod module_registry;
33pub mod printing;
34pub mod task_scheduler;
35pub mod typed_object_ops;
36pub mod utils;
37
38// Test module
39#[cfg(test)]
40mod tests;
41
42// Re-export async types for external use
43pub use async_ops::{AsyncExecutionResult, SuspensionInfo, WaitType};
44pub use control_flow::foreign_marshal;
45pub use control_flow::native_abi;
46pub use task_scheduler::{TaskScheduler, TaskStatus};
47
48/// Reserved future ID used to signal a snapshot suspension
49pub const SNAPSHOT_FUTURE_ID: u64 = u64::MAX;
50
51/// Error returned when a program requires permissions not granted by the host.
52#[derive(Debug, Clone)]
53pub enum PermissionError {
54    /// The program requires permissions not in the granted set.
55    InsufficientPermissions {
56        /// All permissions the program requires.
57        required: shape_abi_v1::PermissionSet,
58        /// Permissions the host granted.
59        granted: shape_abi_v1::PermissionSet,
60        /// Permissions required but not granted.
61        missing: shape_abi_v1::PermissionSet,
62    },
63    /// Linking failed before permission checking could occur.
64    LinkError(String),
65}
66
67impl std::fmt::Display for PermissionError {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            PermissionError::InsufficientPermissions { missing, .. } => {
71                let names: Vec<&str> = missing.iter().map(|p| p.name()).collect();
72                write!(
73                    f,
74                    "program requires permissions not granted: {}",
75                    names.join(", ")
76                )
77            }
78            PermissionError::LinkError(msg) => write!(f, "link error: {msg}"),
79        }
80    }
81}
82
83impl std::error::Error for PermissionError {}
84
85/// Result of VM execution
86#[derive(Debug, Clone)]
87pub enum ExecutionResult {
88    /// Execution completed normally with a ValueWord value
89    Completed(ValueWord),
90    /// Execution suspended waiting for a future to resolve
91    Suspended {
92        /// The future ID that needs to be resolved
93        future_id: u64,
94        /// The instruction pointer to resume at
95        resume_ip: usize,
96    },
97}
98
99use std::collections::HashMap;
100use std::sync::Arc;
101use std::sync::atomic::AtomicU8;
102
103use crate::{
104    bytecode::{
105        BuiltinFunction, BytecodeProgram, FunctionBlob, FunctionHash, Instruction, Operand,
106    },
107    debugger::VMDebugger,
108    memory::{GCConfig, GarbageCollector},
109    tier::TierManager,
110};
111use shape_ast::data::Timeframe;
112
113use crate::constants::{DEFAULT_GC_TRIGGER_THRESHOLD, MAX_CALL_STACK_DEPTH, MAX_STACK_SIZE};
114use shape_value::heap_value::HeapValue;
115use shape_value::{VMError, ValueSlot, ValueWord};
116/// VM configuration
117#[derive(Debug, Clone)]
118pub struct VMConfig {
119    /// Maximum stack size
120    pub max_stack_size: usize,
121    /// Maximum call depth
122    pub max_call_depth: usize,
123    /// Enable debug mode
124    pub debug_mode: bool,
125    /// Enable instruction tracing
126    pub trace_execution: bool,
127    /// Garbage collection configuration
128    pub gc_config: GCConfig,
129    /// Enable automatic garbage collection
130    pub auto_gc: bool,
131    /// GC trigger threshold (instructions between collections)
132    pub gc_trigger_threshold: usize,
133    /// Enable VM metrics collection (counters, tier/GC event ring buffers, histograms).
134    /// When false (default), `VirtualMachine.metrics` is `None` for zero overhead.
135    pub metrics_enabled: bool,
136    /// When true, automatically initialise the tracing GC heap (`shape-gc`) on
137    /// VM creation instead of relying on Arc reference counting.
138    ///
139    /// Requires the `gc` crate feature to be compiled in; otherwise this flag
140    /// is silently ignored.
141    pub use_tracing_gc: bool,
142}
143
144impl Default for VMConfig {
145    fn default() -> Self {
146        Self {
147            max_stack_size: MAX_STACK_SIZE,
148            max_call_depth: MAX_CALL_STACK_DEPTH,
149            debug_mode: false,
150            trace_execution: false,
151            gc_config: GCConfig::default(),
152            auto_gc: true,
153            gc_trigger_threshold: DEFAULT_GC_TRIGGER_THRESHOLD,
154            metrics_enabled: false,
155            use_tracing_gc: false,
156        }
157    }
158}
159
160/// Call frame for function calls
161#[derive(Debug)]
162pub struct CallFrame {
163    /// Return address
164    pub return_ip: usize,
165    /// Base pointer into the unified value stack where this frame's locals start
166    pub base_pointer: usize,
167    /// Number of locals
168    pub locals_count: usize,
169    /// Function index
170    pub function_id: Option<u16>,
171    /// Upvalues captured by this closure (None for regular functions)
172    pub upvalues: Option<Vec<shape_value::Upvalue>>,
173    /// Content hash of the function blob being executed (for content-addressed state capture).
174    /// `None` for programs compiled without content-addressed metadata.
175    pub blob_hash: Option<FunctionHash>,
176}
177
178/// Function pointer type for JIT-compiled functions.
179/// `ctx` is a mutable pointer to VM execution context (e.g., stack base).
180/// `args` is a pointer to the argument buffer.
181/// Returns a NaN-boxed result as raw u64 bits.
182#[cfg(feature = "jit")]
183pub type JitFnPtr = unsafe extern "C" fn(*mut u8, *const u8) -> u64;
184
185/// Linked foreign-function handles.
186///
187/// Dynamic language runtimes are compiled/invoked through extension plugins.
188/// Native ABI entries (`extern "C"`) are linked directly through the VM's
189/// internal C ABI path.
190#[derive(Clone)]
191pub(crate) enum ForeignFunctionHandle {
192    Runtime {
193        runtime: std::sync::Arc<shape_runtime::plugins::language_runtime::PluginLanguageRuntime>,
194        compiled: shape_runtime::plugins::language_runtime::CompiledForeignFunction,
195    },
196    Native(std::sync::Arc<control_flow::native_abi::NativeLinkedFunction>),
197}
198
199/// The Shape virtual machine
200pub struct VirtualMachine {
201    /// Configuration
202    config: VMConfig,
203
204    /// The program being executed
205    pub(crate) program: BytecodeProgram,
206
207    /// Instruction pointer
208    ip: usize,
209
210    /// Unified value stack (pre-allocated, NaN-boxed: 8 bytes per slot).
211    /// Locals live in register windows on this stack.
212    stack: Vec<ValueWord>,
213
214    /// Stack pointer — logical top of the value stack.
215    /// `stack[0..sp]` are live values; `stack[sp..]` is pre-allocated dead space.
216    pub(crate) sp: usize,
217
218    /// ModuleBinding variables (NaN-boxed for compact storage)
219    pub(crate) module_bindings: Vec<ValueWord>,
220
221    /// Call stack
222    call_stack: Vec<CallFrame>,
223
224    /// Loop stack for break/continue
225    loop_stack: Vec<LoopContext>,
226    /// Timeframe stack for timeframe context
227    timeframe_stack: Vec<Option<Timeframe>>,
228
229    /// Integrated debugger
230    debugger: Option<VMDebugger>,
231
232    /// Garbage collector
233    gc: GarbageCollector,
234
235    /// Instruction counter (used for interrupt checking)
236    instruction_count: usize,
237
238    /// Exception handler stack for try/catch blocks
239    exception_handlers: Vec<ExceptionHandler>,
240
241    /// Builtin schema IDs for fixed-layout runtime objects (AnyError, TraceFrame, etc.)
242    pub(crate) builtin_schemas: shape_runtime::type_schema::BuiltinSchemaIds,
243
244    /// Last error location (line number) for LSP integration
245    /// Set by enrich_error_with_location when an error occurs
246    last_error_line: Option<u32>,
247
248    /// Last error file path for LSP integration
249    /// Set by enrich_error_with_location when an error occurs
250    last_error_file: Option<String>,
251
252    /// Uncaught exception payload captured at VM boundary.
253    ///
254    /// Set when an exception escapes with no handler so hosts can render
255    /// structured AnyError output without reparsing plain strings.
256    last_uncaught_exception: Option<ValueWord>,
257
258    /// Whether module-level initialization code has been executed.
259    /// Used by `execute_function_by_name` to ensure module bindings
260    /// are initialized before calling the target function.
261    module_init_done: bool,
262
263    /// Output capture buffer for testing
264    /// When Some, print output is captured here instead of going to stdout
265    output_buffer: Option<Vec<String>>,
266
267    /// Extension module registry — single source of truth for all extension modules.
268    /// Used by extension dispatch, auto-available module_bindings, and LSP completions.
269    module_registry: shape_runtime::module_exports::ModuleExportRegistry,
270
271    /// Table of ModuleFn closures indexed by usize ID.
272    /// ValueWord::ModuleFunction(id) references this table for dispatch.
273    module_fn_table: Vec<shape_runtime::module_exports::ModuleFn>,
274
275    /// Runtime function name → index lookup for UFCS dispatch.
276    /// Populated after program load. Used by handle_object_method to find
277    /// type-scoped impl methods (e.g., "DuckDbQuery::filter") at runtime.
278    function_name_index: HashMap<String, u16>,
279
280    /// Extension method intrinsics for fast dispatch on typed Objects.
281    /// Populated from ModuleExports.method_intrinsics during register_extension().
282    /// Checked in handle_object_method() after built-in methods, before UFCS.
283    extension_methods: HashMap<String, HashMap<String, shape_runtime::module_exports::ModuleFn>>,
284
285    /// Cache of resolved merged schemas: (left_id, right_id) → merged_id
286    merged_schema_cache: HashMap<(u32, u32), u32>,
287
288    /// Interrupt flag set by Ctrl+C handler (0 = none, >0 = interrupted)
289    interrupt: Arc<AtomicU8>,
290
291    /// Counter for generating unique future IDs (for SpawnTask)
292    future_id_counter: u64,
293
294    /// Stack of async scopes for structured concurrency.
295    /// Each entry is a list of Future IDs spawned within that scope.
296    /// AsyncScopeEnter pushes a new Vec; AsyncScopeExit pops and cancels.
297    async_scope_stack: Vec<Vec<u64>>,
298
299    /// Task scheduler for async host runtime.
300    /// Stores spawned callables and tracks their completion status.
301    pub(crate) task_scheduler: task_scheduler::TaskScheduler,
302
303    /// Compiled foreign function handles (linked at pre-execution time).
304    /// Index corresponds to program.foreign_functions index.
305    pub(crate) foreign_fn_handles: Vec<Option<ForeignFunctionHandle>>,
306
307    /// Content hashes for each function, indexed by function_id.
308    /// Populated from `BytecodeProgram.content_addressed` or `LinkedProgram`.
309    /// `None` entries mean the function has no content-addressed metadata.
310    function_hashes: Vec<Option<FunctionHash>>,
311
312    /// Raw byte representation of `function_hashes` for passing to `ModuleContext`.
313    /// Kept in sync with `function_hashes`; avoids per-call allocation when
314    /// constructing `ModuleContext` (which uses `[u8; 32]` to avoid a dependency
315    /// on `FunctionHash`).
316    function_hash_raw: Vec<Option<[u8; 32]>>,
317
318    /// Reverse lookup for hash-first execution identity.
319    /// Maps function blob hash -> runtime function ID.
320    function_id_by_hash: HashMap<FunctionHash, u16>,
321
322    /// Entry points for each function, indexed by function_id.
323    /// Used to compute `local_ip = ip - function_entry_points[function_id]`
324    /// for content-addressed snapshot frames.
325    function_entry_points: Vec<usize>,
326
327    /// Effective execution entry IP for the currently loaded program.
328    /// Normal bytecode starts at 0; linked content-addressed programs start
329    /// at the entry function's `entry_point`.
330    program_entry_ip: usize,
331
332    /// Optional resource usage tracker for sandboxed execution.
333    /// When set, the dispatch loop calls `tick_instruction()` each cycle.
334    pub resource_usage: Option<crate::resource_limits::ResourceUsage>,
335
336    /// Time-travel debugger for recording and navigating VM state history.
337    /// `None` when time-travel debugging is not active.
338    pub(crate) time_travel: Option<time_travel::TimeTravel>,
339
340    /// GC heap (only present when `gc` feature is enabled).
341    #[cfg(feature = "gc")]
342    gc_heap: Option<shape_gc::GcHeap>,
343
344    /// Whether selective JIT compilation has been applied to the loaded program.
345    #[cfg(feature = "jit")]
346    jit_compiled: bool,
347
348    /// JIT dispatch table: function_id → extern "C" function pointer.
349    /// Populated by external JIT compilers (e.g., shape-jit) via `register_jit_function`.
350    #[cfg(feature = "jit")]
351    jit_dispatch_table: std::collections::HashMap<u16, JitFnPtr>,
352
353    /// Tiered compilation manager. Tracks per-function call counts and
354    /// coordinates background JIT compilation via channels.
355    /// `None` when tiered compilation is disabled.
356    tier_manager: Option<TierManager>,
357
358    /// Pending resume snapshot. Set by `state.resume()` stdlib function via
359    /// the `set_pending_resume` callback on `ModuleContext`. Consumed by the
360    /// dispatch loop after the current instruction completes.
361    pub(crate) pending_resume: Option<ValueWord>,
362
363    /// Pending single-frame resume data. Set by `state.resume_frame()` to
364    /// override IP and locals after function invocation sets up the call frame.
365    pub(crate) pending_frame_resume: Option<FrameResumeData>,
366
367    /// Optional VM metrics collector. `None` when `VMConfig.metrics_enabled`
368    /// is false (the default), giving zero per-instruction overhead.
369    pub metrics: Option<crate::metrics::VmMetrics>,
370
371    /// Per-function feedback vectors for inline cache profiling.
372    /// Indexed by function_id. None means no feedback collected for that function.
373    /// Only populated when tiered compilation is enabled.
374    feedback_vectors: Vec<Option<crate::feedback::FeedbackVector>>,
375
376    /// Megamorphic property lookup cache. Used when a property access site has
377    /// seen too many different schemas (>4 targets) and IC state is Megamorphic.
378    megamorphic_cache: crate::megamorphic_cache::MegamorphicCache,
379}
380
381/// Data for resuming a single call frame mid-function.
382pub(crate) struct FrameResumeData {
383    /// IP offset within the function to resume at.
384    pub ip_offset: usize,
385    /// Locals to restore in the resumed frame.
386    pub locals: Vec<ValueWord>,
387}
388
389/// Exception handler for try/catch blocks
390#[derive(Debug, Clone)]
391struct ExceptionHandler {
392    /// Instruction pointer to jump to on exception
393    catch_ip: usize,
394    /// Stack size when handler was set up (for unwinding)
395    stack_size: usize,
396    /// Call stack depth when handler was set up
397    call_depth: usize,
398}
399
400/// Loop context for break/continue
401#[derive(Debug)]
402struct LoopContext {
403    /// Start of loop body (for continue)
404    start: usize,
405    /// End of loop (for break)
406    end: usize,
407}
408
409/// Debug VM state snapshot for the debugger
410#[derive(Debug)]
411pub struct DebugVMState {
412    /// Current instruction pointer
413    pub ip: usize,
414    /// Call stack depth
415    pub call_stack_depth: usize,
416}
417
418mod vm_impl_part1;
419mod vm_impl_part2;
420mod vm_impl_part3;
421mod vm_impl_part4;
422
423/// Replace the active wire transport provider used by VM transport builtins.
424pub fn set_transport_provider(
425    provider: std::sync::Arc<dyn builtins::transport_provider::WireTransportProvider>,
426) {
427    builtins::transport_provider::set_transport_provider(provider);
428}
429
430/// Restore the default shape-wire transport provider.
431pub fn reset_transport_provider() {
432    builtins::transport_provider::reset_transport_provider();
433}
434
435/// Configure global QUIC settings used by `transport.quic()`.
436#[cfg(feature = "quic")]
437pub fn configure_quic_transport(
438    server_name: String,
439    root_certs_der: Vec<Vec<u8>>,
440    connect_timeout: Option<std::time::Duration>,
441) {
442    builtins::transport_provider::configure_quic_transport(
443        server_name,
444        root_certs_der,
445        connect_timeout,
446    );
447}
448
449/// Clear global QUIC settings used by `transport.quic()`.
450#[cfg(feature = "quic")]
451pub fn clear_quic_transport_config() {
452    builtins::transport_provider::clear_quic_transport_config();
453}
454
455/// Create the VM-backed `transport` module exports.
456pub(crate) fn create_transport_module_exports() -> shape_runtime::module_exports::ModuleExports {
457    builtins::transport_builtins::create_transport_module()
458}
459
460/// Remap constant and string pool indices in a single instruction operand after
461/// a hot-patch splice. `const_offset` and `string_offset` are the starting
462/// indices in the global pools where the blob's local pools were appended.
463fn remap_operand(operand: &mut Option<Operand>, const_offset: usize, string_offset: usize) {
464    let Some(op) = operand.as_mut() else {
465        return;
466    };
467    match op {
468        Operand::Const(idx) => {
469            *idx = (*idx as usize + const_offset) as u16;
470        }
471        Operand::Property(idx) => {
472            *idx = (*idx as usize + string_offset) as u16;
473        }
474        Operand::Name(sid) => {
475            sid.0 = (sid.0 as usize + string_offset) as u32;
476        }
477        Operand::MethodCall { name, .. } => {
478            name.0 = (name.0 as usize + string_offset) as u32;
479        }
480        Operand::TypedMethodCall { string_id, .. } => {
481            *string_id = (*string_id as usize + string_offset) as u16;
482        }
483        // Other operands (Local, ModuleBinding, Offset, Function, Builtin,
484        // Count, ColumnIndex, TypedField, TypedObjectAlloc, TypedMerge,
485        // ColumnAccess, ForeignFunction) don't reference the constant or
486        // string pools.
487        _ => {}
488    }
489}