shape_vm/executor/mod.rs
1//! Virtual machine executor for Shape bytecode
2
3// Opcode category implementations (split into submodules)
4mod additional;
5mod arithmetic;
6mod async_ops;
7mod builtins;
8mod call_convention;
9mod comparison;
10mod control_flow;
11mod dispatch;
12mod exceptions;
13pub(crate) mod ic_fast_paths;
14mod jit_ops;
15mod logical;
16mod loops;
17mod objects;
18mod osr;
19mod resume;
20mod snapshot;
21mod stack_ops;
22pub mod state_builtins;
23pub mod time_travel;
24mod trait_object_ops;
25mod variables;
26pub(crate) mod vm_state_snapshot;
27mod window_join;
28
29// VM infrastructure modules
30pub mod debugger_integration;
31pub mod gc_integration;
32pub mod module_registry;
33pub mod printing;
34pub mod task_scheduler;
35pub mod typed_object_ops;
36pub mod utils;
37
38// Test module
39#[cfg(test)]
40mod tests;
41
42// Re-export async types for external use
43pub use async_ops::{AsyncExecutionResult, SuspensionInfo, WaitType};
44pub use control_flow::foreign_marshal;
45pub use control_flow::native_abi;
46pub use task_scheduler::{TaskScheduler, TaskStatus};
47
48/// Reserved future ID used to signal a snapshot suspension
49pub const SNAPSHOT_FUTURE_ID: u64 = u64::MAX;
50
51/// Error returned when a program requires permissions not granted by the host.
52#[derive(Debug, Clone)]
53pub enum PermissionError {
54 /// The program requires permissions not in the granted set.
55 InsufficientPermissions {
56 /// All permissions the program requires.
57 required: shape_abi_v1::PermissionSet,
58 /// Permissions the host granted.
59 granted: shape_abi_v1::PermissionSet,
60 /// Permissions required but not granted.
61 missing: shape_abi_v1::PermissionSet,
62 },
63 /// Linking failed before permission checking could occur.
64 LinkError(String),
65}
66
67impl std::fmt::Display for PermissionError {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 match self {
70 PermissionError::InsufficientPermissions { missing, .. } => {
71 let names: Vec<&str> = missing.iter().map(|p| p.name()).collect();
72 write!(
73 f,
74 "program requires permissions not granted: {}",
75 names.join(", ")
76 )
77 }
78 PermissionError::LinkError(msg) => write!(f, "link error: {msg}"),
79 }
80 }
81}
82
83impl std::error::Error for PermissionError {}
84
85/// Result of VM execution
86#[derive(Debug, Clone)]
87pub enum ExecutionResult {
88 /// Execution completed normally with a ValueWord value
89 Completed(ValueWord),
90 /// Execution suspended waiting for a future to resolve
91 Suspended {
92 /// The future ID that needs to be resolved
93 future_id: u64,
94 /// The instruction pointer to resume at
95 resume_ip: usize,
96 },
97}
98
99use std::collections::HashMap;
100use std::sync::Arc;
101use std::sync::atomic::AtomicU8;
102
103use crate::{
104 bytecode::{
105 BuiltinFunction, BytecodeProgram, FunctionBlob, FunctionHash, Instruction, Operand,
106 },
107 debugger::VMDebugger,
108 memory::{GCConfig, GarbageCollector},
109 tier::TierManager,
110};
111use shape_ast::data::Timeframe;
112
113use crate::constants::{DEFAULT_GC_TRIGGER_THRESHOLD, MAX_CALL_STACK_DEPTH, MAX_STACK_SIZE};
114use shape_value::heap_value::HeapValue;
115use shape_value::{VMError, ValueSlot, ValueWord};
116/// VM configuration
117#[derive(Debug, Clone)]
118pub struct VMConfig {
119 /// Maximum stack size
120 pub max_stack_size: usize,
121 /// Maximum call depth
122 pub max_call_depth: usize,
123 /// Enable debug mode
124 pub debug_mode: bool,
125 /// Enable instruction tracing
126 pub trace_execution: bool,
127 /// Garbage collection configuration
128 pub gc_config: GCConfig,
129 /// Enable automatic garbage collection
130 pub auto_gc: bool,
131 /// GC trigger threshold (instructions between collections)
132 pub gc_trigger_threshold: usize,
133 /// Enable VM metrics collection (counters, tier/GC event ring buffers, histograms).
134 /// When false (default), `VirtualMachine.metrics` is `None` for zero overhead.
135 pub metrics_enabled: bool,
136 /// When true, automatically initialise the tracing GC heap (`shape-gc`) on
137 /// VM creation instead of relying on Arc reference counting.
138 ///
139 /// Requires the `gc` crate feature to be compiled in; otherwise this flag
140 /// is silently ignored.
141 pub use_tracing_gc: bool,
142}
143
144impl Default for VMConfig {
145 fn default() -> Self {
146 Self {
147 max_stack_size: MAX_STACK_SIZE,
148 max_call_depth: MAX_CALL_STACK_DEPTH,
149 debug_mode: false,
150 trace_execution: false,
151 gc_config: GCConfig::default(),
152 auto_gc: true,
153 gc_trigger_threshold: DEFAULT_GC_TRIGGER_THRESHOLD,
154 metrics_enabled: false,
155 use_tracing_gc: false,
156 }
157 }
158}
159
160/// Call frame for function calls
161#[derive(Debug)]
162pub struct CallFrame {
163 /// Return address
164 pub return_ip: usize,
165 /// Base pointer into the unified value stack where this frame's locals start
166 pub base_pointer: usize,
167 /// Number of locals
168 pub locals_count: usize,
169 /// Function index
170 pub function_id: Option<u16>,
171 /// Upvalues captured by this closure (None for regular functions)
172 pub upvalues: Option<Vec<shape_value::Upvalue>>,
173 /// Content hash of the function blob being executed (for content-addressed state capture).
174 /// `None` for programs compiled without content-addressed metadata.
175 pub blob_hash: Option<FunctionHash>,
176}
177
178/// Function pointer type for JIT-compiled functions.
179/// `ctx` is a mutable pointer to VM execution context (e.g., stack base).
180/// `args` is a pointer to the argument buffer.
181/// Returns a NaN-boxed result as raw u64 bits.
182#[cfg(feature = "jit")]
183pub type JitFnPtr = unsafe extern "C" fn(*mut u8, *const u8) -> u64;
184
185/// Linked foreign-function handles.
186///
187/// Dynamic language runtimes are compiled/invoked through extension plugins.
188/// Native ABI entries (`extern "C"`) are linked directly through the VM's
189/// internal C ABI path.
190#[derive(Clone)]
191pub(crate) enum ForeignFunctionHandle {
192 Runtime {
193 runtime: std::sync::Arc<shape_runtime::plugins::language_runtime::PluginLanguageRuntime>,
194 compiled: shape_runtime::plugins::language_runtime::CompiledForeignFunction,
195 },
196 Native(std::sync::Arc<control_flow::native_abi::NativeLinkedFunction>),
197}
198
199/// The Shape virtual machine
200pub struct VirtualMachine {
201 /// Configuration
202 config: VMConfig,
203
204 /// The program being executed
205 pub(crate) program: BytecodeProgram,
206
207 /// Instruction pointer
208 ip: usize,
209
210 /// Unified value stack (pre-allocated, NaN-boxed: 8 bytes per slot).
211 /// Locals live in register windows on this stack.
212 stack: Vec<ValueWord>,
213
214 /// Stack pointer — logical top of the value stack.
215 /// `stack[0..sp]` are live values; `stack[sp..]` is pre-allocated dead space.
216 pub(crate) sp: usize,
217
218 /// ModuleBinding variables (NaN-boxed for compact storage)
219 pub(crate) module_bindings: Vec<ValueWord>,
220
221 /// Call stack
222 call_stack: Vec<CallFrame>,
223
224 /// Loop stack for break/continue
225 loop_stack: Vec<LoopContext>,
226 /// Timeframe stack for timeframe context
227 timeframe_stack: Vec<Option<Timeframe>>,
228
229 /// Integrated debugger
230 debugger: Option<VMDebugger>,
231
232 /// Garbage collector
233 gc: GarbageCollector,
234
235 /// Instruction counter (used for interrupt checking)
236 instruction_count: usize,
237
238 /// Exception handler stack for try/catch blocks
239 exception_handlers: Vec<ExceptionHandler>,
240
241 /// Builtin schema IDs for fixed-layout runtime objects (AnyError, TraceFrame, etc.)
242 pub(crate) builtin_schemas: shape_runtime::type_schema::BuiltinSchemaIds,
243
244 /// Last error location (line number) for LSP integration
245 /// Set by enrich_error_with_location when an error occurs
246 last_error_line: Option<u32>,
247
248 /// Last error file path for LSP integration
249 /// Set by enrich_error_with_location when an error occurs
250 last_error_file: Option<String>,
251
252 /// Uncaught exception payload captured at VM boundary.
253 ///
254 /// Set when an exception escapes with no handler so hosts can render
255 /// structured AnyError output without reparsing plain strings.
256 last_uncaught_exception: Option<ValueWord>,
257
258 /// Whether module-level initialization code has been executed.
259 /// Used by `execute_function_by_name` to ensure module bindings
260 /// are initialized before calling the target function.
261 module_init_done: bool,
262
263 /// Output capture buffer for testing
264 /// When Some, print output is captured here instead of going to stdout
265 output_buffer: Option<Vec<String>>,
266
267 /// Extension module registry — single source of truth for all extension modules.
268 /// Used by extension dispatch, auto-available module_bindings, and LSP completions.
269 module_registry: shape_runtime::module_exports::ModuleExportRegistry,
270
271 /// Table of ModuleFn closures indexed by usize ID.
272 /// ValueWord::ModuleFunction(id) references this table for dispatch.
273 module_fn_table: Vec<shape_runtime::module_exports::ModuleFn>,
274
275 /// Runtime function name → index lookup for UFCS dispatch.
276 /// Populated after program load. Used by handle_object_method to find
277 /// type-scoped impl methods (e.g., "DuckDbQuery::filter") at runtime.
278 function_name_index: HashMap<String, u16>,
279
280 /// Extension method intrinsics for fast dispatch on typed Objects.
281 /// Populated from ModuleExports.method_intrinsics during register_extension().
282 /// Checked in handle_object_method() after built-in methods, before UFCS.
283 extension_methods: HashMap<String, HashMap<String, shape_runtime::module_exports::ModuleFn>>,
284
285 /// Cache of resolved merged schemas: (left_id, right_id) → merged_id
286 merged_schema_cache: HashMap<(u32, u32), u32>,
287
288 /// Interrupt flag set by Ctrl+C handler (0 = none, >0 = interrupted)
289 interrupt: Arc<AtomicU8>,
290
291 /// Counter for generating unique future IDs (for SpawnTask)
292 future_id_counter: u64,
293
294 /// Stack of async scopes for structured concurrency.
295 /// Each entry is a list of Future IDs spawned within that scope.
296 /// AsyncScopeEnter pushes a new Vec; AsyncScopeExit pops and cancels.
297 async_scope_stack: Vec<Vec<u64>>,
298
299 /// Task scheduler for async host runtime.
300 /// Stores spawned callables and tracks their completion status.
301 pub(crate) task_scheduler: task_scheduler::TaskScheduler,
302
303 /// Compiled foreign function handles (linked at pre-execution time).
304 /// Index corresponds to program.foreign_functions index.
305 pub(crate) foreign_fn_handles: Vec<Option<ForeignFunctionHandle>>,
306
307 /// Content hashes for each function, indexed by function_id.
308 /// Populated from `BytecodeProgram.content_addressed` or `LinkedProgram`.
309 /// `None` entries mean the function has no content-addressed metadata.
310 function_hashes: Vec<Option<FunctionHash>>,
311
312 /// Raw byte representation of `function_hashes` for passing to `ModuleContext`.
313 /// Kept in sync with `function_hashes`; avoids per-call allocation when
314 /// constructing `ModuleContext` (which uses `[u8; 32]` to avoid a dependency
315 /// on `FunctionHash`).
316 function_hash_raw: Vec<Option<[u8; 32]>>,
317
318 /// Reverse lookup for hash-first execution identity.
319 /// Maps function blob hash -> runtime function ID.
320 function_id_by_hash: HashMap<FunctionHash, u16>,
321
322 /// Entry points for each function, indexed by function_id.
323 /// Used to compute `local_ip = ip - function_entry_points[function_id]`
324 /// for content-addressed snapshot frames.
325 function_entry_points: Vec<usize>,
326
327 /// Effective execution entry IP for the currently loaded program.
328 /// Normal bytecode starts at 0; linked content-addressed programs start
329 /// at the entry function's `entry_point`.
330 program_entry_ip: usize,
331
332 /// Optional resource usage tracker for sandboxed execution.
333 /// When set, the dispatch loop calls `tick_instruction()` each cycle.
334 pub resource_usage: Option<crate::resource_limits::ResourceUsage>,
335
336 /// Time-travel debugger for recording and navigating VM state history.
337 /// `None` when time-travel debugging is not active.
338 pub(crate) time_travel: Option<time_travel::TimeTravel>,
339
340 /// GC heap (only present when `gc` feature is enabled).
341 #[cfg(feature = "gc")]
342 gc_heap: Option<shape_gc::GcHeap>,
343
344 /// Whether selective JIT compilation has been applied to the loaded program.
345 #[cfg(feature = "jit")]
346 jit_compiled: bool,
347
348 /// JIT dispatch table: function_id → extern "C" function pointer.
349 /// Populated by external JIT compilers (e.g., shape-jit) via `register_jit_function`.
350 #[cfg(feature = "jit")]
351 jit_dispatch_table: std::collections::HashMap<u16, JitFnPtr>,
352
353 /// Tiered compilation manager. Tracks per-function call counts and
354 /// coordinates background JIT compilation via channels.
355 /// `None` when tiered compilation is disabled.
356 tier_manager: Option<TierManager>,
357
358 /// Pending resume snapshot. Set by `state.resume()` stdlib function via
359 /// the `set_pending_resume` callback on `ModuleContext`. Consumed by the
360 /// dispatch loop after the current instruction completes.
361 pub(crate) pending_resume: Option<ValueWord>,
362
363 /// Pending single-frame resume data. Set by `state.resume_frame()` to
364 /// override IP and locals after function invocation sets up the call frame.
365 pub(crate) pending_frame_resume: Option<FrameResumeData>,
366
367 /// Optional VM metrics collector. `None` when `VMConfig.metrics_enabled`
368 /// is false (the default), giving zero per-instruction overhead.
369 pub metrics: Option<crate::metrics::VmMetrics>,
370
371 /// Per-function feedback vectors for inline cache profiling.
372 /// Indexed by function_id. None means no feedback collected for that function.
373 /// Only populated when tiered compilation is enabled.
374 feedback_vectors: Vec<Option<crate::feedback::FeedbackVector>>,
375
376 /// Megamorphic property lookup cache. Used when a property access site has
377 /// seen too many different schemas (>4 targets) and IC state is Megamorphic.
378 megamorphic_cache: crate::megamorphic_cache::MegamorphicCache,
379}
380
381/// Data for resuming a single call frame mid-function.
382pub(crate) struct FrameResumeData {
383 /// IP offset within the function to resume at.
384 pub ip_offset: usize,
385 /// Locals to restore in the resumed frame.
386 pub locals: Vec<ValueWord>,
387}
388
389/// Exception handler for try/catch blocks
390#[derive(Debug, Clone)]
391struct ExceptionHandler {
392 /// Instruction pointer to jump to on exception
393 catch_ip: usize,
394 /// Stack size when handler was set up (for unwinding)
395 stack_size: usize,
396 /// Call stack depth when handler was set up
397 call_depth: usize,
398}
399
400/// Loop context for break/continue
401#[derive(Debug)]
402struct LoopContext {
403 /// Start of loop body (for continue)
404 start: usize,
405 /// End of loop (for break)
406 end: usize,
407}
408
409/// Debug VM state snapshot for the debugger
410#[derive(Debug)]
411pub struct DebugVMState {
412 /// Current instruction pointer
413 pub ip: usize,
414 /// Call stack depth
415 pub call_stack_depth: usize,
416}
417
418mod vm_impl_part1;
419mod vm_impl_part2;
420mod vm_impl_part3;
421mod vm_impl_part4;
422
423/// Replace the active wire transport provider used by VM transport builtins.
424pub fn set_transport_provider(
425 provider: std::sync::Arc<dyn builtins::transport_provider::WireTransportProvider>,
426) {
427 builtins::transport_provider::set_transport_provider(provider);
428}
429
430/// Restore the default shape-wire transport provider.
431pub fn reset_transport_provider() {
432 builtins::transport_provider::reset_transport_provider();
433}
434
435/// Configure global QUIC settings used by `transport.quic()`.
436#[cfg(feature = "quic")]
437pub fn configure_quic_transport(
438 server_name: String,
439 root_certs_der: Vec<Vec<u8>>,
440 connect_timeout: Option<std::time::Duration>,
441) {
442 builtins::transport_provider::configure_quic_transport(
443 server_name,
444 root_certs_der,
445 connect_timeout,
446 );
447}
448
449/// Clear global QUIC settings used by `transport.quic()`.
450#[cfg(feature = "quic")]
451pub fn clear_quic_transport_config() {
452 builtins::transport_provider::clear_quic_transport_config();
453}
454
455/// Create the VM-backed `transport` module exports.
456pub(crate) fn create_transport_module_exports() -> shape_runtime::module_exports::ModuleExports {
457 builtins::transport_builtins::create_transport_module()
458}
459
460/// Remap constant and string pool indices in a single instruction operand after
461/// a hot-patch splice. `const_offset` and `string_offset` are the starting
462/// indices in the global pools where the blob's local pools were appended.
463fn remap_operand(operand: &mut Option<Operand>, const_offset: usize, string_offset: usize) {
464 let Some(op) = operand.as_mut() else {
465 return;
466 };
467 match op {
468 Operand::Const(idx) => {
469 *idx = (*idx as usize + const_offset) as u16;
470 }
471 Operand::Property(idx) => {
472 *idx = (*idx as usize + string_offset) as u16;
473 }
474 Operand::Name(sid) => {
475 sid.0 = (sid.0 as usize + string_offset) as u32;
476 }
477 Operand::MethodCall { name, .. } => {
478 name.0 = (name.0 as usize + string_offset) as u32;
479 }
480 Operand::TypedMethodCall { string_id, .. } => {
481 *string_id = (*string_id as usize + string_offset) as u16;
482 }
483 // Other operands (Local, ModuleBinding, Offset, Function, Builtin,
484 // Count, ColumnIndex, TypedField, TypedObjectAlloc, TypedMerge,
485 // ColumnAccess, ForeignFunction) don't reference the constant or
486 // string pools.
487 _ => {}
488 }
489}