Skip to main content

shape_vm/executor/
vm_impl_part2.rs

1use super::*;
2
3impl VirtualMachine {
4    pub(super) fn pop_builtin_args(&mut self) -> Result<Vec<ValueWord>, VMError> {
5        // Pop arg count (top of stack)
6        let count_nb = self.pop_vw()?;
7        let count = count_nb.as_number_coerce().ok_or_else(|| {
8            VMError::RuntimeError(format!(
9                "Expected numeric arg count, got {:?}",
10                count_nb.type_name()
11            ))
12        })? as usize;
13
14        // Pop args in reverse order (stack is LIFO) then reverse to get correct order
15        let mut args = Vec::with_capacity(count);
16        for _ in 0..count {
17            args.push(self.pop_vw()?);
18        }
19        args.reverse();
20        Ok(args)
21    }
22    // ========================================================================
23
24    /// Enable output capture for testing
25    /// When enabled, print output goes to an internal buffer instead of stdout
26    pub fn enable_output_capture(&mut self) {
27        self.output_buffer = Some(Vec::new());
28    }
29
30    /// Get captured output (returns empty vec if capture not enabled)
31    pub fn get_captured_output(&self) -> Vec<String> {
32        self.output_buffer.clone().unwrap_or_default()
33    }
34
35    /// Clear captured output
36    pub fn clear_captured_output(&mut self) {
37        if let Some(ref mut buf) = self.output_buffer {
38            buf.clear();
39        }
40    }
41
42    /// Write to output (either buffer or stdout)
43    pub(crate) fn write_output(&mut self, text: &str) {
44        if let Some(ref mut buf) = self.output_buffer {
45            buf.push(text.to_string());
46        } else {
47            println!("{}", text);
48        }
49    }
50
51    /// Set a module_binding variable by name using ValueWord directly.
52    pub(crate) fn set_module_binding_by_name_nb(&mut self, name: &str, value: ValueWord) {
53        if let Some(idx) = self
54            .program
55            .module_binding_names
56            .iter()
57            .position(|n| n == name)
58        {
59            if idx < self.module_bindings.len() {
60                // BARRIER: heap write site — overwrites module binding by name
61                self.module_bindings[idx] = value;
62            } else {
63                self.module_bindings.resize_with(idx + 1, ValueWord::none);
64                // BARRIER: heap write site — overwrites module binding by name (after resize)
65                self.module_bindings[idx] = value;
66            }
67        }
68    }
69
70    /// Load a program into the VM
71    pub fn load_program(&mut self, program: BytecodeProgram) {
72        // Content-addressed bytecode is the canonical runtime format.
73        // Do not silently fall back to the flat instruction stream if linking fails.
74        if let Some(ref ca_program) = program.content_addressed {
75            let linked = crate::linker::link(ca_program).unwrap_or_else(|e| {
76                panic!(
77                    "content-addressed linker failed ({} function blobs): {}",
78                    ca_program.function_store.len(),
79                    e
80                )
81            });
82            self.load_linked_program(linked);
83            return;
84        }
85
86        self.program = program;
87        if shape_runtime::type_schema::builtin_schemas::resolve_builtin_schema_ids(
88            &self.program.type_schema_registry,
89        )
90        .is_none()
91        {
92            // Programs built manually in tests may omit builtin schemas.
93            // Merge the static stdlib registry (includes builtin fixed schemas)
94            // without synthesizing any dynamic runtime schemas.
95            let (stdlib_registry, _) =
96                shape_runtime::type_schema::TypeSchemaRegistry::with_stdlib_types_and_builtin_ids();
97            self.program.type_schema_registry.merge(stdlib_registry);
98        }
99        self.builtin_schemas =
100            shape_runtime::type_schema::builtin_schemas::resolve_builtin_schema_ids(
101                &self.program.type_schema_registry,
102            )
103            .expect(
104                "compiled program is missing builtin schemas (__AnyError, __TraceFrame, ...); \
105             schema registry must include static builtin schemas",
106            );
107        // Reserve schema IDs above the compiled program registry.
108        let max_program_id = self
109            .program
110            .type_schema_registry
111            .max_schema_id()
112            .unwrap_or(0);
113        shape_runtime::type_schema::ensure_next_schema_id_above(max_program_id);
114        self.rebuild_function_name_index();
115        self.populate_content_addressed_metadata();
116        self.program_entry_ip = 0;
117        self.module_init_done = false;
118        self.feedback_vectors
119            .resize_with(self.program.functions.len(), || None);
120        self.reset();
121
122        // Bytecode verification: ensure trusted opcodes have valid FrameDescriptors.
123        #[cfg(debug_assertions)]
124        {
125            if let Err(errors) = crate::bytecode::verifier::verify_trusted_opcodes(&self.program) {
126                eprintln!(
127                    "Bytecode verification warning: {} violation(s) found",
128                    errors.len()
129                );
130                for e in &errors {
131                    eprintln!("  - {}", e);
132                }
133            }
134        }
135
136        #[cfg(not(debug_assertions))]
137        {
138            if let Err(errors) = crate::bytecode::verifier::verify_trusted_opcodes(&self.program) {
139                eprintln!(
140                    "Bytecode verification failed: {} violation(s)",
141                    errors.len()
142                );
143                for e in &errors {
144                    eprintln!("  - {}", e);
145                }
146            }
147        }
148    }
149
150    /// Load a `LinkedProgram` into the VM, extracting content-addressed metadata
151    /// directly from the linked function table.
152    ///
153    /// This converts the `LinkedProgram` into the flat `BytecodeProgram` layout that
154    /// the executor expects, then populates `function_hashes` and `function_entry_points`
155    /// from the linked function metadata.
156    pub fn load_linked_program(&mut self, linked: crate::bytecode::LinkedProgram) {
157        let entry_function_id = linked
158            .hash_to_id
159            .get(&linked.entry)
160            .copied()
161            .or_else(|| linked.functions.iter().position(|f| f.name == "__main__"))
162            .unwrap_or(0);
163        let entry_ip = linked
164            .functions
165            .get(entry_function_id)
166            .map(|f| f.entry_point)
167            .unwrap_or(0);
168
169        // Extract hash metadata before converting
170        let hashes: Vec<Option<FunctionHash>> = linked
171            .functions
172            .iter()
173            .map(|lf| {
174                if lf.blob_hash == FunctionHash::ZERO {
175                    None
176                } else {
177                    Some(lf.blob_hash)
178                }
179            })
180            .collect();
181        let entry_points: Vec<usize> = linked.functions.iter().map(|lf| lf.entry_point).collect();
182
183        // Convert LinkedProgram functions to BytecodeProgram functions
184        let functions: Vec<crate::bytecode::Function> = linked
185            .functions
186            .iter()
187            .map(|lf| crate::bytecode::Function {
188                name: lf.name.clone(),
189                arity: lf.arity,
190                param_names: lf.param_names.clone(),
191                locals_count: lf.locals_count,
192                entry_point: lf.entry_point,
193                body_length: lf.body_length,
194                is_closure: lf.is_closure,
195                captures_count: lf.captures_count,
196                is_async: lf.is_async,
197                ref_params: lf.ref_params.clone(),
198                ref_mutates: lf.ref_mutates.clone(),
199                mutable_captures: lf.mutable_captures.clone(),
200                frame_descriptor: lf.frame_descriptor.clone(),
201                osr_entry_points: Vec::new(),
202            })
203            .collect();
204
205        let program = BytecodeProgram {
206            instructions: linked.instructions,
207            constants: linked.constants,
208            strings: linked.strings,
209            functions,
210            debug_info: linked.debug_info,
211            data_schema: linked.data_schema,
212            module_binding_names: linked.module_binding_names,
213            top_level_locals_count: linked.top_level_locals_count,
214            top_level_local_storage_hints: linked.top_level_local_storage_hints,
215            type_schema_registry: linked.type_schema_registry,
216            module_binding_storage_hints: linked.module_binding_storage_hints,
217            function_local_storage_hints: linked.function_local_storage_hints,
218            trait_method_symbols: linked.trait_method_symbols,
219            foreign_functions: linked.foreign_functions,
220            native_struct_layouts: linked.native_struct_layouts,
221            function_blob_hashes: entry_points
222                .iter()
223                .enumerate()
224                .map(|(idx, _)| hashes.get(idx).copied().flatten())
225                .collect(),
226            ..BytecodeProgram::default()
227        };
228
229        // Load the program normally (handles schema resolution, function name index, etc.)
230        self.load_program(program);
231
232        // Override the content-addressed metadata with the linked data
233        // (load_program calls populate_content_addressed_metadata which won't find
234        // content_addressed since we didn't set it — override here)
235        self.function_hashes = hashes;
236        self.function_hash_raw = self
237            .function_hashes
238            .iter()
239            .map(|opt| opt.map(|fh| fh.0))
240            .collect();
241        self.function_id_by_hash.clear();
242        for (idx, maybe_hash) in self.function_hashes.iter().enumerate() {
243            if let Some(hash) = maybe_hash {
244                self.function_id_by_hash.entry(*hash).or_insert(idx as u16);
245            }
246        }
247        self.function_entry_points = entry_points;
248        self.program_entry_ip = entry_ip;
249        self.reset();
250    }
251
252    /// Hot-patch a single function in the loaded program with a new blob.
253    ///
254    /// The new blob's instructions, constants, and strings replace the existing
255    /// function's bytecode in-place. The function's metadata (arity, param names,
256    /// locals count, etc.) is also updated. The content hash is recorded so that
257    /// in-flight frames referencing the old hash remain valid (they execute from
258    /// their saved IP which is now stale, but callers that resolve by function ID
259    /// will pick up the new code on the next call).
260    ///
261    /// Returns `Ok(old_hash)` on success (the previous content hash, if any),
262    /// or `Err(msg)` if the function ID is out of range.
263    pub fn patch_function(
264        &mut self,
265        fn_id: u16,
266        new_blob: FunctionBlob,
267    ) -> Result<Option<FunctionHash>, String> {
268        let idx = fn_id as usize;
269
270        if idx >= self.program.functions.len() {
271            return Err(format!(
272                "patch_function: fn_id {} out of range (program has {} functions)",
273                fn_id,
274                self.program.functions.len()
275            ));
276        }
277
278        // Capture the old hash before overwriting.
279        let old_hash = self.function_hashes.get(idx).copied().flatten();
280
281        let func = &mut self.program.functions[idx];
282        let old_entry = func.entry_point;
283
284        // Compute instruction splice range: from this function's entry point
285        // to the next function's entry point (or end of instructions).
286        let next_entry = self
287            .program
288            .functions
289            .get(idx + 1)
290            .map(|f| f.entry_point)
291            .unwrap_or(self.program.instructions.len());
292
293        let old_len = next_entry - old_entry;
294        let new_len = new_blob.instructions.len();
295
296        // Splice instructions.
297        self.program.instructions.splice(
298            old_entry..old_entry + old_len,
299            new_blob.instructions.iter().cloned(),
300        );
301
302        // If the new function has a different instruction count, shift all
303        // subsequent function entry points.
304        if new_len != old_len {
305            let delta = new_len as isize - old_len as isize;
306            for subsequent in self.program.functions.iter_mut().skip(idx + 1) {
307                subsequent.entry_point = (subsequent.entry_point as isize + delta) as usize;
308            }
309            // Also update function_entry_points mirror.
310            for ep in self.function_entry_points.iter_mut().skip(idx + 1) {
311                *ep = (*ep as isize + delta) as usize;
312            }
313        }
314
315        // Append new constants and strings to the program pools.
316        // The blob's Operand indices reference its local pools, so we need to
317        // remap them to the global pool offsets.
318        let const_offset = self.program.constants.len();
319        let string_offset = self.program.strings.len();
320        self.program
321            .constants
322            .extend(new_blob.constants.iter().cloned());
323        self.program
324            .strings
325            .extend(new_blob.strings.iter().cloned());
326
327        // Remap operands in the spliced instructions to use global pool offsets.
328        let instr_slice = &mut self.program.instructions[old_entry..old_entry + new_len];
329        for instr in instr_slice.iter_mut() {
330            remap_operand(&mut instr.operand, const_offset, string_offset);
331        }
332
333        // Update function metadata.
334        let func = &mut self.program.functions[idx];
335        func.name = new_blob.name;
336        func.arity = new_blob.arity;
337        func.param_names = new_blob.param_names;
338        func.locals_count = new_blob.locals_count;
339        func.is_closure = new_blob.is_closure;
340        func.captures_count = new_blob.captures_count;
341        func.is_async = new_blob.is_async;
342        func.ref_params = new_blob.ref_params;
343        func.ref_mutates = new_blob.ref_mutates;
344        func.mutable_captures = new_blob.mutable_captures;
345
346        // Update content hash metadata.
347        let new_hash = new_blob.content_hash;
348        if idx < self.function_hashes.len() {
349            self.function_hashes[idx] = Some(new_hash);
350        }
351        if idx < self.function_hash_raw.len() {
352            self.function_hash_raw[idx] = Some(new_hash.0);
353        }
354        self.function_id_by_hash.entry(new_hash).or_insert(fn_id);
355
356        // Update function_entry_points for this function.
357        if idx < self.function_entry_points.len() {
358            self.function_entry_points[idx] = old_entry;
359        }
360
361        // Rebuild function name index so UFCS dispatch picks up renames.
362        self.rebuild_function_name_index();
363
364        Ok(old_hash)
365    }
366
367    /// Load a content-addressed `Program` with permission checking.
368    ///
369    /// Links the program, checks that `total_required_permissions` is a subset of
370    /// `granted`, and loads normally if the check passes. Returns an error listing
371    /// the missing permissions if the check fails.
372    pub fn load_program_with_permissions(
373        &mut self,
374        program: crate::bytecode::Program,
375        granted: &shape_abi_v1::PermissionSet,
376    ) -> Result<(), PermissionError> {
377        let linked =
378            crate::linker::link(&program).map_err(|e| PermissionError::LinkError(e.to_string()))?;
379        if !linked.total_required_permissions.is_subset(granted) {
380            let missing = linked.total_required_permissions.difference(granted);
381            return Err(PermissionError::InsufficientPermissions {
382                required: linked.total_required_permissions.clone(),
383                granted: granted.clone(),
384                missing,
385            });
386        }
387        self.load_linked_program(linked);
388        Ok(())
389    }
390
391    /// Load a `LinkedProgram` with permission checking.
392    ///
393    /// Checks that `total_required_permissions` is a subset of `granted`, then
394    /// loads normally. Returns an error listing the missing permissions if the
395    /// check fails.
396    pub fn load_linked_program_with_permissions(
397        &mut self,
398        linked: crate::bytecode::LinkedProgram,
399        granted: &shape_abi_v1::PermissionSet,
400    ) -> Result<(), PermissionError> {
401        if !linked.total_required_permissions.is_subset(granted) {
402            let missing = linked.total_required_permissions.difference(granted);
403            return Err(PermissionError::InsufficientPermissions {
404                required: linked.total_required_permissions.clone(),
405                granted: granted.clone(),
406                missing,
407            });
408        }
409        self.load_linked_program(linked);
410        Ok(())
411    }
412
413    /// Populate `function_hashes` and `function_entry_points` from the loaded program.
414    ///
415    /// If the program was compiled with content-addressed metadata (`content_addressed`
416    /// is `Some`), we extract blob hashes by matching function names/entry points.
417    /// Otherwise both vectors remain empty and `CallFrame::blob_hash` will be `None`.
418    pub(super) fn populate_content_addressed_metadata(&mut self) {
419        let func_count = self.program.functions.len();
420        self.function_entry_points = self
421            .program
422            .functions
423            .iter()
424            .map(|f| f.entry_point)
425            .collect();
426
427        if self.program.function_blob_hashes.len() == func_count {
428            self.function_hashes = self.program.function_blob_hashes.clone();
429        } else if let Some(ref ca_program) = self.program.content_addressed {
430            // Build a lookup from function name -> blob hash from the Program's function_store
431            let mut name_to_hash: HashMap<String, FunctionHash> =
432                HashMap::with_capacity(ca_program.function_store.len());
433            for (hash, blob) in &ca_program.function_store {
434                name_to_hash.insert(blob.name.clone(), *hash);
435            }
436
437            self.function_hashes = Vec::with_capacity(func_count);
438            for func in &self.program.functions {
439                self.function_hashes
440                    .push(name_to_hash.get(&func.name).copied());
441            }
442        } else {
443            self.function_hashes = vec![None; func_count];
444        }
445
446        // Build the raw byte mirror for ModuleContext.
447        self.function_hash_raw = self
448            .function_hashes
449            .iter()
450            .map(|opt| opt.map(|fh| fh.0))
451            .collect();
452        self.function_id_by_hash.clear();
453        for (idx, maybe_hash) in self.function_hashes.iter().enumerate() {
454            if let Some(hash) = maybe_hash {
455                self.function_id_by_hash.entry(*hash).or_insert(idx as u16);
456            }
457        }
458    }
459
460    /// Build the function name → index map for runtime UFCS dispatch.
461    /// Called after program load or merge to enable type-scoped method resolution
462    /// (e.g., "DbTable::filter" looked up when calling .filter() on an Object with __type "DbTable").
463    pub(super) fn rebuild_function_name_index(&mut self) {
464        self.function_name_index.clear();
465        for (i, func) in self.program.functions.iter().enumerate() {
466            self.function_name_index.insert(func.name.clone(), i as u16);
467        }
468    }
469
470    /// Reset VM state
471    pub fn reset(&mut self) {
472        self.ip = self.program_entry_ip;
473        for i in 0..self.sp {
474            self.stack[i] = ValueWord::none();
475        }
476        // Advance sp past top-level locals so expression evaluation
477        // doesn't overlap with local variable storage in register windows.
478        let tl = self.program.top_level_locals_count as usize;
479        self.sp = tl;
480        self.call_stack.clear();
481        self.loop_stack.clear();
482        self.timeframe_stack.clear();
483        self.exception_handlers.clear();
484        self.instruction_count = 0;
485        self.last_error_line = None;
486        self.last_error_file = None;
487        self.last_uncaught_exception = None;
488    }
489
490    /// Reset stack only (for reusing compiled program across iterations)
491    /// Keeps program, module_bindings, and GC state intact - only clears execution state
492    pub fn reset_stack(&mut self) {
493        self.ip = self.program_entry_ip;
494        for i in 0..self.sp {
495            self.stack[i] = ValueWord::none();
496        }
497        let tl = self.program.top_level_locals_count as usize;
498        self.sp = tl;
499        self.call_stack.clear();
500        self.loop_stack.clear();
501        self.timeframe_stack.clear();
502        self.exception_handlers.clear();
503        self.last_error_line = None;
504        self.last_error_file = None;
505        self.last_uncaught_exception = None;
506    }
507
508    /// Minimal reset for hot loops - only clears essential state
509    /// Use this when you know the function doesn't create GC objects or use exceptions
510    #[inline]
511    pub fn reset_minimal(&mut self) {
512        self.ip = self.program_entry_ip;
513        for i in 0..self.sp {
514            self.stack[i] = ValueWord::none();
515        }
516        let tl = self.program.top_level_locals_count as usize;
517        self.sp = tl;
518        self.call_stack.clear();
519        self.last_error_line = None;
520        self.last_error_file = None;
521        self.last_uncaught_exception = None;
522    }
523
524    /// Get the line number of the last error (for LSP integration)
525    pub fn last_error_line(&self) -> Option<u32> {
526        self.last_error_line
527    }
528
529    /// Get the file path of the last error (for LSP integration)
530    pub fn last_error_file(&self) -> Option<&str> {
531        self.last_error_file.as_deref()
532    }
533
534    /// Capture an uncaught exception payload for host-side rendering.
535    pub(crate) fn set_last_uncaught_exception(&mut self, value: ValueWord) {
536        self.last_uncaught_exception = Some(value);
537    }
538
539    /// Clear any previously captured uncaught exception payload.
540    pub(crate) fn clear_last_uncaught_exception(&mut self) {
541        self.last_uncaught_exception = None;
542    }
543
544    /// Take the last uncaught exception payload if present.
545    pub fn take_last_uncaught_exception(&mut self) -> Option<ValueWord> {
546        self.last_uncaught_exception.take()
547    }
548
549    // execute(), execute_with_suspend(), execute_fast(), execute_instruction(),
550    // execute_until_call_depth(), enrich_error_with_location() moved to dispatch module.
551
552    // execute_function_by_name/id(), execute_closure(), execute_function_fast(),
553    // execute_function_with_named_args(), resume(), execute_with_async(),
554    // resolve_spawned_task(), call_function_with_nb_args(), call_closure_with_nb_args(),
555    // call_value_immediate_nb(), call_function_from_stack()
556    // moved to call_convention module.
557
558    // snapshot(), from_snapshot() moved to snapshot module.
559
560    // handle_window_functions(), handle_join_execute(), handle_eval_datetime_expr(),
561    // exec_bind_schema(), exec_load_col() moved to window_join module.
562
563    // ===== Stack Operations =====
564
565    /// Push a value onto the stack (public, for testing and host integration)
566    pub fn push_value(&mut self, value: ValueWord) {
567        if self.sp >= self.stack.len() {
568            self.stack.resize_with(self.sp * 2 + 1, ValueWord::none);
569        }
570        self.stack[self.sp] = value;
571        self.sp += 1;
572    }
573
574    /// Get function ID for fast repeated calls (avoids name lookup in hot loops)
575    pub fn get_function_id(&self, name: &str) -> Option<u16> {
576        self.program
577            .functions
578            .iter()
579            .position(|f| f.name == name)
580            .map(|id| id as u16)
581    }
582}