Skip to main content

shape_vm/executor/vm_impl/
program.rs

1use super::super::*;
2
3impl VirtualMachine {
4    /// Load a program into the VM
5    pub fn load_program(&mut self, program: BytecodeProgram) {
6        // Content-addressed bytecode is the canonical runtime format.
7        // Do not silently fall back to the flat instruction stream if linking fails.
8        if let Some(ref ca_program) = program.content_addressed {
9            let linked = crate::linker::link(ca_program).unwrap_or_else(|e| {
10                panic!(
11                    "content-addressed linker failed ({} function blobs): {}",
12                    ca_program.function_store.len(),
13                    e
14                )
15            });
16            self.load_linked_program(linked);
17            return;
18        }
19
20        self.program = program;
21        if shape_runtime::type_schema::builtin_schemas::resolve_builtin_schema_ids(
22            &self.program.type_schema_registry,
23        )
24        .is_none()
25        {
26            // Programs built manually in tests may omit builtin schemas.
27            // Merge the static stdlib registry (includes builtin fixed schemas)
28            // without synthesizing any dynamic runtime schemas.
29            let (stdlib_registry, _) =
30                shape_runtime::type_schema::TypeSchemaRegistry::with_stdlib_types_and_builtin_ids();
31            self.program.type_schema_registry.merge(stdlib_registry);
32        }
33        self.builtin_schemas =
34            shape_runtime::type_schema::builtin_schemas::resolve_builtin_schema_ids(
35                &self.program.type_schema_registry,
36            )
37            .expect(
38                "compiled program is missing builtin schemas (__AnyError, __TraceFrame, ...); \
39             schema registry must include static builtin schemas",
40            );
41        // Reserve schema IDs above the compiled program registry.
42        let max_program_id = self
43            .program
44            .type_schema_registry
45            .max_schema_id()
46            .unwrap_or(0);
47        shape_runtime::type_schema::ensure_next_schema_id_above(max_program_id);
48        self.rebuild_function_name_index();
49        self.populate_content_addressed_metadata();
50        self.program_entry_ip = 0;
51        self.module_init_done = false;
52        self.feedback_vectors
53            .resize_with(self.program.functions.len(), || None);
54        self.reset();
55
56        // Bytecode verification: ensure trusted opcodes have valid FrameDescriptors.
57        #[cfg(debug_assertions)]
58        {
59            if let Err(errors) = crate::bytecode::verifier::verify_trusted_opcodes(&self.program) {
60                eprintln!(
61                    "Bytecode verification warning: {} violation(s) found",
62                    errors.len()
63                );
64                for e in &errors {
65                    eprintln!("  - {}", e);
66                }
67            }
68        }
69
70        #[cfg(not(debug_assertions))]
71        {
72            if let Err(errors) = crate::bytecode::verifier::verify_trusted_opcodes(&self.program) {
73                eprintln!(
74                    "Bytecode verification failed: {} violation(s)",
75                    errors.len()
76                );
77                for e in &errors {
78                    eprintln!("  - {}", e);
79                }
80            }
81        }
82    }
83
84    /// Load a `LinkedProgram` into the VM, extracting content-addressed metadata
85    /// directly from the linked function table.
86    ///
87    /// This converts the `LinkedProgram` into the flat `BytecodeProgram` layout that
88    /// the executor expects, then populates `function_hashes` and `function_entry_points`
89    /// from the linked function metadata.
90    pub fn load_linked_program(&mut self, linked: crate::bytecode::LinkedProgram) {
91        let entry_function_id = linked
92            .hash_to_id
93            .get(&linked.entry)
94            .copied()
95            .or_else(|| linked.functions.iter().position(|f| f.name == "__main__"))
96            .unwrap_or(0);
97        let entry_ip = linked
98            .functions
99            .get(entry_function_id)
100            .map(|f| f.entry_point)
101            .unwrap_or(0);
102
103        // Extract hash metadata before converting
104        let hashes: Vec<Option<FunctionHash>> = linked
105            .functions
106            .iter()
107            .map(|lf| {
108                if lf.blob_hash == FunctionHash::ZERO {
109                    None
110                } else {
111                    Some(lf.blob_hash)
112                }
113            })
114            .collect();
115        let entry_points: Vec<usize> = linked.functions.iter().map(|lf| lf.entry_point).collect();
116
117        // Convert LinkedProgram functions to BytecodeProgram functions
118        let functions: Vec<crate::bytecode::Function> = linked
119            .functions
120            .iter()
121            .map(|lf| crate::bytecode::Function {
122                name: lf.name.clone(),
123                arity: lf.arity,
124                param_names: lf.param_names.clone(),
125                locals_count: lf.locals_count,
126                entry_point: lf.entry_point,
127                body_length: lf.body_length,
128                is_closure: lf.is_closure,
129                captures_count: lf.captures_count,
130                is_async: lf.is_async,
131                ref_params: lf.ref_params.clone(),
132                ref_mutates: lf.ref_mutates.clone(),
133                mutable_captures: lf.mutable_captures.clone(),
134                frame_descriptor: lf.frame_descriptor.clone(),
135                osr_entry_points: Vec::new(),
136            })
137            .collect();
138
139        let program = BytecodeProgram {
140            instructions: linked.instructions,
141            constants: linked.constants,
142            strings: linked.strings,
143            functions,
144            debug_info: linked.debug_info,
145            data_schema: linked.data_schema,
146            module_binding_names: linked.module_binding_names,
147            top_level_locals_count: linked.top_level_locals_count,
148            top_level_local_storage_hints: linked.top_level_local_storage_hints,
149            type_schema_registry: linked.type_schema_registry,
150            module_binding_storage_hints: linked.module_binding_storage_hints,
151            function_local_storage_hints: linked.function_local_storage_hints,
152            trait_method_symbols: linked.trait_method_symbols,
153            foreign_functions: linked.foreign_functions,
154            native_struct_layouts: linked.native_struct_layouts,
155            function_blob_hashes: entry_points
156                .iter()
157                .enumerate()
158                .map(|(idx, _)| hashes.get(idx).copied().flatten())
159                .collect(),
160            ..BytecodeProgram::default()
161        };
162
163        // Load the program normally (handles schema resolution, function name index, etc.)
164        self.load_program(program);
165
166        // Override the content-addressed metadata with the linked data
167        // (load_program calls populate_content_addressed_metadata which won't find
168        // content_addressed since we didn't set it — override here)
169        self.function_hashes = hashes;
170        self.function_hash_raw = self
171            .function_hashes
172            .iter()
173            .map(|opt| opt.map(|fh| fh.0))
174            .collect();
175        self.function_id_by_hash.clear();
176        for (idx, maybe_hash) in self.function_hashes.iter().enumerate() {
177            if let Some(hash) = maybe_hash {
178                self.function_id_by_hash.entry(*hash).or_insert(idx as u16);
179            }
180        }
181        self.function_entry_points = entry_points;
182        self.program_entry_ip = entry_ip;
183        self.reset();
184    }
185
186    /// Hot-patch a single function in the loaded program with a new blob.
187    ///
188    /// The new blob's instructions, constants, and strings replace the existing
189    /// function's bytecode in-place. The function's metadata (arity, param names,
190    /// locals count, etc.) is also updated. The content hash is recorded so that
191    /// in-flight frames referencing the old hash remain valid (they execute from
192    /// their saved IP which is now stale, but callers that resolve by function ID
193    /// will pick up the new code on the next call).
194    ///
195    /// Returns `Ok(old_hash)` on success (the previous content hash, if any),
196    /// or `Err(msg)` if the function ID is out of range.
197    pub fn patch_function(
198        &mut self,
199        fn_id: u16,
200        new_blob: FunctionBlob,
201    ) -> Result<Option<FunctionHash>, String> {
202        let idx = fn_id as usize;
203
204        if idx >= self.program.functions.len() {
205            return Err(format!(
206                "patch_function: fn_id {} out of range (program has {} functions)",
207                fn_id,
208                self.program.functions.len()
209            ));
210        }
211
212        // Capture the old hash before overwriting.
213        let old_hash = self.function_hashes.get(idx).copied().flatten();
214
215        let func = &mut self.program.functions[idx];
216        let old_entry = func.entry_point;
217
218        // Compute instruction splice range: from this function's entry point
219        // to the next function's entry point (or end of instructions).
220        let next_entry = self
221            .program
222            .functions
223            .get(idx + 1)
224            .map(|f| f.entry_point)
225            .unwrap_or(self.program.instructions.len());
226
227        let old_len = next_entry - old_entry;
228        let new_len = new_blob.instructions.len();
229
230        // Splice instructions.
231        self.program.instructions.splice(
232            old_entry..old_entry + old_len,
233            new_blob.instructions.iter().cloned(),
234        );
235
236        // If the new function has a different instruction count, shift all
237        // subsequent function entry points.
238        if new_len != old_len {
239            let delta = new_len as isize - old_len as isize;
240            for subsequent in self.program.functions.iter_mut().skip(idx + 1) {
241                subsequent.entry_point = (subsequent.entry_point as isize + delta) as usize;
242            }
243            // Also update function_entry_points mirror.
244            for ep in self.function_entry_points.iter_mut().skip(idx + 1) {
245                *ep = (*ep as isize + delta) as usize;
246            }
247        }
248
249        // Append new constants and strings to the program pools.
250        // The blob's Operand indices reference its local pools, so we need to
251        // remap them to the global pool offsets.
252        let const_offset = self.program.constants.len();
253        let string_offset = self.program.strings.len();
254        self.program
255            .constants
256            .extend(new_blob.constants.iter().cloned());
257        self.program
258            .strings
259            .extend(new_blob.strings.iter().cloned());
260
261        // Remap operands in the spliced instructions to use global pool offsets.
262        let instr_slice = &mut self.program.instructions[old_entry..old_entry + new_len];
263        for instr in instr_slice.iter_mut() {
264            remap_operand(&mut instr.operand, const_offset, string_offset);
265        }
266
267        // Update function metadata.
268        let func = &mut self.program.functions[idx];
269        func.name = new_blob.name;
270        func.arity = new_blob.arity;
271        func.param_names = new_blob.param_names;
272        func.locals_count = new_blob.locals_count;
273        func.is_closure = new_blob.is_closure;
274        func.captures_count = new_blob.captures_count;
275        func.is_async = new_blob.is_async;
276        func.ref_params = new_blob.ref_params;
277        func.ref_mutates = new_blob.ref_mutates;
278        func.mutable_captures = new_blob.mutable_captures;
279
280        // Update content hash metadata.
281        let new_hash = new_blob.content_hash;
282        if idx < self.function_hashes.len() {
283            self.function_hashes[idx] = Some(new_hash);
284        }
285        if idx < self.function_hash_raw.len() {
286            self.function_hash_raw[idx] = Some(new_hash.0);
287        }
288        self.function_id_by_hash.entry(new_hash).or_insert(fn_id);
289
290        // Update function_entry_points for this function.
291        if idx < self.function_entry_points.len() {
292            self.function_entry_points[idx] = old_entry;
293        }
294
295        // Rebuild function name index so UFCS dispatch picks up renames.
296        self.rebuild_function_name_index();
297
298        Ok(old_hash)
299    }
300
301    /// Load a content-addressed `Program` with permission checking.
302    ///
303    /// Links the program, checks that `total_required_permissions` is a subset of
304    /// `granted`, and loads normally if the check passes. Returns an error listing
305    /// the missing permissions if the check fails.
306    pub fn load_program_with_permissions(
307        &mut self,
308        program: crate::bytecode::Program,
309        granted: &shape_abi_v1::PermissionSet,
310    ) -> Result<(), PermissionError> {
311        let linked =
312            crate::linker::link(&program).map_err(|e| PermissionError::LinkError(e.to_string()))?;
313        if !linked.total_required_permissions.is_subset(granted) {
314            let missing = linked.total_required_permissions.difference(granted);
315            return Err(PermissionError::InsufficientPermissions {
316                required: linked.total_required_permissions.clone(),
317                granted: granted.clone(),
318                missing,
319            });
320        }
321        self.load_linked_program(linked);
322        Ok(())
323    }
324
325    /// Load a `LinkedProgram` with permission checking.
326    ///
327    /// Checks that `total_required_permissions` is a subset of `granted`, then
328    /// loads normally. Returns an error listing the missing permissions if the
329    /// check fails.
330    pub fn load_linked_program_with_permissions(
331        &mut self,
332        linked: crate::bytecode::LinkedProgram,
333        granted: &shape_abi_v1::PermissionSet,
334    ) -> Result<(), PermissionError> {
335        if !linked.total_required_permissions.is_subset(granted) {
336            let missing = linked.total_required_permissions.difference(granted);
337            return Err(PermissionError::InsufficientPermissions {
338                required: linked.total_required_permissions.clone(),
339                granted: granted.clone(),
340                missing,
341            });
342        }
343        self.load_linked_program(linked);
344        Ok(())
345    }
346
347    /// Populate `function_hashes` and `function_entry_points` from the loaded program.
348    ///
349    /// If the program was compiled with content-addressed metadata (`content_addressed`
350    /// is `Some`), we extract blob hashes by matching function names/entry points.
351    /// Otherwise both vectors remain empty and `CallFrame::blob_hash` will be `None`.
352    pub(crate) fn populate_content_addressed_metadata(&mut self) {
353        let func_count = self.program.functions.len();
354        self.function_entry_points = self
355            .program
356            .functions
357            .iter()
358            .map(|f| f.entry_point)
359            .collect();
360
361        if self.program.function_blob_hashes.len() == func_count {
362            self.function_hashes = self.program.function_blob_hashes.clone();
363        } else if let Some(ref ca_program) = self.program.content_addressed {
364            // Build a lookup from function name -> blob hash from the Program's function_store
365            let mut name_to_hash: HashMap<String, FunctionHash> =
366                HashMap::with_capacity(ca_program.function_store.len());
367            for (hash, blob) in &ca_program.function_store {
368                name_to_hash.insert(blob.name.clone(), *hash);
369            }
370
371            self.function_hashes = Vec::with_capacity(func_count);
372            for func in &self.program.functions {
373                self.function_hashes
374                    .push(name_to_hash.get(&func.name).copied());
375            }
376        } else {
377            self.function_hashes = vec![None; func_count];
378        }
379
380        // Build the raw byte mirror for ModuleContext.
381        self.function_hash_raw = self
382            .function_hashes
383            .iter()
384            .map(|opt| opt.map(|fh| fh.0))
385            .collect();
386        self.function_id_by_hash.clear();
387        for (idx, maybe_hash) in self.function_hashes.iter().enumerate() {
388            if let Some(hash) = maybe_hash {
389                self.function_id_by_hash.entry(*hash).or_insert(idx as u16);
390            }
391        }
392    }
393
394    /// Build the function name → index map for runtime UFCS dispatch.
395    /// Called after program load or merge to enable type-scoped method resolution
396    /// (e.g., "DbTable::filter" looked up when calling .filter() on an Object with __type "DbTable").
397    pub(crate) fn rebuild_function_name_index(&mut self) {
398        self.function_name_index.clear();
399        for (i, func) in self.program.functions.iter().enumerate() {
400            self.function_name_index.insert(func.name.clone(), i as u16);
401        }
402    }
403
404    /// Reset VM state
405    pub fn reset(&mut self) {
406        self.ip = self.program_entry_ip;
407        for i in 0..self.sp {
408            self.stack[i] = ValueWord::none();
409        }
410        // Advance sp past top-level locals so expression evaluation
411        // doesn't overlap with local variable storage in register windows.
412        let tl = self.program.top_level_locals_count as usize;
413        self.sp = tl;
414        self.call_stack.clear();
415        self.loop_stack.clear();
416        self.timeframe_stack.clear();
417        self.exception_handlers.clear();
418        self.instruction_count = 0;
419        self.last_error_line = None;
420        self.last_error_file = None;
421        self.last_uncaught_exception = None;
422    }
423
424    /// Reset stack only (for reusing compiled program across iterations)
425    /// Keeps program, module_bindings, and GC state intact - only clears execution state
426    pub fn reset_stack(&mut self) {
427        self.ip = self.program_entry_ip;
428        for i in 0..self.sp {
429            self.stack[i] = ValueWord::none();
430        }
431        let tl = self.program.top_level_locals_count as usize;
432        self.sp = tl;
433        self.call_stack.clear();
434        self.loop_stack.clear();
435        self.timeframe_stack.clear();
436        self.exception_handlers.clear();
437        self.last_error_line = None;
438        self.last_error_file = None;
439        self.last_uncaught_exception = None;
440    }
441
442    /// Minimal reset for hot loops - only clears essential state
443    /// Use this when you know the function doesn't create GC objects or use exceptions
444    #[inline]
445    pub fn reset_minimal(&mut self) {
446        self.ip = self.program_entry_ip;
447        for i in 0..self.sp {
448            self.stack[i] = ValueWord::none();
449        }
450        let tl = self.program.top_level_locals_count as usize;
451        self.sp = tl;
452        self.call_stack.clear();
453        self.last_error_line = None;
454        self.last_error_file = None;
455        self.last_uncaught_exception = None;
456    }
457
458    /// Push a value onto the stack (public, for testing and host integration)
459    pub fn push_value(&mut self, value: ValueWord) {
460        if self.sp >= self.stack.len() {
461            self.stack.resize_with(self.sp * 2 + 1, ValueWord::none);
462        }
463        self.stack[self.sp] = value;
464        self.sp += 1;
465    }
466}