Skip to main content

shape_vm/executor/vm_impl/
modules.rs

1use super::super::*;
2
3impl VirtualMachine {
4    /// Register a built-in stdlib module into the VM's module registry.
5    /// Delegates to `register_extension` — this is a semantic alias to
6    /// distinguish VM-native stdlib modules from user-installed extension plugins.
7    pub fn register_stdlib_module(&mut self, module: shape_runtime::module_exports::ModuleExports) {
8        self.register_extension(module);
9    }
10
11    /// Register an external/user extension module (e.g. loaded from a .so plugin)
12    /// into the VM's module registry.
13    /// Also merges any method intrinsics for fast Object dispatch.
14    pub fn register_extension(&mut self, module: shape_runtime::module_exports::ModuleExports) {
15        // Merge method intrinsics
16        for (type_name, methods) in &module.method_intrinsics {
17            let entry = self.extension_methods.entry(type_name.clone()).or_default();
18            for (method_name, func) in methods {
19                entry.insert(method_name.clone(), func.clone());
20            }
21        }
22        // Expose module exports as methods on the module object type so
23        // `module.fn(...)` dispatches via CallMethod without UFCS rewrites.
24        // Register under the canonical type name only (`__mod_std::core::json`).
25        let canonical_type_name = format!("__mod_{}", module.name);
26
27        let mut sync_methods: Vec<(String, shape_runtime::module_exports::ModuleFn)> = Vec::new();
28        for (export_name, func) in &module.exports {
29            sync_methods.push((export_name.clone(), func.clone()));
30        }
31        for (export_name, async_fn) in &module.async_exports {
32            let async_fn = async_fn.clone();
33            let wrapped: shape_runtime::module_exports::ModuleFn = Arc::new(
34                move |args: &[ValueWord], _ctx: &shape_runtime::module_exports::ModuleContext| {
35                    let future = async_fn(args);
36                    tokio::task::block_in_place(|| {
37                        tokio::runtime::Handle::current().block_on(future)
38                    })
39                },
40            );
41            sync_methods.push((export_name.clone(), wrapped));
42        }
43
44        let canonical_entry = self
45            .extension_methods
46            .entry(canonical_type_name)
47            .or_default();
48        for (name, func) in &sync_methods {
49            canonical_entry.insert(name.clone(), func.clone());
50        }
51
52        self.module_registry.register(module);
53    }
54
55    /// Register a ModuleFn in the table and return its ID (for ValueWord::ModuleFunction).
56    pub fn register_module_fn(&mut self, f: shape_runtime::module_exports::ModuleFn) -> usize {
57        let id = self.module_fn_table.len();
58        self.module_fn_table.push(f);
59        id
60    }
61
62    /// Invoke a registered module function with a scoped `ModuleContext`.
63    ///
64    /// The context provides access to the type schema registry, a callable
65    /// invoker closure, and a raw invoker that extensions can capture in
66    /// long-lived structs (e.g., CFFI callback userdata).
67    pub(crate) fn invoke_module_fn(
68        &mut self,
69        module_fn: &shape_runtime::module_exports::ModuleFn,
70        args: &[ValueWord],
71    ) -> Result<ValueWord, VMError> {
72        // SAFETY: The module function is called synchronously and the VM pointer
73        // remains valid for the duration of the call.  We use a raw pointer so
74        // that: (a) the callable invoker can re-enter the VM, and (b) we can
75        // simultaneously borrow the schema registry.
76        unsafe {
77            let vm_ptr = self as *mut VirtualMachine;
78
79            let invoker =
80                |callable: &ValueWord, call_args: &[ValueWord]| -> Result<ValueWord, String> {
81                    (*vm_ptr)
82                        .call_value_immediate_nb(callable, call_args, None)
83                        .map_err(|e| e.to_string())
84                };
85
86            unsafe fn vm_callable_invoker(
87                ctx: *mut std::ffi::c_void,
88                callable: &ValueWord,
89                args: &[ValueWord],
90            ) -> Result<ValueWord, String> {
91                let vm = unsafe { &mut *(ctx as *mut VirtualMachine) };
92                vm.call_value_immediate_nb(callable, args, None)
93                    .map_err(|err| err.to_string())
94            }
95
96            // Capture a read-only snapshot of VM state before dispatching.
97            // The snapshot lives on the stack and is referenced by ModuleContext
98            // for the duration of this synchronous call.
99            let vm_snapshot = (*vm_ptr).capture_vm_state();
100
101            let ctx = shape_runtime::module_exports::ModuleContext {
102                schemas: &(*vm_ptr).program.type_schema_registry,
103                invoke_callable: Some(&invoker),
104                raw_invoker: Some(shape_runtime::module_exports::RawCallableInvoker {
105                    ctx: vm_ptr as *mut std::ffi::c_void,
106                    invoke: vm_callable_invoker,
107                }),
108                function_hashes: if (*vm_ptr).function_hash_raw.is_empty() {
109                    None
110                } else {
111                    Some(&(*vm_ptr).function_hash_raw)
112                },
113                vm_state: Some(&vm_snapshot),
114                granted_permissions: None,
115                scope_constraints: None,
116                set_pending_resume: Some(&|snapshot| {
117                    // vm_ptr is valid for the duration of the module function call
118                    // (outer unsafe block covers this).
119                    (*vm_ptr).pending_resume = Some(snapshot);
120                }),
121                set_pending_frame_resume: Some(&|ip_offset, locals| {
122                    // vm_ptr is valid for the duration of the module function call
123                    // (outer unsafe block covers this).
124                    (*vm_ptr).pending_frame_resume = Some(FrameResumeData { ip_offset, locals });
125                }),
126            };
127
128            // Set thread-local program reference so remote.__call() can access it.
129            crate::executor::builtins::remote_builtins::set_current_program(&(*vm_ptr).program);
130            let result = module_fn(args, &ctx).map_err(VMError::RuntimeError);
131            crate::executor::builtins::remote_builtins::clear_current_program();
132
133            // Check if the module function requested a VM state resume.
134            // If so, return a special error that the dispatch loop intercepts.
135            if (*vm_ptr).pending_resume.is_some() {
136                return Err(VMError::ResumeRequested);
137            }
138
139            result
140        }
141    }
142
143    /// Populate extension module objects as module_bindings (json, duckdb, etc.).
144    /// These are used by extension Shape code (e.g., `duckdb.query(...)`).
145    /// Call this after load_program().
146    pub fn populate_module_objects(&mut self) {
147        // Collect module data first to avoid borrow conflicts
148        let module_data: Vec<(
149            String,
150            Vec<(String, shape_runtime::module_exports::ModuleFn)>,
151            Vec<(String, shape_runtime::module_exports::AsyncModuleFn)>,
152            Vec<String>,
153        )> = self
154            .module_registry
155            .module_names()
156            .iter()
157            .filter_map(|name| {
158                let module = self.module_registry.get(name)?;
159                let sync_exports: Vec<_> = module
160                    .exports
161                    .iter()
162                    .map(|(k, v)| (k.clone(), v.clone()))
163                    .collect();
164                let async_exports: Vec<_> = module
165                    .async_exports
166                    .iter()
167                    .map(|(k, v)| (k.clone(), v.clone()))
168                    .collect();
169                let mut source_exports = Vec::new();
170                for artifact in &module.module_artifacts {
171                    if artifact.module_path != *name {
172                        continue;
173                    }
174                    let Some(source) = artifact.source.as_deref() else {
175                        continue;
176                    };
177                    if let Ok(exports) =
178                        shape_runtime::module_loader::collect_exported_function_names_from_source(
179                            &artifact.module_path,
180                            source,
181                        )
182                    {
183                        source_exports.extend(exports);
184                    }
185                }
186                source_exports.sort();
187                source_exports.dedup();
188                Some((
189                    name.to_string(),
190                    sync_exports,
191                    async_exports,
192                    source_exports,
193                ))
194            })
195            .collect();
196
197        for (module_name, sync_exports, async_exports, source_exports) in module_data {
198            // Find the module_binding index for this module name.
199            // Prefer the hidden native binding (`__imported_module__::X`) when it exists,
200            // so that compiled artifact code referencing the hidden binding gets the
201            // native module object. The plain binding is filled by the compiled module
202            // declaration at runtime.
203            let hidden_name =
204                crate::compiler::BytecodeCompiler::hidden_native_module_binding_name(&module_name);
205            let binding_idx = self
206                .program
207                .module_binding_names
208                .iter()
209                .position(|binding_name| binding_name == &hidden_name)
210                .or_else(|| {
211                    self.program
212                        .module_binding_names
213                        .iter()
214                        .position(|binding_name| binding_name == &module_name)
215                });
216
217            if let Some(idx) = binding_idx {
218                let mut obj = HashMap::new();
219
220                // Register sync exports directly
221                for (export_name, module_fn) in sync_exports {
222                    let fn_id = self.register_module_fn(module_fn);
223                    obj.insert(export_name, ValueWord::from_module_function(fn_id as u32));
224                }
225
226                // Wrap async exports: block_in_place + block_on at call time
227                for (export_name, async_fn) in async_exports {
228                    let wrapped: shape_runtime::module_exports::ModuleFn =
229                        Arc::new(move |args: &[ValueWord], _ctx: &shape_runtime::module_exports::ModuleContext| {
230                            let future = async_fn(args);
231                            tokio::task::block_in_place(|| {
232                                tokio::runtime::Handle::current().block_on(future)
233                            })
234                        });
235                    let fn_id = self.register_module_fn(wrapped);
236                    obj.insert(export_name, ValueWord::from_module_function(fn_id as u32));
237                }
238
239                // Add Shape-source exported functions (compiled into bytecode).
240                // These are regular VM functions, not host module functions.
241                for export_name in source_exports {
242                    if obj.contains_key(&export_name) {
243                        continue;
244                    }
245                    if let Some(&func_id) = self.function_name_index.get(&export_name) {
246                        obj.insert(export_name, ValueWord::from_function(func_id));
247                    }
248                }
249
250                // Module object schemas must be predeclared at compile time.
251                // Use the canonical module name only.
252                let cache_name = format!("__mod_{}", module_name);
253                let schema_id =
254                    if let Some(schema) = self.lookup_schema_by_name(&cache_name) {
255                        schema.id
256                    } else {
257                        // Keep execution predictable: no runtime schema synthesis.
258                        // Missing module schema means compiler/loader setup is incomplete.
259                        continue;
260                    };
261
262                // Look up schema to get field ordering
263                let Some(schema) = self.lookup_schema(schema_id) else {
264                    continue;
265                };
266                let field_order: Vec<String> =
267                    schema.fields.iter().map(|f| f.name.clone()).collect();
268
269                let mut slots = Vec::with_capacity(field_order.len());
270                let mut heap_mask: u64 = 0;
271                for (i, field_name) in field_order.iter().enumerate() {
272                    let nb_val = obj.get(field_name).cloned().unwrap_or_else(ValueWord::none);
273                    let (slot, is_heap) =
274                        crate::executor::objects::object_creation::nb_to_slot_with_field_type(
275                            &nb_val, None,
276                        );
277                    slots.push(slot);
278                    if is_heap {
279                        heap_mask |= 1u64 << i;
280                    }
281                }
282
283                let typed_nb = ValueWord::from_heap_value(HeapValue::TypedObject {
284                    schema_id: schema_id as u64,
285                    slots: slots.into_boxed_slice(),
286                    heap_mask,
287                });
288                if idx >= self.module_bindings.len() {
289                    self.module_bindings.resize_with(idx + 1, ValueWord::none);
290                }
291                // BARRIER: heap write site — overwrites module binding during typed object initialization
292                self.module_bindings[idx] = typed_nb;
293            }
294        }
295    }
296}