Skip to main content

shape_runtime/
module_exports.rs

1//! Runtime module export bindings for Shape extensions.
2//!
3//! This module defines the in-process representation used by VM/LSP/CLI after
4//! a plugin has been loaded through the ABI capability interfaces.
5
6use crate::type_schema::{TypeSchema, TypeSchemaRegistry};
7use shape_value::ValueWord;
8use std::collections::HashMap;
9use std::ffi::c_void;
10use std::future::Future;
11use std::pin::Pin;
12use std::sync::Arc;
13
14/// Raw callable invoker as a function pointer + opaque context.
15///
16/// This is the `Send`-safe, `'static`-safe form of `invoke_callable` that
17/// extensions (e.g., CFFI) can store in long-lived structs like callback
18/// userdata.  The context pointer is valid for the duration of the
19/// originating module function call.
20#[derive(Clone, Copy)]
21pub struct RawCallableInvoker {
22    pub ctx: *mut c_void,
23    pub invoke: unsafe fn(*mut c_void, &ValueWord, &[ValueWord]) -> Result<ValueWord, String>,
24}
25
26impl RawCallableInvoker {
27    /// Invoke a Shape callable through this raw invoker.
28    ///
29    /// # Safety
30    /// The caller must ensure `self.ctx` is still valid (i.e., the originating
31    /// VM module call is still on the stack).
32    pub unsafe fn call(
33        &self,
34        callable: &ValueWord,
35        args: &[ValueWord],
36    ) -> Result<ValueWord, String> {
37        unsafe { (self.invoke)(self.ctx, callable, args) }
38    }
39}
40
41/// Information about a single VM call frame, captured at a point in time.
42#[derive(Debug, Clone)]
43pub struct FrameInfo {
44    pub function_id: Option<u16>,
45    pub function_name: String,
46    pub blob_hash: Option<[u8; 32]>,
47    pub local_ip: usize,
48    pub locals: Vec<ValueWord>,
49    pub upvalues: Option<Vec<ValueWord>>,
50    pub args: Vec<ValueWord>,
51}
52
53/// Trait providing read access to VM state for state module functions.
54pub trait VmStateAccessor: Send + Sync {
55    fn current_frame(&self) -> Option<FrameInfo>;
56    fn all_frames(&self) -> Vec<FrameInfo>;
57    fn caller_frame(&self) -> Option<FrameInfo>;
58    fn current_args(&self) -> Vec<ValueWord>;
59    fn current_locals(&self) -> Vec<(String, ValueWord)>;
60    fn module_bindings(&self) -> Vec<(String, ValueWord)>;
61    /// Total instruction count at the time of capture. Default impl for compat.
62    fn instruction_count(&self) -> usize {
63        0
64    }
65}
66
67/// Execution context available to module functions during a VM call.
68///
69/// The VM constructs this before each module function dispatch and passes
70/// it by reference.
71pub struct ModuleContext<'a> {
72    /// Type schema registry — lookup types by name or ID.
73    pub schemas: &'a TypeSchemaRegistry,
74
75    /// Invoke a Shape callable (function/closure) from host code.
76    pub invoke_callable: Option<&'a dyn Fn(&ValueWord, &[ValueWord]) -> Result<ValueWord, String>>,
77
78    /// Raw invoker for extensions that need to capture a callable invoker
79    /// beyond the borrow lifetime (e.g., CFFI callback userdata).
80    /// Valid only for the duration of the current module function call.
81    pub raw_invoker: Option<RawCallableInvoker>,
82
83    /// Content-addressed function hashes indexed by function ID.
84    /// Provided by the VM when content-addressed metadata is available.
85    /// Uses raw `[u8; 32]` to avoid a dependency on `shape-vm`'s `FunctionHash`.
86    pub function_hashes: Option<&'a [Option<[u8; 32]>]>,
87
88    /// Read-only access to VM state (call frames, locals, etc.).
89    /// Provided by the VM when state introspection is needed.
90    pub vm_state: Option<&'a dyn VmStateAccessor>,
91
92    /// Permissions granted to the current execution context.
93    /// When `Some`, module functions check this before performing I/O.
94    /// When `None`, all operations are allowed (backwards compatible).
95    pub granted_permissions: Option<shape_abi_v1::PermissionSet>,
96
97    /// Scope constraints for the current execution context.
98    /// Narrows permissions to specific paths, hosts, etc.
99    pub scope_constraints: Option<shape_abi_v1::ScopeConstraints>,
100
101    /// Callback for `state.resume()` to request full VM state restoration.
102    /// The module function stores the snapshot; the dispatch loop applies it
103    /// after the current instruction completes.
104    pub set_pending_resume: Option<&'a dyn Fn(ValueWord)>,
105
106    /// Callback for `state.resume_frame()` to request mid-function resume.
107    /// Stores (ip_offset, locals) so the dispatch loop can override the
108    /// call frame set up by invoke_callable.
109    pub set_pending_frame_resume: Option<&'a dyn Fn(usize, Vec<ValueWord>)>,
110}
111
112/// Check whether the current execution context has a required permission.
113///
114/// If `granted_permissions` is `None`, all operations are allowed (backwards
115/// compatible with code that predates the permission system). If `Some`, the
116/// specific permission must be present in the set.
117pub fn check_permission(
118    ctx: &ModuleContext,
119    permission: shape_abi_v1::Permission,
120) -> Result<(), String> {
121    if let Some(ref granted) = ctx.granted_permissions {
122        if !granted.contains(&permission) {
123            return Err(format!(
124                "Permission denied: {} ({})",
125                permission.description(),
126                permission.name()
127            ));
128        }
129    }
130    Ok(())
131}
132
133/// Check permission and enforce filesystem path scope constraints.
134///
135/// After verifying the base permission (`FsRead`, `FsWrite`, or `FsScoped`),
136/// checks `ScopeConstraints::allowed_paths` when present. If the scope
137/// constraints list paths, the target path must match at least one (prefix
138/// match). An empty `allowed_paths` list means all paths are permitted.
139pub fn check_fs_permission(
140    ctx: &ModuleContext,
141    permission: shape_abi_v1::Permission,
142    path: &str,
143) -> Result<(), String> {
144    check_permission(ctx, permission)?;
145
146    if let Some(ref constraints) = ctx.scope_constraints {
147        if !constraints.allowed_paths.is_empty() {
148            let target = std::path::Path::new(path);
149            let allowed = constraints.allowed_paths.iter().any(|pattern| {
150                // Support glob-style prefix matching: "/data/**" matches
151                // anything under /data/, and "/tmp/*" matches direct children.
152                let pattern = pattern.trim_end_matches("**").trim_end_matches('*');
153                let prefix = std::path::Path::new(pattern.trim_end_matches('/'));
154                target.starts_with(prefix)
155            });
156            if !allowed {
157                return Err(format!(
158                    "Scope constraint denied: path '{}' is not in allowed paths",
159                    path
160                ));
161            }
162        }
163    }
164    Ok(())
165}
166
167/// Check permission and enforce network host scope constraints.
168///
169/// After verifying the base permission (`NetConnect`, `NetListen`, or
170/// `NetScoped`), checks `ScopeConstraints::allowed_hosts` when present.
171/// If the scope constraints list hosts, the target address must match at
172/// least one (supports `host:port` and `*.domain.com` wildcards).
173pub fn check_net_permission(
174    ctx: &ModuleContext,
175    permission: shape_abi_v1::Permission,
176    address: &str,
177) -> Result<(), String> {
178    check_permission(ctx, permission)?;
179
180    if let Some(ref constraints) = ctx.scope_constraints {
181        if !constraints.allowed_hosts.is_empty() {
182            // Extract host (and optional port) from the address.
183            let target_host = address.split(':').next().unwrap_or(address);
184            let allowed = constraints.allowed_hosts.iter().any(|pattern| {
185                let pattern_host = pattern.split(':').next().unwrap_or(pattern);
186                // Wildcard: *.example.com matches sub.example.com
187                if let Some(suffix) = pattern_host.strip_prefix("*.") {
188                    target_host.ends_with(suffix) && target_host.len() > suffix.len()
189                } else {
190                    // Exact host match (port part is ignored for scope check)
191                    target_host == pattern_host
192                }
193            });
194            if !allowed {
195                return Err(format!(
196                    "Scope constraint denied: address '{}' is not in allowed hosts",
197                    address
198                ));
199            }
200        }
201    }
202    Ok(())
203}
204
205/// A module function callable from Shape (synchronous).
206///
207/// Takes a slice of ValueWord arguments plus a `ModuleContext` that provides
208/// access to the type schema registry and a callable invoker.
209/// The function must be Send + Sync for thread safety.
210pub type ModuleFn = Arc<
211    dyn for<'ctx> Fn(&[ValueWord], &ModuleContext<'ctx>) -> Result<ValueWord, String> + Send + Sync,
212>;
213
214/// An async module function callable from Shape.
215///
216/// Returns a boxed future that resolves to a ValueWord result.
217/// The VM executor awaits this using the current tokio runtime.
218///
219/// Note: async functions do not receive a `ModuleContext` because the context
220/// borrows from the VM and cannot be sent across await points.
221pub type AsyncModuleFn = Arc<
222    dyn Fn(&[ValueWord]) -> Pin<Box<dyn Future<Output = Result<ValueWord, String>> + Send>>
223        + Send
224        + Sync,
225>;
226
227/// Visibility policy for one extension export.
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum ModuleExportVisibility {
230    /// Normal module API: available in runtime + comptime contexts.
231    Public,
232    /// Only callable from comptime contexts.
233    ComptimeOnly,
234    /// Internal helper: callable but hidden from normal user-facing discovery.
235    Internal,
236}
237
238impl Default for ModuleExportVisibility {
239    fn default() -> Self {
240        Self::Public
241    }
242}
243
244/// Schema for a single parameter of a module function.
245/// Used by LSP for completions and by validation for type checking.
246#[derive(Debug, Clone)]
247pub struct ModuleParam {
248    pub name: String,
249    pub type_name: String,
250    pub required: bool,
251    pub description: String,
252    pub default_snippet: Option<String>,
253    pub allowed_values: Option<Vec<String>>,
254    pub nested_params: Option<Vec<ModuleParam>>,
255}
256
257impl Default for ModuleParam {
258    fn default() -> Self {
259        Self {
260            name: String::new(),
261            type_name: "any".to_string(),
262            required: false,
263            description: String::new(),
264            default_snippet: None,
265            allowed_values: None,
266            nested_params: None,
267        }
268    }
269}
270
271/// Schema for a module function — describes parameters and return type.
272/// Used by LSP for completions, hover, and signature help.
273#[derive(Debug, Clone)]
274pub struct ModuleFunction {
275    pub description: String,
276    pub params: Vec<ModuleParam>,
277    pub return_type: Option<String>,
278}
279
280/// Bundled module artifact from an extension.
281#[derive(Debug, Clone, PartialEq, Eq)]
282pub struct ModuleArtifact {
283    /// Import path for this module (e.g. "duckdb", "duckdb.query")
284    pub module_path: String,
285    /// Optional Shape source payload.
286    pub source: Option<String>,
287    /// Optional precompiled payload (opaque host format).
288    pub compiled: Option<Vec<u8>>,
289}
290
291/// A Rust-implemented module exposed via `<name>`.
292#[derive(Clone)]
293pub struct ModuleExports {
294    /// Module name (e.g., "csv", "json", "duckdb")
295    pub name: String,
296    /// Human-readable description of this module
297    pub description: String,
298    /// Exported sync functions: name → implementation
299    pub exports: HashMap<String, ModuleFn>,
300    /// Exported async functions: name → implementation
301    pub async_exports: HashMap<String, AsyncModuleFn>,
302    /// Function schemas for LSP + validation: name → schema
303    pub schemas: HashMap<String, ModuleFunction>,
304    /// Export visibility controls: name → visibility.
305    pub export_visibility: HashMap<String, ModuleExportVisibility>,
306    /// Shape source files bundled with this extension.
307    /// Compiled and merged with core stdlib at startup.
308    /// Vec of (filename, source_code) pairs.
309    ///
310    /// Legacy compatibility field. New code should use `module_artifacts`.
311    pub shape_sources: Vec<(String, String)>,
312    /// Bundled module artifacts (source/compiled/both).
313    pub module_artifacts: Vec<ModuleArtifact>,
314    /// Method intrinsics for fast dispatch on typed Objects.
315    /// Outer key: type name (e.g., "DuckDbQuery")
316    /// Inner key: method name (e.g., "build_sql")
317    /// Dispatched BEFORE callable-property and UFCS fallback.
318    pub method_intrinsics: HashMap<String, HashMap<String, ModuleFn>>,
319    /// Type schemas to register in the VM's runtime TypeSchemaRegistry.
320    /// Extensions can use this to declare types that the runtime can use
321    /// for TypedObject creation and field validation.
322    pub type_schemas: Vec<TypeSchema>,
323}
324
325impl ModuleExports {
326    /// Create a new extension module.
327    pub fn new(name: impl Into<String>) -> Self {
328        Self {
329            name: name.into(),
330            description: String::new(),
331            exports: HashMap::new(),
332            async_exports: HashMap::new(),
333            schemas: HashMap::new(),
334            export_visibility: HashMap::new(),
335            shape_sources: Vec::new(),
336            module_artifacts: Vec::new(),
337            method_intrinsics: HashMap::new(),
338            type_schemas: Vec::new(),
339        }
340    }
341
342    /// Register an exported function.
343    pub fn add_function<F>(&mut self, name: impl Into<String>, f: F) -> &mut Self
344    where
345        F: for<'ctx> Fn(&[ValueWord], &ModuleContext<'ctx>) -> Result<ValueWord, String>
346            + Send
347            + Sync
348            + 'static,
349    {
350        let name = name.into();
351        self.exports.insert(name.clone(), Arc::new(f));
352        self.export_visibility.entry(name).or_default();
353        self
354    }
355
356    /// Register an exported function with its schema.
357    pub fn add_function_with_schema<F>(
358        &mut self,
359        name: impl Into<String>,
360        f: F,
361        schema: ModuleFunction,
362    ) -> &mut Self
363    where
364        F: for<'ctx> Fn(&[ValueWord], &ModuleContext<'ctx>) -> Result<ValueWord, String>
365            + Send
366            + Sync
367            + 'static,
368    {
369        let name = name.into();
370        self.exports.insert(name.clone(), Arc::new(f));
371        self.schemas.insert(name.clone(), schema);
372        self.export_visibility.entry(name).or_default();
373        self
374    }
375
376    /// Register an async exported function.
377    pub fn add_async_function<F, Fut>(&mut self, name: impl Into<String>, f: F) -> &mut Self
378    where
379        F: Fn(Vec<ValueWord>) -> Fut + Send + Sync + 'static,
380        Fut: Future<Output = Result<ValueWord, String>> + Send + 'static,
381    {
382        let name = name.into();
383        self.async_exports.insert(
384            name.clone(),
385            Arc::new(move |args: &[ValueWord]| {
386                let owned_args = args.to_vec();
387                Box::pin(f(owned_args))
388            }),
389        );
390        self.export_visibility.entry(name).or_default();
391        self
392    }
393
394    /// Register an async exported function with its schema.
395    pub fn add_async_function_with_schema<F, Fut>(
396        &mut self,
397        name: impl Into<String>,
398        f: F,
399        schema: ModuleFunction,
400    ) -> &mut Self
401    where
402        F: Fn(Vec<ValueWord>) -> Fut + Send + Sync + 'static,
403        Fut: Future<Output = Result<ValueWord, String>> + Send + 'static,
404    {
405        let name = name.into();
406        self.async_exports.insert(
407            name.clone(),
408            Arc::new(move |args: &[ValueWord]| {
409                let owned_args = args.to_vec();
410                Box::pin(f(owned_args))
411            }),
412        );
413        self.schemas.insert(name.clone(), schema);
414        self.export_visibility.entry(name).or_default();
415        self
416    }
417
418    /// Set visibility for one export name.
419    pub fn set_export_visibility(
420        &mut self,
421        name: impl Into<String>,
422        visibility: ModuleExportVisibility,
423    ) -> &mut Self {
424        self.export_visibility.insert(name.into(), visibility);
425        self
426    }
427
428    /// Resolve visibility for one export (defaults to Public).
429    pub fn export_visibility(&self, name: &str) -> ModuleExportVisibility {
430        self.export_visibility
431            .get(name)
432            .copied()
433            .unwrap_or_default()
434    }
435
436    /// Return true when the export can be called in the current compiler mode.
437    pub fn is_export_available(&self, name: &str, comptime_mode: bool) -> bool {
438        match self.export_visibility(name) {
439            ModuleExportVisibility::Public => true,
440            ModuleExportVisibility::ComptimeOnly => comptime_mode,
441            ModuleExportVisibility::Internal => true,
442        }
443    }
444
445    /// Return true when the export should appear in user-facing completion/hover surfaces.
446    pub fn is_export_public_surface(&self, name: &str, comptime_mode: bool) -> bool {
447        match self.export_visibility(name) {
448            ModuleExportVisibility::Public => true,
449            ModuleExportVisibility::ComptimeOnly => comptime_mode,
450            ModuleExportVisibility::Internal => false,
451        }
452    }
453
454    /// List exports available for the requested mode (sync + async).
455    pub fn export_names_available(&self, comptime_mode: bool) -> Vec<&str> {
456        self.export_names()
457            .into_iter()
458            .filter(|name| self.is_export_available(name, comptime_mode))
459            .collect()
460    }
461
462    /// List user-facing exports for completion/hover (sync + async).
463    pub fn export_names_public_surface(&self, comptime_mode: bool) -> Vec<&str> {
464        self.export_names()
465            .into_iter()
466            .filter(|name| self.is_export_public_surface(name, comptime_mode))
467            .collect()
468    }
469
470    /// Bundle a Shape source file with this extension.
471    /// The source will be compiled and merged with stdlib at startup.
472    pub fn add_shape_source(&mut self, filename: &str, source: &str) -> &mut Self {
473        self.module_artifacts.push(ModuleArtifact {
474            module_path: filename.to_string(),
475            source: Some(source.to_string()),
476            compiled: None,
477        });
478        self.shape_sources
479            .push((filename.to_string(), source.to_string()));
480        self
481    }
482
483    /// Register a bundled module artifact (source/compiled/both).
484    pub fn add_shape_artifact(
485        &mut self,
486        module_path: impl Into<String>,
487        source: Option<String>,
488        compiled: Option<Vec<u8>>,
489    ) -> &mut Self {
490        self.module_artifacts.push(ModuleArtifact {
491            module_path: module_path.into(),
492            source,
493            compiled,
494        });
495        self
496    }
497
498    /// Register a method intrinsic for fast dispatch on typed Objects.
499    /// Called before callable-property and UFCS fallback in handle_object_method().
500    pub fn add_intrinsic<F>(&mut self, type_name: &str, method_name: &str, f: F) -> &mut Self
501    where
502        F: for<'ctx> Fn(&[ValueWord], &ModuleContext<'ctx>) -> Result<ValueWord, String>
503            + Send
504            + Sync
505            + 'static,
506    {
507        self.method_intrinsics
508            .entry(type_name.to_string())
509            .or_default()
510            .insert(method_name.to_string(), Arc::new(f));
511        self
512    }
513
514    /// Register a type schema that the VM will add to its runtime registry.
515    /// Returns the schema ID for reference.
516    pub fn add_type_schema(&mut self, schema: TypeSchema) -> crate::type_schema::SchemaId {
517        let id = schema.id;
518        self.type_schemas.push(schema);
519        id
520    }
521
522    /// Check if this module exports a given name (sync or async).
523    pub fn has_export(&self, name: &str) -> bool {
524        self.exports.contains_key(name) || self.async_exports.contains_key(name)
525    }
526
527    /// Get a sync exported function by name.
528    pub fn get_export(&self, name: &str) -> Option<&ModuleFn> {
529        self.exports.get(name)
530    }
531
532    /// Get an async exported function by name.
533    pub fn get_async_export(&self, name: &str) -> Option<&AsyncModuleFn> {
534        self.async_exports.get(name)
535    }
536
537    /// Check if a function is async.
538    pub fn is_async(&self, name: &str) -> bool {
539        self.async_exports.contains_key(name)
540    }
541
542    /// Get the schema for an exported function.
543    pub fn get_schema(&self, name: &str) -> Option<&ModuleFunction> {
544        self.schemas.get(name)
545    }
546
547    /// List all export names (sync + async).
548    pub fn export_names(&self) -> Vec<&str> {
549        let mut names: Vec<&str> = self
550            .exports
551            .keys()
552            .chain(self.async_exports.keys())
553            .map(|s| s.as_str())
554            .collect();
555        names.sort_unstable();
556        names.dedup();
557        names
558    }
559
560    /// Convert this module's schema to a `ParsedModuleSchema` for the semantic
561    /// analyzer, mirroring the conversion in `BytecodeExecutor::module_schemas()`.
562    pub fn to_parsed_schema(&self) -> crate::extensions::ParsedModuleSchema {
563        let functions = self
564            .schemas
565            .iter()
566            .filter(|(name, _)| self.is_export_public_surface(name, false))
567            .map(|(name, schema)| crate::extensions::ParsedModuleFunction {
568                name: name.clone(),
569                description: schema.description.clone(),
570                params: schema.params.iter().map(|p| p.type_name.clone()).collect(),
571                return_type: schema.return_type.clone(),
572            })
573            .collect();
574        crate::extensions::ParsedModuleSchema {
575            module_name: self.name.clone(),
576            functions,
577            artifacts: Vec::new(),
578        }
579    }
580
581    /// Return `ParsedModuleSchema` entries for all shipped native stdlib modules.
582    /// Used during engine initialization to make these globals visible at compile time.
583    pub fn stdlib_module_schemas() -> Vec<crate::extensions::ParsedModuleSchema> {
584        crate::stdlib::all_stdlib_modules()
585            .into_iter()
586            .map(|m| m.to_parsed_schema())
587            .collect()
588    }
589}
590
591impl std::fmt::Debug for ModuleExports {
592    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
593        f.debug_struct("ModuleExports")
594            .field("name", &self.name)
595            .field("description", &self.description)
596            .field("exports", &self.exports.keys().collect::<Vec<_>>())
597            .field(
598                "async_exports",
599                &self.async_exports.keys().collect::<Vec<_>>(),
600            )
601            .field("schemas", &self.schemas.keys().collect::<Vec<_>>())
602            .field(
603                "shape_sources",
604                &self
605                    .shape_sources
606                    .iter()
607                    .map(|(f, _)| f)
608                    .collect::<Vec<_>>(),
609            )
610            .field(
611                "method_intrinsics",
612                &self.method_intrinsics.keys().collect::<Vec<_>>(),
613            )
614            .finish()
615    }
616}
617
618/// Registry of all extension modules.
619///
620/// Created at startup and populated from loaded plugin capabilities.
621/// Lookup is by canonical path only (e.g. `"std::core::json"`).
622#[derive(Default)]
623pub struct ModuleExportRegistry {
624    modules: HashMap<String, ModuleExports>,
625}
626
627impl ModuleExportRegistry {
628    /// Create a new empty registry.
629    pub fn new() -> Self {
630        Self {
631            modules: HashMap::new(),
632        }
633    }
634
635    /// Register a extension module.
636    pub fn register(&mut self, module: ModuleExports) {
637        let canonical = module.name.clone();
638        self.modules.insert(canonical, module);
639    }
640
641    /// Get a module by canonical name.
642    pub fn get(&self, name: &str) -> Option<&ModuleExports> {
643        self.modules.get(name)
644    }
645
646    /// Check if a module exists by canonical name.
647    pub fn has(&self, name: &str) -> bool {
648        self.get(name).is_some()
649    }
650
651    /// List all registered module names.
652    pub fn module_names(&self) -> Vec<&str> {
653        self.modules.keys().map(|s| s.as_str()).collect()
654    }
655
656    /// Get all registered modules.
657    pub fn modules(&self) -> &HashMap<String, ModuleExports> {
658        &self.modules
659    }
660}
661
662impl std::fmt::Debug for ModuleExportRegistry {
663    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
664        f.debug_struct("ModuleExportRegistry")
665            .field("modules", &self.modules.keys().collect::<Vec<_>>())
666            .finish()
667    }
668}
669
670#[cfg(test)]
671#[path = "module_exports_tests.rs"]
672mod tests;