Skip to main content

harn_vm/vm/
modules.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, HashSet};
3use std::future::Future;
4use std::hash::{Hash, Hasher};
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::rc::Rc;
8use std::sync::{Arc, Mutex, OnceLock};
9
10use crate::bytecode_cache;
11use crate::chunk::{Chunk, CompiledFunction};
12use crate::module_artifact::{compile_module_artifact_from_source, ModuleArtifact};
13use crate::value::{ModuleFunctionRegistry, VmClosure, VmEnv, VmError, VmValue};
14
15use super::{ScopeSpan, Vm};
16
17static STDLIB_MODULE_ARTIFACT_CACHE: OnceLock<Mutex<BTreeMap<String, Arc<ModuleArtifact>>>> =
18    OnceLock::new();
19
20fn stdlib_module_artifact_cache() -> &'static Mutex<BTreeMap<String, Arc<ModuleArtifact>>> {
21    STDLIB_MODULE_ARTIFACT_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
22}
23
24#[cfg(test)]
25fn reset_stdlib_module_artifact_cache() {
26    stdlib_module_artifact_cache().lock().unwrap().clear();
27}
28
29#[cfg(test)]
30fn stdlib_module_artifact_cache_ptr(module: &str, source: &str) -> Option<usize> {
31    let key = stdlib_artifact_cache_key(module, source);
32    stdlib_module_artifact_cache()
33        .lock()
34        .unwrap()
35        .get(&key)
36        .map(|artifact| Arc::as_ptr(artifact) as usize)
37}
38
39#[derive(Clone)]
40pub(crate) struct LoadedModule {
41    pub(crate) functions: BTreeMap<String, Rc<VmClosure>>,
42    pub(crate) public_names: HashSet<String>,
43}
44
45pub fn resolve_module_import_path(base: &Path, path: &str) -> PathBuf {
46    let synthetic_current_file = base.join("__harn_import_base__.harn");
47    if let Some(resolved) = harn_modules::resolve_import_path(&synthetic_current_file, path) {
48        return resolved;
49    }
50
51    let mut file_path = base.join(path);
52
53    if !file_path.exists() && file_path.extension().is_none() {
54        file_path.set_extension("harn");
55    }
56
57    file_path
58}
59
60fn stdlib_artifact_cache_key(module: &str, source: &str) -> String {
61    let mut hasher = std::collections::hash_map::DefaultHasher::new();
62    module.hash(&mut hasher);
63    source.hash(&mut hasher);
64    format!("{module}:{:016x}", hasher.finish())
65}
66
67fn stdlib_module_artifact(
68    module: &str,
69    synthetic: &Path,
70    source: &'static str,
71) -> Result<Arc<ModuleArtifact>, VmError> {
72    let key = stdlib_artifact_cache_key(module, source);
73    {
74        let cache = stdlib_module_artifact_cache().lock().unwrap();
75        if let Some(cached) = cache.get(&key) {
76            return Ok(Arc::clone(cached));
77        }
78    }
79
80    // Stdlib modules are embedded in the binary so their content cannot
81    // legitimately change between processes; that means the disk cache
82    // for stdlib can use a synthetic source_path. The harn_version field
83    // of the cache key gates correctness across releases.
84    let lookup = bytecode_cache::load_module(synthetic, source);
85    let artifact = if let Some(artifact) = lookup.artifact {
86        artifact
87    } else {
88        let compiled = compile_module_artifact_from_source(synthetic, source)?;
89        if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
90            if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
91                eprintln!("[harn] stdlib module cache write skipped for {module}: {err}");
92            }
93        }
94        compiled
95    };
96
97    let compiled = Arc::new(artifact);
98    let mut cache = stdlib_module_artifact_cache().lock().unwrap();
99    if let Some(cached) = cache.get(&key) {
100        return Ok(Arc::clone(cached));
101    }
102    cache.insert(key, Arc::clone(&compiled));
103    Ok(compiled)
104}
105
106impl Vm {
107    async fn load_module_from_source(
108        &mut self,
109        synthetic: PathBuf,
110        source: &str,
111    ) -> Result<LoadedModule, VmError> {
112        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
113            return Ok(loaded);
114        }
115        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
116
117        let artifact = compile_module_artifact_from_source(&synthetic, source)?;
118
119        self.imported_paths.push(synthetic.clone());
120        let loaded = self.instantiate_module(None, &artifact).await?;
121        self.imported_paths.pop();
122        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
123        Ok(loaded)
124    }
125
126    async fn load_stdlib_module_from_source(
127        &mut self,
128        module: &str,
129        synthetic: PathBuf,
130        source: &'static str,
131    ) -> Result<LoadedModule, VmError> {
132        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
133            return Ok(loaded);
134        }
135        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
136
137        let artifact = stdlib_module_artifact(module, &synthetic, source)?;
138        self.imported_paths.push(synthetic.clone());
139        let loaded = self.instantiate_stdlib_module(artifact.as_ref()).await?;
140        self.imported_paths.pop();
141        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
142        Ok(loaded)
143    }
144
145    async fn instantiate_stdlib_module(
146        &mut self,
147        artifact: &ModuleArtifact,
148    ) -> Result<LoadedModule, VmError> {
149        self.instantiate_module(None, artifact).await
150    }
151
152    /// Instantiate a previously-compiled [`ModuleArtifact`] into a
153    /// [`LoadedModule`]. Re-runs nested imports, replays the init chunk
154    /// into a fresh module env, mints a [`VmClosure`] for each compiled
155    /// function (stamped with `module_source_dir` so imports from inside
156    /// those functions resolve against the originating file), and
157    /// applies the re-export pass. Used by both stdlib and user-import
158    /// code paths.
159    async fn instantiate_module(
160        &mut self,
161        module_source_dir: Option<PathBuf>,
162        artifact: &ModuleArtifact,
163    ) -> Result<LoadedModule, VmError> {
164        let caller_env = self.env.clone();
165        let old_source_dir = self.source_dir.clone();
166        self.env = VmEnv::new();
167        self.source_dir = module_source_dir.clone();
168
169        for import in &artifact.imports {
170            self.execute_import(&import.path, import.selected_names.as_deref())
171                .await?;
172        }
173
174        let module_state: crate::value::ModuleState = {
175            let mut init_env = self.env.clone();
176            if let Some(init_chunk) = &artifact.init_chunk {
177                let fresh_init_chunk = Chunk::from_cached(init_chunk);
178                let saved_env = std::mem::replace(&mut self.env, init_env);
179                let saved_frames = std::mem::take(&mut self.frames);
180                let saved_handlers = std::mem::take(&mut self.exception_handlers);
181                let saved_iterators = std::mem::take(&mut self.iterators);
182                let saved_deadlines = std::mem::take(&mut self.deadlines);
183                // STEP_STACK / PERSONA_STACK are thread-locals shared with
184                // the calling frame. Emptying `self.frames` above means
185                // any `prune_below_frame(0)` triggered while the init
186                // chunk's bytecode runs — including the inevitable
187                // frame-pop prune at end-of-chunk — would wipe active
188                // steps owned by the *caller* (e.g., a `@step`-decorated
189                // function whose body lazily imports a module). Snapshot
190                // the persona/step context here and restore it after init
191                // so module loading is invisible to the step-tracking
192                // surface.
193                let active_context = crate::step_runtime::take_active_context();
194                let init_result = self.run_chunk(&fresh_init_chunk).await;
195                crate::step_runtime::restore_active_context(active_context);
196                init_env = std::mem::replace(&mut self.env, saved_env);
197                self.frames = saved_frames;
198                self.exception_handlers = saved_handlers;
199                self.iterators = saved_iterators;
200                self.deadlines = saved_deadlines;
201                init_result?;
202            }
203            Rc::new(RefCell::new(init_env))
204        };
205
206        let module_env = self.env.clone();
207        let registry: ModuleFunctionRegistry = Rc::new(RefCell::new(BTreeMap::new()));
208        let mut functions: BTreeMap<String, Rc<VmClosure>> = BTreeMap::new();
209        let mut public_names = artifact.public_names.clone();
210
211        for (name, compiled) in &artifact.functions {
212            let closure = Rc::new(VmClosure {
213                func: Rc::new(CompiledFunction::from_cached(compiled)),
214                env: module_env.clone(),
215                source_dir: module_source_dir.clone(),
216                module_functions: Some(Rc::clone(&registry)),
217                module_state: Some(Rc::clone(&module_state)),
218            });
219            registry
220                .borrow_mut()
221                .insert(name.clone(), Rc::clone(&closure));
222            self.env
223                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
224            module_state
225                .borrow_mut()
226                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
227            functions.insert(name.clone(), Rc::clone(&closure));
228        }
229
230        for import in artifact.imports.iter().filter(|import| import.is_pub) {
231            let cache_key = self.cache_key_for_import(&import.path);
232            let Some(loaded) = self.module_cache.get(&cache_key).cloned() else {
233                return Err(VmError::Runtime(format!(
234                    "Re-export error: imported module '{}' was not loaded",
235                    import.path
236                )));
237            };
238            let names_to_reexport: Vec<String> = match &import.selected_names {
239                Some(names) => names.clone(),
240                None => {
241                    if loaded.public_names.is_empty() {
242                        loaded.functions.keys().cloned().collect()
243                    } else {
244                        loaded.public_names.iter().cloned().collect()
245                    }
246                }
247            };
248            for name in names_to_reexport {
249                let Some(closure) = loaded.functions.get(&name) else {
250                    return Err(VmError::Runtime(format!(
251                        "Re-export error: '{name}' is not exported by '{}'",
252                        import.path
253                    )));
254                };
255                if let Some(existing) = functions.get(&name) {
256                    if !Rc::ptr_eq(existing, closure) {
257                        return Err(VmError::Runtime(format!(
258                            "Re-export collision: '{name}' is defined here and also \
259                             re-exported from '{}'",
260                            import.path
261                        )));
262                    }
263                }
264                functions.insert(name.clone(), Rc::clone(closure));
265                public_names.insert(name);
266            }
267        }
268
269        self.env = caller_env;
270        self.source_dir = old_source_dir;
271
272        Ok(LoadedModule {
273            functions,
274            public_names,
275        })
276    }
277
278    fn export_loaded_module(
279        &mut self,
280        module_path: &Path,
281        loaded: &LoadedModule,
282        selected_names: Option<&[String]>,
283    ) -> Result<(), VmError> {
284        let export_names: Vec<String> = if let Some(names) = selected_names {
285            names.to_vec()
286        } else if !loaded.public_names.is_empty() {
287            loaded.public_names.iter().cloned().collect()
288        } else {
289            loaded.functions.keys().cloned().collect()
290        };
291
292        let module_name = module_path.display().to_string();
293        for name in export_names {
294            let Some(closure) = loaded.functions.get(&name) else {
295                return Err(VmError::Runtime(format!(
296                    "Import error: '{name}' is not defined in {module_name}"
297                )));
298            };
299            if let Some(VmValue::Closure(_)) = self.env.get(&name) {
300                return Err(VmError::Runtime(format!(
301                    "Import collision: '{name}' is already defined when importing {module_name}. \
302                     Use selective imports to disambiguate: import {{ {name} }} from \"...\""
303                )));
304            }
305            self.env
306                .define(&name, VmValue::Closure(Rc::clone(closure)), false)?;
307        }
308        Ok(())
309    }
310
311    /// Execute an import, reading and running the file's declarations.
312    pub(super) fn execute_import<'a>(
313        &'a mut self,
314        path: &'a str,
315        selected_names: Option<&'a [String]>,
316    ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + 'a>> {
317        Box::pin(async move {
318            let _import_span = ScopeSpan::new(crate::tracing::SpanKind::Import, path.to_string());
319
320            let stdlib_module = path
321                .strip_prefix("std/")
322                .or_else(|| (path == "observability").then_some("observability"));
323            if let Some(module) = stdlib_module {
324                if let Some(source) = crate::stdlib_modules::get_stdlib_source(module) {
325                    let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
326                    if self.imported_paths.contains(&synthetic) {
327                        return Ok(());
328                    }
329                    let loaded = self
330                        .load_stdlib_module_from_source(module, synthetic.clone(), source)
331                        .await?;
332                    self.export_loaded_module(&synthetic, &loaded, selected_names)?;
333                    return Ok(());
334                }
335                return Err(VmError::Runtime(format!(
336                    "Unknown stdlib module: std/{module}"
337                )));
338            }
339
340            let base = self
341                .source_dir
342                .clone()
343                .unwrap_or_else(|| PathBuf::from("."));
344            let file_path = resolve_module_import_path(&base, path);
345
346            let canonical = file_path
347                .canonicalize()
348                .unwrap_or_else(|_| file_path.clone());
349            if self.imported_paths.contains(&canonical) {
350                return Ok(());
351            }
352            if let Some(loaded) = self.module_cache.get(&canonical).cloned() {
353                return self.export_loaded_module(&canonical, &loaded, selected_names);
354            }
355            self.imported_paths.push(canonical.clone());
356
357            let source = std::fs::read_to_string(&file_path).map_err(|e| {
358                VmError::Runtime(format!(
359                    "Import error: cannot read '{}': {e}",
360                    file_path.display()
361                ))
362            })?;
363            Rc::make_mut(&mut self.source_cache).insert(canonical.clone(), source.clone());
364            Rc::make_mut(&mut self.source_cache).insert(file_path.clone(), source.clone());
365
366            // Disk cache first: hits skip parse + compile for the imported
367            // module's whole function pool, not just the entry pipeline.
368            let lookup = bytecode_cache::load_module(&file_path, &source);
369            let artifact = if let Some(artifact) = lookup.artifact {
370                artifact
371            } else {
372                let compiled = compile_module_artifact_from_source(&file_path, &source)?;
373                if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
374                    if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
375                        eprintln!(
376                            "[harn] module cache write skipped for {}: {err}",
377                            file_path.display()
378                        );
379                    }
380                }
381                compiled
382            };
383
384            let module_source_dir = file_path.parent().map(|p| p.to_path_buf());
385            let loaded = self
386                .instantiate_module(module_source_dir, &artifact)
387                .await?;
388            self.imported_paths.pop();
389            Rc::make_mut(&mut self.module_cache).insert(canonical.clone(), loaded.clone());
390            self.export_loaded_module(&canonical, &loaded, selected_names)?;
391
392            Ok(())
393        })
394    }
395
396    /// Return the path key that `execute_import` would use to cache the
397    /// LoadedModule for this import string. Used by the re-export pass to
398    /// look up the already-loaded source module after `execute_import`
399    /// has populated [`Vm::module_cache`].
400    fn cache_key_for_import(&self, path: &str) -> PathBuf {
401        if let Some(module) = path
402            .strip_prefix("std/")
403            .or_else(|| (path == "observability").then_some("observability"))
404        {
405            return PathBuf::from(format!("<stdlib>/{module}.harn"));
406        }
407        let base = self
408            .source_dir
409            .clone()
410            .unwrap_or_else(|| PathBuf::from("."));
411        let file_path = resolve_module_import_path(&base, path);
412        file_path.canonicalize().unwrap_or(file_path)
413    }
414
415    /// Load a module file and return the exported function closures that
416    /// would be visible to a wildcard import.
417    pub async fn load_module_exports(
418        &mut self,
419        path: &Path,
420    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
421        let path_str = path.to_string_lossy().into_owned();
422        self.execute_import(&path_str, None).await?;
423
424        let mut file_path = if path.is_absolute() {
425            path.to_path_buf()
426        } else {
427            self.source_dir
428                .clone()
429                .unwrap_or_else(|| PathBuf::from("."))
430                .join(path)
431        };
432        if !file_path.exists() && file_path.extension().is_none() {
433            file_path.set_extension("harn");
434        }
435
436        let canonical = file_path
437            .canonicalize()
438            .unwrap_or_else(|_| file_path.clone());
439        let loaded = self.module_cache.get(&canonical).cloned().ok_or_else(|| {
440            VmError::Runtime(format!(
441                "Import error: failed to cache loaded module '{}'",
442                canonical.display()
443            ))
444        })?;
445
446        let export_names: Vec<String> = if loaded.public_names.is_empty() {
447            loaded.functions.keys().cloned().collect()
448        } else {
449            loaded.public_names.iter().cloned().collect()
450        };
451
452        let mut exports = BTreeMap::new();
453        for name in export_names {
454            let Some(closure) = loaded.functions.get(&name) else {
455                return Err(VmError::Runtime(format!(
456                    "Import error: exported function '{name}' is missing from {}",
457                    canonical.display()
458                )));
459            };
460            exports.insert(name, Rc::clone(closure));
461        }
462
463        Ok(exports)
464    }
465
466    /// Load synthetic source keyed by a synthetic module path and return
467    /// the exported function closures that a wildcard import would expose.
468    pub async fn load_module_exports_from_source(
469        &mut self,
470        source_key: impl Into<PathBuf>,
471        source: &str,
472    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
473        let synthetic = source_key.into();
474        let loaded = self
475            .load_module_from_source(synthetic.clone(), source)
476            .await?;
477        let export_names: Vec<String> = if loaded.public_names.is_empty() {
478            loaded.functions.keys().cloned().collect()
479        } else {
480            loaded.public_names.iter().cloned().collect()
481        };
482
483        let mut exports = BTreeMap::new();
484        for name in export_names {
485            let Some(closure) = loaded.functions.get(&name) else {
486                return Err(VmError::Runtime(format!(
487                    "Import error: exported function '{name}' is missing from {}",
488                    synthetic.display()
489                )));
490            };
491            exports.insert(name, Rc::clone(closure));
492        }
493
494        Ok(exports)
495    }
496
497    /// Load a module by import path (`std/foo`, relative module path, or
498    /// package import) and return the exported function closures that a
499    /// wildcard import would expose.
500    pub async fn load_module_exports_from_import(
501        &mut self,
502        import_path: &str,
503    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
504        self.execute_import(import_path, None).await?;
505
506        if let Some(module) = import_path
507            .strip_prefix("std/")
508            .or_else(|| (import_path == "observability").then_some("observability"))
509        {
510            let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
511            let loaded = self.module_cache.get(&synthetic).cloned().ok_or_else(|| {
512                VmError::Runtime(format!(
513                    "Import error: failed to cache loaded module '{}'",
514                    synthetic.display()
515                ))
516            })?;
517            let mut exports = BTreeMap::new();
518            let export_names: Vec<String> = if loaded.public_names.is_empty() {
519                loaded.functions.keys().cloned().collect()
520            } else {
521                loaded.public_names.iter().cloned().collect()
522            };
523            for name in export_names {
524                let Some(closure) = loaded.functions.get(&name) else {
525                    return Err(VmError::Runtime(format!(
526                        "Import error: exported function '{name}' is missing from {}",
527                        synthetic.display()
528                    )));
529                };
530                exports.insert(name, Rc::clone(closure));
531            }
532            return Ok(exports);
533        }
534
535        let base = self
536            .source_dir
537            .clone()
538            .unwrap_or_else(|| PathBuf::from("."));
539        let file_path = resolve_module_import_path(&base, import_path);
540        self.load_module_exports(&file_path).await
541    }
542}
543
544#[cfg(test)]
545mod tests {
546    use std::rc::Rc;
547    use std::sync::{Mutex, MutexGuard, OnceLock};
548
549    use super::*;
550
551    static CACHE_TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
552
553    fn cache_test_guard() -> MutexGuard<'static, ()> {
554        CACHE_TEST_LOCK
555            .get_or_init(|| Mutex::new(()))
556            .lock()
557            .unwrap()
558    }
559
560    fn cached_stdlib_module_ptr(module: &str) -> Option<usize> {
561        let source = harn_stdlib::get_stdlib_source(module).expect("stdlib module source exists");
562        stdlib_module_artifact_cache_ptr(module, source)
563    }
564
565    #[test]
566    fn stdlib_artifact_cache_reuses_compilation_with_fresh_vm_state() {
567        let _guard = cache_test_guard();
568        reset_stdlib_module_artifact_cache();
569        let runtime = tokio::runtime::Builder::new_current_thread()
570            .enable_all()
571            .build()
572            .expect("runtime builds");
573
574        let first_exports = runtime.block_on(async {
575            let mut first_vm = Vm::new();
576            first_vm
577                .load_module_exports_from_import("std/agent/prompts")
578                .await
579                .expect("first stdlib import succeeds")
580        });
581        let first_cached =
582            cached_stdlib_module_ptr("agent/prompts").expect("first import cached stdlib artifact");
583
584        let second_exports = runtime.block_on(async {
585            let mut second_vm = Vm::new();
586            second_vm
587                .load_module_exports_from_import("std/agent/prompts")
588                .await
589                .expect("second stdlib import succeeds")
590        });
591        assert_eq!(
592            cached_stdlib_module_ptr("agent/prompts"),
593            Some(first_cached)
594        );
595
596        let first = first_exports
597            .get("render_agent_prompt")
598            .expect("first export exists");
599        let second = second_exports
600            .get("render_agent_prompt")
601            .expect("second export exists");
602
603        assert!(!Rc::ptr_eq(first, second));
604        assert!(!Rc::ptr_eq(&first.func, &second.func));
605        assert!(!Rc::ptr_eq(&first.func.chunk, &second.func.chunk));
606        assert!(!Rc::ptr_eq(
607            first.module_state.as_ref().expect("first module state"),
608            second.module_state.as_ref().expect("second module state")
609        ));
610    }
611
612    #[test]
613    fn stdlib_artifact_cache_is_process_wide_across_threads() {
614        let _guard = cache_test_guard();
615        reset_stdlib_module_artifact_cache();
616
617        let handle = std::thread::spawn(|| {
618            let runtime = tokio::runtime::Builder::new_current_thread()
619                .enable_all()
620                .build()
621                .expect("runtime builds");
622            runtime.block_on(async {
623                let mut vm = Vm::new();
624                vm.load_module_exports_from_import("std/agent/prompts")
625                    .await
626                    .expect("thread stdlib import succeeds");
627            });
628        });
629        handle.join().expect("thread joins");
630        let thread_cached = cached_stdlib_module_ptr("agent/prompts")
631            .expect("thread import cached stdlib artifact");
632
633        let runtime = tokio::runtime::Builder::new_current_thread()
634            .enable_all()
635            .build()
636            .expect("runtime builds");
637        runtime.block_on(async {
638            let mut vm = Vm::new();
639            vm.load_module_exports_from_import("std/agent/prompts")
640                .await
641                .expect("main-thread stdlib import succeeds");
642        });
643        assert_eq!(
644            cached_stdlib_module_ptr("agent/prompts"),
645            Some(thread_cached)
646        );
647    }
648}