Skip to main content

harn_vm/vm/
modules.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, HashSet};
3use std::future::Future;
4use std::hash::{Hash, Hasher};
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::rc::Rc;
8use std::sync::{Arc, Mutex, OnceLock};
9
10use crate::bytecode_cache;
11use crate::chunk::{Chunk, CompiledFunction};
12use crate::module_artifact::{compile_module_artifact_from_source, ModuleArtifact};
13use crate::value::{ModuleFunctionRegistry, VmClosure, VmEnv, VmError, VmValue};
14
15use super::{ScopeSpan, Vm};
16
17static STDLIB_MODULE_ARTIFACT_CACHE: OnceLock<Mutex<BTreeMap<String, Arc<ModuleArtifact>>>> =
18    OnceLock::new();
19
20fn stdlib_module_artifact_cache() -> &'static Mutex<BTreeMap<String, Arc<ModuleArtifact>>> {
21    STDLIB_MODULE_ARTIFACT_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
22}
23
24#[cfg(test)]
25fn reset_stdlib_module_artifact_cache() {
26    stdlib_module_artifact_cache().lock().unwrap().clear();
27}
28
29#[cfg(test)]
30fn stdlib_module_artifact_cache_ptr(module: &str, source: &str) -> Option<usize> {
31    let key = stdlib_artifact_cache_key(module, source);
32    stdlib_module_artifact_cache()
33        .lock()
34        .unwrap()
35        .get(&key)
36        .map(|artifact| Arc::as_ptr(artifact) as usize)
37}
38
39#[derive(Clone)]
40pub(crate) struct LoadedModule {
41    pub(crate) functions: BTreeMap<String, Rc<VmClosure>>,
42    pub(crate) public_names: HashSet<String>,
43}
44
45pub fn resolve_module_import_path(base: &Path, path: &str) -> PathBuf {
46    let synthetic_current_file = base.join("__harn_import_base__.harn");
47    if let Some(resolved) = harn_modules::resolve_import_path(&synthetic_current_file, path) {
48        return resolved;
49    }
50
51    let mut file_path = base.join(path);
52
53    if !file_path.exists() && file_path.extension().is_none() {
54        file_path.set_extension("harn");
55    }
56
57    file_path
58}
59
60fn stdlib_artifact_cache_key(module: &str, source: &str) -> String {
61    let mut hasher = std::collections::hash_map::DefaultHasher::new();
62    module.hash(&mut hasher);
63    source.hash(&mut hasher);
64    format!("{module}:{:016x}", hasher.finish())
65}
66
67fn stdlib_module_artifact(
68    module: &str,
69    synthetic: &Path,
70    source: &'static str,
71) -> Result<Arc<ModuleArtifact>, VmError> {
72    let key = stdlib_artifact_cache_key(module, source);
73    {
74        let cache = stdlib_module_artifact_cache().lock().unwrap();
75        if let Some(cached) = cache.get(&key) {
76            return Ok(Arc::clone(cached));
77        }
78    }
79
80    // Stdlib modules are embedded in the binary so their content cannot
81    // legitimately change between processes; that means the disk cache
82    // for stdlib can use a synthetic source_path. The harn_version field
83    // of the cache key gates correctness across releases.
84    let lookup = bytecode_cache::load_module(synthetic, source);
85    let artifact = if let Some(artifact) = lookup.artifact {
86        artifact
87    } else {
88        let compiled = compile_module_artifact_from_source(synthetic, source)?;
89        if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
90            if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
91                eprintln!("[harn] stdlib module cache write skipped for {module}: {err}");
92            }
93        }
94        compiled
95    };
96
97    let compiled = Arc::new(artifact);
98    let mut cache = stdlib_module_artifact_cache().lock().unwrap();
99    if let Some(cached) = cache.get(&key) {
100        return Ok(Arc::clone(cached));
101    }
102    cache.insert(key, Arc::clone(&compiled));
103    Ok(compiled)
104}
105
106impl Vm {
107    async fn load_module_from_source(
108        &mut self,
109        synthetic: PathBuf,
110        source: &str,
111    ) -> Result<LoadedModule, VmError> {
112        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
113            return Ok(loaded);
114        }
115        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
116
117        let artifact = compile_module_artifact_from_source(&synthetic, source)?;
118
119        self.imported_paths.push(synthetic.clone());
120        let loaded = self.instantiate_module(None, &artifact).await?;
121        self.imported_paths.pop();
122        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
123        Ok(loaded)
124    }
125
126    async fn load_stdlib_module_from_source(
127        &mut self,
128        module: &str,
129        synthetic: PathBuf,
130        source: &'static str,
131    ) -> Result<LoadedModule, VmError> {
132        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
133            return Ok(loaded);
134        }
135        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
136
137        let artifact = stdlib_module_artifact(module, &synthetic, source)?;
138        self.imported_paths.push(synthetic.clone());
139        let loaded = self.instantiate_stdlib_module(artifact.as_ref()).await?;
140        self.imported_paths.pop();
141        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
142        Ok(loaded)
143    }
144
145    async fn instantiate_stdlib_module(
146        &mut self,
147        artifact: &ModuleArtifact,
148    ) -> Result<LoadedModule, VmError> {
149        self.instantiate_module(None, artifact).await
150    }
151
152    /// Instantiate a previously-compiled [`ModuleArtifact`] into a
153    /// [`LoadedModule`]. Re-runs nested imports, replays the init chunk
154    /// into a fresh module env, mints a [`VmClosure`] for each compiled
155    /// function (stamped with `module_source_dir` so imports from inside
156    /// those functions resolve against the originating file), and
157    /// applies the re-export pass. Used by both stdlib and user-import
158    /// code paths.
159    async fn instantiate_module(
160        &mut self,
161        module_source_dir: Option<PathBuf>,
162        artifact: &ModuleArtifact,
163    ) -> Result<LoadedModule, VmError> {
164        let caller_env = self.env.clone();
165        let old_source_dir = self.source_dir.clone();
166        self.env = VmEnv::new();
167        self.source_dir = module_source_dir.clone();
168
169        for import in &artifact.imports {
170            self.execute_import(&import.path, import.selected_names.as_deref())
171                .await?;
172        }
173
174        let module_state: crate::value::ModuleState = {
175            let mut init_env = self.env.clone();
176            if let Some(init_chunk) = &artifact.init_chunk {
177                let fresh_init_chunk = Chunk::from_cached(init_chunk);
178                let saved_env = std::mem::replace(&mut self.env, init_env);
179                let saved_frames = std::mem::take(&mut self.frames);
180                let saved_handlers = std::mem::take(&mut self.exception_handlers);
181                let saved_iterators = std::mem::take(&mut self.iterators);
182                let saved_deadlines = std::mem::take(&mut self.deadlines);
183                let init_result = self.run_chunk(&fresh_init_chunk).await;
184                init_env = std::mem::replace(&mut self.env, saved_env);
185                self.frames = saved_frames;
186                self.exception_handlers = saved_handlers;
187                self.iterators = saved_iterators;
188                self.deadlines = saved_deadlines;
189                init_result?;
190            }
191            Rc::new(RefCell::new(init_env))
192        };
193
194        let module_env = self.env.clone();
195        let registry: ModuleFunctionRegistry = Rc::new(RefCell::new(BTreeMap::new()));
196        let mut functions: BTreeMap<String, Rc<VmClosure>> = BTreeMap::new();
197        let mut public_names = artifact.public_names.clone();
198
199        for (name, compiled) in &artifact.functions {
200            let closure = Rc::new(VmClosure {
201                func: Rc::new(CompiledFunction::from_cached(compiled)),
202                env: module_env.clone(),
203                source_dir: module_source_dir.clone(),
204                module_functions: Some(Rc::clone(&registry)),
205                module_state: Some(Rc::clone(&module_state)),
206            });
207            registry
208                .borrow_mut()
209                .insert(name.clone(), Rc::clone(&closure));
210            self.env
211                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
212            module_state
213                .borrow_mut()
214                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
215            functions.insert(name.clone(), Rc::clone(&closure));
216        }
217
218        for import in artifact.imports.iter().filter(|import| import.is_pub) {
219            let cache_key = self.cache_key_for_import(&import.path);
220            let Some(loaded) = self.module_cache.get(&cache_key).cloned() else {
221                return Err(VmError::Runtime(format!(
222                    "Re-export error: imported module '{}' was not loaded",
223                    import.path
224                )));
225            };
226            let names_to_reexport: Vec<String> = match &import.selected_names {
227                Some(names) => names.clone(),
228                None => {
229                    if loaded.public_names.is_empty() {
230                        loaded.functions.keys().cloned().collect()
231                    } else {
232                        loaded.public_names.iter().cloned().collect()
233                    }
234                }
235            };
236            for name in names_to_reexport {
237                let Some(closure) = loaded.functions.get(&name) else {
238                    return Err(VmError::Runtime(format!(
239                        "Re-export error: '{name}' is not exported by '{}'",
240                        import.path
241                    )));
242                };
243                if let Some(existing) = functions.get(&name) {
244                    if !Rc::ptr_eq(existing, closure) {
245                        return Err(VmError::Runtime(format!(
246                            "Re-export collision: '{name}' is defined here and also \
247                             re-exported from '{}'",
248                            import.path
249                        )));
250                    }
251                }
252                functions.insert(name.clone(), Rc::clone(closure));
253                public_names.insert(name);
254            }
255        }
256
257        self.env = caller_env;
258        self.source_dir = old_source_dir;
259
260        Ok(LoadedModule {
261            functions,
262            public_names,
263        })
264    }
265
266    fn export_loaded_module(
267        &mut self,
268        module_path: &Path,
269        loaded: &LoadedModule,
270        selected_names: Option<&[String]>,
271    ) -> Result<(), VmError> {
272        let export_names: Vec<String> = if let Some(names) = selected_names {
273            names.to_vec()
274        } else if !loaded.public_names.is_empty() {
275            loaded.public_names.iter().cloned().collect()
276        } else {
277            loaded.functions.keys().cloned().collect()
278        };
279
280        let module_name = module_path.display().to_string();
281        for name in export_names {
282            let Some(closure) = loaded.functions.get(&name) else {
283                return Err(VmError::Runtime(format!(
284                    "Import error: '{name}' is not defined in {module_name}"
285                )));
286            };
287            if let Some(VmValue::Closure(_)) = self.env.get(&name) {
288                return Err(VmError::Runtime(format!(
289                    "Import collision: '{name}' is already defined when importing {module_name}. \
290                     Use selective imports to disambiguate: import {{ {name} }} from \"...\""
291                )));
292            }
293            self.env
294                .define(&name, VmValue::Closure(Rc::clone(closure)), false)?;
295        }
296        Ok(())
297    }
298
299    /// Execute an import, reading and running the file's declarations.
300    pub(super) fn execute_import<'a>(
301        &'a mut self,
302        path: &'a str,
303        selected_names: Option<&'a [String]>,
304    ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + 'a>> {
305        Box::pin(async move {
306            let _import_span = ScopeSpan::new(crate::tracing::SpanKind::Import, path.to_string());
307
308            let stdlib_module = path
309                .strip_prefix("std/")
310                .or_else(|| (path == "observability").then_some("observability"));
311            if let Some(module) = stdlib_module {
312                if let Some(source) = crate::stdlib_modules::get_stdlib_source(module) {
313                    let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
314                    if self.imported_paths.contains(&synthetic) {
315                        return Ok(());
316                    }
317                    let loaded = self
318                        .load_stdlib_module_from_source(module, synthetic.clone(), source)
319                        .await?;
320                    self.export_loaded_module(&synthetic, &loaded, selected_names)?;
321                    return Ok(());
322                }
323                return Err(VmError::Runtime(format!(
324                    "Unknown stdlib module: std/{module}"
325                )));
326            }
327
328            let base = self
329                .source_dir
330                .clone()
331                .unwrap_or_else(|| PathBuf::from("."));
332            let file_path = resolve_module_import_path(&base, path);
333
334            let canonical = file_path
335                .canonicalize()
336                .unwrap_or_else(|_| file_path.clone());
337            if self.imported_paths.contains(&canonical) {
338                return Ok(());
339            }
340            if let Some(loaded) = self.module_cache.get(&canonical).cloned() {
341                return self.export_loaded_module(&canonical, &loaded, selected_names);
342            }
343            self.imported_paths.push(canonical.clone());
344
345            let source = std::fs::read_to_string(&file_path).map_err(|e| {
346                VmError::Runtime(format!(
347                    "Import error: cannot read '{}': {e}",
348                    file_path.display()
349                ))
350            })?;
351            Rc::make_mut(&mut self.source_cache).insert(canonical.clone(), source.clone());
352            Rc::make_mut(&mut self.source_cache).insert(file_path.clone(), source.clone());
353
354            // Disk cache first: hits skip parse + compile for the imported
355            // module's whole function pool, not just the entry pipeline.
356            let lookup = bytecode_cache::load_module(&file_path, &source);
357            let artifact = if let Some(artifact) = lookup.artifact {
358                artifact
359            } else {
360                let compiled = compile_module_artifact_from_source(&file_path, &source)?;
361                if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
362                    if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
363                        eprintln!(
364                            "[harn] module cache write skipped for {}: {err}",
365                            file_path.display()
366                        );
367                    }
368                }
369                compiled
370            };
371
372            let module_source_dir = file_path.parent().map(|p| p.to_path_buf());
373            let loaded = self
374                .instantiate_module(module_source_dir, &artifact)
375                .await?;
376            self.imported_paths.pop();
377            Rc::make_mut(&mut self.module_cache).insert(canonical.clone(), loaded.clone());
378            self.export_loaded_module(&canonical, &loaded, selected_names)?;
379
380            Ok(())
381        })
382    }
383
384    /// Return the path key that `execute_import` would use to cache the
385    /// LoadedModule for this import string. Used by the re-export pass to
386    /// look up the already-loaded source module after `execute_import`
387    /// has populated [`Vm::module_cache`].
388    fn cache_key_for_import(&self, path: &str) -> PathBuf {
389        if let Some(module) = path
390            .strip_prefix("std/")
391            .or_else(|| (path == "observability").then_some("observability"))
392        {
393            return PathBuf::from(format!("<stdlib>/{module}.harn"));
394        }
395        let base = self
396            .source_dir
397            .clone()
398            .unwrap_or_else(|| PathBuf::from("."));
399        let file_path = resolve_module_import_path(&base, path);
400        file_path.canonicalize().unwrap_or(file_path)
401    }
402
403    /// Load a module file and return the exported function closures that
404    /// would be visible to a wildcard import.
405    pub async fn load_module_exports(
406        &mut self,
407        path: &Path,
408    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
409        let path_str = path.to_string_lossy().into_owned();
410        self.execute_import(&path_str, None).await?;
411
412        let mut file_path = if path.is_absolute() {
413            path.to_path_buf()
414        } else {
415            self.source_dir
416                .clone()
417                .unwrap_or_else(|| PathBuf::from("."))
418                .join(path)
419        };
420        if !file_path.exists() && file_path.extension().is_none() {
421            file_path.set_extension("harn");
422        }
423
424        let canonical = file_path
425            .canonicalize()
426            .unwrap_or_else(|_| file_path.clone());
427        let loaded = self.module_cache.get(&canonical).cloned().ok_or_else(|| {
428            VmError::Runtime(format!(
429                "Import error: failed to cache loaded module '{}'",
430                canonical.display()
431            ))
432        })?;
433
434        let export_names: Vec<String> = if loaded.public_names.is_empty() {
435            loaded.functions.keys().cloned().collect()
436        } else {
437            loaded.public_names.iter().cloned().collect()
438        };
439
440        let mut exports = BTreeMap::new();
441        for name in export_names {
442            let Some(closure) = loaded.functions.get(&name) else {
443                return Err(VmError::Runtime(format!(
444                    "Import error: exported function '{name}' is missing from {}",
445                    canonical.display()
446                )));
447            };
448            exports.insert(name, Rc::clone(closure));
449        }
450
451        Ok(exports)
452    }
453
454    /// Load synthetic source keyed by a synthetic module path and return
455    /// the exported function closures that a wildcard import would expose.
456    pub async fn load_module_exports_from_source(
457        &mut self,
458        source_key: impl Into<PathBuf>,
459        source: &str,
460    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
461        let synthetic = source_key.into();
462        let loaded = self
463            .load_module_from_source(synthetic.clone(), source)
464            .await?;
465        let export_names: Vec<String> = if loaded.public_names.is_empty() {
466            loaded.functions.keys().cloned().collect()
467        } else {
468            loaded.public_names.iter().cloned().collect()
469        };
470
471        let mut exports = BTreeMap::new();
472        for name in export_names {
473            let Some(closure) = loaded.functions.get(&name) else {
474                return Err(VmError::Runtime(format!(
475                    "Import error: exported function '{name}' is missing from {}",
476                    synthetic.display()
477                )));
478            };
479            exports.insert(name, Rc::clone(closure));
480        }
481
482        Ok(exports)
483    }
484
485    /// Load a module by import path (`std/foo`, relative module path, or
486    /// package import) and return the exported function closures that a
487    /// wildcard import would expose.
488    pub async fn load_module_exports_from_import(
489        &mut self,
490        import_path: &str,
491    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
492        self.execute_import(import_path, None).await?;
493
494        if let Some(module) = import_path
495            .strip_prefix("std/")
496            .or_else(|| (import_path == "observability").then_some("observability"))
497        {
498            let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
499            let loaded = self.module_cache.get(&synthetic).cloned().ok_or_else(|| {
500                VmError::Runtime(format!(
501                    "Import error: failed to cache loaded module '{}'",
502                    synthetic.display()
503                ))
504            })?;
505            let mut exports = BTreeMap::new();
506            let export_names: Vec<String> = if loaded.public_names.is_empty() {
507                loaded.functions.keys().cloned().collect()
508            } else {
509                loaded.public_names.iter().cloned().collect()
510            };
511            for name in export_names {
512                let Some(closure) = loaded.functions.get(&name) else {
513                    return Err(VmError::Runtime(format!(
514                        "Import error: exported function '{name}' is missing from {}",
515                        synthetic.display()
516                    )));
517                };
518                exports.insert(name, Rc::clone(closure));
519            }
520            return Ok(exports);
521        }
522
523        let base = self
524            .source_dir
525            .clone()
526            .unwrap_or_else(|| PathBuf::from("."));
527        let file_path = resolve_module_import_path(&base, import_path);
528        self.load_module_exports(&file_path).await
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use std::rc::Rc;
535    use std::sync::{Mutex, MutexGuard, OnceLock};
536
537    use super::*;
538
539    static CACHE_TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
540
541    fn cache_test_guard() -> MutexGuard<'static, ()> {
542        CACHE_TEST_LOCK
543            .get_or_init(|| Mutex::new(()))
544            .lock()
545            .unwrap()
546    }
547
548    fn cached_stdlib_module_ptr(module: &str) -> Option<usize> {
549        let source = harn_stdlib::get_stdlib_source(module).expect("stdlib module source exists");
550        stdlib_module_artifact_cache_ptr(module, source)
551    }
552
553    #[test]
554    fn stdlib_artifact_cache_reuses_compilation_with_fresh_vm_state() {
555        let _guard = cache_test_guard();
556        reset_stdlib_module_artifact_cache();
557        let runtime = tokio::runtime::Builder::new_current_thread()
558            .enable_all()
559            .build()
560            .expect("runtime builds");
561
562        let first_exports = runtime.block_on(async {
563            let mut first_vm = Vm::new();
564            first_vm
565                .load_module_exports_from_import("std/agent/prompts")
566                .await
567                .expect("first stdlib import succeeds")
568        });
569        let first_cached =
570            cached_stdlib_module_ptr("agent/prompts").expect("first import cached stdlib artifact");
571
572        let second_exports = runtime.block_on(async {
573            let mut second_vm = Vm::new();
574            second_vm
575                .load_module_exports_from_import("std/agent/prompts")
576                .await
577                .expect("second stdlib import succeeds")
578        });
579        assert_eq!(
580            cached_stdlib_module_ptr("agent/prompts"),
581            Some(first_cached)
582        );
583
584        let first = first_exports
585            .get("render_agent_prompt")
586            .expect("first export exists");
587        let second = second_exports
588            .get("render_agent_prompt")
589            .expect("second export exists");
590
591        assert!(!Rc::ptr_eq(first, second));
592        assert!(!Rc::ptr_eq(&first.func, &second.func));
593        assert!(!Rc::ptr_eq(&first.func.chunk, &second.func.chunk));
594        assert!(!Rc::ptr_eq(
595            first.module_state.as_ref().expect("first module state"),
596            second.module_state.as_ref().expect("second module state")
597        ));
598    }
599
600    #[test]
601    fn stdlib_artifact_cache_is_process_wide_across_threads() {
602        let _guard = cache_test_guard();
603        reset_stdlib_module_artifact_cache();
604
605        let handle = std::thread::spawn(|| {
606            let runtime = tokio::runtime::Builder::new_current_thread()
607                .enable_all()
608                .build()
609                .expect("runtime builds");
610            runtime.block_on(async {
611                let mut vm = Vm::new();
612                vm.load_module_exports_from_import("std/agent/prompts")
613                    .await
614                    .expect("thread stdlib import succeeds");
615            });
616        });
617        handle.join().expect("thread joins");
618        let thread_cached = cached_stdlib_module_ptr("agent/prompts")
619            .expect("thread import cached stdlib artifact");
620
621        let runtime = tokio::runtime::Builder::new_current_thread()
622            .enable_all()
623            .build()
624            .expect("runtime builds");
625        runtime.block_on(async {
626            let mut vm = Vm::new();
627            vm.load_module_exports_from_import("std/agent/prompts")
628                .await
629                .expect("main-thread stdlib import succeeds");
630        });
631        assert_eq!(
632            cached_stdlib_module_ptr("agent/prompts"),
633            Some(thread_cached)
634        );
635    }
636}