1use std::collections::{BTreeMap, HashSet};
2use std::future::Future;
3use std::hash::{Hash, Hasher};
4use std::path::{Path, PathBuf};
5use std::pin::Pin;
6use std::sync::{Arc, Mutex, OnceLock};
7
8use crate::bytecode_cache;
9use crate::chunk::{Chunk, CompiledFunction};
10use crate::module_artifact::{compile_module_artifact_from_source, ModuleArtifact};
11use crate::value::{ModuleFunctionRegistry, VmClosure, VmEnv, VmError, VmValue};
12
13use super::{ScopeSpan, Vm};
14
15static STDLIB_MODULE_ARTIFACT_CACHE: OnceLock<Mutex<BTreeMap<String, Arc<ModuleArtifact>>>> =
16 OnceLock::new();
17
18fn stdlib_module_artifact_cache() -> &'static Mutex<BTreeMap<String, Arc<ModuleArtifact>>> {
19 STDLIB_MODULE_ARTIFACT_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
20}
21
22#[cfg(test)]
23fn reset_stdlib_module_artifact_cache() {
24 stdlib_module_artifact_cache().lock().unwrap().clear();
25}
26
27#[cfg(test)]
28fn stdlib_module_artifact_cache_ptr(module: &str, source: &str) -> Option<usize> {
29 let key = stdlib_artifact_cache_key(module, source);
30 stdlib_module_artifact_cache()
31 .lock()
32 .unwrap()
33 .get(&key)
34 .map(|artifact| Arc::as_ptr(artifact) as usize)
35}
36
37#[derive(Clone)]
38pub(crate) struct LoadedModule {
39 pub(crate) functions: BTreeMap<String, Arc<VmClosure>>,
40 pub(crate) public_names: HashSet<String>,
41 pub(crate) _module_functions: crate::value::ModuleFunctionRegistry,
42 pub(crate) _module_state: crate::value::ModuleState,
43}
44
45pub fn resolve_module_import_path(base: &Path, path: &str) -> PathBuf {
46 let synthetic_current_file = base.join("__harn_import_base__.harn");
47 if let Some(resolved) = harn_modules::resolve_import_path(&synthetic_current_file, path) {
48 return resolved;
49 }
50
51 let mut file_path = base.join(path);
52
53 if !file_path.exists() && file_path.extension().is_none() {
54 file_path.set_extension("harn");
55 }
56
57 file_path
58}
59
60fn stdlib_artifact_cache_key(module: &str, source: &str) -> String {
61 let mut hasher = std::collections::hash_map::DefaultHasher::new();
62 module.hash(&mut hasher);
63 source.hash(&mut hasher);
64 format!("{module}:{:016x}", hasher.finish())
65}
66
67fn stdlib_module_artifact(
68 module: &str,
69 synthetic: &Path,
70 source: &'static str,
71) -> Result<Arc<ModuleArtifact>, VmError> {
72 let key = stdlib_artifact_cache_key(module, source);
73 {
74 let cache = stdlib_module_artifact_cache().lock().unwrap();
75 if let Some(cached) = cache.get(&key) {
76 return Ok(Arc::clone(cached));
77 }
78 }
79
80 let lookup = bytecode_cache::load_module(synthetic, source);
85 let artifact = if let Some(artifact) = lookup.artifact {
86 artifact
87 } else {
88 let compiled = compile_module_artifact_from_source(synthetic, source)?;
89 if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
90 if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
91 eprintln!("[harn] stdlib module cache write skipped for {module}: {err}");
92 }
93 }
94 compiled
95 };
96
97 let compiled = Arc::new(artifact);
98 let mut cache = stdlib_module_artifact_cache().lock().unwrap();
99 if let Some(cached) = cache.get(&key) {
100 return Ok(Arc::clone(cached));
101 }
102 cache.insert(key, Arc::clone(&compiled));
103 Ok(compiled)
104}
105
106impl Vm {
107 async fn load_module_from_source(
108 &mut self,
109 synthetic: PathBuf,
110 source: &str,
111 ) -> Result<LoadedModule, VmError> {
112 if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
113 return Ok(loaded);
114 }
115 Arc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
116
117 let artifact = compile_module_artifact_from_source(&synthetic, source)?;
118
119 self.imported_paths.push(synthetic.clone());
120 let loaded = self.instantiate_module(None, &artifact).await?;
121 self.imported_paths.pop();
122 Arc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
123 Ok(loaded)
124 }
125
126 async fn load_stdlib_module_from_source(
127 &mut self,
128 module: &str,
129 synthetic: PathBuf,
130 source: &'static str,
131 ) -> Result<LoadedModule, VmError> {
132 if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
133 return Ok(loaded);
134 }
135 Arc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
136
137 let artifact = stdlib_module_artifact(module, &synthetic, source)?;
138 self.imported_paths.push(synthetic.clone());
139 let loaded = self.instantiate_stdlib_module(artifact.as_ref()).await?;
140 self.imported_paths.pop();
141 Arc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
142 Ok(loaded)
143 }
144
145 async fn instantiate_stdlib_module(
146 &mut self,
147 artifact: &ModuleArtifact,
148 ) -> Result<LoadedModule, VmError> {
149 self.instantiate_module(None, artifact).await
150 }
151
152 async fn instantiate_module(
160 &mut self,
161 module_source_dir: Option<PathBuf>,
162 artifact: &ModuleArtifact,
163 ) -> Result<LoadedModule, VmError> {
164 let caller_env = self.env.clone();
165 let old_source_dir = self.source_dir.clone();
166 self.env = VmEnv::new();
167 self.source_dir = module_source_dir.clone();
168
169 for import in &artifact.imports {
170 self.execute_import(&import.path, import.selected_names.as_deref())
171 .await?;
172 }
173
174 let module_state: crate::value::ModuleState = {
175 let mut init_env = self.env.clone();
176 if let Some(init_chunk) = &artifact.init_chunk {
177 let fresh_init_chunk = Chunk::from_cached(init_chunk);
178 let saved_env = std::mem::replace(&mut self.env, init_env);
179 let saved_frames = std::mem::take(&mut self.frames);
180 let saved_handlers = std::mem::take(&mut self.exception_handlers);
181 let saved_iterators = std::mem::take(&mut self.iterators);
182 let saved_deadlines = std::mem::take(&mut self.deadlines);
183 let active_context = crate::step_runtime::take_active_context();
194 let init_result = self.run_chunk(&fresh_init_chunk).await;
195 crate::step_runtime::restore_active_context(active_context);
196 init_env = std::mem::replace(&mut self.env, saved_env);
197 self.frames = saved_frames;
198 self.exception_handlers = saved_handlers;
199 self.iterators = saved_iterators;
200 self.deadlines = saved_deadlines;
201 init_result?;
202 }
203 Arc::new(crate::value::VmMutex::new(init_env))
204 };
205
206 let module_env = self.env.clone();
207 let registry: ModuleFunctionRegistry =
208 Arc::new(crate::value::VmMutex::new(BTreeMap::new()));
209 let mut functions: BTreeMap<String, Arc<VmClosure>> = BTreeMap::new();
210 let mut public_names = artifact.public_names.clone();
211
212 for (name, compiled) in &artifact.functions {
213 let closure = Arc::new(VmClosure {
214 func: Arc::new(CompiledFunction::from_cached(compiled)),
215 env: module_env.clone(),
216 source_dir: module_source_dir.clone(),
217 module_functions: Some(Arc::downgrade(®istry)),
218 module_state: Some(Arc::downgrade(&module_state)),
219 });
220 registry.lock().insert(name.clone(), Arc::clone(&closure));
221 self.env
222 .define(name, VmValue::Closure(Arc::clone(&closure)), false)?;
223 module_state
224 .lock()
225 .define(name, VmValue::Closure(Arc::clone(&closure)), false)?;
226 functions.insert(name.clone(), Arc::clone(&closure));
227 }
228
229 for import in artifact.imports.iter().filter(|import| import.is_pub) {
230 let cache_key = self.cache_key_for_import(&import.path);
231 let Some(loaded) = self.module_cache.get(&cache_key).cloned() else {
232 return Err(VmError::Runtime(format!(
233 "Re-export error: imported module '{}' was not loaded",
234 import.path
235 )));
236 };
237 let names_to_reexport: Vec<String> = match &import.selected_names {
238 Some(names) => names.clone(),
239 None => {
240 if loaded.public_names.is_empty() {
241 loaded.functions.keys().cloned().collect()
242 } else {
243 loaded.public_names.iter().cloned().collect()
244 }
245 }
246 };
247 for name in names_to_reexport {
248 let Some(closure) = loaded.functions.get(&name) else {
249 return Err(VmError::Runtime(format!(
250 "Re-export error: '{name}' is not exported by '{}'",
251 import.path
252 )));
253 };
254 if let Some(existing) = functions.get(&name) {
255 if !Arc::ptr_eq(existing, closure) {
256 return Err(VmError::Runtime(format!(
257 "Re-export collision: '{name}' is defined here and also \
258 re-exported from '{}'",
259 import.path
260 )));
261 }
262 }
263 functions.insert(name.clone(), Arc::clone(closure));
264 public_names.insert(name);
265 }
266 }
267
268 self.env = caller_env;
269 self.source_dir = old_source_dir;
270
271 Ok(LoadedModule {
272 functions,
273 public_names,
274 _module_functions: registry,
275 _module_state: module_state,
276 })
277 }
278
279 fn export_loaded_module(
280 &mut self,
281 module_path: &Path,
282 loaded: &LoadedModule,
283 selected_names: Option<&[String]>,
284 ) -> Result<(), VmError> {
285 let export_names: Vec<String> = if let Some(names) = selected_names {
286 names.to_vec()
287 } else if !loaded.public_names.is_empty() {
288 loaded.public_names.iter().cloned().collect()
289 } else {
290 loaded.functions.keys().cloned().collect()
291 };
292
293 let module_name = module_path.display().to_string();
294 for name in export_names {
295 let Some(closure) = loaded.functions.get(&name) else {
296 return Err(VmError::Runtime(format!(
297 "Import error: '{name}' is not defined in {module_name}"
298 )));
299 };
300 if let Some(VmValue::Closure(_)) = self.env.get(&name) {
301 return Err(VmError::Runtime(format!(
302 "Import collision: '{name}' is already defined when importing {module_name}. \
303 Use selective imports to disambiguate: import {{ {name} }} from \"...\""
304 )));
305 }
306 self.env
307 .define(&name, VmValue::Closure(Arc::clone(closure)), false)?;
308 }
309 Ok(())
310 }
311
312 pub(super) fn execute_import<'a>(
314 &'a mut self,
315 path: &'a str,
316 selected_names: Option<&'a [String]>,
317 ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + Send + 'a>> {
318 Box::pin(async move {
319 let _import_span = ScopeSpan::new(crate::tracing::SpanKind::Import, path.to_string());
320
321 let stdlib_module = path
322 .strip_prefix("std/")
323 .or_else(|| (path == "observability").then_some("observability"));
324 if let Some(module) = stdlib_module {
325 if let Some(source) = crate::stdlib_modules::get_stdlib_source(module) {
326 let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
327 if self.imported_paths.contains(&synthetic) {
328 return Ok(());
329 }
330 let loaded = self
331 .load_stdlib_module_from_source(module, synthetic.clone(), source)
332 .await?;
333 self.export_loaded_module(&synthetic, &loaded, selected_names)?;
334 return Ok(());
335 }
336 return Err(VmError::Runtime(format!(
337 "Unknown stdlib module: std/{module}"
338 )));
339 }
340
341 let base = self
342 .source_dir
343 .clone()
344 .unwrap_or_else(|| PathBuf::from("."));
345 let file_path = resolve_module_import_path(&base, path);
346
347 let canonical = file_path
348 .canonicalize()
349 .unwrap_or_else(|_| file_path.clone());
350 if self.imported_paths.contains(&canonical) {
351 return Ok(());
352 }
353 if let Some(loaded) = self.module_cache.get(&canonical).cloned() {
354 return self.export_loaded_module(&canonical, &loaded, selected_names);
355 }
356 self.imported_paths.push(canonical.clone());
357
358 let source = std::fs::read_to_string(&file_path).map_err(|e| {
359 VmError::Runtime(format!(
360 "Import error: cannot read '{}': {e}",
361 file_path.display()
362 ))
363 })?;
364 Arc::make_mut(&mut self.source_cache).insert(canonical.clone(), source.clone());
365 Arc::make_mut(&mut self.source_cache).insert(file_path.clone(), source.clone());
366
367 let lookup = bytecode_cache::load_module(&file_path, &source);
370 let artifact = if let Some(artifact) = lookup.artifact {
371 artifact
372 } else {
373 let compiled = compile_module_artifact_from_source(&file_path, &source)?;
374 if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
375 if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
376 eprintln!(
377 "[harn] module cache write skipped for {}: {err}",
378 file_path.display()
379 );
380 }
381 }
382 compiled
383 };
384
385 let module_source_dir = file_path.parent().map(|p| p.to_path_buf());
386 let loaded = self
387 .instantiate_module(module_source_dir, &artifact)
388 .await?;
389 self.imported_paths.pop();
390 Arc::make_mut(&mut self.module_cache).insert(canonical.clone(), loaded.clone());
391 self.export_loaded_module(&canonical, &loaded, selected_names)?;
392
393 Ok(())
394 })
395 }
396
397 fn cache_key_for_import(&self, path: &str) -> PathBuf {
402 if let Some(module) = path
403 .strip_prefix("std/")
404 .or_else(|| (path == "observability").then_some("observability"))
405 {
406 return PathBuf::from(format!("<stdlib>/{module}.harn"));
407 }
408 let base = self
409 .source_dir
410 .clone()
411 .unwrap_or_else(|| PathBuf::from("."));
412 let file_path = resolve_module_import_path(&base, path);
413 file_path.canonicalize().unwrap_or(file_path)
414 }
415
416 pub async fn load_module_exports(
419 &mut self,
420 path: &Path,
421 ) -> Result<BTreeMap<String, Arc<VmClosure>>, VmError> {
422 let path_str = path.to_string_lossy().into_owned();
423 self.execute_import(&path_str, None).await?;
424
425 let mut file_path = if path.is_absolute() {
426 path.to_path_buf()
427 } else {
428 self.source_dir
429 .clone()
430 .unwrap_or_else(|| PathBuf::from("."))
431 .join(path)
432 };
433 if !file_path.exists() && file_path.extension().is_none() {
434 file_path.set_extension("harn");
435 }
436
437 let canonical = file_path
438 .canonicalize()
439 .unwrap_or_else(|_| file_path.clone());
440 let loaded = self.module_cache.get(&canonical).cloned().ok_or_else(|| {
441 VmError::Runtime(format!(
442 "Import error: failed to cache loaded module '{}'",
443 canonical.display()
444 ))
445 })?;
446
447 let export_names: Vec<String> = if loaded.public_names.is_empty() {
448 loaded.functions.keys().cloned().collect()
449 } else {
450 loaded.public_names.iter().cloned().collect()
451 };
452
453 let mut exports = BTreeMap::new();
454 for name in export_names {
455 let Some(closure) = loaded.functions.get(&name) else {
456 return Err(VmError::Runtime(format!(
457 "Import error: exported function '{name}' is missing from {}",
458 canonical.display()
459 )));
460 };
461 exports.insert(name, Arc::clone(closure));
462 }
463
464 Ok(exports)
465 }
466
467 pub async fn load_module_exports_from_source(
470 &mut self,
471 source_key: impl Into<PathBuf>,
472 source: &str,
473 ) -> Result<BTreeMap<String, Arc<VmClosure>>, VmError> {
474 let synthetic = source_key.into();
475 let loaded = self
476 .load_module_from_source(synthetic.clone(), source)
477 .await?;
478 let export_names: Vec<String> = if loaded.public_names.is_empty() {
479 loaded.functions.keys().cloned().collect()
480 } else {
481 loaded.public_names.iter().cloned().collect()
482 };
483
484 let mut exports = BTreeMap::new();
485 for name in export_names {
486 let Some(closure) = loaded.functions.get(&name) else {
487 return Err(VmError::Runtime(format!(
488 "Import error: exported function '{name}' is missing from {}",
489 synthetic.display()
490 )));
491 };
492 exports.insert(name, Arc::clone(closure));
493 }
494
495 Ok(exports)
496 }
497
498 pub async fn load_module_exports_from_import(
502 &mut self,
503 import_path: &str,
504 ) -> Result<BTreeMap<String, Arc<VmClosure>>, VmError> {
505 self.execute_import(import_path, None).await?;
506
507 if let Some(module) = import_path
508 .strip_prefix("std/")
509 .or_else(|| (import_path == "observability").then_some("observability"))
510 {
511 let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
512 let loaded = self.module_cache.get(&synthetic).cloned().ok_or_else(|| {
513 VmError::Runtime(format!(
514 "Import error: failed to cache loaded module '{}'",
515 synthetic.display()
516 ))
517 })?;
518 let mut exports = BTreeMap::new();
519 let export_names: Vec<String> = if loaded.public_names.is_empty() {
520 loaded.functions.keys().cloned().collect()
521 } else {
522 loaded.public_names.iter().cloned().collect()
523 };
524 for name in export_names {
525 let Some(closure) = loaded.functions.get(&name) else {
526 return Err(VmError::Runtime(format!(
527 "Import error: exported function '{name}' is missing from {}",
528 synthetic.display()
529 )));
530 };
531 exports.insert(name, Arc::clone(closure));
532 }
533 return Ok(exports);
534 }
535
536 let base = self
537 .source_dir
538 .clone()
539 .unwrap_or_else(|| PathBuf::from("."));
540 let file_path = resolve_module_import_path(&base, import_path);
541 self.load_module_exports(&file_path).await
542 }
543}
544
545#[cfg(test)]
546mod tests {
547
548 use std::sync::{Mutex, MutexGuard, OnceLock};
549
550 use super::*;
551
552 static CACHE_TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
553
554 fn cache_test_guard() -> MutexGuard<'static, ()> {
555 CACHE_TEST_LOCK
556 .get_or_init(|| Mutex::new(()))
557 .lock()
558 .unwrap()
559 }
560
561 fn cached_stdlib_module_ptr(module: &str) -> Option<usize> {
562 let source = harn_stdlib::get_stdlib_source(module).expect("stdlib module source exists");
563 stdlib_module_artifact_cache_ptr(module, source)
564 }
565
566 #[test]
567 fn stdlib_artifact_cache_reuses_compilation_with_fresh_vm_state() {
568 let _guard = cache_test_guard();
569 reset_stdlib_module_artifact_cache();
570 let runtime = tokio::runtime::Builder::new_current_thread()
571 .enable_all()
572 .build()
573 .expect("runtime builds");
574
575 let (first_exports, second_exports, first_state_weak, second_state_weak) = runtime
576 .block_on(async {
577 let mut first_vm = Vm::new();
578 let first_exports = first_vm
579 .load_module_exports_from_import("std/agent/prompts")
580 .await
581 .expect("first stdlib import succeeds");
582 let first_state = first_exports
583 .get("render_agent_prompt")
584 .expect("first export exists")
585 .module_state()
586 .expect("first module state stays live while VM owns module");
587 let first_state_weak = Arc::downgrade(&first_state);
588 let first_state_ptr = Arc::as_ptr(&first_state);
589
590 let mut second_vm = Vm::new();
591 let second_exports = second_vm
592 .load_module_exports_from_import("std/agent/prompts")
593 .await
594 .expect("second stdlib import succeeds");
595 let second_state = second_exports
596 .get("render_agent_prompt")
597 .expect("second export exists")
598 .module_state()
599 .expect("second module state stays live while VM owns module");
600 let second_state_weak = Arc::downgrade(&second_state);
601
602 assert_ne!(first_state_ptr, Arc::as_ptr(&second_state));
603 (
604 first_exports,
605 second_exports,
606 first_state_weak,
607 second_state_weak,
608 )
609 });
610 let first_cached =
611 cached_stdlib_module_ptr("agent/prompts").expect("first import cached stdlib artifact");
612 assert_eq!(
613 cached_stdlib_module_ptr("agent/prompts"),
614 Some(first_cached)
615 );
616
617 let first = first_exports
618 .get("render_agent_prompt")
619 .expect("first export exists");
620 let second = second_exports
621 .get("render_agent_prompt")
622 .expect("second export exists");
623
624 assert!(!Arc::ptr_eq(first, second));
625 assert!(!Arc::ptr_eq(&first.func, &second.func));
626 assert!(!Arc::ptr_eq(&first.func.chunk, &second.func.chunk));
627 assert!(first.module_state().is_none());
628 assert!(second.module_state().is_none());
629 assert!(first_state_weak.upgrade().is_none());
630 assert!(second_state_weak.upgrade().is_none());
631 }
632
633 #[test]
634 fn stdlib_artifact_cache_is_process_wide_across_threads() {
635 let _guard = cache_test_guard();
636 reset_stdlib_module_artifact_cache();
637
638 let handle = std::thread::spawn(|| {
639 let runtime = tokio::runtime::Builder::new_current_thread()
640 .enable_all()
641 .build()
642 .expect("runtime builds");
643 runtime.block_on(async {
644 let mut vm = Vm::new();
645 vm.load_module_exports_from_import("std/agent/prompts")
646 .await
647 .expect("thread stdlib import succeeds");
648 });
649 });
650 handle.join().expect("thread joins");
651 let thread_cached = cached_stdlib_module_ptr("agent/prompts")
652 .expect("thread import cached stdlib artifact");
653
654 let runtime = tokio::runtime::Builder::new_current_thread()
655 .enable_all()
656 .build()
657 .expect("runtime builds");
658 runtime.block_on(async {
659 let mut vm = Vm::new();
660 vm.load_module_exports_from_import("std/agent/prompts")
661 .await
662 .expect("main-thread stdlib import succeeds");
663 });
664 assert_eq!(
665 cached_stdlib_module_ptr("agent/prompts"),
666 Some(thread_cached)
667 );
668 }
669
670 #[test]
671 fn module_closures_release_state_after_vm_drop() {
672 let runtime = tokio::runtime::Builder::new_current_thread()
673 .enable_all()
674 .build()
675 .expect("runtime builds");
676
677 let (closure_weak, registry_weak, state_weak) = runtime.block_on(async {
678 let mut vm = Vm::new();
679 let loaded = vm
680 .load_module_from_source(
681 PathBuf::from("<test>/module_cycle.harn"),
682 r#"
683var payload = "x" * 1024
684
685pub fn touch() {
686 return len(payload)
687}
688"#,
689 )
690 .await
691 .expect("module loads");
692 let closure = Arc::clone(loaded.functions.get("touch").expect("touch export exists"));
693 let closure_weak = Arc::downgrade(&closure);
694 let registry_weak = Arc::downgrade(&loaded._module_functions);
695 let state_weak = Arc::downgrade(&loaded._module_state);
696
697 drop(closure);
698 drop(loaded);
699 drop(vm);
700
701 (closure_weak, registry_weak, state_weak)
702 });
703
704 assert!(
705 closure_weak.upgrade().is_none(),
706 "module closure should drop with its VM"
707 );
708 assert!(
709 registry_weak.upgrade().is_none(),
710 "module function registry should drop with its VM"
711 );
712 assert!(
713 state_weak.upgrade().is_none(),
714 "module state should drop with its VM"
715 );
716 }
717}