Skip to main content

harn_vm/vm/
modules.rs

1use std::cell::RefCell;
2use std::collections::{BTreeMap, HashSet};
3use std::future::Future;
4use std::hash::{Hash, Hasher};
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::rc::Rc;
8use std::sync::{Arc, Mutex, OnceLock};
9
10use crate::bytecode_cache;
11use crate::chunk::{Chunk, CompiledFunction};
12use crate::module_artifact::{compile_module_artifact_from_source, ModuleArtifact};
13use crate::value::{ModuleFunctionRegistry, VmClosure, VmEnv, VmError, VmValue};
14
15use super::{ScopeSpan, Vm};
16
17static STDLIB_MODULE_ARTIFACT_CACHE: OnceLock<Mutex<BTreeMap<String, Arc<ModuleArtifact>>>> =
18    OnceLock::new();
19
20fn stdlib_module_artifact_cache() -> &'static Mutex<BTreeMap<String, Arc<ModuleArtifact>>> {
21    STDLIB_MODULE_ARTIFACT_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
22}
23
24#[cfg(test)]
25fn reset_stdlib_module_artifact_cache() {
26    stdlib_module_artifact_cache().lock().unwrap().clear();
27}
28
29#[cfg(test)]
30fn stdlib_module_artifact_cache_ptr(module: &str, source: &str) -> Option<usize> {
31    let key = stdlib_artifact_cache_key(module, source);
32    stdlib_module_artifact_cache()
33        .lock()
34        .unwrap()
35        .get(&key)
36        .map(|artifact| Arc::as_ptr(artifact) as usize)
37}
38
39#[derive(Clone)]
40pub(crate) struct LoadedModule {
41    pub(crate) functions: BTreeMap<String, Rc<VmClosure>>,
42    pub(crate) public_names: HashSet<String>,
43}
44
45pub fn resolve_module_import_path(base: &Path, path: &str) -> PathBuf {
46    let synthetic_current_file = base.join("__harn_import_base__.harn");
47    if let Some(resolved) = harn_modules::resolve_import_path(&synthetic_current_file, path) {
48        return resolved;
49    }
50
51    let mut file_path = base.join(path);
52
53    if !file_path.exists() && file_path.extension().is_none() {
54        file_path.set_extension("harn");
55    }
56
57    file_path
58}
59
60fn stdlib_artifact_cache_key(module: &str, source: &str) -> String {
61    let mut hasher = std::collections::hash_map::DefaultHasher::new();
62    module.hash(&mut hasher);
63    source.hash(&mut hasher);
64    format!("{module}:{:016x}", hasher.finish())
65}
66
67fn stdlib_module_artifact(
68    module: &str,
69    synthetic: &Path,
70    source: &'static str,
71) -> Result<Arc<ModuleArtifact>, VmError> {
72    let key = stdlib_artifact_cache_key(module, source);
73    {
74        let cache = stdlib_module_artifact_cache().lock().unwrap();
75        if let Some(cached) = cache.get(&key) {
76            return Ok(Arc::clone(cached));
77        }
78    }
79
80    // Stdlib modules are embedded in the binary so their content cannot
81    // legitimately change between processes; that means the disk cache
82    // for stdlib can use a synthetic source_path. The harn_version field
83    // of the cache key gates correctness across releases.
84    let lookup = bytecode_cache::load_module(synthetic, source);
85    let artifact = if let Some(artifact) = lookup.artifact {
86        artifact
87    } else {
88        let compiled = compile_module_artifact_from_source(synthetic, source)?;
89        if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
90            if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
91                eprintln!("[harn] stdlib module cache write skipped for {module}: {err}");
92            }
93        }
94        compiled
95    };
96
97    let compiled = Arc::new(artifact);
98    let mut cache = stdlib_module_artifact_cache().lock().unwrap();
99    if let Some(cached) = cache.get(&key) {
100        return Ok(Arc::clone(cached));
101    }
102    cache.insert(key, Arc::clone(&compiled));
103    Ok(compiled)
104}
105
106impl Vm {
107    async fn load_module_from_source(
108        &mut self,
109        synthetic: PathBuf,
110        source: &str,
111    ) -> Result<LoadedModule, VmError> {
112        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
113            return Ok(loaded);
114        }
115        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
116
117        let artifact = compile_module_artifact_from_source(&synthetic, source)?;
118
119        self.imported_paths.push(synthetic.clone());
120        let loaded = self.instantiate_module(None, &artifact).await?;
121        self.imported_paths.pop();
122        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
123        Ok(loaded)
124    }
125
126    async fn load_stdlib_module_from_source(
127        &mut self,
128        module: &str,
129        synthetic: PathBuf,
130        source: &'static str,
131    ) -> Result<LoadedModule, VmError> {
132        if let Some(loaded) = self.module_cache.get(&synthetic).cloned() {
133            return Ok(loaded);
134        }
135        Rc::make_mut(&mut self.source_cache).insert(synthetic.clone(), source.to_string());
136
137        let artifact = stdlib_module_artifact(module, &synthetic, source)?;
138        self.imported_paths.push(synthetic.clone());
139        let loaded = self.instantiate_stdlib_module(artifact.as_ref()).await?;
140        self.imported_paths.pop();
141        Rc::make_mut(&mut self.module_cache).insert(synthetic, loaded.clone());
142        Ok(loaded)
143    }
144
145    async fn instantiate_stdlib_module(
146        &mut self,
147        artifact: &ModuleArtifact,
148    ) -> Result<LoadedModule, VmError> {
149        self.instantiate_module(None, artifact).await
150    }
151
152    /// Instantiate a previously-compiled [`ModuleArtifact`] into a
153    /// [`LoadedModule`]. Re-runs nested imports, replays the init chunk
154    /// into a fresh module env, mints a [`VmClosure`] for each compiled
155    /// function (stamped with `module_source_dir` so imports from inside
156    /// those functions resolve against the originating file), and
157    /// applies the re-export pass. Used by both stdlib and user-import
158    /// code paths.
159    async fn instantiate_module(
160        &mut self,
161        module_source_dir: Option<PathBuf>,
162        artifact: &ModuleArtifact,
163    ) -> Result<LoadedModule, VmError> {
164        let caller_env = self.env.clone();
165        let old_source_dir = self.source_dir.clone();
166        self.env = VmEnv::new();
167        self.source_dir = module_source_dir.clone();
168
169        for import in &artifact.imports {
170            self.execute_import(&import.path, import.selected_names.as_deref())
171                .await?;
172        }
173
174        let module_state: crate::value::ModuleState = {
175            let mut init_env = self.env.clone();
176            if let Some(init_chunk) = &artifact.init_chunk {
177                let fresh_init_chunk = Chunk::from_cached(init_chunk);
178                let saved_env = std::mem::replace(&mut self.env, init_env);
179                let saved_frames = std::mem::take(&mut self.frames);
180                let saved_handlers = std::mem::take(&mut self.exception_handlers);
181                let saved_iterators = std::mem::take(&mut self.iterators);
182                let saved_deadlines = std::mem::take(&mut self.deadlines);
183                let init_result = self.run_chunk(&fresh_init_chunk).await;
184                init_env = std::mem::replace(&mut self.env, saved_env);
185                self.frames = saved_frames;
186                self.exception_handlers = saved_handlers;
187                self.iterators = saved_iterators;
188                self.deadlines = saved_deadlines;
189                init_result?;
190            }
191            Rc::new(RefCell::new(init_env))
192        };
193
194        let module_env = self.env.clone();
195        let registry: ModuleFunctionRegistry = Rc::new(RefCell::new(BTreeMap::new()));
196        let mut functions: BTreeMap<String, Rc<VmClosure>> = BTreeMap::new();
197        let mut public_names = artifact.public_names.clone();
198
199        for (name, compiled) in &artifact.functions {
200            let closure = Rc::new(VmClosure {
201                func: Rc::new(CompiledFunction::from_cached(compiled)),
202                env: module_env.clone(),
203                source_dir: module_source_dir.clone(),
204                module_functions: Some(Rc::clone(&registry)),
205                module_state: Some(Rc::clone(&module_state)),
206            });
207            registry
208                .borrow_mut()
209                .insert(name.clone(), Rc::clone(&closure));
210            self.env
211                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
212            module_state
213                .borrow_mut()
214                .define(name, VmValue::Closure(Rc::clone(&closure)), false)?;
215            functions.insert(name.clone(), Rc::clone(&closure));
216        }
217
218        for import in artifact.imports.iter().filter(|import| import.is_pub) {
219            let cache_key = self.cache_key_for_import(&import.path);
220            let Some(loaded) = self.module_cache.get(&cache_key).cloned() else {
221                return Err(VmError::Runtime(format!(
222                    "Re-export error: imported module '{}' was not loaded",
223                    import.path
224                )));
225            };
226            let names_to_reexport: Vec<String> = match &import.selected_names {
227                Some(names) => names.clone(),
228                None => {
229                    if loaded.public_names.is_empty() {
230                        loaded.functions.keys().cloned().collect()
231                    } else {
232                        loaded.public_names.iter().cloned().collect()
233                    }
234                }
235            };
236            for name in names_to_reexport {
237                let Some(closure) = loaded.functions.get(&name) else {
238                    return Err(VmError::Runtime(format!(
239                        "Re-export error: '{name}' is not exported by '{}'",
240                        import.path
241                    )));
242                };
243                if let Some(existing) = functions.get(&name) {
244                    if !Rc::ptr_eq(existing, closure) {
245                        return Err(VmError::Runtime(format!(
246                            "Re-export collision: '{name}' is defined here and also \
247                             re-exported from '{}'",
248                            import.path
249                        )));
250                    }
251                }
252                functions.insert(name.clone(), Rc::clone(closure));
253                public_names.insert(name);
254            }
255        }
256
257        self.env = caller_env;
258        self.source_dir = old_source_dir;
259
260        Ok(LoadedModule {
261            functions,
262            public_names,
263        })
264    }
265
266    fn export_loaded_module(
267        &mut self,
268        module_path: &Path,
269        loaded: &LoadedModule,
270        selected_names: Option<&[String]>,
271    ) -> Result<(), VmError> {
272        let export_names: Vec<String> = if let Some(names) = selected_names {
273            names.to_vec()
274        } else if !loaded.public_names.is_empty() {
275            loaded.public_names.iter().cloned().collect()
276        } else {
277            loaded.functions.keys().cloned().collect()
278        };
279
280        let module_name = module_path.display().to_string();
281        for name in export_names {
282            let Some(closure) = loaded.functions.get(&name) else {
283                return Err(VmError::Runtime(format!(
284                    "Import error: '{name}' is not defined in {module_name}"
285                )));
286            };
287            if let Some(VmValue::Closure(_)) = self.env.get(&name) {
288                return Err(VmError::Runtime(format!(
289                    "Import collision: '{name}' is already defined when importing {module_name}. \
290                     Use selective imports to disambiguate: import {{ {name} }} from \"...\""
291                )));
292            }
293            self.env
294                .define(&name, VmValue::Closure(Rc::clone(closure)), false)?;
295        }
296        Ok(())
297    }
298
299    /// Execute an import, reading and running the file's declarations.
300    pub(super) fn execute_import<'a>(
301        &'a mut self,
302        path: &'a str,
303        selected_names: Option<&'a [String]>,
304    ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + 'a>> {
305        Box::pin(async move {
306            let _import_span = ScopeSpan::new(crate::tracing::SpanKind::Import, path.to_string());
307
308            if let Some(module) = path.strip_prefix("std/") {
309                if let Some(source) = crate::stdlib_modules::get_stdlib_source(module) {
310                    let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
311                    if self.imported_paths.contains(&synthetic) {
312                        return Ok(());
313                    }
314                    let loaded = self
315                        .load_stdlib_module_from_source(module, synthetic.clone(), source)
316                        .await?;
317                    self.export_loaded_module(&synthetic, &loaded, selected_names)?;
318                    return Ok(());
319                }
320                return Err(VmError::Runtime(format!(
321                    "Unknown stdlib module: std/{module}"
322                )));
323            }
324
325            let base = self
326                .source_dir
327                .clone()
328                .unwrap_or_else(|| PathBuf::from("."));
329            let file_path = resolve_module_import_path(&base, path);
330
331            let canonical = file_path
332                .canonicalize()
333                .unwrap_or_else(|_| file_path.clone());
334            if self.imported_paths.contains(&canonical) {
335                return Ok(());
336            }
337            if let Some(loaded) = self.module_cache.get(&canonical).cloned() {
338                return self.export_loaded_module(&canonical, &loaded, selected_names);
339            }
340            self.imported_paths.push(canonical.clone());
341
342            let source = std::fs::read_to_string(&file_path).map_err(|e| {
343                VmError::Runtime(format!(
344                    "Import error: cannot read '{}': {e}",
345                    file_path.display()
346                ))
347            })?;
348            Rc::make_mut(&mut self.source_cache).insert(canonical.clone(), source.clone());
349            Rc::make_mut(&mut self.source_cache).insert(file_path.clone(), source.clone());
350
351            // Disk cache first: hits skip parse + compile for the imported
352            // module's whole function pool, not just the entry pipeline.
353            let lookup = bytecode_cache::load_module(&file_path, &source);
354            let artifact = if let Some(artifact) = lookup.artifact {
355                artifact
356            } else {
357                let compiled = compile_module_artifact_from_source(&file_path, &source)?;
358                if let Err(err) = bytecode_cache::store_module(&lookup.key, &compiled) {
359                    if std::env::var_os("HARN_BYTECODE_CACHE_DEBUG").is_some() {
360                        eprintln!(
361                            "[harn] module cache write skipped for {}: {err}",
362                            file_path.display()
363                        );
364                    }
365                }
366                compiled
367            };
368
369            let module_source_dir = file_path.parent().map(|p| p.to_path_buf());
370            let loaded = self
371                .instantiate_module(module_source_dir, &artifact)
372                .await?;
373            self.imported_paths.pop();
374            Rc::make_mut(&mut self.module_cache).insert(canonical.clone(), loaded.clone());
375            self.export_loaded_module(&canonical, &loaded, selected_names)?;
376
377            Ok(())
378        })
379    }
380
381    /// Return the path key that `execute_import` would use to cache the
382    /// LoadedModule for this import string. Used by the re-export pass to
383    /// look up the already-loaded source module after `execute_import`
384    /// has populated [`Vm::module_cache`].
385    fn cache_key_for_import(&self, path: &str) -> PathBuf {
386        if let Some(module) = path.strip_prefix("std/") {
387            return PathBuf::from(format!("<stdlib>/{module}.harn"));
388        }
389        let base = self
390            .source_dir
391            .clone()
392            .unwrap_or_else(|| PathBuf::from("."));
393        let file_path = resolve_module_import_path(&base, path);
394        file_path.canonicalize().unwrap_or(file_path)
395    }
396
397    /// Load a module file and return the exported function closures that
398    /// would be visible to a wildcard import.
399    pub async fn load_module_exports(
400        &mut self,
401        path: &Path,
402    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
403        let path_str = path.to_string_lossy().into_owned();
404        self.execute_import(&path_str, None).await?;
405
406        let mut file_path = if path.is_absolute() {
407            path.to_path_buf()
408        } else {
409            self.source_dir
410                .clone()
411                .unwrap_or_else(|| PathBuf::from("."))
412                .join(path)
413        };
414        if !file_path.exists() && file_path.extension().is_none() {
415            file_path.set_extension("harn");
416        }
417
418        let canonical = file_path
419            .canonicalize()
420            .unwrap_or_else(|_| file_path.clone());
421        let loaded = self.module_cache.get(&canonical).cloned().ok_or_else(|| {
422            VmError::Runtime(format!(
423                "Import error: failed to cache loaded module '{}'",
424                canonical.display()
425            ))
426        })?;
427
428        let export_names: Vec<String> = if loaded.public_names.is_empty() {
429            loaded.functions.keys().cloned().collect()
430        } else {
431            loaded.public_names.iter().cloned().collect()
432        };
433
434        let mut exports = BTreeMap::new();
435        for name in export_names {
436            let Some(closure) = loaded.functions.get(&name) else {
437                return Err(VmError::Runtime(format!(
438                    "Import error: exported function '{name}' is missing from {}",
439                    canonical.display()
440                )));
441            };
442            exports.insert(name, Rc::clone(closure));
443        }
444
445        Ok(exports)
446    }
447
448    /// Load synthetic source keyed by a synthetic module path and return
449    /// the exported function closures that a wildcard import would expose.
450    pub async fn load_module_exports_from_source(
451        &mut self,
452        source_key: impl Into<PathBuf>,
453        source: &str,
454    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
455        let synthetic = source_key.into();
456        let loaded = self
457            .load_module_from_source(synthetic.clone(), source)
458            .await?;
459        let export_names: Vec<String> = if loaded.public_names.is_empty() {
460            loaded.functions.keys().cloned().collect()
461        } else {
462            loaded.public_names.iter().cloned().collect()
463        };
464
465        let mut exports = BTreeMap::new();
466        for name in export_names {
467            let Some(closure) = loaded.functions.get(&name) else {
468                return Err(VmError::Runtime(format!(
469                    "Import error: exported function '{name}' is missing from {}",
470                    synthetic.display()
471                )));
472            };
473            exports.insert(name, Rc::clone(closure));
474        }
475
476        Ok(exports)
477    }
478
479    /// Load a module by import path (`std/foo`, relative module path, or
480    /// package import) and return the exported function closures that a
481    /// wildcard import would expose.
482    pub async fn load_module_exports_from_import(
483        &mut self,
484        import_path: &str,
485    ) -> Result<BTreeMap<String, Rc<VmClosure>>, VmError> {
486        self.execute_import(import_path, None).await?;
487
488        if let Some(module) = import_path.strip_prefix("std/") {
489            let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
490            let loaded = self.module_cache.get(&synthetic).cloned().ok_or_else(|| {
491                VmError::Runtime(format!(
492                    "Import error: failed to cache loaded module '{}'",
493                    synthetic.display()
494                ))
495            })?;
496            let mut exports = BTreeMap::new();
497            let export_names: Vec<String> = if loaded.public_names.is_empty() {
498                loaded.functions.keys().cloned().collect()
499            } else {
500                loaded.public_names.iter().cloned().collect()
501            };
502            for name in export_names {
503                let Some(closure) = loaded.functions.get(&name) else {
504                    return Err(VmError::Runtime(format!(
505                        "Import error: exported function '{name}' is missing from {}",
506                        synthetic.display()
507                    )));
508                };
509                exports.insert(name, Rc::clone(closure));
510            }
511            return Ok(exports);
512        }
513
514        let base = self
515            .source_dir
516            .clone()
517            .unwrap_or_else(|| PathBuf::from("."));
518        let file_path = resolve_module_import_path(&base, import_path);
519        self.load_module_exports(&file_path).await
520    }
521}
522
523#[cfg(test)]
524mod tests {
525    use std::rc::Rc;
526    use std::sync::{Mutex, MutexGuard, OnceLock};
527
528    use super::*;
529
530    static CACHE_TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
531
532    fn cache_test_guard() -> MutexGuard<'static, ()> {
533        CACHE_TEST_LOCK
534            .get_or_init(|| Mutex::new(()))
535            .lock()
536            .unwrap()
537    }
538
539    fn cached_stdlib_module_ptr(module: &str) -> Option<usize> {
540        let source = harn_stdlib::get_stdlib_source(module).expect("stdlib module source exists");
541        stdlib_module_artifact_cache_ptr(module, source)
542    }
543
544    #[test]
545    fn stdlib_artifact_cache_reuses_compilation_with_fresh_vm_state() {
546        let _guard = cache_test_guard();
547        reset_stdlib_module_artifact_cache();
548        let runtime = tokio::runtime::Builder::new_current_thread()
549            .enable_all()
550            .build()
551            .expect("runtime builds");
552
553        let first_exports = runtime.block_on(async {
554            let mut first_vm = Vm::new();
555            first_vm
556                .load_module_exports_from_import("std/agent/prompts")
557                .await
558                .expect("first stdlib import succeeds")
559        });
560        let first_cached =
561            cached_stdlib_module_ptr("agent/prompts").expect("first import cached stdlib artifact");
562
563        let second_exports = runtime.block_on(async {
564            let mut second_vm = Vm::new();
565            second_vm
566                .load_module_exports_from_import("std/agent/prompts")
567                .await
568                .expect("second stdlib import succeeds")
569        });
570        assert_eq!(
571            cached_stdlib_module_ptr("agent/prompts"),
572            Some(first_cached)
573        );
574
575        let first = first_exports
576            .get("render_agent_prompt")
577            .expect("first export exists");
578        let second = second_exports
579            .get("render_agent_prompt")
580            .expect("second export exists");
581
582        assert!(!Rc::ptr_eq(first, second));
583        assert!(!Rc::ptr_eq(&first.func, &second.func));
584        assert!(!Rc::ptr_eq(&first.func.chunk, &second.func.chunk));
585        assert!(!Rc::ptr_eq(
586            first.module_state.as_ref().expect("first module state"),
587            second.module_state.as_ref().expect("second module state")
588        ));
589    }
590
591    #[test]
592    fn stdlib_artifact_cache_is_process_wide_across_threads() {
593        let _guard = cache_test_guard();
594        reset_stdlib_module_artifact_cache();
595
596        let handle = std::thread::spawn(|| {
597            let runtime = tokio::runtime::Builder::new_current_thread()
598                .enable_all()
599                .build()
600                .expect("runtime builds");
601            runtime.block_on(async {
602                let mut vm = Vm::new();
603                vm.load_module_exports_from_import("std/agent/prompts")
604                    .await
605                    .expect("thread stdlib import succeeds");
606            });
607        });
608        handle.join().expect("thread joins");
609        let thread_cached = cached_stdlib_module_ptr("agent/prompts")
610            .expect("thread import cached stdlib artifact");
611
612        let runtime = tokio::runtime::Builder::new_current_thread()
613            .enable_all()
614            .build()
615            .expect("runtime builds");
616        runtime.block_on(async {
617            let mut vm = Vm::new();
618            vm.load_module_exports_from_import("std/agent/prompts")
619                .await
620                .expect("main-thread stdlib import succeeds");
621        });
622        assert_eq!(
623            cached_stdlib_module_ptr("agent/prompts"),
624            Some(thread_cached)
625        );
626    }
627}