Skip to main content

lean_rs/module/
library.rs

1//! RAII handle over a Lake-built Lean shared object.
2//!
3//! [`LeanLibrary`] owns a [`libloading::Library`] for the duration of its
4//! scope and exposes a single safe operation that callers actually want:
5//! initialize a named Lean module out of it. The dlopen step, the Lake
6//! symbol-mangling convention, the `IO Unit` decoding, and the
7//! `builtin` flag policy are hidden inside the implementation; the
8//! interface mentions only the human-readable package and module names.
9//!
10//! Construction requires a `&'lean LeanRuntime` borrow. Use-before-init
11//! is therefore structurally impossible: a caller cannot build a
12//! [`LeanLibrary`] without holding the proof that
13//! [`crate::LeanRuntime::init`] has succeeded.
14//!
15//! ## Symbol-table walk at `open`
16//!
17//! Lean compiles `def x : T := constant` — a nullary export whose body
18//! reduces to a constant — to a persistent `lean_object*` global variable
19//! (`lean_mark_persistent` at module init), not a callable function.
20//! Calling such a symbol as a function pointer SIGBUSes. To make the
21//! distinction invisible to callers, [`LeanLibrary::open`] reads the
22//! dylib's bytes once, walks the export table with the [`object`] crate,
23//! and records the names of data-section exports as `globals`.
24//! [`LeanModule::exported`](super::loaded::LeanModule::exported) consults
25//! the set to dispatch function-vs-global at call time.
26
27// SAFETY DOC: every `unsafe { ... }` block in this file carries its own
28// `// SAFETY:` comment. The blanket allow exists because this is the
29// single `pub` doorway into the dlopen/dlsym path; per
30// `docs/architecture/01-safety-model.md` the opt-out lives at the
31// smallest scope that compiles.
32#![allow(unsafe_code)]
33
34use std::collections::HashSet;
35use std::ffi::c_void;
36use std::path::{Path, PathBuf};
37
38use lean_rs_sys::lean_object;
39use object::{Object, ObjectSection, ObjectSymbol, SectionKind, SymbolSection};
40
41use super::initializer::{InitializerName, RawInitializer, call_initializer};
42use super::loaded::LeanModule;
43#[cfg(doc)]
44use crate::error::HostStage;
45use crate::error::{LeanError, LeanResult};
46use crate::runtime::LeanRuntime;
47
48/// A loaded native Lean shared object.
49///
50/// Wraps a [`libloading::Library`] and hands out initialized module
51/// handles via [`LeanLibrary::initialize_module`]. The `'lean` lifetime
52/// parameter ties the library to the witnessing
53/// [`crate::LeanRuntime`] borrow; the resulting [`LeanModule`] handles
54/// borrow from `self`, so they cannot outlive the library that hosts
55/// them.
56///
57/// Neither [`Send`] nor [`Sync`]: the `&'lean LeanRuntime` field
58/// inherits the runtime's per-thread restriction (`LeanRuntime: !Sync`
59/// implies `&LeanRuntime: !Send`, and `&LeanRuntime: Sync` iff
60/// `LeanRuntime: Sync`). A negative-bound compile-time assertion in
61/// the test module fails if either auto-trait is ever implemented.
62pub struct LeanLibrary<'lean> {
63    library: libloading::Library,
64    path: PathBuf,
65    runtime: &'lean LeanRuntime,
66    /// Names of data-section exports (Lean nullary-constant globals),
67    /// normalised to what [`libloading::Library::get`] resolves with
68    /// (Mach-O leading underscore stripped). Computed once at [`open`]
69    /// and consulted by
70    /// [`LeanModule::exported`](super::loaded::LeanModule::exported) to
71    /// dispatch function-vs-global at call time.
72    ///
73    /// [`open`]: Self::open
74    globals: HashSet<String>,
75}
76
77impl<'lean> LeanLibrary<'lean> {
78    /// Load a Lake-built Lean shared object from `path`.
79    ///
80    /// Reads the file's symbol table once to classify each exported
81    /// symbol as a function (text/code section) or a Lean
82    /// nullary-constant global (data/rodata/bss section). The
83    /// classification is consulted by
84    /// [`LeanModule::exported`](super::loaded::LeanModule::exported) so
85    /// callers never write the function-vs-global distinction at the
86    /// call site. The walk is cheap (a single `std::fs::read` plus an
87    /// in-memory parse) and amortised across every later lookup.
88    ///
89    /// The `runtime` borrow is the type-level proof that the Lean runtime
90    /// is up; it is retained for the symbol-initialization step but
91    /// otherwise unused. Module initialization happens later through
92    /// [`LeanLibrary::initialize_module`].
93    ///
94    /// # Errors
95    ///
96    /// Returns [`LeanError::Host`] with stage [`HostStage::Load`] if:
97    ///
98    /// - the file cannot be read (missing, permission denied),
99    /// - the bytes do not parse as a recognised object format (Mach-O,
100    ///   ELF, PE),
101    /// - the dynamic linker fails to open the file (missing transitive
102    ///   dependency, architecture mismatch, …).
103    ///
104    /// The diagnostic embeds the path and the underlying error message.
105    pub fn open(runtime: &'lean LeanRuntime, path: impl AsRef<Path>) -> LeanResult<Self> {
106        let path = path.as_ref();
107        let _span = tracing::debug_span!(
108            target: "lean_rs",
109            "lean_rs.module.library.open",
110            path = %crate::error::redact::short_path(path),
111        )
112        .entered();
113        let globals = classify_globals(path)?;
114        // SAFETY: `Library::new` runs the platform dynamic loader. Lake
115        // does not emit constructor-style initializers for Lean
116        // libraries (the per-module `initialize_*` functions are
117        // explicit, not C constructors), so the load is side-effect-free
118        // from Rust's perspective; the resulting handle releases the
119        // library on drop.
120        let library = unsafe { libloading::Library::new(path) }.map_err(|err| {
121            LeanError::module_init(format!("failed to open Lean library '{}': {err}", path.display()))
122        })?;
123        Ok(Self {
124            library,
125            path: path.to_path_buf(),
126            runtime,
127            globals,
128        })
129    }
130
131    /// Open a Lake-built Lean shared object with **globally visible
132    /// symbols** (POSIX `RTLD_GLOBAL` on Unix; the Windows side stays
133    /// on the default loader since DLL symbols are already global).
134    ///
135    /// The same contract as [`LeanLibrary::open`], plus: symbols
136    /// defined by this dylib become visible to the dynamic linker's
137    /// global namespace, so any subsequently `dlopen`ed dylib whose
138    /// initializer chain references them resolves correctly.
139    ///
140    /// The motivating case is the `lean-rs-host` shim load: the host shim
141    /// package imports the generic interop shim package, so opening the
142    /// generic dylib globally first lets the host shim initializer resolve
143    /// the generated `LeanRsInterop.*` references normally.
144    ///
145    /// # Errors
146    ///
147    /// Same as [`LeanLibrary::open`].
148    pub fn open_globally(runtime: &'lean LeanRuntime, path: impl AsRef<Path>) -> LeanResult<Self> {
149        let path = path.as_ref();
150        let _span = tracing::debug_span!(
151            target: "lean_rs",
152            "lean_rs.module.library.open_globally",
153            path = %crate::error::redact::short_path(path),
154        )
155        .entered();
156        let globals = classify_globals(path)?;
157        let library = open_with_global_visibility(path)?;
158        Ok(Self {
159            library,
160            path: path.to_path_buf(),
161            runtime,
162            globals,
163        })
164    }
165
166    /// Initialize the Lean module identified by `(package, module)`.
167    ///
168    /// Resolves the Lake-mangled initializer symbol against this
169    /// library, invokes it under a panic boundary with the runtime
170    /// `builtin` flag, and decodes the resulting `IO Unit`. Idempotent:
171    /// the Lean-emitted initializer body short-circuits to `IO.ok(())`
172    /// on its second and later calls, so repeated invocations are safe
173    /// and cheap.
174    ///
175    /// # Errors
176    ///
177    /// Returns [`LeanError::Host`] with:
178    ///
179    /// - [`HostStage::Link`] if `package` or `module` is not a valid
180    ///   Lake identifier, or the mangled initializer symbol is not
181    ///   exported by this library.
182    /// - [`HostStage::Load`] if the initializer panics or returns
183    ///   `IO.error`.
184    pub fn initialize_module<'lib>(&'lib self, package: &str, module: &str) -> LeanResult<LeanModule<'lean, 'lib>> {
185        let _span = tracing::debug_span!(
186            target: "lean_rs",
187            "lean_rs.module.library.initialize",
188            package = package,
189            module = module,
190        )
191        .entered();
192        let name = InitializerName::from_lake_names(package, module)?;
193        let init = self.lookup_initializer(&name)?;
194        call_initializer(self.runtime, init, &name)?;
195        Ok(LeanModule::new(self, name))
196    }
197
198    /// Names of data-section exports for this library, normalised to the
199    /// form [`libloading::Library::get`] resolves with.
200    pub(crate) fn globals(&self) -> &HashSet<String> {
201        &self.globals
202    }
203
204    /// The runtime borrow this library was opened with.
205    pub(crate) fn runtime(&self) -> &'lean LeanRuntime {
206        self.runtime
207    }
208
209    /// On-disk path the library was opened from. Used only for
210    /// diagnostics.
211    pub(crate) fn path(&self) -> &Path {
212        &self.path
213    }
214
215    /// Resolve `name` as a function-pointer symbol (text section).
216    ///
217    /// Returns the raw symbol address. The caller is responsible for
218    /// casting to the right `unsafe extern "C" fn(...) -> ...`
219    /// signature, typically by handing the address to
220    /// [`LeanExported::from_function_address`].
221    ///
222    /// # Errors
223    ///
224    /// Returns [`LeanError::Host`] with stage [`HostStage::Link`] if
225    /// the symbol is not exported by this library. The diagnostic
226    /// embeds the symbol name and the library path.
227    ///
228    /// [`LeanExported::from_function_address`]: crate::module::LeanExported::from_function_address
229    pub fn resolve_function_symbol(&self, name: &str) -> LeanResult<*mut c_void> {
230        // SAFETY: `libloading::Library::get::<*mut c_void>` is the raw
231        // address lookup; the returned `Symbol<'_, *mut c_void>` borrows
232        // from `self.library`, so dereferencing it inside this scope is
233        // valid. We copy the address out via `*symbol` — the same idiom
234        // `lookup_initializer` uses.
235        let symbol: libloading::Symbol<'_, *mut c_void> =
236            unsafe { self.library.get(name.as_bytes()) }.map_err(|err| {
237                LeanError::symbol_lookup(format!(
238                    "unknown exported symbol '{}' in '{}': {err}",
239                    name,
240                    self.path.display()
241                ))
242            })?;
243        Ok(*symbol)
244    }
245
246    /// Resolve `name` as a function-pointer symbol, tolerating a missing
247    /// symbol.
248    ///
249    /// Returns `Some(address)` when the symbol resolves, `None` when it
250    /// is absent from this library. The opinionated `lean-rs-host` stack
251    /// uses this to pre-resolve *optional* capability symbols at
252    /// `LeanCapabilities::new` time without failing capability load when
253    /// a fixture omits a service.
254    ///
255    /// Symbol resolution at the dlsym level only fails with "symbol not
256    /// present" — the dynamic linker has already accepted the library at
257    /// [`Self::open`], so a per-symbol lookup error is either "missing"
258    /// or a `\0` in the name (which a sane caller never passes). Both
259    /// are collapsed into the `None` branch by design.
260    pub fn resolve_optional_function_symbol(&self, name: &str) -> Option<*mut c_void> {
261        // SAFETY: identical to `resolve_function_symbol`; the symbol
262        // borrow does not escape this scope.
263        match unsafe { self.library.get::<*mut c_void>(name.as_bytes()) } {
264            Ok(symbol) => Some(*symbol),
265            Err(_) => None,
266        }
267    }
268
269    /// Resolve `name` as a Lean nullary-constant global symbol
270    /// (data section). The returned pointer addresses the storage
271    /// holding the persistent `lean_object*` value; the caller reads
272    /// `*returned` to get the Lean object pointer itself.
273    ///
274    /// # Errors
275    ///
276    /// Returns [`LeanError::Host`] with stage [`HostStage::Link`] if
277    /// the symbol is not exported by this library.
278    pub(crate) fn resolve_global_symbol(&self, name: &str) -> LeanResult<*mut *mut lean_object> {
279        // SAFETY: `libloading::Library::get::<T>` returns the symbol's
280        // address typed as `T`. For data symbols the address is the
281        // location of the variable, so the parameterised type spells a
282        // pointer to the variable's value. The borrow lifetime ends
283        // when this function returns; we copy the address out.
284        let symbol: libloading::Symbol<'_, *mut *mut lean_object> = unsafe { self.library.get(name.as_bytes()) }
285            .map_err(|err| {
286                LeanError::symbol_lookup(format!(
287                    "unknown global symbol '{}' in '{}': {err}",
288                    name,
289                    self.path.display()
290                ))
291            })?;
292        Ok(*symbol)
293    }
294
295    /// Resolve the module initializer by `dlsym`, trying both the modern
296    /// (Lean ≥ 4.27) and legacy (Lean ≤ 4.26) Lake symbol shapes. The
297    /// diagnostic on failure names both candidates so the operator can
298    /// see what was searched.
299    fn lookup_initializer(&self, name: &InitializerName) -> LeanResult<RawInitializer> {
300        // SAFETY: the type parameter spells the canonical Lake
301        // initializer signature (`(u8) -> *mut lean_object`), verified
302        // against the Lake-emitted C in `fixtures/lean/.lake/build/ir/`
303        // for every supported Lean version. `libloading::Library::get`
304        // returns a borrowed `Symbol<'_, T>`; copying the raw function
305        // pointer out of it via deref is the standard pattern when the
306        // caller does not need to retain the borrow.
307        let modern: Result<libloading::Symbol<'_, RawInitializer>, _> =
308            unsafe { self.library.get(name.symbol_bytes()) };
309        if let Ok(symbol) = modern {
310            return Ok(*symbol);
311        }
312        // SAFETY: same as above; the legacy symbol shape is the only
313        // exported form for Lean ≤ 4.26 dylibs.
314        let legacy: Result<libloading::Symbol<'_, RawInitializer>, _> =
315            unsafe { self.library.get(name.legacy_symbol_bytes()) };
316        match legacy {
317            Ok(symbol) => Ok(*symbol),
318            Err(err) => Err(LeanError::linking(format!(
319                "missing initializer symbol in '{}': tried '{}' and '{}': {err}",
320                self.path.display(),
321                name.symbol_str(),
322                name.legacy_symbol_str(),
323            ))),
324        }
325    }
326}
327
328impl std::fmt::Debug for LeanLibrary<'_> {
329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330        f.debug_struct("LeanLibrary")
331            .field("path", &self.path)
332            .field("globals_count", &self.globals.len())
333            .finish()
334    }
335}
336
337/// Read `path` and collect the names of its data-section exports
338/// (Lean nullary-constant globals).
339///
340/// Mach-O export names carry a leading `_` that `libloading` strips at
341/// lookup; we strip here so the set keys match what
342/// [`LeanLibrary::resolve_global_symbol`] queries with. ELF and PE
343/// symbol names are stored without leading `_`, so no normalisation is
344/// needed on those platforms.
345///
346/// Symbols whose section can't be classified (undefined, absolute,
347/// common) are skipped: they cannot be the Lean-compiled persistent
348/// globals we care about.
349fn classify_globals(path: &Path) -> LeanResult<HashSet<String>> {
350    let bytes = std::fs::read(path)
351        .map_err(|err| LeanError::module_init(format!("failed to read Lean library '{}': {err}", path.display())))?;
352    let file = object::File::parse(&*bytes)
353        .map_err(|err| LeanError::module_init(format!("failed to parse object file '{}': {err}", path.display())))?;
354
355    let strip_underscore = matches!(file.format(), object::BinaryFormat::MachO | object::BinaryFormat::Wasm);
356
357    let mut globals = HashSet::new();
358    // `symbols()` covers both ELF `.symtab` (when present) and Mach-O
359    // `LC_SYMTAB` external defs; `dynamic_symbols()` on Mach-O omits
360    // data-section externals such as Lean nullary-constant globals,
361    // which is exactly what we need to classify.
362    for symbol in file.symbols() {
363        if !symbol.is_global() || symbol.is_undefined() {
364            continue;
365        }
366        let SymbolSection::Section(section_index) = symbol.section() else {
367            continue;
368        };
369        let Ok(section) = file.section_by_index(section_index) else {
370            continue;
371        };
372        if !is_data_section(section.kind()) {
373            continue;
374        }
375        let Ok(name) = symbol.name() else {
376            continue;
377        };
378        let normalised = if strip_underscore {
379            name.strip_prefix('_').unwrap_or(name)
380        } else {
381            name
382        };
383        if normalised.is_empty() {
384            continue;
385        }
386        globals.insert(normalised.to_owned());
387    }
388    Ok(globals)
389}
390
391/// Open `path` with the dynamic loader's *global* symbol-visibility
392/// flag set so any subsequently loaded dylib can resolve symbols
393/// defined here.
394///
395/// On Unix this means `RTLD_LAZY | RTLD_GLOBAL`. On Windows the
396/// platform loader publishes module symbols globally by default, so
397/// the standard [`libloading::Library::new`] path is sufficient.
398fn open_with_global_visibility(path: &Path) -> LeanResult<libloading::Library> {
399    #[cfg(unix)]
400    {
401        // SAFETY: identical contract to `Library::new` — runs the
402        // platform dynamic loader against `path`. The added
403        // `RTLD_GLOBAL` flag only affects symbol-table visibility for
404        // later loads; it does not change initializer-execution
405        // semantics (Lake-built Lean dylibs have no constructor-style
406        // init).
407        let unix_library = unsafe {
408            libloading::os::unix::Library::open(
409                Some(path),
410                libloading::os::unix::RTLD_LAZY | libloading::os::unix::RTLD_GLOBAL,
411            )
412        }
413        .map_err(|err| {
414            LeanError::module_init(format!(
415                "failed to open Lean library '{}' with RTLD_GLOBAL: {err}",
416                path.display()
417            ))
418        })?;
419        Ok(unix_library.into())
420    }
421    #[cfg(not(unix))]
422    {
423        // SAFETY: same as `Library::new`. See unix branch.
424        let library = unsafe { libloading::Library::new(path) }.map_err(|err| {
425            LeanError::module_init(format!("failed to open Lean library '{}': {err}", path.display()))
426        })?;
427        Ok(library)
428    }
429}
430
431/// Section kinds that hold runtime data (Lean nullary-constant
432/// `lean_object*` pointers live in `__data` on Mach-O, `.data` /
433/// `.rodata` / `.bss` on ELF).
434fn is_data_section(kind: SectionKind) -> bool {
435    matches!(
436        kind,
437        SectionKind::Data
438            | SectionKind::ReadOnlyData
439            | SectionKind::ReadOnlyDataWithRel
440            | SectionKind::UninitializedData
441            | SectionKind::Common
442            | SectionKind::Tls
443            | SectionKind::UninitializedTls
444    )
445}