lean_rs/module/library.rs
1//! RAII handle over a Lake-built Lean shared object.
2//!
3//! [`LeanLibrary`] owns a [`libloading::Library`] for the duration of its
4//! scope and exposes a single safe operation that callers actually want:
5//! initialize a named Lean module out of it. The dlopen step, the Lake
6//! symbol-mangling convention, the `IO Unit` decoding, and the
7//! `builtin` flag policy are hidden inside the implementation; the
8//! interface mentions only the human-readable package and module names.
9//!
10//! Construction requires a `&'lean LeanRuntime` borrow. Use-before-init
11//! is therefore structurally impossible: a caller cannot build a
12//! [`LeanLibrary`] without holding the proof that
13//! [`crate::LeanRuntime::init`] has succeeded.
14//!
15//! ## Symbol-table walk at `open`
16//!
17//! Lean compiles `def x : T := constant` — a nullary export whose body
18//! reduces to a constant — to a persistent `lean_object*` global variable
19//! (`lean_mark_persistent` at module init), not a callable function.
20//! Calling such a symbol as a function pointer SIGBUSes. To make the
21//! distinction invisible to callers, [`LeanLibrary::open`] reads the
22//! dylib's bytes once, walks the export table with the [`object`] crate,
23//! and records the names of data-section exports as `globals`.
24//! [`LeanModule::exported`](super::loaded::LeanModule::exported) consults
25//! the set to dispatch function-vs-global at call time.
26
27// SAFETY DOC: every `unsafe { ... }` block in this file carries its own
28// `// SAFETY:` comment. The blanket allow exists because this is the
29// single `pub` doorway into the dlopen/dlsym path; per
30// `docs/architecture/01-safety-model.md` the opt-out lives at the
31// smallest scope that compiles.
32#![allow(unsafe_code)]
33
34use std::collections::HashSet;
35use std::ffi::c_void;
36use std::path::{Path, PathBuf};
37
38use lean_rs_sys::lean_object;
39use object::{Object, ObjectSection, ObjectSymbol, SectionKind, SymbolSection};
40
41use super::initializer::{InitializerName, RawInitializer, call_initializer};
42use super::loaded::LeanModule;
43#[cfg(doc)]
44use crate::error::HostStage;
45use crate::error::{LeanError, LeanResult};
46use crate::runtime::LeanRuntime;
47
48/// A loaded native Lean shared object.
49///
50/// Wraps a [`libloading::Library`] and hands out initialized module
51/// handles via [`LeanLibrary::initialize_module`]. The `'lean` lifetime
52/// parameter ties the library to the witnessing
53/// [`crate::LeanRuntime`] borrow; the resulting [`LeanModule`] handles
54/// borrow from `self`, so they cannot outlive the library that hosts
55/// them.
56///
57/// Neither [`Send`] nor [`Sync`]: the `&'lean LeanRuntime` field
58/// inherits the runtime's per-thread restriction (`LeanRuntime: !Sync`
59/// implies `&LeanRuntime: !Send`, and `&LeanRuntime: Sync` iff
60/// `LeanRuntime: Sync`). A negative-bound compile-time assertion in
61/// the test module fails if either auto-trait is ever implemented.
62pub struct LeanLibrary<'lean> {
63 library: libloading::Library,
64 path: PathBuf,
65 runtime: &'lean LeanRuntime,
66 /// Names of data-section exports (Lean nullary-constant globals),
67 /// normalised to what [`libloading::Library::get`] resolves with
68 /// (Mach-O leading underscore stripped). Computed once at [`open`]
69 /// and consulted by
70 /// [`LeanModule::exported`](super::loaded::LeanModule::exported) to
71 /// dispatch function-vs-global at call time.
72 ///
73 /// [`open`]: Self::open
74 globals: HashSet<String>,
75}
76
77impl<'lean> LeanLibrary<'lean> {
78 /// Load a Lake-built Lean shared object from `path`.
79 ///
80 /// Reads the file's symbol table once to classify each exported
81 /// symbol as a function (text/code section) or a Lean
82 /// nullary-constant global (data/rodata/bss section). The
83 /// classification is consulted by
84 /// [`LeanModule::exported`](super::loaded::LeanModule::exported) so
85 /// callers never write the function-vs-global distinction at the
86 /// call site. The walk is cheap (a single `std::fs::read` plus an
87 /// in-memory parse) and amortised across every later lookup.
88 ///
89 /// The `runtime` borrow is the type-level proof that the Lean runtime
90 /// is up; it is retained for the symbol-initialization step but
91 /// otherwise unused. Module initialization happens later through
92 /// [`LeanLibrary::initialize_module`].
93 ///
94 /// # Errors
95 ///
96 /// Returns [`LeanError::Host`] with stage [`HostStage::Load`] if:
97 ///
98 /// - the file cannot be read (missing, permission denied),
99 /// - the bytes do not parse as a recognised object format (Mach-O,
100 /// ELF, PE),
101 /// - the dynamic linker fails to open the file (missing transitive
102 /// dependency, architecture mismatch, …).
103 ///
104 /// The diagnostic embeds the path and the underlying error message.
105 pub fn open(runtime: &'lean LeanRuntime, path: impl AsRef<Path>) -> LeanResult<Self> {
106 let path = path.as_ref();
107 let _span = tracing::debug_span!(
108 target: "lean_rs",
109 "lean_rs.module.library.open",
110 path = %crate::error::redact::short_path(path),
111 )
112 .entered();
113 let globals = classify_globals(path)?;
114 // SAFETY: `Library::new` runs the platform dynamic loader. Lake
115 // does not emit constructor-style initializers for Lean
116 // libraries (the per-module `initialize_*` functions are
117 // explicit, not C constructors), so the load is side-effect-free
118 // from Rust's perspective; the resulting handle releases the
119 // library on drop.
120 let library = unsafe { libloading::Library::new(path) }.map_err(|err| {
121 LeanError::module_init(format!("failed to open Lean library '{}': {err}", path.display()))
122 })?;
123 Ok(Self {
124 library,
125 path: path.to_path_buf(),
126 runtime,
127 globals,
128 })
129 }
130
131 /// Open a Lake-built Lean shared object with **globally visible
132 /// symbols** (POSIX `RTLD_GLOBAL` on Unix; the Windows side stays
133 /// on the default loader since DLL symbols are already global).
134 ///
135 /// The same contract as [`LeanLibrary::open`], plus: symbols
136 /// defined by this dylib become visible to the dynamic linker's
137 /// global namespace, so any subsequently `dlopen`ed dylib whose
138 /// initializer chain references them resolves correctly.
139 ///
140 /// The motivating case is the `lean-rs-host` shim load: the host shim
141 /// package imports the generic interop shim package, so opening the
142 /// generic dylib globally first lets the host shim initializer resolve
143 /// the generated `LeanRsInterop.*` references normally.
144 ///
145 /// # Errors
146 ///
147 /// Same as [`LeanLibrary::open`].
148 pub fn open_globally(runtime: &'lean LeanRuntime, path: impl AsRef<Path>) -> LeanResult<Self> {
149 let path = path.as_ref();
150 let _span = tracing::debug_span!(
151 target: "lean_rs",
152 "lean_rs.module.library.open_globally",
153 path = %crate::error::redact::short_path(path),
154 )
155 .entered();
156 let globals = classify_globals(path)?;
157 let library = open_with_global_visibility(path)?;
158 Ok(Self {
159 library,
160 path: path.to_path_buf(),
161 runtime,
162 globals,
163 })
164 }
165
166 /// Initialize the Lean module identified by `(package, module)`.
167 ///
168 /// Resolves the Lake-mangled initializer symbol against this
169 /// library, invokes it under a panic boundary with the runtime
170 /// `builtin` flag, and decodes the resulting `IO Unit`. Idempotent:
171 /// the Lean-emitted initializer body short-circuits to `IO.ok(())`
172 /// on its second and later calls, so repeated invocations are safe
173 /// and cheap.
174 ///
175 /// # Errors
176 ///
177 /// Returns [`LeanError::Host`] with:
178 ///
179 /// - [`HostStage::Link`] if `package` or `module` is not a valid
180 /// Lake identifier, or the mangled initializer symbol is not
181 /// exported by this library.
182 /// - [`HostStage::Load`] if the initializer panics or returns
183 /// `IO.error`.
184 pub fn initialize_module<'lib>(&'lib self, package: &str, module: &str) -> LeanResult<LeanModule<'lean, 'lib>> {
185 let _span = tracing::debug_span!(
186 target: "lean_rs",
187 "lean_rs.module.library.initialize",
188 package = package,
189 module = module,
190 )
191 .entered();
192 let name = InitializerName::from_lake_names(package, module)?;
193 let init = self.lookup_initializer(&name)?;
194 call_initializer(self.runtime, init, &name)?;
195 Ok(LeanModule::new(self, name))
196 }
197
198 /// Names of data-section exports for this library, normalised to the
199 /// form [`libloading::Library::get`] resolves with.
200 pub(crate) fn globals(&self) -> &HashSet<String> {
201 &self.globals
202 }
203
204 /// The runtime borrow this library was opened with.
205 pub(crate) fn runtime(&self) -> &'lean LeanRuntime {
206 self.runtime
207 }
208
209 /// On-disk path the library was opened from. Used only for
210 /// diagnostics.
211 pub(crate) fn path(&self) -> &Path {
212 &self.path
213 }
214
215 /// Resolve `name` as a function-pointer symbol (text section).
216 ///
217 /// Returns the raw symbol address. The caller is responsible for
218 /// casting to the right `unsafe extern "C" fn(...) -> ...`
219 /// signature, typically by handing the address to
220 /// [`LeanExported::from_function_address`].
221 ///
222 /// # Errors
223 ///
224 /// Returns [`LeanError::Host`] with stage [`HostStage::Link`] if
225 /// the symbol is not exported by this library. The diagnostic
226 /// embeds the symbol name and the library path.
227 ///
228 /// [`LeanExported::from_function_address`]: crate::module::LeanExported::from_function_address
229 pub fn resolve_function_symbol(&self, name: &str) -> LeanResult<*mut c_void> {
230 // SAFETY: `libloading::Library::get::<*mut c_void>` is the raw
231 // address lookup; the returned `Symbol<'_, *mut c_void>` borrows
232 // from `self.library`, so dereferencing it inside this scope is
233 // valid. We copy the address out via `*symbol` — the same idiom
234 // `lookup_initializer` uses.
235 let symbol: libloading::Symbol<'_, *mut c_void> =
236 unsafe { self.library.get(name.as_bytes()) }.map_err(|err| {
237 LeanError::symbol_lookup(format!(
238 "unknown exported symbol '{}' in '{}': {err}",
239 name,
240 self.path.display()
241 ))
242 })?;
243 Ok(*symbol)
244 }
245
246 /// Resolve `name` as a function-pointer symbol, tolerating a missing
247 /// symbol.
248 ///
249 /// Returns `Some(address)` when the symbol resolves, `None` when it
250 /// is absent from this library. The opinionated `lean-rs-host` stack
251 /// uses this to pre-resolve *optional* capability symbols at
252 /// `LeanCapabilities::new` time without failing capability load when
253 /// a fixture omits a service.
254 ///
255 /// Symbol resolution at the dlsym level only fails with "symbol not
256 /// present" — the dynamic linker has already accepted the library at
257 /// [`Self::open`], so a per-symbol lookup error is either "missing"
258 /// or a `\0` in the name (which a sane caller never passes). Both
259 /// are collapsed into the `None` branch by design.
260 pub fn resolve_optional_function_symbol(&self, name: &str) -> Option<*mut c_void> {
261 // SAFETY: identical to `resolve_function_symbol`; the symbol
262 // borrow does not escape this scope.
263 match unsafe { self.library.get::<*mut c_void>(name.as_bytes()) } {
264 Ok(symbol) => Some(*symbol),
265 Err(_) => None,
266 }
267 }
268
269 /// Resolve `name` as a Lean nullary-constant global symbol
270 /// (data section). The returned pointer addresses the storage
271 /// holding the persistent `lean_object*` value; the caller reads
272 /// `*returned` to get the Lean object pointer itself.
273 ///
274 /// # Errors
275 ///
276 /// Returns [`LeanError::Host`] with stage [`HostStage::Link`] if
277 /// the symbol is not exported by this library.
278 pub(crate) fn resolve_global_symbol(&self, name: &str) -> LeanResult<*mut *mut lean_object> {
279 // SAFETY: `libloading::Library::get::<T>` returns the symbol's
280 // address typed as `T`. For data symbols the address is the
281 // location of the variable, so the parameterised type spells a
282 // pointer to the variable's value. The borrow lifetime ends
283 // when this function returns; we copy the address out.
284 let symbol: libloading::Symbol<'_, *mut *mut lean_object> = unsafe { self.library.get(name.as_bytes()) }
285 .map_err(|err| {
286 LeanError::symbol_lookup(format!(
287 "unknown global symbol '{}' in '{}': {err}",
288 name,
289 self.path.display()
290 ))
291 })?;
292 Ok(*symbol)
293 }
294
295 /// Resolve the module initializer by `dlsym`, trying both the modern
296 /// (Lean ≥ 4.27) and legacy (Lean ≤ 4.26) Lake symbol shapes. The
297 /// diagnostic on failure names both candidates so the operator can
298 /// see what was searched.
299 fn lookup_initializer(&self, name: &InitializerName) -> LeanResult<RawInitializer> {
300 // SAFETY: the type parameter spells the canonical Lake
301 // initializer signature (`(u8) -> *mut lean_object`), verified
302 // against the Lake-emitted C in `fixtures/lean/.lake/build/ir/`
303 // for every supported Lean version. `libloading::Library::get`
304 // returns a borrowed `Symbol<'_, T>`; copying the raw function
305 // pointer out of it via deref is the standard pattern when the
306 // caller does not need to retain the borrow.
307 let modern: Result<libloading::Symbol<'_, RawInitializer>, _> =
308 unsafe { self.library.get(name.symbol_bytes()) };
309 if let Ok(symbol) = modern {
310 return Ok(*symbol);
311 }
312 // SAFETY: same as above; the legacy symbol shape is the only
313 // exported form for Lean ≤ 4.26 dylibs.
314 let legacy: Result<libloading::Symbol<'_, RawInitializer>, _> =
315 unsafe { self.library.get(name.legacy_symbol_bytes()) };
316 match legacy {
317 Ok(symbol) => Ok(*symbol),
318 Err(err) => Err(LeanError::linking(format!(
319 "missing initializer symbol in '{}': tried '{}' and '{}': {err}",
320 self.path.display(),
321 name.symbol_str(),
322 name.legacy_symbol_str(),
323 ))),
324 }
325 }
326}
327
328impl std::fmt::Debug for LeanLibrary<'_> {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 f.debug_struct("LeanLibrary")
331 .field("path", &self.path)
332 .field("globals_count", &self.globals.len())
333 .finish()
334 }
335}
336
337/// Read `path` and collect the names of its data-section exports
338/// (Lean nullary-constant globals).
339///
340/// Mach-O export names carry a leading `_` that `libloading` strips at
341/// lookup; we strip here so the set keys match what
342/// [`LeanLibrary::resolve_global_symbol`] queries with. ELF and PE
343/// symbol names are stored without leading `_`, so no normalisation is
344/// needed on those platforms.
345///
346/// Symbols whose section can't be classified (undefined, absolute,
347/// common) are skipped: they cannot be the Lean-compiled persistent
348/// globals we care about.
349fn classify_globals(path: &Path) -> LeanResult<HashSet<String>> {
350 let bytes = std::fs::read(path)
351 .map_err(|err| LeanError::module_init(format!("failed to read Lean library '{}': {err}", path.display())))?;
352 let file = object::File::parse(&*bytes)
353 .map_err(|err| LeanError::module_init(format!("failed to parse object file '{}': {err}", path.display())))?;
354
355 let strip_underscore = matches!(file.format(), object::BinaryFormat::MachO | object::BinaryFormat::Wasm);
356
357 let mut globals = HashSet::new();
358 // `symbols()` covers both ELF `.symtab` (when present) and Mach-O
359 // `LC_SYMTAB` external defs; `dynamic_symbols()` on Mach-O omits
360 // data-section externals such as Lean nullary-constant globals,
361 // which is exactly what we need to classify.
362 for symbol in file.symbols() {
363 if !symbol.is_global() || symbol.is_undefined() {
364 continue;
365 }
366 let SymbolSection::Section(section_index) = symbol.section() else {
367 continue;
368 };
369 let Ok(section) = file.section_by_index(section_index) else {
370 continue;
371 };
372 if !is_data_section(section.kind()) {
373 continue;
374 }
375 let Ok(name) = symbol.name() else {
376 continue;
377 };
378 let normalised = if strip_underscore {
379 name.strip_prefix('_').unwrap_or(name)
380 } else {
381 name
382 };
383 if normalised.is_empty() {
384 continue;
385 }
386 globals.insert(normalised.to_owned());
387 }
388 Ok(globals)
389}
390
391/// Open `path` with the dynamic loader's *global* symbol-visibility
392/// flag set so any subsequently loaded dylib can resolve symbols
393/// defined here.
394///
395/// On Unix this means `RTLD_LAZY | RTLD_GLOBAL`. On Windows the
396/// platform loader publishes module symbols globally by default, so
397/// the standard [`libloading::Library::new`] path is sufficient.
398fn open_with_global_visibility(path: &Path) -> LeanResult<libloading::Library> {
399 #[cfg(unix)]
400 {
401 // SAFETY: identical contract to `Library::new` — runs the
402 // platform dynamic loader against `path`. The added
403 // `RTLD_GLOBAL` flag only affects symbol-table visibility for
404 // later loads; it does not change initializer-execution
405 // semantics (Lake-built Lean dylibs have no constructor-style
406 // init).
407 let unix_library = unsafe {
408 libloading::os::unix::Library::open(
409 Some(path),
410 libloading::os::unix::RTLD_LAZY | libloading::os::unix::RTLD_GLOBAL,
411 )
412 }
413 .map_err(|err| {
414 LeanError::module_init(format!(
415 "failed to open Lean library '{}' with RTLD_GLOBAL: {err}",
416 path.display()
417 ))
418 })?;
419 Ok(unix_library.into())
420 }
421 #[cfg(not(unix))]
422 {
423 // SAFETY: same as `Library::new`. See unix branch.
424 let library = unsafe { libloading::Library::new(path) }.map_err(|err| {
425 LeanError::module_init(format!("failed to open Lean library '{}': {err}", path.display()))
426 })?;
427 Ok(library)
428 }
429}
430
431/// Section kinds that hold runtime data (Lean nullary-constant
432/// `lean_object*` pointers live in `__data` on Mach-O, `.data` /
433/// `.rodata` / `.bss` on ELF).
434fn is_data_section(kind: SectionKind) -> bool {
435 matches!(
436 kind,
437 SectionKind::Data
438 | SectionKind::ReadOnlyData
439 | SectionKind::ReadOnlyDataWithRel
440 | SectionKind::UninitializedData
441 | SectionKind::Common
442 | SectionKind::Tls
443 | SectionKind::UninitializedTls
444 )
445}