Skip to main content

eryx_runtime/
linker.rs

1//! Late-linking support for native Python extensions.
2//!
3//! This module provides functionality to link native Python extensions (.so files)
4//! into the eryx runtime at sandbox creation time using the wit-component Linker.
5//!
6//! # Architecture
7//!
8//! The base libraries (libc, libpython, etc.) are embedded in the crate and combined
9//! with user-provided native extensions using shared-everything dynamic linking.
10//! Since true runtime dynamic linking isn't yet supported in the component model,
11//! we re-link the component when native extensions are added and cache the result.
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────────────────────┐
15//! │                    Base Libraries (embedded)                         │
16//! │   libc.so, libpython3.14.so, liberyx_runtime.so, etc.               │
17//! └─────────────────────────────────────────────────────────────────────┘
18//!                               +
19//! ┌─────────────────────────────────────────────────────────────────────┐
20//! │                    Native Extensions (user-provided)                 │
21//! │   numpy/*.cpython-314-wasm32-wasi.so, etc.                          │
22//! └─────────────────────────────────────────────────────────────────────┘
23//!                               ↓
24//!                     wit_component::Linker
25//!                               ↓
26//! ┌─────────────────────────────────────────────────────────────────────┐
27//! │                    Linked Component                                  │
28//! │   Complete WASM component with native extensions available          │
29//! └─────────────────────────────────────────────────────────────────────┘
30//! ```
31
32use std::io::Cursor;
33
34use sha2::{Digest, Sha256};
35
36/// A native extension to be linked into the component.
37#[derive(Debug, Clone)]
38pub struct NativeExtension {
39    /// The name of the .so file (e.g., "_multiarray_umath.cpython-314-wasm32-wasi.so")
40    pub name: String,
41    /// The raw WASM bytes of the .so file
42    pub bytes: Vec<u8>,
43}
44
45impl NativeExtension {
46    /// Create a new native extension.
47    #[must_use]
48    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
49        Self {
50            name: name.into(),
51            bytes,
52        }
53    }
54}
55
56/// Metadata about a Python wheel.
57#[derive(Debug, Clone)]
58#[non_exhaustive]
59pub struct WheelInfo {
60    /// Package name (e.g., "numpy")
61    pub name: String,
62    /// Package version (e.g., "1.26.0")
63    pub version: String,
64    /// Python files in the wheel (path, contents)
65    pub python_files: Vec<(String, Vec<u8>)>,
66    /// Native extensions (.so files) if any
67    pub native_extensions: Vec<NativeExtension>,
68}
69
70impl WheelInfo {
71    /// Returns true if this wheel contains native extensions.
72    #[must_use]
73    pub fn has_native_extensions(&self) -> bool {
74        !self.native_extensions.is_empty()
75    }
76}
77
78/// Parse a Python wheel (ZIP file) and extract its contents.
79///
80/// # Errors
81///
82/// Returns an error if the wheel cannot be parsed.
83pub fn parse_wheel(wheel_bytes: &[u8]) -> Result<WheelInfo, WheelParseError> {
84    use std::io::Read;
85
86    let reader = Cursor::new(wheel_bytes);
87    let mut archive =
88        zip::ZipArchive::new(reader).map_err(|e| WheelParseError::InvalidZip(e.to_string()))?;
89
90    let mut python_files = Vec::new();
91    let mut native_extensions = Vec::new();
92    let mut name = String::new();
93    let mut version = String::new();
94
95    for i in 0..archive.len() {
96        let mut file = archive
97            .by_index(i)
98            .map_err(|e| WheelParseError::InvalidZip(e.to_string()))?;
99
100        let file_name = file.name().to_string();
101
102        // Extract package info from METADATA
103        if file_name.ends_with(".dist-info/METADATA") {
104            let mut contents = String::new();
105            file.read_to_string(&mut contents)
106                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
107
108            for line in contents.lines() {
109                if let Some(n) = line.strip_prefix("Name: ") {
110                    name = n.to_string();
111                } else if let Some(v) = line.strip_prefix("Version: ") {
112                    version = v.to_string();
113                }
114            }
115        }
116        // Check for native extensions (.so files for WASI)
117        else if file_name.ends_with(".so") && file_name.contains("wasm32-wasi") {
118            let mut bytes = Vec::new();
119            file.read_to_end(&mut bytes)
120                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
121
122            // Extract just the filename from the path
123            let so_name = file_name
124                .rsplit('/')
125                .next()
126                .unwrap_or(&file_name)
127                .to_string();
128
129            native_extensions.push(NativeExtension {
130                name: so_name,
131                bytes,
132            });
133        }
134        // Collect Python files
135        else if file_name.ends_with(".py") || file_name.ends_with(".pyi") {
136            let mut bytes = Vec::new();
137            file.read_to_end(&mut bytes)
138                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
139
140            python_files.push((file_name, bytes));
141        }
142    }
143
144    Ok(WheelInfo {
145        name,
146        version,
147        python_files,
148        native_extensions,
149    })
150}
151
152/// Errors that can occur when parsing a wheel.
153#[derive(Debug, Clone)]
154#[non_exhaustive]
155pub enum WheelParseError {
156    /// The wheel is not a valid ZIP file.
157    InvalidZip(String),
158    /// Failed to read a file from the wheel.
159    ReadError(String),
160}
161
162impl std::fmt::Display for WheelParseError {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        match self {
165            Self::InvalidZip(e) => write!(f, "invalid ZIP file: {e}"),
166            Self::ReadError(e) => write!(f, "failed to read file: {e}"),
167        }
168    }
169}
170
171impl std::error::Error for WheelParseError {}
172
173/// Base libraries embedded in the crate (compressed with zstd).
174pub mod base_libraries {
175    /// libc.so - C standard library
176    pub const LIBC: &[u8] =
177        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/libs/libc.so.zst"));
178
179    /// libc++.so - C++ standard library
180    pub const LIBCXX: &[u8] =
181        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/libs/libc++.so.zst"));
182
183    /// libc++abi.so - C++ ABI library
184    pub const LIBCXXABI: &[u8] = include_bytes!(concat!(
185        env!("CARGO_MANIFEST_DIR"),
186        "/libs/libc++abi.so.zst"
187    ));
188
189    /// libpython3.14.so - Python interpreter
190    pub const LIBPYTHON: &[u8] = include_bytes!(concat!(
191        env!("CARGO_MANIFEST_DIR"),
192        "/libs/libpython3.14.so.zst"
193    ));
194
195    /// libwasi-emulated-mman.so - WASI memory management emulation
196    pub const LIBWASI_EMULATED_MMAN: &[u8] = include_bytes!(concat!(
197        env!("CARGO_MANIFEST_DIR"),
198        "/libs/libwasi-emulated-mman.so.zst"
199    ));
200
201    /// libwasi-emulated-process-clocks.so - WASI process clocks emulation
202    pub const LIBWASI_EMULATED_PROCESS_CLOCKS: &[u8] = include_bytes!(concat!(
203        env!("CARGO_MANIFEST_DIR"),
204        "/libs/libwasi-emulated-process-clocks.so.zst"
205    ));
206
207    /// libwasi-emulated-getpid.so - WASI getpid emulation
208    pub const LIBWASI_EMULATED_GETPID: &[u8] = include_bytes!(concat!(
209        env!("CARGO_MANIFEST_DIR"),
210        "/libs/libwasi-emulated-getpid.so.zst"
211    ));
212
213    /// libwasi-emulated-signal.so - WASI signal emulation
214    pub const LIBWASI_EMULATED_SIGNAL: &[u8] = include_bytes!(concat!(
215        env!("CARGO_MANIFEST_DIR"),
216        "/libs/libwasi-emulated-signal.so.zst"
217    ));
218
219    /// WASI adapter (preview1 to preview2)
220    pub const WASI_ADAPTER: &[u8] = include_bytes!(concat!(
221        env!("CARGO_MANIFEST_DIR"),
222        "/libs/wasi_snapshot_preview1.reactor.wasm.zst"
223    ));
224
225    /// liberyx_runtime.so - Our custom runtime (built during cargo build)
226    pub const LIBERYX_RUNTIME: &[u8] =
227        include_bytes!(concat!(env!("OUT_DIR"), "/liberyx_runtime.so.zst"));
228
229    /// liberyx_bindings.so - WIT bindings for our runtime (built during cargo build)
230    pub const LIBERYX_BINDINGS: &[u8] =
231        include_bytes!(concat!(env!("OUT_DIR"), "/liberyx_bindings.so.zst"));
232}
233
234/// Compute a cache key for a set of native extensions.
235///
236/// The key is a SHA-256 hash of all extension names and contents,
237/// sorted by name for determinism.
238#[must_use]
239pub fn compute_cache_key(extensions: &[NativeExtension]) -> [u8; 32] {
240    let mut hasher = Sha256::new();
241
242    // Sort extensions by name for deterministic hashing
243    let mut sorted: Vec<_> = extensions.iter().collect();
244    sorted.sort_by(|a, b| a.name.cmp(&b.name));
245
246    for ext in sorted {
247        hasher.update(ext.name.as_bytes());
248        hasher.update((ext.bytes.len() as u64).to_le_bytes());
249        hasher.update(&ext.bytes);
250    }
251
252    hasher.finalize().into()
253}
254
255/// Link native extensions with the base libraries to create a new component.
256///
257/// This uses our custom eryx-wasm-runtime instead of componentize-py's runtime.
258///
259/// # Arguments
260///
261/// * `extensions` - Native extensions to include (will be dl_openable)
262///
263/// # Returns
264///
265/// The linked component as WASM bytes.
266///
267/// # Errors
268///
269/// Returns an error if linking fails.
270pub fn link_with_extensions(extensions: &[NativeExtension]) -> Result<Vec<u8>, LinkError> {
271    use wit_component::Linker;
272
273    // Decompress base libraries
274    let libc = decompress_zstd(base_libraries::LIBC)?;
275    let libcxx = decompress_zstd(base_libraries::LIBCXX)?;
276    let libcxxabi = decompress_zstd(base_libraries::LIBCXXABI)?;
277    let libpython = decompress_zstd(base_libraries::LIBPYTHON)?;
278    let wasi_mman = decompress_zstd(base_libraries::LIBWASI_EMULATED_MMAN)?;
279    let wasi_clocks = decompress_zstd(base_libraries::LIBWASI_EMULATED_PROCESS_CLOCKS)?;
280    let wasi_getpid = decompress_zstd(base_libraries::LIBWASI_EMULATED_GETPID)?;
281    let wasi_signal = decompress_zstd(base_libraries::LIBWASI_EMULATED_SIGNAL)?;
282    let adapter = decompress_zstd(base_libraries::WASI_ADAPTER)?;
283    let runtime = decompress_zstd(base_libraries::LIBERYX_RUNTIME)?;
284    let bindings = decompress_zstd(base_libraries::LIBERYX_BINDINGS)?;
285
286    let mut linker = Linker::default().validate(true).use_built_in_libdl(true);
287
288    // Add base libraries (order matters for symbol resolution)
289    linker = linker
290        // WASI emulation libraries
291        .library("libwasi-emulated-process-clocks.so", &wasi_clocks, false)
292        .map_err(|e| {
293            LinkError::Library("libwasi-emulated-process-clocks.so".into(), e.to_string())
294        })?
295        .library("libwasi-emulated-signal.so", &wasi_signal, false)
296        .map_err(|e| LinkError::Library("libwasi-emulated-signal.so".into(), e.to_string()))?
297        .library("libwasi-emulated-mman.so", &wasi_mman, false)
298        .map_err(|e| LinkError::Library("libwasi-emulated-mman.so".into(), e.to_string()))?
299        .library("libwasi-emulated-getpid.so", &wasi_getpid, false)
300        .map_err(|e| LinkError::Library("libwasi-emulated-getpid.so".into(), e.to_string()))?
301        // C/C++ runtime
302        .library("libc.so", &libc, false)
303        .map_err(|e| LinkError::Library("libc.so".into(), e.to_string()))?
304        .library("libc++abi.so", &libcxxabi, false)
305        .map_err(|e| LinkError::Library("libc++abi.so".into(), e.to_string()))?
306        .library("libc++.so", &libcxx, false)
307        .map_err(|e| LinkError::Library("libc++.so".into(), e.to_string()))?
308        // Python
309        .library("libpython3.14.so", &libpython, false)
310        .map_err(|e| LinkError::Library("libpython3.14.so".into(), e.to_string()))?
311        // Our runtime and bindings
312        .library("liberyx_runtime.so", &runtime, false)
313        .map_err(|e| LinkError::Library("liberyx_runtime.so".into(), e.to_string()))?
314        .library("liberyx_bindings.so", &bindings, false)
315        .map_err(|e| LinkError::Library("liberyx_bindings.so".into(), e.to_string()))?;
316
317    // Add user's native extensions (dl_openable = true for dlopen/dlsym)
318    for ext in extensions {
319        linker = linker
320            .library(&ext.name, &ext.bytes, true)
321            .map_err(|e| LinkError::Extension(ext.name.clone(), e.to_string()))?;
322    }
323
324    // Add WASI adapter
325    linker = linker
326        .adapter("wasi_snapshot_preview1", &adapter)
327        .map_err(|e| LinkError::Adapter(e.to_string()))?;
328
329    linker
330        .encode()
331        .map_err(|e| LinkError::Encode(e.to_string()))
332}
333
334fn decompress_zstd(data: &[u8]) -> Result<Vec<u8>, LinkError> {
335    zstd::decode_all(Cursor::new(data)).map_err(|e| LinkError::Decompress(e.to_string()))
336}
337
338/// Errors that can occur during linking.
339#[derive(Debug, Clone)]
340#[non_exhaustive]
341pub enum LinkError {
342    /// Failed to add a base library.
343    Library(String, String),
344    /// Failed to add a native extension.
345    Extension(String, String),
346    /// Failed to add the WASI adapter.
347    Adapter(String),
348    /// Failed to encode the final component.
349    Encode(String),
350    /// Failed to decompress a library.
351    Decompress(String),
352}
353
354impl std::fmt::Display for LinkError {
355    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
356        match self {
357            Self::Library(name, e) => write!(f, "failed to add base library {name}: {e}"),
358            Self::Extension(name, e) => write!(f, "failed to add extension {name}: {e}"),
359            Self::Adapter(e) => write!(f, "failed to add WASI adapter: {e}"),
360            Self::Encode(e) => write!(f, "failed to encode component: {e}"),
361            Self::Decompress(e) => write!(f, "failed to decompress library: {e}"),
362        }
363    }
364}
365
366impl std::error::Error for LinkError {}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    #[test]
373    fn test_cache_key_determinism() {
374        let ext1 = NativeExtension::new("a.so", vec![1, 2, 3]);
375        let ext2 = NativeExtension::new("b.so", vec![4, 5, 6]);
376
377        // Same extensions in different order should produce same key
378        let key1 = compute_cache_key(&[ext1.clone(), ext2.clone()]);
379        let key2 = compute_cache_key(&[ext2, ext1]);
380        assert_eq!(key1, key2);
381    }
382
383    #[test]
384    fn test_wheel_parse_error_display() {
385        let err = WheelParseError::InvalidZip("test error".to_string());
386        assert!(err.to_string().contains("test error"));
387    }
388}