Skip to main content

eryx_runtime/
linker.rs

1//! Late-linking support for native Python extensions.
2//!
3//! This module provides functionality to link native Python extensions (.so files)
4//! into the eryx runtime at sandbox creation time using the wit-component Linker.
5//!
6//! # Architecture
7//!
8//! The base libraries (libc, libpython, etc.) are embedded in the crate and combined
9//! with user-provided native extensions using shared-everything dynamic linking.
10//! Since true runtime dynamic linking isn't yet supported in the component model,
11//! we re-link the component when native extensions are added and cache the result.
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────────────────────┐
15//! │                    Base Libraries (embedded)                         │
16//! │   libc.so, libpython3.14.so, liberyx_runtime.so, etc.               │
17//! └─────────────────────────────────────────────────────────────────────┘
18//!                               +
19//! ┌─────────────────────────────────────────────────────────────────────┐
20//! │                    Native Extensions (user-provided)                 │
21//! │   numpy/*.cpython-314-wasm32-wasi.so, etc.                          │
22//! └─────────────────────────────────────────────────────────────────────┘
23//!                               ↓
24//!                     wit_component::Linker
25//!                               ↓
26//! ┌─────────────────────────────────────────────────────────────────────┐
27//! │                    Linked Component                                  │
28//! │   Complete WASM component with native extensions available          │
29//! └─────────────────────────────────────────────────────────────────────┘
30//! ```
31
32use std::io::Cursor;
33
34use sha2::{Digest, Sha256};
35
36/// A native extension to be linked into the component.
37#[derive(Debug, Clone)]
38pub struct NativeExtension {
39    /// The name of the .so file (e.g., "_multiarray_umath.cpython-314-wasm32-wasi.so")
40    pub name: String,
41    /// The raw WASM bytes of the .so file
42    pub bytes: Vec<u8>,
43}
44
45impl NativeExtension {
46    /// Create a new native extension.
47    #[must_use]
48    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
49        Self {
50            name: name.into(),
51            bytes,
52        }
53    }
54}
55
56/// Metadata about a Python wheel.
57#[derive(Debug, Clone)]
58pub struct WheelInfo {
59    /// Package name (e.g., "numpy")
60    pub name: String,
61    /// Package version (e.g., "1.26.0")
62    pub version: String,
63    /// Python files in the wheel (path, contents)
64    pub python_files: Vec<(String, Vec<u8>)>,
65    /// Native extensions (.so files) if any
66    pub native_extensions: Vec<NativeExtension>,
67}
68
69impl WheelInfo {
70    /// Returns true if this wheel contains native extensions.
71    #[must_use]
72    pub fn has_native_extensions(&self) -> bool {
73        !self.native_extensions.is_empty()
74    }
75}
76
77/// Parse a Python wheel (ZIP file) and extract its contents.
78///
79/// # Errors
80///
81/// Returns an error if the wheel cannot be parsed.
82pub fn parse_wheel(wheel_bytes: &[u8]) -> Result<WheelInfo, WheelParseError> {
83    use std::io::Read;
84
85    let reader = Cursor::new(wheel_bytes);
86    let mut archive =
87        zip::ZipArchive::new(reader).map_err(|e| WheelParseError::InvalidZip(e.to_string()))?;
88
89    let mut python_files = Vec::new();
90    let mut native_extensions = Vec::new();
91    let mut name = String::new();
92    let mut version = String::new();
93
94    for i in 0..archive.len() {
95        let mut file = archive
96            .by_index(i)
97            .map_err(|e| WheelParseError::InvalidZip(e.to_string()))?;
98
99        let file_name = file.name().to_string();
100
101        // Extract package info from METADATA
102        if file_name.ends_with(".dist-info/METADATA") {
103            let mut contents = String::new();
104            file.read_to_string(&mut contents)
105                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
106
107            for line in contents.lines() {
108                if let Some(n) = line.strip_prefix("Name: ") {
109                    name = n.to_string();
110                } else if let Some(v) = line.strip_prefix("Version: ") {
111                    version = v.to_string();
112                }
113            }
114        }
115        // Check for native extensions (.so files for WASI)
116        else if file_name.ends_with(".so") && file_name.contains("wasm32-wasi") {
117            let mut bytes = Vec::new();
118            file.read_to_end(&mut bytes)
119                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
120
121            // Extract just the filename from the path
122            let so_name = file_name
123                .rsplit('/')
124                .next()
125                .unwrap_or(&file_name)
126                .to_string();
127
128            native_extensions.push(NativeExtension {
129                name: so_name,
130                bytes,
131            });
132        }
133        // Collect Python files
134        else if file_name.ends_with(".py") || file_name.ends_with(".pyi") {
135            let mut bytes = Vec::new();
136            file.read_to_end(&mut bytes)
137                .map_err(|e| WheelParseError::ReadError(e.to_string()))?;
138
139            python_files.push((file_name, bytes));
140        }
141    }
142
143    Ok(WheelInfo {
144        name,
145        version,
146        python_files,
147        native_extensions,
148    })
149}
150
151/// Errors that can occur when parsing a wheel.
152#[derive(Debug, Clone)]
153pub enum WheelParseError {
154    /// The wheel is not a valid ZIP file.
155    InvalidZip(String),
156    /// Failed to read a file from the wheel.
157    ReadError(String),
158}
159
160impl std::fmt::Display for WheelParseError {
161    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
162        match self {
163            Self::InvalidZip(e) => write!(f, "invalid ZIP file: {e}"),
164            Self::ReadError(e) => write!(f, "failed to read file: {e}"),
165        }
166    }
167}
168
169impl std::error::Error for WheelParseError {}
170
171/// Base libraries embedded in the crate (compressed with zstd).
172pub mod base_libraries {
173    /// libc.so - C standard library
174    pub const LIBC: &[u8] =
175        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/libs/libc.so.zst"));
176
177    /// libc++.so - C++ standard library
178    pub const LIBCXX: &[u8] =
179        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/libs/libc++.so.zst"));
180
181    /// libc++abi.so - C++ ABI library
182    pub const LIBCXXABI: &[u8] = include_bytes!(concat!(
183        env!("CARGO_MANIFEST_DIR"),
184        "/libs/libc++abi.so.zst"
185    ));
186
187    /// libpython3.14.so - Python interpreter
188    pub const LIBPYTHON: &[u8] = include_bytes!(concat!(
189        env!("CARGO_MANIFEST_DIR"),
190        "/libs/libpython3.14.so.zst"
191    ));
192
193    /// libwasi-emulated-mman.so - WASI memory management emulation
194    pub const LIBWASI_EMULATED_MMAN: &[u8] = include_bytes!(concat!(
195        env!("CARGO_MANIFEST_DIR"),
196        "/libs/libwasi-emulated-mman.so.zst"
197    ));
198
199    /// libwasi-emulated-process-clocks.so - WASI process clocks emulation
200    pub const LIBWASI_EMULATED_PROCESS_CLOCKS: &[u8] = include_bytes!(concat!(
201        env!("CARGO_MANIFEST_DIR"),
202        "/libs/libwasi-emulated-process-clocks.so.zst"
203    ));
204
205    /// libwasi-emulated-getpid.so - WASI getpid emulation
206    pub const LIBWASI_EMULATED_GETPID: &[u8] = include_bytes!(concat!(
207        env!("CARGO_MANIFEST_DIR"),
208        "/libs/libwasi-emulated-getpid.so.zst"
209    ));
210
211    /// libwasi-emulated-signal.so - WASI signal emulation
212    pub const LIBWASI_EMULATED_SIGNAL: &[u8] = include_bytes!(concat!(
213        env!("CARGO_MANIFEST_DIR"),
214        "/libs/libwasi-emulated-signal.so.zst"
215    ));
216
217    /// WASI adapter (preview1 to preview2)
218    pub const WASI_ADAPTER: &[u8] = include_bytes!(concat!(
219        env!("CARGO_MANIFEST_DIR"),
220        "/libs/wasi_snapshot_preview1.reactor.wasm.zst"
221    ));
222
223    /// liberyx_runtime.so - Our custom runtime (built during cargo build)
224    pub const LIBERYX_RUNTIME: &[u8] =
225        include_bytes!(concat!(env!("OUT_DIR"), "/liberyx_runtime.so.zst"));
226
227    /// liberyx_bindings.so - WIT bindings for our runtime (built during cargo build)
228    pub const LIBERYX_BINDINGS: &[u8] =
229        include_bytes!(concat!(env!("OUT_DIR"), "/liberyx_bindings.so.zst"));
230}
231
232/// Compute a cache key for a set of native extensions.
233///
234/// The key is a SHA-256 hash of all extension names and contents,
235/// sorted by name for determinism.
236#[must_use]
237pub fn compute_cache_key(extensions: &[NativeExtension]) -> [u8; 32] {
238    let mut hasher = Sha256::new();
239
240    // Sort extensions by name for deterministic hashing
241    let mut sorted: Vec<_> = extensions.iter().collect();
242    sorted.sort_by(|a, b| a.name.cmp(&b.name));
243
244    for ext in sorted {
245        hasher.update(ext.name.as_bytes());
246        hasher.update((ext.bytes.len() as u64).to_le_bytes());
247        hasher.update(&ext.bytes);
248    }
249
250    hasher.finalize().into()
251}
252
253/// Link native extensions with the base libraries to create a new component.
254///
255/// This uses our custom eryx-wasm-runtime instead of componentize-py's runtime.
256///
257/// # Arguments
258///
259/// * `extensions` - Native extensions to include (will be dl_openable)
260///
261/// # Returns
262///
263/// The linked component as WASM bytes.
264///
265/// # Errors
266///
267/// Returns an error if linking fails.
268pub fn link_with_extensions(extensions: &[NativeExtension]) -> Result<Vec<u8>, LinkError> {
269    use wit_component::Linker;
270
271    // Decompress base libraries
272    let libc = decompress_zstd(base_libraries::LIBC)?;
273    let libcxx = decompress_zstd(base_libraries::LIBCXX)?;
274    let libcxxabi = decompress_zstd(base_libraries::LIBCXXABI)?;
275    let libpython = decompress_zstd(base_libraries::LIBPYTHON)?;
276    let wasi_mman = decompress_zstd(base_libraries::LIBWASI_EMULATED_MMAN)?;
277    let wasi_clocks = decompress_zstd(base_libraries::LIBWASI_EMULATED_PROCESS_CLOCKS)?;
278    let wasi_getpid = decompress_zstd(base_libraries::LIBWASI_EMULATED_GETPID)?;
279    let wasi_signal = decompress_zstd(base_libraries::LIBWASI_EMULATED_SIGNAL)?;
280    let adapter = decompress_zstd(base_libraries::WASI_ADAPTER)?;
281    let runtime = decompress_zstd(base_libraries::LIBERYX_RUNTIME)?;
282    let bindings = decompress_zstd(base_libraries::LIBERYX_BINDINGS)?;
283
284    let mut linker = Linker::default().validate(true).use_built_in_libdl(true);
285
286    // Add base libraries (order matters for symbol resolution)
287    linker = linker
288        // WASI emulation libraries
289        .library("libwasi-emulated-process-clocks.so", &wasi_clocks, false)
290        .map_err(|e| {
291            LinkError::Library("libwasi-emulated-process-clocks.so".into(), e.to_string())
292        })?
293        .library("libwasi-emulated-signal.so", &wasi_signal, false)
294        .map_err(|e| LinkError::Library("libwasi-emulated-signal.so".into(), e.to_string()))?
295        .library("libwasi-emulated-mman.so", &wasi_mman, false)
296        .map_err(|e| LinkError::Library("libwasi-emulated-mman.so".into(), e.to_string()))?
297        .library("libwasi-emulated-getpid.so", &wasi_getpid, false)
298        .map_err(|e| LinkError::Library("libwasi-emulated-getpid.so".into(), e.to_string()))?
299        // C/C++ runtime
300        .library("libc.so", &libc, false)
301        .map_err(|e| LinkError::Library("libc.so".into(), e.to_string()))?
302        .library("libc++abi.so", &libcxxabi, false)
303        .map_err(|e| LinkError::Library("libc++abi.so".into(), e.to_string()))?
304        .library("libc++.so", &libcxx, false)
305        .map_err(|e| LinkError::Library("libc++.so".into(), e.to_string()))?
306        // Python
307        .library("libpython3.14.so", &libpython, false)
308        .map_err(|e| LinkError::Library("libpython3.14.so".into(), e.to_string()))?
309        // Our runtime and bindings
310        .library("liberyx_runtime.so", &runtime, false)
311        .map_err(|e| LinkError::Library("liberyx_runtime.so".into(), e.to_string()))?
312        .library("liberyx_bindings.so", &bindings, false)
313        .map_err(|e| LinkError::Library("liberyx_bindings.so".into(), e.to_string()))?;
314
315    // Add user's native extensions (dl_openable = true for dlopen/dlsym)
316    for ext in extensions {
317        linker = linker
318            .library(&ext.name, &ext.bytes, true)
319            .map_err(|e| LinkError::Extension(ext.name.clone(), e.to_string()))?;
320    }
321
322    // Add WASI adapter
323    linker = linker
324        .adapter("wasi_snapshot_preview1", &adapter)
325        .map_err(|e| LinkError::Adapter(e.to_string()))?;
326
327    linker
328        .encode()
329        .map_err(|e| LinkError::Encode(e.to_string()))
330}
331
332fn decompress_zstd(data: &[u8]) -> Result<Vec<u8>, LinkError> {
333    zstd::decode_all(Cursor::new(data)).map_err(|e| LinkError::Decompress(e.to_string()))
334}
335
336/// Errors that can occur during linking.
337#[derive(Debug, Clone)]
338pub enum LinkError {
339    /// Failed to add a base library.
340    Library(String, String),
341    /// Failed to add a native extension.
342    Extension(String, String),
343    /// Failed to add the WASI adapter.
344    Adapter(String),
345    /// Failed to encode the final component.
346    Encode(String),
347    /// Failed to decompress a library.
348    Decompress(String),
349}
350
351impl std::fmt::Display for LinkError {
352    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
353        match self {
354            Self::Library(name, e) => write!(f, "failed to add base library {name}: {e}"),
355            Self::Extension(name, e) => write!(f, "failed to add extension {name}: {e}"),
356            Self::Adapter(e) => write!(f, "failed to add WASI adapter: {e}"),
357            Self::Encode(e) => write!(f, "failed to encode component: {e}"),
358            Self::Decompress(e) => write!(f, "failed to decompress library: {e}"),
359        }
360    }
361}
362
363impl std::error::Error for LinkError {}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368
369    #[test]
370    fn test_cache_key_determinism() {
371        let ext1 = NativeExtension::new("a.so", vec![1, 2, 3]);
372        let ext2 = NativeExtension::new("b.so", vec![4, 5, 6]);
373
374        // Same extensions in different order should produce same key
375        let key1 = compute_cache_key(&[ext1.clone(), ext2.clone()]);
376        let key2 = compute_cache_key(&[ext2, ext1]);
377        assert_eq!(key1, key2);
378    }
379
380    #[test]
381    fn test_wheel_parse_error_display() {
382        let err = WheelParseError::InvalidZip("test error".to_string());
383        assert!(err.to_string().contains("test error"));
384    }
385}