Skip to main content

shape_runtime/
package_bundle.rs

1//! Package bundle format for distributable .shapec files
2//!
3//! A package bundle contains pre-compiled bytecode for all modules in a Shape
4//! package, plus metadata for versioning and freshness checks.
5//!
6//! File format: `[8 bytes "SHAPEPKG"] [4 bytes format_version LE] [MessagePack payload]`
7
8use crate::doc_extract::DocItem;
9use crate::module_manifest::ModuleManifest;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::Path;
13
14const MAGIC: &[u8; 8] = b"SHAPEPKG";
15const FORMAT_VERSION: u32 = 3;
16/// Minimum version we can still load (v1 bundles lack blob_store/manifests).
17const MIN_FORMAT_VERSION: u32 = 1;
18
19fn default_bundle_kind() -> String {
20    "portable-bytecode".to_string()
21}
22
23/// Metadata about a compiled package bundle.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct BundleMetadata {
26    /// Package name from shape.toml [project].name
27    pub name: String,
28    /// Package version from shape.toml [project].version
29    pub version: String,
30    /// Shape compiler version that produced this bundle
31    pub compiler_version: String,
32    /// SHA-256 hash of all source files combined
33    pub source_hash: String,
34    /// Bundle compatibility kind.
35    /// `portable-bytecode` bundles are cross-platform and contain no host-native machine code.
36    #[serde(default = "default_bundle_kind")]
37    pub bundle_kind: String,
38    /// Host identifier of the build machine (for diagnostics only).
39    #[serde(default)]
40    pub build_host: String,
41    /// Whether declared native dependencies are host-portable (no host-specific path/vendoring required).
42    #[serde(default = "default_native_portable")]
43    pub native_portable: bool,
44    /// Entry module path, if any
45    pub entry_module: Option<String>,
46    /// Build timestamp (unix seconds from SystemTime)
47    pub built_at: u64,
48    /// README content (raw Markdown), read from README.md in project root.
49    #[serde(default)]
50    pub readme: Option<String>,
51}
52
53fn default_native_portable() -> bool {
54    true
55}
56
57/// A single compiled module within a bundle.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct BundledModule {
60    /// Module path using :: separator (e.g., "utils::helpers")
61    pub module_path: String,
62    /// MessagePack-serialized BytecodeProgram as raw bytes
63    pub bytecode_bytes: Vec<u8>,
64    /// Names of exported symbols
65    pub export_names: Vec<String>,
66    /// SHA-256 hash of the individual source file
67    pub source_hash: String,
68}
69
70/// A compiled package bundle containing all modules and metadata.
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct PackageBundle {
73    /// Bundle metadata
74    pub metadata: BundleMetadata,
75    /// Compiled modules
76    pub modules: Vec<BundledModule>,
77    /// Declared dependency versions (name -> version string)
78    pub dependencies: HashMap<String, String>,
79    /// Content-addressed blob store: hash -> raw blob bytes.
80    /// Blobs are deduplicated across modules so shared functions are stored once.
81    #[serde(default)]
82    pub blob_store: HashMap<[u8; 32], Vec<u8>>,
83    /// Module manifests for content-addressed resolution.
84    /// Each manifest maps export names to blob hashes in `blob_store`.
85    #[serde(default)]
86    pub manifests: Vec<ModuleManifest>,
87    /// Native dependency scopes for this package and all transitive dependencies.
88    /// Used by consumers of `.shapec` bundles to lock/validate native prerequisites.
89    #[serde(default)]
90    pub native_dependency_scopes: Vec<BundledNativeDependencyScope>,
91    /// Documentation items extracted from source code, keyed by module path.
92    #[serde(default)]
93    pub docs: HashMap<String, Vec<DocItem>>,
94}
95
96/// Native dependency scope embedded in a `.shapec` bundle.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct BundledNativeDependencyScope {
99    /// Package name declaring the dependencies.
100    pub package_name: String,
101    /// Package version declaring the dependencies.
102    pub package_version: String,
103    /// Canonical package identity key (`name@version`).
104    pub package_key: String,
105    /// Native dependencies declared by this package.
106    pub dependencies: HashMap<String, crate::project::NativeDependencySpec>,
107}
108
109impl PackageBundle {
110    /// Serialize the bundle to bytes with magic header.
111    pub fn to_bytes(&self) -> Result<Vec<u8>, String> {
112        let payload =
113            rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize bundle: {}", e))?;
114
115        let mut buf = Vec::with_capacity(12 + payload.len());
116        buf.extend_from_slice(MAGIC);
117        buf.extend_from_slice(&FORMAT_VERSION.to_le_bytes());
118        buf.extend_from_slice(&payload);
119        Ok(buf)
120    }
121
122    /// Deserialize a bundle from bytes, validating magic and version.
123    ///
124    /// Supports v1 (no blob_store/manifests), v2, and v3 (docs) bundles.
125    /// Missing fields are filled with defaults via `#[serde(default)]`.
126    pub fn from_bytes(data: &[u8]) -> Result<Self, String> {
127        if data.len() < 12 {
128            return Err("Bundle too small: missing header".to_string());
129        }
130
131        if &data[..8] != MAGIC {
132            return Err("Invalid bundle: bad magic bytes".to_string());
133        }
134
135        let version = u32::from_le_bytes(
136            data[8..12]
137                .try_into()
138                .map_err(|_| "Invalid version bytes".to_string())?,
139        );
140        if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
141            return Err(format!(
142                "Unsupported bundle format version: expected {}-{}, got {}",
143                MIN_FORMAT_VERSION, FORMAT_VERSION, version
144            ));
145        }
146
147        rmp_serde::from_slice(&data[12..])
148            .map_err(|e| format!("Failed to deserialize bundle: {}", e))
149    }
150
151    /// Write the bundle to a file.
152    pub fn write_to_file(&self, path: &Path) -> Result<(), String> {
153        let bytes = self.to_bytes()?;
154        std::fs::write(path, bytes)
155            .map_err(|e| format!("Failed to write bundle to '{}': {}", path.display(), e))
156    }
157
158    /// Read a bundle from a file.
159    pub fn read_from_file(path: &Path) -> Result<Self, String> {
160        let data = std::fs::read(path)
161            .map_err(|e| format!("Failed to read bundle from '{}': {}", path.display(), e))?;
162        Self::from_bytes(&data)
163    }
164}
165
166/// Verify SHA-256 checksum of raw bundle bytes.
167/// `expected` should be in format "sha256:hexdigest" or just the hex digest.
168pub fn verify_bundle_checksum(bundle_bytes: &[u8], expected: &str) -> bool {
169    use sha2::{Digest, Sha256};
170    let mut hasher = Sha256::new();
171    hasher.update(bundle_bytes);
172    let digest = hex::encode(hasher.finalize());
173    let expected_hex = expected.strip_prefix("sha256:").unwrap_or(expected);
174    digest == expected_hex
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    fn sample_bundle() -> PackageBundle {
182        PackageBundle {
183            metadata: BundleMetadata {
184                name: "test-pkg".to_string(),
185                version: "0.1.0".to_string(),
186                compiler_version: "0.5.0".to_string(),
187                source_hash: "abc123".to_string(),
188                bundle_kind: default_bundle_kind(),
189                build_host: "x86_64-linux".to_string(),
190                native_portable: true,
191                entry_module: Some("main".to_string()),
192                built_at: 1700000000,
193                readme: None,
194            },
195            modules: vec![
196                BundledModule {
197                    module_path: "main".to_string(),
198                    bytecode_bytes: vec![1, 2, 3, 4],
199                    export_names: vec!["run".to_string()],
200                    source_hash: "def456".to_string(),
201                },
202                BundledModule {
203                    module_path: "utils::helpers".to_string(),
204                    bytecode_bytes: vec![5, 6, 7],
205                    export_names: vec!["helper".to_string(), "format".to_string()],
206                    source_hash: "ghi789".to_string(),
207                },
208            ],
209            dependencies: {
210                let mut deps = HashMap::new();
211                deps.insert("my-lib".to_string(), "1.0.0".to_string());
212                deps
213            },
214            blob_store: HashMap::new(),
215            manifests: vec![],
216            native_dependency_scopes: vec![],
217            docs: HashMap::new(),
218        }
219    }
220
221    #[test]
222    fn test_roundtrip_serialize_deserialize() {
223        let bundle = sample_bundle();
224        let bytes = bundle.to_bytes().expect("serialization should succeed");
225        let restored = PackageBundle::from_bytes(&bytes).expect("deserialization should succeed");
226
227        assert_eq!(restored.metadata.name, "test-pkg");
228        assert_eq!(restored.metadata.version, "0.1.0");
229        assert_eq!(restored.modules.len(), 2);
230        assert_eq!(restored.modules[0].module_path, "main");
231        assert_eq!(restored.modules[0].bytecode_bytes, vec![1, 2, 3, 4]);
232        assert_eq!(restored.modules[1].module_path, "utils::helpers");
233        assert_eq!(restored.dependencies.get("my-lib").unwrap(), "1.0.0");
234        assert!(restored.blob_store.is_empty());
235        assert!(restored.manifests.is_empty());
236    }
237
238    #[test]
239    fn test_magic_bytes_validation() {
240        let mut bad_data = vec![0u8; 20];
241        bad_data[..8].copy_from_slice(b"BADMAGIC");
242        let result = PackageBundle::from_bytes(&bad_data);
243        assert!(result.is_err());
244        assert!(result.unwrap_err().contains("bad magic bytes"));
245    }
246
247    #[test]
248    fn test_version_validation() {
249        let mut data = vec![0u8; 20];
250        data[..8].copy_from_slice(MAGIC);
251        data[8..12].copy_from_slice(&99u32.to_le_bytes());
252        let result = PackageBundle::from_bytes(&data);
253        assert!(result.is_err());
254        assert!(
255            result
256                .unwrap_err()
257                .contains("Unsupported bundle format version")
258        );
259    }
260
261    #[test]
262    fn test_too_small_data() {
263        let result = PackageBundle::from_bytes(&[1, 2, 3]);
264        assert!(result.is_err());
265        assert!(result.unwrap_err().contains("too small"));
266    }
267
268    #[test]
269    fn test_empty_bundle() {
270        let bundle = PackageBundle {
271            metadata: BundleMetadata {
272                name: "empty".to_string(),
273                version: "0.0.1".to_string(),
274                compiler_version: "0.5.0".to_string(),
275                source_hash: "empty".to_string(),
276                bundle_kind: default_bundle_kind(),
277                build_host: "x86_64-linux".to_string(),
278                native_portable: true,
279                entry_module: None,
280                built_at: 0,
281                readme: None,
282            },
283            modules: vec![],
284            dependencies: HashMap::new(),
285            blob_store: HashMap::new(),
286            manifests: vec![],
287            native_dependency_scopes: vec![],
288            docs: HashMap::new(),
289        };
290
291        let bytes = bundle.to_bytes().expect("should serialize");
292        let restored = PackageBundle::from_bytes(&bytes).expect("should deserialize");
293        assert_eq!(restored.metadata.name, "empty");
294        assert!(restored.modules.is_empty());
295        assert!(restored.dependencies.is_empty());
296    }
297
298    #[test]
299    fn test_file_roundtrip() {
300        let tmp = tempfile::tempdir().expect("temp dir");
301        let path = tmp.path().join("test.shapec");
302
303        let bundle = sample_bundle();
304        bundle.write_to_file(&path).expect("write should succeed");
305        let restored = PackageBundle::read_from_file(&path).expect("read should succeed");
306
307        assert_eq!(restored.metadata.name, "test-pkg");
308        assert_eq!(restored.modules.len(), 2);
309    }
310
311    #[test]
312    fn test_bundle_with_blob_store_and_manifests() {
313        let blob_hash = [0xAB; 32];
314        let blob_data = vec![10, 20, 30, 40];
315
316        let mut manifest = ModuleManifest::new("mymod".into(), "1.0.0".into());
317        manifest.add_export("greet".into(), blob_hash);
318        manifest.finalize();
319
320        let bundle = PackageBundle {
321            metadata: BundleMetadata {
322                name: "ca-pkg".to_string(),
323                version: "2.0.0".to_string(),
324                compiler_version: "0.6.0".to_string(),
325                source_hash: "ca_hash".to_string(),
326                bundle_kind: default_bundle_kind(),
327                build_host: "x86_64-linux".to_string(),
328                native_portable: true,
329                entry_module: None,
330                built_at: 1700000001,
331                readme: None,
332            },
333            modules: vec![],
334            dependencies: HashMap::new(),
335            blob_store: {
336                let mut bs = HashMap::new();
337                bs.insert(blob_hash, blob_data.clone());
338                bs
339            },
340            manifests: vec![manifest],
341            native_dependency_scopes: vec![],
342            docs: HashMap::new(),
343        };
344
345        let bytes = bundle.to_bytes().expect("serialization should succeed");
346        let restored = PackageBundle::from_bytes(&bytes).expect("deserialization should succeed");
347
348        assert_eq!(restored.metadata.name, "ca-pkg");
349        assert_eq!(restored.manifests.len(), 1);
350        assert_eq!(restored.manifests[0].name, "mymod");
351        assert!(restored.manifests[0].verify_integrity());
352        assert_eq!(restored.blob_store.get(&blob_hash), Some(&blob_data));
353        assert!(restored.modules.is_empty());
354    }
355
356    // --- verify_bundle_checksum tests ---
357
358    fn sha256_hex(data: &[u8]) -> String {
359        use sha2::{Digest, Sha256};
360        let mut hasher = Sha256::new();
361        hasher.update(data);
362        hex::encode(hasher.finalize())
363    }
364
365    #[test]
366    fn test_verify_checksum_correct() {
367        let data = b"hello world";
368        let hash = sha256_hex(data);
369        assert!(verify_bundle_checksum(data, &hash));
370    }
371
372    #[test]
373    fn test_verify_checksum_wrong() {
374        let data = b"hello world";
375        assert!(!verify_bundle_checksum(
376            data,
377            "0000000000000000000000000000000000000000000000000000000000000000"
378        ));
379    }
380
381    #[test]
382    fn test_verify_checksum_with_sha256_prefix() {
383        let data = b"test data";
384        let hash = sha256_hex(data);
385        let prefixed = format!("sha256:{}", hash);
386        assert!(verify_bundle_checksum(data, &prefixed));
387    }
388
389    #[test]
390    fn test_verify_checksum_without_prefix() {
391        let data = b"test data";
392        let hash = sha256_hex(data);
393        assert!(verify_bundle_checksum(data, &hash));
394    }
395
396    #[test]
397    fn test_verify_checksum_empty_data() {
398        let data = b"";
399        let hash = sha256_hex(data);
400        assert!(verify_bundle_checksum(data, &hash));
401    }
402
403    #[test]
404    fn test_verify_checksum_case_sensitive() {
405        let data = b"case test";
406        let hash = sha256_hex(data).to_uppercase();
407        // hex::encode produces lowercase; uppercase should fail
408        assert!(!verify_bundle_checksum(data, &hash));
409    }
410
411    #[test]
412    fn test_bundle_blob_deduplication() {
413        let shared_hash = [0x01; 32];
414        let shared_blob = vec![99, 88, 77];
415
416        let mut m1 = ModuleManifest::new("mod_a".into(), "1.0.0".into());
417        m1.add_export("fn_a".into(), shared_hash);
418        m1.finalize();
419
420        let mut m2 = ModuleManifest::new("mod_b".into(), "1.0.0".into());
421        m2.add_export("fn_b".into(), shared_hash);
422        m2.finalize();
423
424        let bundle = PackageBundle {
425            metadata: BundleMetadata {
426                name: "dedup-pkg".to_string(),
427                version: "1.0.0".to_string(),
428                compiler_version: "0.6.0".to_string(),
429                source_hash: "dedup".to_string(),
430                bundle_kind: default_bundle_kind(),
431                build_host: "x86_64-linux".to_string(),
432                native_portable: true,
433                entry_module: None,
434                built_at: 0,
435                readme: None,
436            },
437            modules: vec![],
438            dependencies: HashMap::new(),
439            blob_store: {
440                let mut bs = HashMap::new();
441                bs.insert(shared_hash, shared_blob.clone());
442                bs
443            },
444            manifests: vec![m1, m2],
445            native_dependency_scopes: vec![],
446            docs: HashMap::new(),
447        };
448
449        let bytes = bundle.to_bytes().expect("serialize");
450        let restored = PackageBundle::from_bytes(&bytes).expect("deserialize");
451
452        // Both manifests reference the same hash, but blob_store has it once.
453        assert_eq!(restored.blob_store.len(), 1);
454        assert_eq!(restored.blob_store.get(&shared_hash), Some(&shared_blob));
455        assert_eq!(restored.manifests.len(), 2);
456    }
457}