Skip to main content

shape_runtime/
blob_wire_format.rs

1//! Cross-language wire format for FunctionBlob serialization.
2//!
3//! This module provides a stable, versioned binary format for FunctionBlobs
4//! that can be implemented in any language. The format is designed for:
5//! - Deterministic serialization (same blob = same bytes)
6//! - Forward compatibility (new fields are appendable)
7//! - Language independence (no Rust-specific constructs)
8//!
9//! ## Wire Format Layout
10//!
11//! ```text
12//! +-------------------+
13//! | BlobHeader (50 B) |
14//! +-------------------+
15//! | Section Table     |  (section_count * 18 bytes)
16//! +-------------------+
17//! | Section 0 data    |
18//! +-------------------+
19//! | Section 1 data    |
20//! +-------------------+
21//! | ...               |
22//! +-------------------+
23//! ```
24//!
25//! The encoder accepts pre-serialized section payloads via [`EncodableBlob`],
26//! keeping this module free of `shape-vm` dependencies (which would create a
27//! circular crate dependency). Higher-level code (in `shape-vm`) provides a
28//! `From<&FunctionBlob>` conversion into `EncodableBlob`.
29
30use serde::{Deserialize, Serialize};
31use sha2::{Digest, Sha256};
32use std::collections::HashMap;
33
34/// Wire format version.
35pub const WIRE_FORMAT_VERSION: u32 = 1;
36
37/// Magic bytes identifying a Shape blob file.
38pub const BLOB_MAGIC: [u8; 4] = [0x53, 0x48, 0x42, 0x4C]; // "SHBL"
39
40/// Header size: 4 (magic) + 4 (version) + 32 (hash) + 8 (total_size) + 2 (section_count) = 50
41const HEADER_SIZE: usize = 50;
42
43/// Each section table entry: 2 (type) + 8 (offset) + 8 (length) = 18 bytes
44const SECTION_ENTRY_SIZE: usize = 18;
45
46// ---------------------------------------------------------------------------
47// Error type
48// ---------------------------------------------------------------------------
49
50/// Errors that can occur during wire format encoding/decoding.
51#[derive(Debug, Clone, thiserror::Error)]
52pub enum WireFormatError {
53    #[error("invalid magic bytes: expected SHBL")]
54    InvalidMagic,
55    #[error("unsupported wire format version {0} (expected {WIRE_FORMAT_VERSION})")]
56    UnsupportedVersion(u32),
57    #[error("content hash mismatch")]
58    HashMismatch,
59    #[error("data too short: need {needed} bytes, got {got}")]
60    TooShort { needed: usize, got: usize },
61    #[error("section offset {offset} + length {length} exceeds total size {total}")]
62    SectionOutOfBounds {
63        offset: u64,
64        length: u64,
65        total: u64,
66    },
67    #[error("unknown section type 0x{0:04x}")]
68    UnknownSectionType(u16),
69    #[error("missing required section: {0}")]
70    MissingSection(&'static str),
71    #[error("msgpack decode error: {0}")]
72    MsgpackDecode(String),
73    #[error("msgpack encode error: {0}")]
74    MsgpackEncode(String),
75    #[error("duplicate section type 0x{0:04x}")]
76    DuplicateSection(u16),
77}
78
79// ---------------------------------------------------------------------------
80// Section types
81// ---------------------------------------------------------------------------
82
83/// Section types in the wire format.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
85#[repr(u16)]
86pub enum SectionType {
87    Metadata = 0x0001,
88    Instructions = 0x0002,
89    Constants = 0x0003,
90    Strings = 0x0004,
91    Dependencies = 0x0005,
92    TypeSchemas = 0x0006,
93    SourceMap = 0x0007,
94    Permissions = 0x0008,
95}
96
97impl SectionType {
98    fn from_u16(v: u16) -> Result<Self, WireFormatError> {
99        match v {
100            0x0001 => Ok(Self::Metadata),
101            0x0002 => Ok(Self::Instructions),
102            0x0003 => Ok(Self::Constants),
103            0x0004 => Ok(Self::Strings),
104            0x0005 => Ok(Self::Dependencies),
105            0x0006 => Ok(Self::TypeSchemas),
106            0x0007 => Ok(Self::SourceMap),
107            0x0008 => Ok(Self::Permissions),
108            other => Err(WireFormatError::UnknownSectionType(other)),
109        }
110    }
111}
112
113// ---------------------------------------------------------------------------
114// Header / section table (on-disk structs)
115// ---------------------------------------------------------------------------
116
117/// Wire format header for a serialized blob.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct BlobHeader {
120    pub magic: [u8; 4],
121    pub version: u32,
122    pub content_hash: [u8; 32],
123    pub total_size: u64,
124    pub section_count: u16,
125}
126
127/// A section table entry in the wire format.
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct BlobSectionEntry {
130    pub section_type: SectionType,
131    pub offset: u64,
132    pub length: u64,
133}
134
135// ---------------------------------------------------------------------------
136// Section payloads (msgpack-serialized)
137// ---------------------------------------------------------------------------
138
139/// Metadata section content.
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct BlobMetadata {
142    pub name: String,
143    pub arity: u16,
144    pub locals_count: u16,
145    pub is_async: bool,
146    pub is_closure: bool,
147    pub captures_count: u16,
148    pub param_names: Vec<String>,
149    pub ref_params: Vec<bool>,
150    pub ref_mutates: Vec<bool>,
151    pub mutable_captures: Vec<bool>,
152}
153
154/// Type mapping specification for cross-language interop.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct TypeMapping {
157    pub shape_type: String,
158    pub schema_hash: [u8; 32],
159    pub fields: Vec<TypeFieldMapping>,
160}
161
162/// A field within a type mapping.
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct TypeFieldMapping {
165    pub name: String,
166    pub field_type: WireType,
167    pub offset: u32,
168}
169
170/// Language-independent type descriptors for cross-language interop.
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub enum WireType {
173    Int64,
174    Float64,
175    Bool,
176    String,
177    Bytes,
178    Array(Box<WireType>),
179    Map(Box<WireType>, Box<WireType>),
180    Optional(Box<WireType>),
181    /// Reference to another type by schema hash.
182    Ref([u8; 32]),
183}
184
185// ---------------------------------------------------------------------------
186// EncodableBlob - VM-independent input for the encoder
187// ---------------------------------------------------------------------------
188
189/// A blob prepared for wire-format encoding.
190///
191/// This struct holds pre-serialized section payloads so that the encoder does
192/// not need to depend on `shape-vm` types.  Higher-level code (typically in
193/// `shape-vm`) converts a `FunctionBlob` into an `EncodableBlob`.
194#[derive(Debug, Clone)]
195pub struct EncodableBlob {
196    /// Msgpack-encoded [`BlobMetadata`].
197    pub metadata_bytes: Vec<u8>,
198    /// Msgpack-encoded instructions (opaque to this module).
199    pub instructions_bytes: Vec<u8>,
200    /// Msgpack-encoded constants (opaque to this module).
201    pub constants_bytes: Vec<u8>,
202    /// Msgpack-encoded string pool.
203    pub strings_bytes: Vec<u8>,
204    /// Flat concatenation of 32-byte dependency hashes.
205    pub dependencies_bytes: Vec<u8>,
206    /// Msgpack-encoded type schema names.
207    pub type_schemas_bytes: Vec<u8>,
208    /// Msgpack-encoded source map entries.
209    pub source_map_bytes: Vec<u8>,
210    /// Msgpack-encoded permission names.
211    pub permissions_bytes: Vec<u8>,
212}
213
214impl EncodableBlob {
215    /// Build an `EncodableBlob` from individual fields.
216    ///
217    /// This is the convenience constructor when you already have the decoded
218    /// data available. Each field is msgpack-serialized here.
219    pub fn from_parts(
220        metadata: &BlobMetadata,
221        instructions: &impl Serialize,
222        constants: &impl Serialize,
223        strings: &[String],
224        dependency_hashes: &[[u8; 32]],
225        type_schemas: &[String],
226        source_map: &[(usize, u32, u32)],
227        permission_names: &[&str],
228    ) -> Result<Self, WireFormatError> {
229        fn mp<T: Serialize + ?Sized>(v: &T) -> Result<Vec<u8>, WireFormatError> {
230            rmp_serde::encode::to_vec(v).map_err(|e| WireFormatError::MsgpackEncode(e.to_string()))
231        }
232
233        let mut deps_buf = Vec::with_capacity(dependency_hashes.len() * 32);
234        for h in dependency_hashes {
235            deps_buf.extend_from_slice(h);
236        }
237
238        Ok(Self {
239            metadata_bytes: mp(metadata)?,
240            instructions_bytes: mp(instructions)?,
241            constants_bytes: mp(constants)?,
242            strings_bytes: mp(strings)?,
243            dependencies_bytes: deps_buf,
244            type_schemas_bytes: mp(type_schemas)?,
245            source_map_bytes: mp(source_map)?,
246            permissions_bytes: mp(&permission_names)?,
247        })
248    }
249}
250
251// ---------------------------------------------------------------------------
252// Decoded blob (in-memory representation after decode)
253// ---------------------------------------------------------------------------
254
255/// A fully decoded blob from wire format.
256#[derive(Debug, Clone)]
257pub struct DecodedBlob {
258    pub content_hash: [u8; 32],
259    pub metadata: BlobMetadata,
260    /// Raw msgpack bytes for the instructions section.
261    pub instructions_bytes: Vec<u8>,
262    /// Raw msgpack bytes for the constants section.
263    pub constants_bytes: Vec<u8>,
264    /// Decoded string pool.
265    pub strings: Vec<String>,
266    /// Dependency hashes (each 32 bytes).
267    pub dependencies: Vec<[u8; 32]>,
268    /// Type schema names referenced by this blob.
269    pub type_schemas: Vec<String>,
270    /// Source map entries: (local_offset, file_id, line).
271    pub source_map: Vec<(usize, u32, u32)>,
272    /// Permission names required by this function.
273    pub permissions: Vec<String>,
274}
275
276// ---------------------------------------------------------------------------
277// Encoder
278// ---------------------------------------------------------------------------
279
280/// Encodes an [`EncodableBlob`] into the cross-language wire format.
281pub struct BlobEncoder<'a> {
282    blob: &'a EncodableBlob,
283}
284
285impl<'a> BlobEncoder<'a> {
286    pub fn new(blob: &'a EncodableBlob) -> Self {
287        Self { blob }
288    }
289
290    /// Serialize the blob into wire format bytes.
291    pub fn encode_to_bytes(&self) -> Result<Vec<u8>, WireFormatError> {
292        let sections: Vec<(SectionType, &[u8])> = vec![
293            (SectionType::Metadata, &self.blob.metadata_bytes),
294            (SectionType::Instructions, &self.blob.instructions_bytes),
295            (SectionType::Constants, &self.blob.constants_bytes),
296            (SectionType::Strings, &self.blob.strings_bytes),
297            (SectionType::Dependencies, &self.blob.dependencies_bytes),
298            (SectionType::TypeSchemas, &self.blob.type_schemas_bytes),
299            (SectionType::SourceMap, &self.blob.source_map_bytes),
300            (SectionType::Permissions, &self.blob.permissions_bytes),
301        ];
302
303        let section_count = sections.len() as u16;
304        let section_table_size = sections.len() * SECTION_ENTRY_SIZE;
305        let data_start = HEADER_SIZE + section_table_size;
306
307        // Compute section offsets.
308        let mut section_entries = Vec::with_capacity(sections.len());
309        let mut current_offset = data_start as u64;
310        for (st, payload) in &sections {
311            section_entries.push(BlobSectionEntry {
312                section_type: *st,
313                offset: current_offset,
314                length: payload.len() as u64,
315            });
316            current_offset += payload.len() as u64;
317        }
318        let total_size = current_offset;
319
320        // Assemble the final buffer.
321        let mut buf = Vec::with_capacity(total_size as usize);
322
323        // -- header (50 bytes) --
324        buf.extend_from_slice(&BLOB_MAGIC);
325        buf.extend_from_slice(&WIRE_FORMAT_VERSION.to_le_bytes());
326        // Placeholder for content hash (filled after computing over body).
327        let hash_pos = buf.len();
328        buf.extend_from_slice(&[0u8; 32]);
329        buf.extend_from_slice(&total_size.to_le_bytes());
330        buf.extend_from_slice(&section_count.to_le_bytes());
331
332        // -- section table --
333        for entry in &section_entries {
334            buf.extend_from_slice(&(entry.section_type as u16).to_le_bytes());
335            buf.extend_from_slice(&entry.offset.to_le_bytes());
336            buf.extend_from_slice(&entry.length.to_le_bytes());
337        }
338
339        // -- section data --
340        for (_, payload) in &sections {
341            buf.extend_from_slice(payload);
342        }
343
344        // Compute content hash over everything after the header
345        // (section table + section data).
346        let digest = Sha256::digest(&buf[HEADER_SIZE..]);
347        buf[hash_pos..hash_pos + 32].copy_from_slice(&digest);
348
349        Ok(buf)
350    }
351}
352
353// ---------------------------------------------------------------------------
354// Decoder
355// ---------------------------------------------------------------------------
356
357/// Decodes wire-format bytes into a [`DecodedBlob`].
358pub struct BlobDecoder;
359
360impl BlobDecoder {
361    /// Decode a wire-format byte slice into a [`DecodedBlob`].
362    ///
363    /// This validates the magic bytes and version but does **not** verify the
364    /// content hash. Call [`validate_blob`] for full verification.
365    pub fn decode_from_bytes(data: &[u8]) -> Result<DecodedBlob, WireFormatError> {
366        if data.len() < HEADER_SIZE {
367            return Err(WireFormatError::TooShort {
368                needed: HEADER_SIZE,
369                got: data.len(),
370            });
371        }
372
373        // Parse header.
374        let magic: [u8; 4] = data[0..4].try_into().unwrap();
375        if magic != BLOB_MAGIC {
376            return Err(WireFormatError::InvalidMagic);
377        }
378
379        let version = u32::from_le_bytes(data[4..8].try_into().unwrap());
380        if version != WIRE_FORMAT_VERSION {
381            return Err(WireFormatError::UnsupportedVersion(version));
382        }
383
384        let content_hash: [u8; 32] = data[8..40].try_into().unwrap();
385        let total_size = u64::from_le_bytes(data[40..48].try_into().unwrap());
386        let section_count = u16::from_le_bytes(data[48..50].try_into().unwrap());
387
388        let needed = HEADER_SIZE + (section_count as usize) * SECTION_ENTRY_SIZE;
389        if data.len() < needed {
390            return Err(WireFormatError::TooShort {
391                needed,
392                got: data.len(),
393            });
394        }
395
396        // Parse section table.
397        let mut sections = Vec::with_capacity(section_count as usize);
398        let mut offset = HEADER_SIZE;
399        for _ in 0..section_count {
400            let st = u16::from_le_bytes(data[offset..offset + 2].try_into().unwrap());
401            let sec_offset = u64::from_le_bytes(data[offset + 2..offset + 10].try_into().unwrap());
402            let sec_length = u64::from_le_bytes(data[offset + 10..offset + 18].try_into().unwrap());
403
404            let section_type = SectionType::from_u16(st)?;
405
406            if sec_offset + sec_length > total_size {
407                return Err(WireFormatError::SectionOutOfBounds {
408                    offset: sec_offset,
409                    length: sec_length,
410                    total: total_size,
411                });
412            }
413
414            sections.push((section_type, sec_offset as usize, sec_length as usize));
415            offset += SECTION_ENTRY_SIZE;
416        }
417
418        // Helper to extract a section's bytes.
419        let get_section = |st: SectionType| -> Option<&[u8]> {
420            sections
421                .iter()
422                .find(|(t, _, _)| *t == st)
423                .map(|(_, o, l)| &data[*o..*o + *l])
424        };
425
426        let require_section =
427            |st: SectionType, label: &'static str| -> Result<&[u8], WireFormatError> {
428                get_section(st).ok_or(WireFormatError::MissingSection(label))
429            };
430
431        // Decode each required section.
432        let metadata: BlobMetadata = {
433            let bytes = require_section(SectionType::Metadata, "Metadata")?;
434            rmp_serde::decode::from_slice(bytes)
435                .map_err(|e| WireFormatError::MsgpackDecode(e.to_string()))?
436        };
437
438        let instructions_bytes =
439            require_section(SectionType::Instructions, "Instructions")?.to_vec();
440
441        let constants_bytes = require_section(SectionType::Constants, "Constants")?.to_vec();
442
443        let strings: Vec<String> = {
444            let bytes = require_section(SectionType::Strings, "Strings")?;
445            rmp_serde::decode::from_slice(bytes)
446                .map_err(|e| WireFormatError::MsgpackDecode(e.to_string()))?
447        };
448
449        let dependencies: Vec<[u8; 32]> = {
450            let bytes = require_section(SectionType::Dependencies, "Dependencies")?;
451            if bytes.len() % 32 != 0 {
452                return Err(WireFormatError::MsgpackDecode(
453                    "dependency section length not a multiple of 32".into(),
454                ));
455            }
456            bytes
457                .chunks_exact(32)
458                .map(|chunk| {
459                    let mut arr = [0u8; 32];
460                    arr.copy_from_slice(chunk);
461                    arr
462                })
463                .collect()
464        };
465
466        let type_schemas: Vec<String> = {
467            let bytes = require_section(SectionType::TypeSchemas, "TypeSchemas")?;
468            rmp_serde::decode::from_slice(bytes)
469                .map_err(|e| WireFormatError::MsgpackDecode(e.to_string()))?
470        };
471
472        let source_map: Vec<(usize, u32, u32)> = {
473            let bytes = require_section(SectionType::SourceMap, "SourceMap")?;
474            rmp_serde::decode::from_slice(bytes)
475                .map_err(|e| WireFormatError::MsgpackDecode(e.to_string()))?
476        };
477
478        let permissions: Vec<String> = {
479            let bytes = require_section(SectionType::Permissions, "Permissions")?;
480            rmp_serde::decode::from_slice(bytes)
481                .map_err(|e| WireFormatError::MsgpackDecode(e.to_string()))?
482        };
483
484        Ok(DecodedBlob {
485            content_hash,
486            metadata,
487            instructions_bytes,
488            constants_bytes,
489            strings,
490            dependencies,
491            type_schemas,
492            source_map,
493            permissions,
494        })
495    }
496}
497
498// ---------------------------------------------------------------------------
499// Validation
500// ---------------------------------------------------------------------------
501
502/// Verify magic, version, and content hash of a wire-format blob.
503///
504/// Returns `Ok(true)` if valid, or an error describing the first problem found.
505pub fn validate_blob(data: &[u8]) -> Result<bool, WireFormatError> {
506    if data.len() < HEADER_SIZE {
507        return Err(WireFormatError::TooShort {
508            needed: HEADER_SIZE,
509            got: data.len(),
510        });
511    }
512
513    // Magic.
514    let magic: [u8; 4] = data[0..4].try_into().unwrap();
515    if magic != BLOB_MAGIC {
516        return Err(WireFormatError::InvalidMagic);
517    }
518
519    // Version.
520    let version = u32::from_le_bytes(data[4..8].try_into().unwrap());
521    if version != WIRE_FORMAT_VERSION {
522        return Err(WireFormatError::UnsupportedVersion(version));
523    }
524
525    // Content hash.
526    let stored_hash: [u8; 32] = data[8..40].try_into().unwrap();
527    let computed = Sha256::digest(&data[HEADER_SIZE..]);
528    let mut computed_arr = [0u8; 32];
529    computed_arr.copy_from_slice(&computed);
530
531    if stored_hash != computed_arr {
532        return Err(WireFormatError::HashMismatch);
533    }
534
535    Ok(true)
536}
537
538// ---------------------------------------------------------------------------
539// TypeMappingRegistry
540// ---------------------------------------------------------------------------
541
542/// Registry for cross-language type mappings, keyed by schema hash.
543#[derive(Debug, Clone, Default)]
544pub struct TypeMappingRegistry {
545    by_hash: HashMap<[u8; 32], TypeMapping>,
546    by_name: HashMap<String, [u8; 32]>,
547}
548
549impl TypeMappingRegistry {
550    pub fn new() -> Self {
551        Self::default()
552    }
553
554    /// Register a type mapping. Overwrites any existing mapping for the same hash.
555    pub fn register(&mut self, mapping: TypeMapping) {
556        let hash = mapping.schema_hash;
557        self.by_name.insert(mapping.shape_type.clone(), hash);
558        self.by_hash.insert(hash, mapping);
559    }
560
561    /// Look up a type mapping by its schema hash.
562    pub fn get_by_hash(&self, hash: &[u8; 32]) -> Option<&TypeMapping> {
563        self.by_hash.get(hash)
564    }
565
566    /// Look up a type mapping by its Shape type name.
567    pub fn get_by_name(&self, name: &str) -> Option<&TypeMapping> {
568        self.by_name.get(name).and_then(|h| self.by_hash.get(h))
569    }
570
571    /// Number of registered type mappings.
572    pub fn len(&self) -> usize {
573        self.by_hash.len()
574    }
575
576    /// Whether the registry is empty.
577    pub fn is_empty(&self) -> bool {
578        self.by_hash.is_empty()
579    }
580
581    /// Iterate over all registered type mappings.
582    pub fn iter(&self) -> impl Iterator<Item = &TypeMapping> {
583        self.by_hash.values()
584    }
585}
586
587// ---------------------------------------------------------------------------
588// Tests
589// ---------------------------------------------------------------------------
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594
595    fn make_test_metadata() -> BlobMetadata {
596        BlobMetadata {
597            name: "test_fn".to_string(),
598            arity: 2,
599            locals_count: 3,
600            is_async: false,
601            is_closure: false,
602            captures_count: 0,
603            param_names: vec!["a".to_string(), "b".to_string()],
604            ref_params: vec![false, false],
605            ref_mutates: vec![false, false],
606            mutable_captures: vec![],
607        }
608    }
609
610    fn make_test_encodable() -> EncodableBlob {
611        let meta = make_test_metadata();
612        // Use simple placeholder data for instructions/constants.
613        let instructions: Vec<u8> = vec![1, 2, 3, 4]; // opaque payload
614        let constants: Vec<i64> = vec![42, 100];
615        let strings: Vec<String> = vec!["a".into(), "b".into()];
616        let deps: Vec<[u8; 32]> = vec![];
617        let type_schemas: Vec<String> = vec!["MyType".into()];
618        let source_map: Vec<(usize, u32, u32)> = vec![(0, 0, 1), (2, 0, 2)];
619        let perms: Vec<&str> = vec![];
620
621        EncodableBlob::from_parts(
622            &meta,
623            &instructions,
624            &constants,
625            &strings,
626            &deps,
627            &type_schemas,
628            &source_map,
629            &perms,
630        )
631        .unwrap()
632    }
633
634    #[test]
635    fn roundtrip_encode_decode() {
636        let encodable = make_test_encodable();
637        let encoder = BlobEncoder::new(&encodable);
638        let wire_bytes = encoder.encode_to_bytes().unwrap();
639
640        // Validate.
641        assert!(validate_blob(&wire_bytes).unwrap());
642
643        // Decode.
644        let decoded = BlobDecoder::decode_from_bytes(&wire_bytes).unwrap();
645
646        assert_eq!(decoded.metadata.name, "test_fn");
647        assert_eq!(decoded.metadata.arity, 2);
648        assert_eq!(decoded.metadata.locals_count, 3);
649        assert!(!decoded.metadata.is_async);
650        assert!(!decoded.metadata.is_closure);
651        assert_eq!(decoded.metadata.captures_count, 0);
652        assert_eq!(
653            decoded.metadata.param_names,
654            vec!["a".to_string(), "b".to_string()]
655        );
656        assert_eq!(decoded.strings, vec!["a".to_string(), "b".to_string()]);
657        assert!(decoded.dependencies.is_empty());
658        assert_eq!(decoded.type_schemas, vec!["MyType".to_string()]);
659        assert_eq!(decoded.source_map, vec![(0, 0, 1), (2, 0, 2)]);
660        assert!(decoded.permissions.is_empty());
661    }
662
663    #[test]
664    fn validates_magic() {
665        let encodable = make_test_encodable();
666        let mut wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
667
668        // Corrupt magic.
669        wire_bytes[0] = 0xFF;
670        assert!(matches!(
671            validate_blob(&wire_bytes),
672            Err(WireFormatError::InvalidMagic)
673        ));
674    }
675
676    #[test]
677    fn validates_version() {
678        let encodable = make_test_encodable();
679        let mut wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
680
681        // Set version to 99.
682        wire_bytes[4..8].copy_from_slice(&99u32.to_le_bytes());
683        assert!(matches!(
684            validate_blob(&wire_bytes),
685            Err(WireFormatError::UnsupportedVersion(99))
686        ));
687    }
688
689    #[test]
690    fn detects_hash_mismatch() {
691        let encodable = make_test_encodable();
692        let mut wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
693
694        // Corrupt a byte in the data section.
695        let last = wire_bytes.len() - 1;
696        wire_bytes[last] ^= 0xFF;
697        assert!(matches!(
698            validate_blob(&wire_bytes),
699            Err(WireFormatError::HashMismatch)
700        ));
701    }
702
703    #[test]
704    fn too_short_data() {
705        assert!(matches!(
706            validate_blob(&[0u8; 10]),
707            Err(WireFormatError::TooShort { .. })
708        ));
709        assert!(matches!(
710            BlobDecoder::decode_from_bytes(&[0u8; 10]),
711            Err(WireFormatError::TooShort { .. })
712        ));
713    }
714
715    #[test]
716    fn content_hash_is_nonzero_and_validates() {
717        let encodable = make_test_encodable();
718        let wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
719        let decoded = BlobDecoder::decode_from_bytes(&wire_bytes).unwrap();
720
721        assert_ne!(decoded.content_hash, [0u8; 32]);
722        assert!(validate_blob(&wire_bytes).unwrap());
723    }
724
725    #[test]
726    fn encode_decode_with_dependencies() {
727        let meta = BlobMetadata {
728            name: "caller".to_string(),
729            arity: 0,
730            locals_count: 1,
731            is_async: true,
732            is_closure: false,
733            captures_count: 0,
734            param_names: vec![],
735            ref_params: vec![],
736            ref_mutates: vec![],
737            mutable_captures: vec![],
738        };
739
740        let dep_hash = [0xAAu8; 32];
741        let encodable = EncodableBlob::from_parts(
742            &meta,
743            &Vec::<u8>::new(),
744            &Vec::<i64>::new(),
745            &Vec::<String>::new(),
746            &[dep_hash],
747            &Vec::<String>::new(),
748            &Vec::<(usize, u32, u32)>::new(),
749            &Vec::<&str>::new(),
750        )
751        .unwrap();
752
753        let wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
754        assert!(validate_blob(&wire_bytes).unwrap());
755
756        let decoded = BlobDecoder::decode_from_bytes(&wire_bytes).unwrap();
757        assert_eq!(decoded.dependencies.len(), 1);
758        assert_eq!(decoded.dependencies[0], [0xAA; 32]);
759        assert!(decoded.metadata.is_async);
760    }
761
762    #[test]
763    fn type_mapping_registry_operations() {
764        let mut registry = TypeMappingRegistry::new();
765        assert!(registry.is_empty());
766
767        let hash = [42u8; 32];
768        let mapping = TypeMapping {
769            shape_type: "Point".to_string(),
770            schema_hash: hash,
771            fields: vec![
772                TypeFieldMapping {
773                    name: "x".to_string(),
774                    field_type: WireType::Float64,
775                    offset: 0,
776                },
777                TypeFieldMapping {
778                    name: "y".to_string(),
779                    field_type: WireType::Float64,
780                    offset: 8,
781                },
782            ],
783        };
784
785        registry.register(mapping);
786        assert_eq!(registry.len(), 1);
787        assert!(!registry.is_empty());
788
789        let found = registry.get_by_hash(&hash).unwrap();
790        assert_eq!(found.shape_type, "Point");
791        assert_eq!(found.fields.len(), 2);
792
793        let found_by_name = registry.get_by_name("Point").unwrap();
794        assert_eq!(found_by_name.schema_hash, hash);
795
796        assert!(registry.get_by_name("NonExistent").is_none());
797        assert!(registry.get_by_hash(&[0u8; 32]).is_none());
798    }
799
800    #[test]
801    fn type_mapping_registry_overwrite() {
802        let mut registry = TypeMappingRegistry::new();
803        let hash = [1u8; 32];
804
805        registry.register(TypeMapping {
806            shape_type: "Foo".into(),
807            schema_hash: hash,
808            fields: vec![],
809        });
810        assert_eq!(registry.len(), 1);
811
812        // Overwrite same hash.
813        registry.register(TypeMapping {
814            shape_type: "Foo".into(),
815            schema_hash: hash,
816            fields: vec![TypeFieldMapping {
817                name: "x".into(),
818                field_type: WireType::Int64,
819                offset: 0,
820            }],
821        });
822        assert_eq!(registry.len(), 1);
823        assert_eq!(registry.get_by_hash(&hash).unwrap().fields.len(), 1);
824    }
825
826    #[test]
827    fn wire_type_complex_nesting() {
828        let complex = WireType::Map(
829            Box::new(WireType::String),
830            Box::new(WireType::Optional(Box::new(WireType::Array(Box::new(
831                WireType::Ref([0xAB; 32]),
832            ))))),
833        );
834
835        let bytes = rmp_serde::encode::to_vec(&complex).unwrap();
836        let decoded: WireType = rmp_serde::decode::from_slice(&bytes).unwrap();
837
838        // Verify the round-trip by re-encoding.
839        let bytes2 = rmp_serde::encode::to_vec(&decoded).unwrap();
840        assert_eq!(bytes, bytes2);
841    }
842
843    #[test]
844    fn decode_rejects_truncated_section_table() {
845        let encodable = make_test_encodable();
846        let wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
847
848        // Truncate right after the header so the section table is incomplete.
849        let truncated = &wire_bytes[..HEADER_SIZE + 5];
850        assert!(matches!(
851            BlobDecoder::decode_from_bytes(truncated),
852            Err(WireFormatError::TooShort { .. })
853        ));
854    }
855
856    #[test]
857    fn encode_with_permissions() {
858        let meta = BlobMetadata {
859            name: "needs_io".into(),
860            arity: 0,
861            locals_count: 0,
862            is_async: false,
863            is_closure: false,
864            captures_count: 0,
865            param_names: vec![],
866            ref_params: vec![],
867            ref_mutates: vec![],
868            mutable_captures: vec![],
869        };
870
871        let perms: Vec<&str> = vec!["io.read", "io.write"];
872        let encodable = EncodableBlob::from_parts(
873            &meta,
874            &Vec::<u8>::new(),
875            &Vec::<i64>::new(),
876            &Vec::<String>::new(),
877            &[],
878            &Vec::<String>::new(),
879            &Vec::<(usize, u32, u32)>::new(),
880            &perms,
881        )
882        .unwrap();
883
884        let wire_bytes = BlobEncoder::new(&encodable).encode_to_bytes().unwrap();
885        assert!(validate_blob(&wire_bytes).unwrap());
886
887        let decoded = BlobDecoder::decode_from_bytes(&wire_bytes).unwrap();
888        assert_eq!(decoded.permissions, vec!["io.read", "io.write"]);
889    }
890}