Skip to main content

mkit_core/
serialize.rs

1//! Canonical byte (de)serialization for [`Object`].
2//!
3//! Spec: `docs/SPEC-OBJECTS.md`. The byte layout produced here is the
4//! v1 on-disk format; the golden-vector tests in `tests/golden.rs` pin
5//! it byte-for-byte.
6//!
7//! Every deserializer:
8//! * Validates the 6-byte v1 prologue first.
9//! * Enforces per-type bounds (entry counts, identity len, etc.).
10//! * Rejects non-empty trailing bytes via [`MkitError::TrailingData`].
11
12use crate::hash::{HASH_LEN, Hash};
13use crate::object::{
14    Blob, ChunkedBlob, Commit, Delta, EntryMode, IDENTITY_MAX_LEN, Identity, IdentityKind, MAGIC,
15    MkitError, Object, ObjectType, Remix, RemixSource, SCHEMA_VERSION, TAG_NAME_MAX_LEN, Tag, Tree,
16    TreeEntry,
17};
18
19const PROLOGUE_LEN: usize = 6;
20
21/// Decode-side cap on tree entry count; writers (and the git
22/// importer) must refuse anything larger or the store gains an
23/// undecodable signed object.
24pub const MAX_TREE_ENTRIES: u32 = 1_000_000;
25const MAX_PARENTS: u32 = 1_000;
26const MAX_REMIX_SOURCES: u32 = 10_000;
27const MAX_CHUNKS: u32 = 1_000_000;
28
29// ---------------------------------------------------------------------
30// Public API
31// ---------------------------------------------------------------------
32
33/// Serialize an [`Object`] to its canonical byte form. Allocates fresh
34/// each call; the result is fully owned.
35///
36/// Returns [`MkitError::OversizePayload`] if any length-prefixed field
37/// exceeds the wire-format `u32` cap, and [`MkitError::InvalidIdentity`]
38/// if the object carries a structurally invalid [`Identity`].
39pub fn serialize(obj: &Object) -> Result<Vec<u8>, MkitError> {
40    let mut buf = Vec::with_capacity(PROLOGUE_LEN + estimated_body_len(obj));
41    write_prologue(&mut buf, obj.object_type());
42    match obj {
43        Object::Blob(b) => write_blob(&mut buf, b)?,
44        Object::Tree(t) => write_tree(&mut buf, t)?,
45        Object::Commit(c) => write_commit(&mut buf, c)?,
46        Object::Remix(r) => write_remix(&mut buf, r)?,
47        Object::ChunkedBlob(cb) => write_chunked_blob(&mut buf, cb)?,
48        Object::Delta(d) => write_delta(&mut buf, d)?,
49        Object::Tag(t) => write_tag(&mut buf, t)?,
50    }
51    Ok(buf)
52}
53
54/// The exact byte prefix of `serialize(Object::Blob(..))` for a payload
55/// of `len` bytes: 6-byte object prologue plus the `u32` LE data
56/// length. Lets ingest write a chunk as `prologue ‖ payload` straight
57/// from the source buffer — no `Blob` allocation, no serialize copy.
58/// Equivalence with [`serialize`] is pinned by proptest and,
59/// transitively, the golden blob vectors.
60///
61/// # Errors
62///
63/// [`MkitError::OversizePayload`] if `len` exceeds the wire-format
64/// `u32` cap.
65pub fn blob_prologue(len: usize) -> Result<[u8; PROLOGUE_LEN + 4], MkitError> {
66    let len_le = checked_u32("blob.data", len)?.to_le_bytes();
67    let mut out = [0u8; PROLOGUE_LEN + 4];
68    out[0] = ObjectType::Blob as u8;
69    out[1..5].copy_from_slice(&MAGIC);
70    out[5] = SCHEMA_VERSION;
71    out[6..10].copy_from_slice(&len_le);
72    Ok(out)
73}
74
75/// Deserialize bytes into an owned [`Object`]. Validates the prologue
76/// and every per-type bound; rejects trailing data.
77pub fn deserialize(data: &[u8]) -> Result<Object, MkitError> {
78    if data.len() < PROLOGUE_LEN {
79        return Err(MkitError::EmptyData);
80    }
81    let tag = ObjectType::from_u8(data[0])?;
82    if data[1..5] != MAGIC {
83        return Err(MkitError::InvalidMagic);
84    }
85    if data[5] != SCHEMA_VERSION {
86        return Err(MkitError::UnsupportedObjectVersion);
87    }
88    let mut r = Reader::new(&data[PROLOGUE_LEN..]);
89    let obj = match tag {
90        ObjectType::Blob => Object::Blob(read_blob(&mut r)?),
91        ObjectType::Tree => Object::Tree(read_tree(&mut r)?),
92        ObjectType::Commit => Object::Commit(read_commit(&mut r)?),
93        ObjectType::Remix => Object::Remix(read_remix(&mut r)?),
94        ObjectType::ChunkedBlob => Object::ChunkedBlob(read_chunked_blob(&mut r)?),
95        ObjectType::Delta => Object::Delta(read_delta(&mut r)?),
96        ObjectType::Tag => Object::Tag(read_tag(&mut r)?),
97    };
98    if r.remaining() != 0 {
99        return Err(MkitError::TrailingData);
100    }
101    Ok(obj)
102}
103
104// ---------------------------------------------------------------------
105// Writers
106// ---------------------------------------------------------------------
107
108fn write_prologue(buf: &mut Vec<u8>, t: ObjectType) {
109    buf.push(t as u8);
110    buf.extend_from_slice(&MAGIC);
111    buf.push(SCHEMA_VERSION);
112}
113
114fn write_u16_le(buf: &mut Vec<u8>, v: u16) {
115    buf.extend_from_slice(&v.to_le_bytes());
116}
117
118fn write_u32_le(buf: &mut Vec<u8>, v: u32) {
119    buf.extend_from_slice(&v.to_le_bytes());
120}
121
122fn write_u64_le(buf: &mut Vec<u8>, v: u64) {
123    buf.extend_from_slice(&v.to_le_bytes());
124}
125
126fn checked_u32(field: &'static str, len: usize) -> Result<u32, MkitError> {
127    u32::try_from(len).map_err(|_| MkitError::OversizePayload { field, len })
128}
129
130fn write_lp_bytes(buf: &mut Vec<u8>, field: &'static str, data: &[u8]) -> Result<(), MkitError> {
131    write_u32_le(buf, checked_u32(field, data.len())?);
132    buf.extend_from_slice(data);
133    Ok(())
134}
135
136fn write_identity(buf: &mut Vec<u8>, id: &Identity) -> Result<(), MkitError> {
137    if !id.is_valid() {
138        return Err(MkitError::InvalidIdentity);
139    }
140    buf.push(id.kind as u8);
141    // `is_valid` already enforces 1..=IDENTITY_MAX_LEN, so the cast is
142    // safe — but keep the guard so the encoder can never silently lose
143    // bytes if `is_valid` is ever loosened.
144    let len = u16::try_from(id.bytes.len()).map_err(|_| MkitError::InvalidIdentity)?;
145    write_u16_le(buf, len);
146    buf.extend_from_slice(&id.bytes);
147    Ok(())
148}
149
150fn write_blob(buf: &mut Vec<u8>, b: &Blob) -> Result<(), MkitError> {
151    write_lp_bytes(buf, "blob.data", &b.data)
152}
153
154fn write_tree(buf: &mut Vec<u8>, t: &Tree) -> Result<(), MkitError> {
155    write_u32_le(buf, checked_u32("tree.entries", t.entries.len())?);
156    for e in &t.entries {
157        write_lp_bytes(buf, "tree.entry.name", &e.name)?;
158        buf.push(e.mode as u8);
159        buf.extend_from_slice(&e.object_hash);
160    }
161    Ok(())
162}
163
164fn write_commit(buf: &mut Vec<u8>, c: &Commit) -> Result<(), MkitError> {
165    buf.extend_from_slice(&c.tree_hash);
166    write_u32_le(buf, checked_u32("commit.parents", c.parents.len())?);
167    for p in &c.parents {
168        buf.extend_from_slice(p);
169    }
170    write_identity(buf, &c.author)?;
171    write_lp_bytes(buf, "commit.message", &c.message)?;
172    write_u64_le(buf, c.timestamp);
173    buf.extend_from_slice(&c.signer);
174    buf.extend_from_slice(&c.message_hash);
175    buf.extend_from_slice(&c.content_digest);
176    buf.extend_from_slice(&c.signature);
177    Ok(())
178}
179
180fn write_remix(buf: &mut Vec<u8>, r: &Remix) -> Result<(), MkitError> {
181    buf.extend_from_slice(&r.tree_hash);
182    write_u32_le(buf, checked_u32("remix.parents", r.parents.len())?);
183    for p in &r.parents {
184        buf.extend_from_slice(p);
185    }
186    write_u32_le(buf, checked_u32("remix.sources", r.sources.len())?);
187    for s in &r.sources {
188        buf.extend_from_slice(&s.upstream_id);
189        buf.extend_from_slice(&s.commit_hash);
190    }
191    write_identity(buf, &r.author)?;
192    write_lp_bytes(buf, "remix.message", &r.message)?;
193    write_u64_le(buf, r.timestamp);
194    buf.extend_from_slice(&r.signer);
195    buf.extend_from_slice(&r.signature);
196    Ok(())
197}
198
199/// Reject pack-only / non-storable target types. A tag MUST point at a
200/// type that can live in the object store (`Delta` is pack-only).
201fn check_tag_target_type(t: ObjectType) -> Result<(), MkitError> {
202    if matches!(t, ObjectType::Delta) {
203        return Err(MkitError::TagTargetTypeInvalid(t as u8));
204    }
205    Ok(())
206}
207
208fn write_tag(buf: &mut Vec<u8>, t: &Tag) -> Result<(), MkitError> {
209    if !t.name_is_valid() {
210        return Err(MkitError::TagNameInvalid);
211    }
212    check_tag_target_type(t.target_type)?;
213    buf.extend_from_slice(&t.target);
214    buf.push(t.target_type as u8);
215    write_lp_bytes(buf, "tag.name", &t.name)?;
216    write_identity(buf, &t.tagger)?;
217    write_lp_bytes(buf, "tag.message", &t.message)?;
218    write_u64_le(buf, t.timestamp);
219    buf.extend_from_slice(&t.signer);
220    buf.extend_from_slice(&t.signature);
221    Ok(())
222}
223
224fn write_chunked_blob(buf: &mut Vec<u8>, cb: &ChunkedBlob) -> Result<(), MkitError> {
225    write_u64_le(buf, cb.total_size);
226    write_u32_le(buf, cb.chunk_size);
227    write_u32_le(buf, checked_u32("chunked_blob.chunks", cb.chunks.len())?);
228    for c in &cb.chunks {
229        buf.extend_from_slice(c);
230    }
231    Ok(())
232}
233
234fn write_delta(buf: &mut Vec<u8>, d: &Delta) -> Result<(), MkitError> {
235    buf.extend_from_slice(&d.base_hash);
236    write_u32_le(buf, d.result_size);
237    write_lp_bytes(buf, "delta.instructions", &d.instructions)
238}
239
240fn estimated_body_len(obj: &Object) -> usize {
241    match obj {
242        Object::Blob(b) => 4 + b.data.len(),
243        Object::Tree(t) => {
244            4 + t
245                .entries
246                .iter()
247                .map(|e| 4 + e.name.len() + 1 + 32)
248                .sum::<usize>()
249        }
250        Object::Commit(c) => {
251            32 + 4
252                + c.parents.len() * 32
253                + 1
254                + 2
255                + c.author.bytes.len()
256                + 4
257                + c.message.len()
258                + 8
259                + 32
260                + 32
261                + 32
262                + 64
263        }
264        Object::Remix(r) => {
265            32 + 4
266                + r.parents.len() * 32
267                + 4
268                + r.sources.len() * 64
269                + 1
270                + 2
271                + r.author.bytes.len()
272                + 4
273                + r.message.len()
274                + 8
275                + 32
276                + 64
277        }
278        Object::ChunkedBlob(cb) => 8 + 4 + 4 + cb.chunks.len() * 32,
279        Object::Delta(d) => 32 + 4 + 4 + d.instructions.len(),
280        Object::Tag(t) => {
281            32 + 1
282                + 4
283                + t.name.len()
284                + 1
285                + 2
286                + t.tagger.bytes.len()
287                + 4
288                + t.message.len()
289                + 8
290                + 32
291                + 64
292        }
293    }
294}
295
296// ---------------------------------------------------------------------
297// Reader
298// ---------------------------------------------------------------------
299
300struct Reader<'a> {
301    data: &'a [u8],
302    pos: usize,
303}
304
305impl<'a> Reader<'a> {
306    fn new(data: &'a [u8]) -> Self {
307        Self { data, pos: 0 }
308    }
309
310    fn remaining(&self) -> usize {
311        self.data.len() - self.pos
312    }
313
314    fn need(&self, n: usize) -> Result<(), MkitError> {
315        if self.remaining() < n {
316            Err(MkitError::UnexpectedEof)
317        } else {
318            Ok(())
319        }
320    }
321
322    fn read_u8(&mut self) -> Result<u8, MkitError> {
323        self.need(1)?;
324        let v = self.data[self.pos];
325        self.pos += 1;
326        Ok(v)
327    }
328
329    fn read_u16(&mut self) -> Result<u16, MkitError> {
330        self.need(2)?;
331        let mut a = [0u8; 2];
332        a.copy_from_slice(&self.data[self.pos..self.pos + 2]);
333        self.pos += 2;
334        Ok(u16::from_le_bytes(a))
335    }
336
337    fn read_u32(&mut self) -> Result<u32, MkitError> {
338        self.need(4)?;
339        let mut a = [0u8; 4];
340        a.copy_from_slice(&self.data[self.pos..self.pos + 4]);
341        self.pos += 4;
342        Ok(u32::from_le_bytes(a))
343    }
344
345    fn read_u64(&mut self) -> Result<u64, MkitError> {
346        self.need(8)?;
347        let mut a = [0u8; 8];
348        a.copy_from_slice(&self.data[self.pos..self.pos + 8]);
349        self.pos += 8;
350        Ok(u64::from_le_bytes(a))
351    }
352
353    fn read_hash(&mut self) -> Result<Hash, MkitError> {
354        self.need(HASH_LEN)?;
355        let mut h = [0u8; HASH_LEN];
356        h.copy_from_slice(&self.data[self.pos..self.pos + HASH_LEN]);
357        self.pos += HASH_LEN;
358        Ok(h)
359    }
360
361    fn read_fixed<const N: usize>(&mut self) -> Result<[u8; N], MkitError> {
362        self.need(N)?;
363        let mut out = [0u8; N];
364        out.copy_from_slice(&self.data[self.pos..self.pos + N]);
365        self.pos += N;
366        Ok(out)
367    }
368
369    fn read_lp_bytes(&mut self) -> Result<Vec<u8>, MkitError> {
370        let len = self.read_u32()? as usize;
371        self.need(len)?;
372        let v = self.data[self.pos..self.pos + len].to_vec();
373        self.pos += len;
374        Ok(v)
375    }
376
377    fn read_identity(&mut self) -> Result<Identity, MkitError> {
378        let kind = IdentityKind::from_u8(self.read_u8()?)?;
379        let len = self.read_u16()?;
380        if len == 0 {
381            return Err(MkitError::InvalidIdentity);
382        }
383        if len > IDENTITY_MAX_LEN {
384            return Err(MkitError::IdentityTooLarge);
385        }
386        match kind {
387            IdentityKind::Ed25519 if len != 32 => return Err(MkitError::InvalidIdentity),
388            _ => {}
389        }
390        let len = len as usize;
391        self.need(len)?;
392        let bytes = self.data[self.pos..self.pos + len].to_vec();
393        self.pos += len;
394        let id = Identity { kind, bytes };
395        // Enforce the full structural invariant at the read boundary so a
396        // malformed object from disk/remote can't deserialize with an
397        // invalid payload (e.g. a binary `DidKey` that isn't a printable
398        // multibase string). `is_valid` is the single source of truth and
399        // the serialize side already gates on it (#223).
400        if !id.is_valid() {
401            return Err(MkitError::InvalidIdentity);
402        }
403        Ok(id)
404    }
405}
406
407// ---------------------------------------------------------------------
408// Readers
409// ---------------------------------------------------------------------
410
411fn read_blob(r: &mut Reader<'_>) -> Result<Blob, MkitError> {
412    Ok(Blob {
413        data: r.read_lp_bytes()?,
414    })
415}
416
417fn read_tree(r: &mut Reader<'_>) -> Result<Tree, MkitError> {
418    let count = r.read_u32()?;
419    if count > MAX_TREE_ENTRIES {
420        return Err(MkitError::TooManyEntries);
421    }
422    // Cheap upper bound: each entry is at least name_len(4) + mode(1) +
423    // hash(32) = 37 bytes plus a 1-byte name. Reject impossible counts
424    // before we allocate the entry vec.
425    if (count as usize).saturating_mul(4 + 1 + 1 + HASH_LEN) > r.remaining() {
426        return Err(MkitError::UnexpectedEof);
427    }
428    let mut entries = Vec::with_capacity(count as usize);
429    let mut prev: Option<Vec<u8>> = None;
430    for _ in 0..count {
431        let name = r.read_lp_bytes()?;
432        if !TreeEntry::validate_name(&name) {
433            return Err(MkitError::InvalidEntryName);
434        }
435        if let Some(p) = &prev
436            && p.as_slice() >= name.as_slice()
437        {
438            return Err(MkitError::InvalidEntryOrder);
439        }
440        let mode = EntryMode::from_u8(r.read_u8()?)?;
441        let object_hash = r.read_hash()?;
442        prev = Some(name.clone());
443        entries.push(TreeEntry {
444            name,
445            mode,
446            object_hash,
447        });
448    }
449    Ok(Tree { entries })
450}
451
452fn read_commit(r: &mut Reader<'_>) -> Result<Commit, MkitError> {
453    let tree_hash = r.read_hash()?;
454    let parent_count = r.read_u32()?;
455    if parent_count > MAX_PARENTS {
456        return Err(MkitError::TooManyParents);
457    }
458    // Cheap upper bound: each parent is HASH_LEN bytes on the wire. If
459    // the remaining buffer can't even hold the parent hashes, the
460    // header is lying and we must not pre-allocate for it.
461    if (parent_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
462        return Err(MkitError::UnexpectedEof);
463    }
464    let mut parents = Vec::with_capacity(parent_count as usize);
465    for _ in 0..parent_count {
466        parents.push(r.read_hash()?);
467    }
468    let author = r.read_identity()?;
469    let message = r.read_lp_bytes()?;
470    let timestamp = r.read_u64()?;
471    let signer = r.read_fixed::<32>()?;
472    let message_hash = r.read_hash()?;
473    let content_digest = r.read_hash()?;
474    let signature = r.read_fixed::<64>()?;
475    Ok(Commit {
476        tree_hash,
477        parents,
478        author,
479        signer,
480        message,
481        timestamp,
482        message_hash,
483        content_digest,
484        signature,
485    })
486}
487
488fn read_remix(r: &mut Reader<'_>) -> Result<Remix, MkitError> {
489    let tree_hash = r.read_hash()?;
490    let parent_count = r.read_u32()?;
491    if parent_count > MAX_PARENTS {
492        return Err(MkitError::TooManyParents);
493    }
494    // Cheap upper bound: each parent is HASH_LEN bytes on the wire.
495    if (parent_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
496        return Err(MkitError::UnexpectedEof);
497    }
498    let mut parents = Vec::with_capacity(parent_count as usize);
499    for _ in 0..parent_count {
500        parents.push(r.read_hash()?);
501    }
502    let source_count = r.read_u32()?;
503    if source_count > MAX_REMIX_SOURCES {
504        return Err(MkitError::TooManySources);
505    }
506    // Each source is two hashes (upstream_id + commit_hash) = 2 *
507    // HASH_LEN bytes. Reject impossible counts before allocating.
508    if (source_count as usize).saturating_mul(2 * HASH_LEN) > r.remaining() {
509        return Err(MkitError::UnexpectedEof);
510    }
511    let mut sources = Vec::with_capacity(source_count as usize);
512    for _ in 0..source_count {
513        let upstream_id = r.read_hash()?;
514        let commit_hash = r.read_hash()?;
515        sources.push(RemixSource {
516            upstream_id,
517            commit_hash,
518        });
519    }
520    let author = r.read_identity()?;
521    let message = r.read_lp_bytes()?;
522    let timestamp = r.read_u64()?;
523    let signer = r.read_fixed::<32>()?;
524    let signature = r.read_fixed::<64>()?;
525    // Sort check: strict ascending by (upstream_id, commit_hash).
526    if sources.len() > 1 {
527        for w in sources.windows(2) {
528            let a = &w[0];
529            let b = &w[1];
530            let bad = match a.upstream_id.cmp(&b.upstream_id) {
531                core::cmp::Ordering::Greater => true,
532                core::cmp::Ordering::Equal => a.commit_hash >= b.commit_hash,
533                core::cmp::Ordering::Less => false,
534            };
535            if bad {
536                return Err(MkitError::InvalidSourceOrder);
537            }
538        }
539    }
540    Ok(Remix {
541        tree_hash,
542        parents,
543        sources,
544        author,
545        signer,
546        message,
547        timestamp,
548        signature,
549    })
550}
551
552fn read_tag(r: &mut Reader<'_>) -> Result<Tag, MkitError> {
553    let target = r.read_hash()?;
554    let target_type = ObjectType::from_u8(r.read_u8()?)?;
555    check_tag_target_type(target_type)?;
556    // `name` is length-prefixed; bound it by TAG_NAME_MAX_LEN before we
557    // copy so a bogus header can't force a large allocation.
558    let name_len = r.read_u32()? as usize;
559    if name_len == 0 || name_len > TAG_NAME_MAX_LEN as usize {
560        return Err(MkitError::TagNameInvalid);
561    }
562    r.need(name_len)?;
563    let name = r.data[r.pos..r.pos + name_len].to_vec();
564    r.pos += name_len;
565    if name.iter().any(|&b| matches!(b, 0 | b'/' | b'\\')) {
566        return Err(MkitError::TagNameInvalid);
567    }
568    let tagger = r.read_identity()?;
569    let message = r.read_lp_bytes()?;
570    let timestamp = r.read_u64()?;
571    let signer = r.read_fixed::<32>()?;
572    let signature = r.read_fixed::<64>()?;
573    Ok(Tag {
574        target,
575        target_type,
576        name,
577        tagger,
578        signer,
579        message,
580        timestamp,
581        signature,
582    })
583}
584
585fn read_chunked_blob(r: &mut Reader<'_>) -> Result<ChunkedBlob, MkitError> {
586    let total_size = r.read_u64()?;
587    let chunk_size = r.read_u32()?;
588    let chunk_count = r.read_u32()?;
589    if chunk_count > MAX_CHUNKS {
590        return Err(MkitError::TooManyChunks);
591    }
592    if (chunk_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
593        return Err(MkitError::UnexpectedEof);
594    }
595    let mut chunks = Vec::with_capacity(chunk_count as usize);
596    for _ in 0..chunk_count {
597        chunks.push(r.read_hash()?);
598    }
599    Ok(ChunkedBlob {
600        total_size,
601        chunk_size,
602        chunks,
603    })
604}
605
606fn read_delta(r: &mut Reader<'_>) -> Result<Delta, MkitError> {
607    let base_hash = r.read_hash()?;
608    let result_size = r.read_u32()?;
609    let instructions = r.read_lp_bytes()?;
610    Ok(Delta {
611        base_hash,
612        result_size,
613        instructions,
614    })
615}
616
617// ---------------------------------------------------------------------
618// Tests
619// ---------------------------------------------------------------------
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624    use crate::hash::{ZERO, hash};
625    use proptest::prelude::*;
626
627    fn ed25519_id() -> Identity {
628        Identity::ed25519([0xAA; 32])
629    }
630
631    proptest! {
632        /// `blob_prologue(len) ‖ payload` must be byte-identical to
633        /// `serialize(Object::Blob(payload))` — the zero-copy chunk
634        /// write path depends on this equivalence, which transitively
635        /// pins it to the golden blob vectors.
636        #[test]
637        fn blob_prologue_plus_payload_equals_serialize_blob(
638            payload in proptest::collection::vec(any::<u8>(), 0..2048)
639        ) {
640            let via_serialize = serialize(&Object::Blob(Blob {
641                data: payload.clone(),
642            })).unwrap();
643            let header = blob_prologue(payload.len()).unwrap();
644            let mut via_parts = header.to_vec();
645            via_parts.extend_from_slice(&payload);
646            prop_assert_eq!(via_parts, via_serialize);
647        }
648    }
649
650    #[test]
651    fn blob_prologue_rejects_oversize_len() {
652        assert!(blob_prologue(u32::MAX as usize + 1).is_err());
653        assert!(blob_prologue(0).is_ok());
654    }
655
656    #[test]
657    fn blob_roundtrip() {
658        let obj = Object::Blob(Blob {
659            data: b"hello world".to_vec(),
660        });
661        let bytes = serialize(&obj).expect("valid blob serialises");
662        // Prologue
663        assert_eq!(bytes[0], 0x01);
664        assert_eq!(&bytes[1..5], b"MKT1");
665        assert_eq!(bytes[5], 0x01);
666        let parsed = deserialize(&bytes).unwrap();
667        assert_eq!(obj, parsed);
668    }
669
670    #[test]
671    fn empty_blob_size_is_10() {
672        let obj = Object::Blob(Blob { data: vec![] });
673        let bytes = serialize(&obj).unwrap();
674        assert_eq!(bytes.len(), 10);
675        assert_eq!(deserialize(&bytes).unwrap(), obj);
676    }
677
678    #[test]
679    fn empty_tree_roundtrip() {
680        let obj = Object::Tree(Tree { entries: vec![] });
681        let bytes = serialize(&obj).unwrap();
682        assert_eq!(deserialize(&bytes).unwrap(), obj);
683    }
684
685    #[test]
686    fn tree_with_three_entries_roundtrip() {
687        let obj = Object::Tree(Tree {
688            entries: vec![
689                TreeEntry {
690                    name: b"alpha".to_vec(),
691                    mode: EntryMode::Blob,
692                    object_hash: hash(b"a"),
693                },
694                TreeEntry {
695                    name: b"beta".to_vec(),
696                    mode: EntryMode::Tree,
697                    object_hash: hash(b"b"),
698                },
699                TreeEntry {
700                    name: b"gamma".to_vec(),
701                    mode: EntryMode::Executable,
702                    object_hash: hash(b"g"),
703                },
704            ],
705        });
706        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
707    }
708
709    #[test]
710    fn commit_with_one_parent_roundtrip() {
711        let obj = Object::Commit(Commit::new_unannotated(
712            hash(b"tree"),
713            vec![hash(b"parent")],
714            ed25519_id(),
715            [0xAA; 32],
716            b"initial".to_vec(),
717            1_711_300_000,
718            [0xBB; 64],
719        ));
720        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
721    }
722
723    #[test]
724    fn root_commit_roundtrip() {
725        let obj = Object::Commit(Commit::new_unannotated(
726            hash(b"tree"),
727            vec![],
728            ed25519_id(),
729            [0x11; 32],
730            b"genesis".to_vec(),
731            1_000_000,
732            [0x22; 64],
733        ));
734        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
735    }
736
737    #[test]
738    fn commit_with_opaque_identity_roundtrip() {
739        let mid = vec![42u8, 0, 0, 0, 0, 0, 0, 0];
740        let obj = Object::Commit(Commit::new_unannotated(
741            hash(b"tree"),
742            vec![],
743            Identity::opaque(mid.clone()),
744            [0xAA; 32],
745            b"opaque author".to_vec(),
746            1_700_000_000,
747            [0xBB; 64],
748        ));
749        let parsed = deserialize(&serialize(&obj).unwrap()).unwrap();
750        if let Object::Commit(c) = &parsed {
751            assert_eq!(c.author.kind, IdentityKind::Opaque);
752            assert_eq!(c.author.bytes, mid);
753        } else {
754            panic!("not a commit");
755        }
756        assert_eq!(parsed, obj);
757    }
758
759    #[test]
760    fn remix_with_one_source_roundtrip() {
761        let obj = Object::Remix(Remix {
762            tree_hash: hash(b"tree"),
763            parents: vec![],
764            sources: vec![RemixSource {
765                upstream_id: hash(b"project-a"),
766                commit_hash: hash(b"commit-x"),
767            }],
768            author: ed25519_id(),
769            signer: [0xCC; 32],
770            message: b"remixed".to_vec(),
771            timestamp: 1_711_300_100,
772            signature: [0xDD; 64],
773        });
774        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
775    }
776
777    #[test]
778    fn chunked_blob_roundtrip() {
779        let obj = Object::ChunkedBlob(ChunkedBlob {
780            total_size: 3 * 65536,
781            chunk_size: 65536,
782            chunks: vec![hash(b"c1"), hash(b"c2"), hash(b"c3")],
783        });
784        let bytes = serialize(&obj).unwrap();
785        assert_eq!(bytes[0], 0x05);
786        assert_eq!(deserialize(&bytes).unwrap(), obj);
787    }
788
789    #[test]
790    fn chunked_blob_cdc_marker_roundtrips() {
791        let obj = Object::ChunkedBlob(ChunkedBlob {
792            total_size: 100_000,
793            chunk_size: 0,
794            chunks: vec![hash(b"x"), hash(b"y")],
795        });
796        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
797    }
798
799    fn sample_tag() -> Tag {
800        Tag {
801            target: hash(b"target-commit"),
802            target_type: ObjectType::Commit,
803            name: b"v1.0.0".to_vec(),
804            tagger: ed25519_id(),
805            signer: [0xAA; 32],
806            message: b"release 1.0.0".to_vec(),
807            timestamp: 1_711_300_000,
808            signature: [0xCC; 64],
809        }
810    }
811
812    #[test]
813    fn tag_roundtrip() {
814        let obj = Object::Tag(sample_tag());
815        let bytes = serialize(&obj).unwrap();
816        assert_eq!(bytes[0], 0x07, "tag object_type tag");
817        assert_eq!(&bytes[1..5], b"MKT1");
818        assert_eq!(bytes[5], 0x01);
819        assert_eq!(deserialize(&bytes).unwrap(), obj);
820    }
821
822    #[test]
823    fn tag_empty_message_roundtrip() {
824        let mut t = sample_tag();
825        t.message = vec![];
826        let obj = Object::Tag(t);
827        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
828    }
829
830    #[test]
831    fn tag_rejects_empty_name() {
832        let mut t = sample_tag();
833        t.name = vec![];
834        assert_eq!(serialize(&Object::Tag(t)), Err(MkitError::TagNameInvalid));
835    }
836
837    #[test]
838    fn tag_rejects_delta_target_type() {
839        let mut t = sample_tag();
840        t.target_type = ObjectType::Delta;
841        assert_eq!(
842            serialize(&Object::Tag(t)),
843            Err(MkitError::TagTargetTypeInvalid(ObjectType::Delta as u8))
844        );
845    }
846
847    #[test]
848    fn tag_decode_rejects_forbidden_name_byte() {
849        // Hand-craft a tag whose name embeds a `/`. The writer would
850        // reject it, so build the wire bytes directly.
851        let mut buf = vec![0x07, b'M', b'K', b'T', b'1', 0x01];
852        buf.extend_from_slice(&[0u8; 32]); // target
853        buf.push(ObjectType::Commit as u8); // target_type
854        buf.extend_from_slice(&3u32.to_le_bytes()); // name_len
855        buf.extend_from_slice(b"a/b");
856        assert_eq!(deserialize(&buf), Err(MkitError::TagNameInvalid));
857    }
858
859    // ---- Negative tests ----
860
861    #[test]
862    fn deserialize_empty_input() {
863        assert_eq!(deserialize(&[]), Err(MkitError::EmptyData));
864    }
865
866    #[test]
867    fn rejects_invalid_object_type() {
868        let bad = [0xFF, b'M', b'K', b'T', b'1', 0x01];
869        assert_eq!(deserialize(&bad), Err(MkitError::InvalidObjectType(0xFF)));
870    }
871
872    #[test]
873    fn rejects_bad_magic() {
874        let bad = [0x01, b'X', b'Y', b'Z', b'W', 0x01, 0, 0, 0, 0];
875        assert_eq!(deserialize(&bad), Err(MkitError::InvalidMagic));
876    }
877
878    #[test]
879    fn rejects_unsupported_schema_version() {
880        let bad = [0x01, b'M', b'K', b'T', b'1', 0x02, 0, 0, 0, 0];
881        assert_eq!(deserialize(&bad), Err(MkitError::UnsupportedObjectVersion));
882    }
883
884    #[test]
885    fn rejects_truncated_blob() {
886        // length=100 but only 2 bytes follow
887        let bad = [
888            0x01, b'M', b'K', b'T', b'1', 0x01, 0x64, 0x00, 0x00, 0x00, 0xAA, 0xBB,
889        ];
890        assert_eq!(deserialize(&bad), Err(MkitError::UnexpectedEof));
891    }
892
893    #[test]
894    fn rejects_unsorted_tree_entries() {
895        // Build an unsorted tree by hand — can't go through serialize()
896        // because writers don't validate ordering today.
897        let mut buf = vec![0x02, b'M', b'K', b'T', b'1', 0x01];
898        buf.extend_from_slice(&2u32.to_le_bytes());
899        // entry "z.txt"
900        buf.extend_from_slice(&5u32.to_le_bytes());
901        buf.extend_from_slice(b"z.txt");
902        buf.push(EntryMode::Blob as u8);
903        buf.extend_from_slice(&[0u8; 32]);
904        // entry "a.txt"
905        buf.extend_from_slice(&5u32.to_le_bytes());
906        buf.extend_from_slice(b"a.txt");
907        buf.push(EntryMode::Blob as u8);
908        buf.extend_from_slice(&[0u8; 32]);
909        assert_eq!(deserialize(&buf), Err(MkitError::InvalidEntryOrder));
910    }
911
912    #[test]
913    fn rejects_trailing_bytes() {
914        let obj = Object::Blob(Blob {
915            data: b"hello".to_vec(),
916        });
917        let mut bytes = serialize(&obj).unwrap();
918        bytes.push(0xFF);
919        assert_eq!(deserialize(&bytes), Err(MkitError::TrailingData));
920    }
921
922    #[test]
923    fn rejects_zero_length_identity() {
924        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
925        buf.extend_from_slice(&[0u8; 32]); // tree_hash
926        buf.extend_from_slice(&0u32.to_le_bytes()); // parent_count
927        buf.push(IdentityKind::Opaque as u8);
928        buf.extend_from_slice(&0u16.to_le_bytes()); // len = 0
929        assert_eq!(deserialize(&buf), Err(MkitError::InvalidIdentity));
930    }
931
932    #[test]
933    fn rejects_unknown_identity_kind() {
934        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
935        buf.extend_from_slice(&[0u8; 32]);
936        buf.extend_from_slice(&0u32.to_le_bytes());
937        buf.push(0xEE); // unknown kind
938        buf.extend_from_slice(&4u16.to_le_bytes());
939        buf.extend_from_slice(b"xxxx");
940        assert_eq!(deserialize(&buf), Err(MkitError::UnknownIdentityKind(0xEE)));
941    }
942
943    #[test]
944    fn rejects_ed25519_with_wrong_length() {
945        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
946        buf.extend_from_slice(&[0u8; 32]);
947        buf.extend_from_slice(&0u32.to_le_bytes());
948        buf.push(IdentityKind::Ed25519 as u8);
949        buf.extend_from_slice(&8u16.to_le_bytes());
950        buf.extend_from_slice(b"12345678");
951        assert_eq!(deserialize(&buf), Err(MkitError::InvalidIdentity));
952    }
953
954    #[test]
955    fn rejects_oversize_identity() {
956        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
957        buf.extend_from_slice(&[0u8; 32]);
958        buf.extend_from_slice(&0u32.to_le_bytes());
959        buf.push(IdentityKind::Opaque as u8);
960        buf.extend_from_slice(&(IDENTITY_MAX_LEN + 1).to_le_bytes());
961        buf.extend(core::iter::repeat_n(0u8, IDENTITY_MAX_LEN as usize + 1));
962        assert_eq!(deserialize(&buf), Err(MkitError::IdentityTooLarge));
963    }
964
965    #[test]
966    fn rejects_too_many_tree_entries() {
967        let mut buf = vec![0x02, b'M', b'K', b'T', b'1', 0x01];
968        buf.extend_from_slice(&(MAX_TREE_ENTRIES + 1).to_le_bytes());
969        assert_eq!(deserialize(&buf), Err(MkitError::TooManyEntries));
970    }
971
972    #[test]
973    fn rejects_truncated_chunk_list() {
974        let mut buf = vec![0x05, b'M', b'K', b'T', b'1', 0x01];
975        buf.extend_from_slice(&1024u64.to_le_bytes());
976        buf.extend_from_slice(&0u32.to_le_bytes());
977        buf.extend_from_slice(&2u32.to_le_bytes()); // chunk_count = 2
978        buf.extend_from_slice(&[0xAA; 32]); // only one chunk
979        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
980    }
981
982    #[test]
983    fn deterministic_serialization() {
984        let obj = Object::Blob(Blob {
985            data: b"deterministic".to_vec(),
986        });
987        let a = serialize(&obj).unwrap();
988        let b = serialize(&obj).unwrap();
989        assert_eq!(a, b);
990        assert_eq!(hash(&a), hash(&b));
991        // Ensure hash() and ZERO are linked correctly — silly sanity.
992        assert_ne!(a, vec![0u8; a.len()]);
993        let _ = ZERO;
994    }
995
996    // ---- Fallible-serialize tests (review follow-up #22) ----
997
998    #[test]
999    fn serialize_rejects_invalid_identity_in_commit() {
1000        // Empty payload is structurally invalid for every kind.
1001        let bad_id = Identity {
1002            kind: IdentityKind::Opaque,
1003            bytes: Vec::new(),
1004        };
1005        let obj = Object::Commit(Commit::new_unannotated(
1006            hash(b"tree"),
1007            vec![],
1008            bad_id,
1009            [0; 32],
1010            b"x".to_vec(),
1011            0,
1012            [0; 64],
1013        ));
1014        assert_eq!(serialize(&obj), Err(MkitError::InvalidIdentity));
1015    }
1016
1017    #[test]
1018    fn read_identity_rejects_non_multibase_didkey() {
1019        // Wire format: [u8 kind][u16 LE len][payload]. A DidKey payload must
1020        // be a printable-ASCII multibase string, so a malformed object with a
1021        // binary/whitespace DidKey payload must be rejected at the read
1022        // boundary, not silently deserialized (#223).
1023        let id_bytes = |payload: &[u8]| {
1024            let mut b = vec![0x02u8]; // IdentityKind::DidKey
1025            let len = u16::try_from(payload.len()).expect("test payload fits u16");
1026            b.extend_from_slice(&len.to_le_bytes());
1027            b.extend_from_slice(payload);
1028            b
1029        };
1030        // NUL, high byte, and whitespace payloads all reject.
1031        for bad in [b"z\x00ab".as_slice(), b"z\xff", b"z6Mk has space"] {
1032            let buf = id_bytes(bad);
1033            assert_eq!(
1034                Reader::new(&buf).read_identity(),
1035                Err(MkitError::InvalidIdentity),
1036                "should reject DidKey payload {bad:?} at the read boundary"
1037            );
1038        }
1039        // A real did:key multibase payload round-trips.
1040        let good = id_bytes(b"z6MkExample");
1041        let id = Reader::new(&good).read_identity().unwrap();
1042        assert_eq!(id.kind, IdentityKind::DidKey);
1043        assert_eq!(id.bytes, b"z6MkExample");
1044    }
1045
1046    #[test]
1047    fn serialize_rejects_invalid_identity_in_remix() {
1048        // Ed25519 with non-32-byte payload.
1049        let bad_id = Identity {
1050            kind: IdentityKind::Ed25519,
1051            bytes: vec![0u8; 16],
1052        };
1053        let obj = Object::Remix(Remix {
1054            tree_hash: ZERO,
1055            parents: vec![],
1056            sources: vec![],
1057            author: bad_id,
1058            signer: [0; 32],
1059            message: b"x".to_vec(),
1060            timestamp: 0,
1061            signature: [0; 64],
1062        });
1063        assert_eq!(serialize(&obj), Err(MkitError::InvalidIdentity));
1064    }
1065
1066    /// `read_commit` claims `parent_count = MAX_PARENTS` (`1_000`) but
1067    /// the remaining buffer is too small to ever hold that many
1068    /// 32-byte parent hashes. The pre-allocation guard must reject the
1069    /// header before the parent vec is sized from attacker input.
1070    #[test]
1071    fn rejects_truncated_commit_parents() {
1072        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
1073        buf.extend_from_slice(&[0u8; 32]); // tree_hash
1074        // Within MAX_PARENTS (1_000) so the existing TooManyParents
1075        // guard doesn't fire — we want to confirm the capacity-vs-
1076        // remaining check rejects too. 1_000 parents = 32_000 bytes,
1077        // but only a single 32-byte hash follows.
1078        buf.extend_from_slice(&1_000u32.to_le_bytes()); // parent_count
1079        buf.extend_from_slice(&[0xAA; 32]); // only one parent worth
1080        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
1081    }
1082
1083    /// `read_remix` claims `source_count = MAX_REMIX_SOURCES` (`10_000`)
1084    /// but the remaining buffer cannot accommodate even one source
1085    /// (which is 64 bytes — two hashes). Reject without allocating.
1086    #[test]
1087    fn rejects_truncated_remix_sources() {
1088        let mut buf = vec![0x04, b'M', b'K', b'T', b'1', 0x01];
1089        buf.extend_from_slice(&[0u8; 32]); // tree_hash
1090        buf.extend_from_slice(&0u32.to_le_bytes()); // parent_count
1091        // 10_000 sources × 64 bytes = 640_000 bytes required, but the
1092        // buffer is empty after this point.
1093        buf.extend_from_slice(&10_000u32.to_le_bytes()); // source_count
1094        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
1095    }
1096
1097    #[cfg(target_pointer_width = "64")]
1098    #[test]
1099    fn checked_u32_rejects_oversize() {
1100        // Direct unit test on the bounds helper — we cannot allocate a
1101        // Vec with > u32::MAX entries in a unit test, so exercise the
1102        // guard surface itself. This pins the field-name string so
1103        // downstream consumers can grep on it. 32-bit targets cannot
1104        // even construct `n`, so the test is gated on pointer width.
1105        let n: usize = u32::MAX as usize + 1;
1106        let err = checked_u32("blob.data", n).unwrap_err();
1107        assert_eq!(
1108            err,
1109            MkitError::OversizePayload {
1110                field: "blob.data",
1111                len: n,
1112            }
1113        );
1114    }
1115
1116    // -- Property tests -------------------------------------------------
1117    //
1118    // Round-trip invariants exercised against arbitrary inputs via
1119    // `proptest`. The example tests above cover specific vectors and
1120    // the goldens pin wire bytes; the properties below catch the
1121    // boundary cases the examples miss (empty payloads, max-length
1122    // strings, non-ASCII bytes, etc.).
1123    proptest::proptest! {
1124        /// Any blob round-trips byte-for-byte through serialize/deserialize.
1125        #[test]
1126        fn proptest_blob_roundtrip(data in proptest::collection::vec(proptest::num::u8::ANY, 0..4096)) {
1127            let obj = Object::Blob(Blob { data });
1128            let bytes = serialize(&obj).expect("blob serialises");
1129            let parsed = deserialize(&bytes).expect("blob deserialises");
1130            proptest::prop_assert_eq!(obj, parsed);
1131        }
1132
1133        /// Any commit (single parent, fixed identity) round-trips
1134        /// byte-for-byte. Covers arbitrary tree hashes, arbitrary parent
1135        /// hashes, arbitrary message bytes including non-UTF-8 sequences
1136        /// (commit messages are bytes per SPEC-OBJECTS §5). Signer + sig
1137        /// arrays are constructed from a u8 seed (proptest only ships
1138        /// `uniform32` natively; 64-byte signatures get a tiled seed).
1139        #[test]
1140        fn proptest_commit_roundtrip(
1141            tree in proptest::array::uniform32(proptest::num::u8::ANY),
1142            parent in proptest::array::uniform32(proptest::num::u8::ANY),
1143            signer in proptest::array::uniform32(proptest::num::u8::ANY),
1144            msg in proptest::collection::vec(proptest::num::u8::ANY, 0..2048),
1145            sig_seed in proptest::num::u8::ANY,
1146            ts in 0u64..u64::from(u32::MAX),
1147        ) {
1148            let mut sig = [0u8; 64];
1149            sig.fill(sig_seed);
1150            let commit = Commit::new_unannotated(
1151                tree,
1152                vec![parent],
1153                ed25519_id(),
1154                signer,
1155                msg,
1156                ts,
1157                sig,
1158            );
1159            let obj = Object::Commit(commit);
1160            let bytes = serialize(&obj).expect("commit serialises");
1161            let parsed = deserialize(&bytes).expect("commit deserialises");
1162            proptest::prop_assert_eq!(obj, parsed);
1163        }
1164    }
1165}