Skip to main content

mkit_core/
serialize.rs

1//! Canonical byte (de)serialization for [`Object`].
2//!
3//! Spec: `docs/SPEC-OBJECTS.md`. The byte layout produced here is the
4//! v1 on-disk format; the golden-vector tests in `tests/golden.rs` pin
5//! it byte-for-byte.
6//!
7//! Every deserializer:
8//! * Validates the 6-byte v1 prologue first.
9//! * Enforces per-type bounds (entry counts, identity len, etc.).
10//! * Rejects non-empty trailing bytes via [`MkitError::TrailingData`].
11
12use crate::hash::{HASH_LEN, Hash};
13use crate::object::{
14    Blob, ChunkedBlob, Commit, Delta, EntryMode, IDENTITY_MAX_LEN, Identity, IdentityKind, MAGIC,
15    MkitError, Object, ObjectType, Remix, RemixSource, SCHEMA_VERSION, TAG_NAME_MAX_LEN, Tag, Tree,
16    TreeEntry,
17};
18
19const PROLOGUE_LEN: usize = 6;
20
21const MAX_TREE_ENTRIES: u32 = 1_000_000;
22const MAX_PARENTS: u32 = 1_000;
23const MAX_REMIX_SOURCES: u32 = 10_000;
24const MAX_CHUNKS: u32 = 1_000_000;
25
26// ---------------------------------------------------------------------
27// Public API
28// ---------------------------------------------------------------------
29
30/// Serialize an [`Object`] to its canonical byte form. Allocates fresh
31/// each call; the result is fully owned.
32///
33/// Returns [`MkitError::OversizePayload`] if any length-prefixed field
34/// exceeds the wire-format `u32` cap, and [`MkitError::InvalidIdentity`]
35/// if the object carries a structurally invalid [`Identity`].
36pub fn serialize(obj: &Object) -> Result<Vec<u8>, MkitError> {
37    let mut buf = Vec::with_capacity(PROLOGUE_LEN + estimated_body_len(obj));
38    write_prologue(&mut buf, obj.object_type());
39    match obj {
40        Object::Blob(b) => write_blob(&mut buf, b)?,
41        Object::Tree(t) => write_tree(&mut buf, t)?,
42        Object::Commit(c) => write_commit(&mut buf, c)?,
43        Object::Remix(r) => write_remix(&mut buf, r)?,
44        Object::ChunkedBlob(cb) => write_chunked_blob(&mut buf, cb)?,
45        Object::Delta(d) => write_delta(&mut buf, d)?,
46        Object::Tag(t) => write_tag(&mut buf, t)?,
47    }
48    Ok(buf)
49}
50
51/// Deserialize bytes into an owned [`Object`]. Validates the prologue
52/// and every per-type bound; rejects trailing data.
53pub fn deserialize(data: &[u8]) -> Result<Object, MkitError> {
54    if data.len() < PROLOGUE_LEN {
55        return Err(MkitError::EmptyData);
56    }
57    let tag = ObjectType::from_u8(data[0])?;
58    if data[1..5] != MAGIC {
59        return Err(MkitError::InvalidMagic);
60    }
61    if data[5] != SCHEMA_VERSION {
62        return Err(MkitError::UnsupportedObjectVersion);
63    }
64    let mut r = Reader::new(&data[PROLOGUE_LEN..]);
65    let obj = match tag {
66        ObjectType::Blob => Object::Blob(read_blob(&mut r)?),
67        ObjectType::Tree => Object::Tree(read_tree(&mut r)?),
68        ObjectType::Commit => Object::Commit(read_commit(&mut r)?),
69        ObjectType::Remix => Object::Remix(read_remix(&mut r)?),
70        ObjectType::ChunkedBlob => Object::ChunkedBlob(read_chunked_blob(&mut r)?),
71        ObjectType::Delta => Object::Delta(read_delta(&mut r)?),
72        ObjectType::Tag => Object::Tag(read_tag(&mut r)?),
73    };
74    if r.remaining() != 0 {
75        return Err(MkitError::TrailingData);
76    }
77    Ok(obj)
78}
79
80// ---------------------------------------------------------------------
81// Writers
82// ---------------------------------------------------------------------
83
84fn write_prologue(buf: &mut Vec<u8>, t: ObjectType) {
85    buf.push(t as u8);
86    buf.extend_from_slice(&MAGIC);
87    buf.push(SCHEMA_VERSION);
88}
89
90fn write_u16_le(buf: &mut Vec<u8>, v: u16) {
91    buf.extend_from_slice(&v.to_le_bytes());
92}
93
94fn write_u32_le(buf: &mut Vec<u8>, v: u32) {
95    buf.extend_from_slice(&v.to_le_bytes());
96}
97
98fn write_u64_le(buf: &mut Vec<u8>, v: u64) {
99    buf.extend_from_slice(&v.to_le_bytes());
100}
101
102fn checked_u32(field: &'static str, len: usize) -> Result<u32, MkitError> {
103    u32::try_from(len).map_err(|_| MkitError::OversizePayload { field, len })
104}
105
106fn write_lp_bytes(buf: &mut Vec<u8>, field: &'static str, data: &[u8]) -> Result<(), MkitError> {
107    write_u32_le(buf, checked_u32(field, data.len())?);
108    buf.extend_from_slice(data);
109    Ok(())
110}
111
112fn write_identity(buf: &mut Vec<u8>, id: &Identity) -> Result<(), MkitError> {
113    if !id.is_valid() {
114        return Err(MkitError::InvalidIdentity);
115    }
116    buf.push(id.kind as u8);
117    // `is_valid` already enforces 1..=IDENTITY_MAX_LEN, so the cast is
118    // safe — but keep the guard so the encoder can never silently lose
119    // bytes if `is_valid` is ever loosened.
120    let len = u16::try_from(id.bytes.len()).map_err(|_| MkitError::InvalidIdentity)?;
121    write_u16_le(buf, len);
122    buf.extend_from_slice(&id.bytes);
123    Ok(())
124}
125
126fn write_blob(buf: &mut Vec<u8>, b: &Blob) -> Result<(), MkitError> {
127    write_lp_bytes(buf, "blob.data", &b.data)
128}
129
130fn write_tree(buf: &mut Vec<u8>, t: &Tree) -> Result<(), MkitError> {
131    write_u32_le(buf, checked_u32("tree.entries", t.entries.len())?);
132    for e in &t.entries {
133        write_lp_bytes(buf, "tree.entry.name", &e.name)?;
134        buf.push(e.mode as u8);
135        buf.extend_from_slice(&e.object_hash);
136    }
137    Ok(())
138}
139
140fn write_commit(buf: &mut Vec<u8>, c: &Commit) -> Result<(), MkitError> {
141    buf.extend_from_slice(&c.tree_hash);
142    write_u32_le(buf, checked_u32("commit.parents", c.parents.len())?);
143    for p in &c.parents {
144        buf.extend_from_slice(p);
145    }
146    write_identity(buf, &c.author)?;
147    write_lp_bytes(buf, "commit.message", &c.message)?;
148    write_u64_le(buf, c.timestamp);
149    buf.extend_from_slice(&c.signer);
150    buf.extend_from_slice(&c.message_hash);
151    buf.extend_from_slice(&c.content_digest);
152    buf.extend_from_slice(&c.signature);
153    Ok(())
154}
155
156fn write_remix(buf: &mut Vec<u8>, r: &Remix) -> Result<(), MkitError> {
157    buf.extend_from_slice(&r.tree_hash);
158    write_u32_le(buf, checked_u32("remix.parents", r.parents.len())?);
159    for p in &r.parents {
160        buf.extend_from_slice(p);
161    }
162    write_u32_le(buf, checked_u32("remix.sources", r.sources.len())?);
163    for s in &r.sources {
164        buf.extend_from_slice(&s.upstream_id);
165        buf.extend_from_slice(&s.commit_hash);
166    }
167    write_identity(buf, &r.author)?;
168    write_lp_bytes(buf, "remix.message", &r.message)?;
169    write_u64_le(buf, r.timestamp);
170    buf.extend_from_slice(&r.signer);
171    buf.extend_from_slice(&r.signature);
172    Ok(())
173}
174
175/// Reject pack-only / non-storable target types. A tag MUST point at a
176/// type that can live in the object store (`Delta` is pack-only).
177fn check_tag_target_type(t: ObjectType) -> Result<(), MkitError> {
178    if matches!(t, ObjectType::Delta) {
179        return Err(MkitError::TagTargetTypeInvalid(t as u8));
180    }
181    Ok(())
182}
183
184fn write_tag(buf: &mut Vec<u8>, t: &Tag) -> Result<(), MkitError> {
185    if !t.name_is_valid() {
186        return Err(MkitError::TagNameInvalid);
187    }
188    check_tag_target_type(t.target_type)?;
189    buf.extend_from_slice(&t.target);
190    buf.push(t.target_type as u8);
191    write_lp_bytes(buf, "tag.name", &t.name)?;
192    write_identity(buf, &t.tagger)?;
193    write_lp_bytes(buf, "tag.message", &t.message)?;
194    write_u64_le(buf, t.timestamp);
195    buf.extend_from_slice(&t.signer);
196    buf.extend_from_slice(&t.signature);
197    Ok(())
198}
199
200fn write_chunked_blob(buf: &mut Vec<u8>, cb: &ChunkedBlob) -> Result<(), MkitError> {
201    write_u64_le(buf, cb.total_size);
202    write_u32_le(buf, cb.chunk_size);
203    write_u32_le(buf, checked_u32("chunked_blob.chunks", cb.chunks.len())?);
204    for c in &cb.chunks {
205        buf.extend_from_slice(c);
206    }
207    Ok(())
208}
209
210fn write_delta(buf: &mut Vec<u8>, d: &Delta) -> Result<(), MkitError> {
211    buf.extend_from_slice(&d.base_hash);
212    write_u32_le(buf, d.result_size);
213    write_lp_bytes(buf, "delta.instructions", &d.instructions)
214}
215
216fn estimated_body_len(obj: &Object) -> usize {
217    match obj {
218        Object::Blob(b) => 4 + b.data.len(),
219        Object::Tree(t) => {
220            4 + t
221                .entries
222                .iter()
223                .map(|e| 4 + e.name.len() + 1 + 32)
224                .sum::<usize>()
225        }
226        Object::Commit(c) => {
227            32 + 4
228                + c.parents.len() * 32
229                + 1
230                + 2
231                + c.author.bytes.len()
232                + 4
233                + c.message.len()
234                + 8
235                + 32
236                + 32
237                + 32
238                + 64
239        }
240        Object::Remix(r) => {
241            32 + 4
242                + r.parents.len() * 32
243                + 4
244                + r.sources.len() * 64
245                + 1
246                + 2
247                + r.author.bytes.len()
248                + 4
249                + r.message.len()
250                + 8
251                + 32
252                + 64
253        }
254        Object::ChunkedBlob(cb) => 8 + 4 + 4 + cb.chunks.len() * 32,
255        Object::Delta(d) => 32 + 4 + 4 + d.instructions.len(),
256        Object::Tag(t) => {
257            32 + 1
258                + 4
259                + t.name.len()
260                + 1
261                + 2
262                + t.tagger.bytes.len()
263                + 4
264                + t.message.len()
265                + 8
266                + 32
267                + 64
268        }
269    }
270}
271
272// ---------------------------------------------------------------------
273// Reader
274// ---------------------------------------------------------------------
275
276struct Reader<'a> {
277    data: &'a [u8],
278    pos: usize,
279}
280
281impl<'a> Reader<'a> {
282    fn new(data: &'a [u8]) -> Self {
283        Self { data, pos: 0 }
284    }
285
286    fn remaining(&self) -> usize {
287        self.data.len() - self.pos
288    }
289
290    fn need(&self, n: usize) -> Result<(), MkitError> {
291        if self.remaining() < n {
292            Err(MkitError::UnexpectedEof)
293        } else {
294            Ok(())
295        }
296    }
297
298    fn read_u8(&mut self) -> Result<u8, MkitError> {
299        self.need(1)?;
300        let v = self.data[self.pos];
301        self.pos += 1;
302        Ok(v)
303    }
304
305    fn read_u16(&mut self) -> Result<u16, MkitError> {
306        self.need(2)?;
307        let mut a = [0u8; 2];
308        a.copy_from_slice(&self.data[self.pos..self.pos + 2]);
309        self.pos += 2;
310        Ok(u16::from_le_bytes(a))
311    }
312
313    fn read_u32(&mut self) -> Result<u32, MkitError> {
314        self.need(4)?;
315        let mut a = [0u8; 4];
316        a.copy_from_slice(&self.data[self.pos..self.pos + 4]);
317        self.pos += 4;
318        Ok(u32::from_le_bytes(a))
319    }
320
321    fn read_u64(&mut self) -> Result<u64, MkitError> {
322        self.need(8)?;
323        let mut a = [0u8; 8];
324        a.copy_from_slice(&self.data[self.pos..self.pos + 8]);
325        self.pos += 8;
326        Ok(u64::from_le_bytes(a))
327    }
328
329    fn read_hash(&mut self) -> Result<Hash, MkitError> {
330        self.need(HASH_LEN)?;
331        let mut h = [0u8; HASH_LEN];
332        h.copy_from_slice(&self.data[self.pos..self.pos + HASH_LEN]);
333        self.pos += HASH_LEN;
334        Ok(h)
335    }
336
337    fn read_fixed<const N: usize>(&mut self) -> Result<[u8; N], MkitError> {
338        self.need(N)?;
339        let mut out = [0u8; N];
340        out.copy_from_slice(&self.data[self.pos..self.pos + N]);
341        self.pos += N;
342        Ok(out)
343    }
344
345    fn read_lp_bytes(&mut self) -> Result<Vec<u8>, MkitError> {
346        let len = self.read_u32()? as usize;
347        self.need(len)?;
348        let v = self.data[self.pos..self.pos + len].to_vec();
349        self.pos += len;
350        Ok(v)
351    }
352
353    fn read_identity(&mut self) -> Result<Identity, MkitError> {
354        let kind = IdentityKind::from_u8(self.read_u8()?)?;
355        let len = self.read_u16()?;
356        if len == 0 {
357            return Err(MkitError::InvalidIdentity);
358        }
359        if len > IDENTITY_MAX_LEN {
360            return Err(MkitError::IdentityTooLarge);
361        }
362        match kind {
363            IdentityKind::Ed25519 if len != 32 => return Err(MkitError::InvalidIdentity),
364            _ => {}
365        }
366        let len = len as usize;
367        self.need(len)?;
368        let bytes = self.data[self.pos..self.pos + len].to_vec();
369        self.pos += len;
370        let id = Identity { kind, bytes };
371        // Enforce the full structural invariant at the read boundary so a
372        // malformed object from disk/remote can't deserialize with an
373        // invalid payload (e.g. a binary `DidKey` that isn't a printable
374        // multibase string). `is_valid` is the single source of truth and
375        // the serialize side already gates on it (#223).
376        if !id.is_valid() {
377            return Err(MkitError::InvalidIdentity);
378        }
379        Ok(id)
380    }
381}
382
383// ---------------------------------------------------------------------
384// Readers
385// ---------------------------------------------------------------------
386
387fn read_blob(r: &mut Reader<'_>) -> Result<Blob, MkitError> {
388    Ok(Blob {
389        data: r.read_lp_bytes()?,
390    })
391}
392
393fn read_tree(r: &mut Reader<'_>) -> Result<Tree, MkitError> {
394    let count = r.read_u32()?;
395    if count > MAX_TREE_ENTRIES {
396        return Err(MkitError::TooManyEntries);
397    }
398    // Cheap upper bound: each entry is at least name_len(4) + mode(1) +
399    // hash(32) = 37 bytes plus a 1-byte name. Reject impossible counts
400    // before we allocate the entry vec.
401    if (count as usize).saturating_mul(4 + 1 + 1 + HASH_LEN) > r.remaining() {
402        return Err(MkitError::UnexpectedEof);
403    }
404    let mut entries = Vec::with_capacity(count as usize);
405    let mut prev: Option<Vec<u8>> = None;
406    for _ in 0..count {
407        let name = r.read_lp_bytes()?;
408        if !TreeEntry::validate_name(&name) {
409            return Err(MkitError::InvalidEntryName);
410        }
411        if let Some(p) = &prev
412            && p.as_slice() >= name.as_slice()
413        {
414            return Err(MkitError::InvalidEntryOrder);
415        }
416        let mode = EntryMode::from_u8(r.read_u8()?)?;
417        let object_hash = r.read_hash()?;
418        prev = Some(name.clone());
419        entries.push(TreeEntry {
420            name,
421            mode,
422            object_hash,
423        });
424    }
425    Ok(Tree { entries })
426}
427
428fn read_commit(r: &mut Reader<'_>) -> Result<Commit, MkitError> {
429    let tree_hash = r.read_hash()?;
430    let parent_count = r.read_u32()?;
431    if parent_count > MAX_PARENTS {
432        return Err(MkitError::TooManyParents);
433    }
434    // Cheap upper bound: each parent is HASH_LEN bytes on the wire. If
435    // the remaining buffer can't even hold the parent hashes, the
436    // header is lying and we must not pre-allocate for it.
437    if (parent_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
438        return Err(MkitError::UnexpectedEof);
439    }
440    let mut parents = Vec::with_capacity(parent_count as usize);
441    for _ in 0..parent_count {
442        parents.push(r.read_hash()?);
443    }
444    let author = r.read_identity()?;
445    let message = r.read_lp_bytes()?;
446    let timestamp = r.read_u64()?;
447    let signer = r.read_fixed::<32>()?;
448    let message_hash = r.read_hash()?;
449    let content_digest = r.read_hash()?;
450    let signature = r.read_fixed::<64>()?;
451    Ok(Commit {
452        tree_hash,
453        parents,
454        author,
455        signer,
456        message,
457        timestamp,
458        message_hash,
459        content_digest,
460        signature,
461    })
462}
463
464fn read_remix(r: &mut Reader<'_>) -> Result<Remix, MkitError> {
465    let tree_hash = r.read_hash()?;
466    let parent_count = r.read_u32()?;
467    if parent_count > MAX_PARENTS {
468        return Err(MkitError::TooManyParents);
469    }
470    // Cheap upper bound: each parent is HASH_LEN bytes on the wire.
471    if (parent_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
472        return Err(MkitError::UnexpectedEof);
473    }
474    let mut parents = Vec::with_capacity(parent_count as usize);
475    for _ in 0..parent_count {
476        parents.push(r.read_hash()?);
477    }
478    let source_count = r.read_u32()?;
479    if source_count > MAX_REMIX_SOURCES {
480        return Err(MkitError::TooManySources);
481    }
482    // Each source is two hashes (upstream_id + commit_hash) = 2 *
483    // HASH_LEN bytes. Reject impossible counts before allocating.
484    if (source_count as usize).saturating_mul(2 * HASH_LEN) > r.remaining() {
485        return Err(MkitError::UnexpectedEof);
486    }
487    let mut sources = Vec::with_capacity(source_count as usize);
488    for _ in 0..source_count {
489        let upstream_id = r.read_hash()?;
490        let commit_hash = r.read_hash()?;
491        sources.push(RemixSource {
492            upstream_id,
493            commit_hash,
494        });
495    }
496    let author = r.read_identity()?;
497    let message = r.read_lp_bytes()?;
498    let timestamp = r.read_u64()?;
499    let signer = r.read_fixed::<32>()?;
500    let signature = r.read_fixed::<64>()?;
501    // Sort check: strict ascending by (upstream_id, commit_hash).
502    if sources.len() > 1 {
503        for w in sources.windows(2) {
504            let a = &w[0];
505            let b = &w[1];
506            let bad = match a.upstream_id.cmp(&b.upstream_id) {
507                core::cmp::Ordering::Greater => true,
508                core::cmp::Ordering::Equal => a.commit_hash >= b.commit_hash,
509                core::cmp::Ordering::Less => false,
510            };
511            if bad {
512                return Err(MkitError::InvalidSourceOrder);
513            }
514        }
515    }
516    Ok(Remix {
517        tree_hash,
518        parents,
519        sources,
520        author,
521        signer,
522        message,
523        timestamp,
524        signature,
525    })
526}
527
528fn read_tag(r: &mut Reader<'_>) -> Result<Tag, MkitError> {
529    let target = r.read_hash()?;
530    let target_type = ObjectType::from_u8(r.read_u8()?)?;
531    check_tag_target_type(target_type)?;
532    // `name` is length-prefixed; bound it by TAG_NAME_MAX_LEN before we
533    // copy so a bogus header can't force a large allocation.
534    let name_len = r.read_u32()? as usize;
535    if name_len == 0 || name_len > TAG_NAME_MAX_LEN as usize {
536        return Err(MkitError::TagNameInvalid);
537    }
538    r.need(name_len)?;
539    let name = r.data[r.pos..r.pos + name_len].to_vec();
540    r.pos += name_len;
541    if name.iter().any(|&b| matches!(b, 0 | b'/' | b'\\')) {
542        return Err(MkitError::TagNameInvalid);
543    }
544    let tagger = r.read_identity()?;
545    let message = r.read_lp_bytes()?;
546    let timestamp = r.read_u64()?;
547    let signer = r.read_fixed::<32>()?;
548    let signature = r.read_fixed::<64>()?;
549    Ok(Tag {
550        target,
551        target_type,
552        name,
553        tagger,
554        signer,
555        message,
556        timestamp,
557        signature,
558    })
559}
560
561fn read_chunked_blob(r: &mut Reader<'_>) -> Result<ChunkedBlob, MkitError> {
562    let total_size = r.read_u64()?;
563    let chunk_size = r.read_u32()?;
564    let chunk_count = r.read_u32()?;
565    if chunk_count > MAX_CHUNKS {
566        return Err(MkitError::TooManyChunks);
567    }
568    if (chunk_count as usize).saturating_mul(HASH_LEN) > r.remaining() {
569        return Err(MkitError::UnexpectedEof);
570    }
571    let mut chunks = Vec::with_capacity(chunk_count as usize);
572    for _ in 0..chunk_count {
573        chunks.push(r.read_hash()?);
574    }
575    Ok(ChunkedBlob {
576        total_size,
577        chunk_size,
578        chunks,
579    })
580}
581
582fn read_delta(r: &mut Reader<'_>) -> Result<Delta, MkitError> {
583    let base_hash = r.read_hash()?;
584    let result_size = r.read_u32()?;
585    let instructions = r.read_lp_bytes()?;
586    Ok(Delta {
587        base_hash,
588        result_size,
589        instructions,
590    })
591}
592
593// ---------------------------------------------------------------------
594// Tests
595// ---------------------------------------------------------------------
596
597#[cfg(test)]
598mod tests {
599    use super::*;
600    use crate::hash::{ZERO, hash};
601
602    fn ed25519_id() -> Identity {
603        Identity::ed25519([0xAA; 32])
604    }
605
606    #[test]
607    fn blob_roundtrip() {
608        let obj = Object::Blob(Blob {
609            data: b"hello world".to_vec(),
610        });
611        let bytes = serialize(&obj).expect("valid blob serialises");
612        // Prologue
613        assert_eq!(bytes[0], 0x01);
614        assert_eq!(&bytes[1..5], b"MKT1");
615        assert_eq!(bytes[5], 0x01);
616        let parsed = deserialize(&bytes).unwrap();
617        assert_eq!(obj, parsed);
618    }
619
620    #[test]
621    fn empty_blob_size_is_10() {
622        let obj = Object::Blob(Blob { data: vec![] });
623        let bytes = serialize(&obj).unwrap();
624        assert_eq!(bytes.len(), 10);
625        assert_eq!(deserialize(&bytes).unwrap(), obj);
626    }
627
628    #[test]
629    fn empty_tree_roundtrip() {
630        let obj = Object::Tree(Tree { entries: vec![] });
631        let bytes = serialize(&obj).unwrap();
632        assert_eq!(deserialize(&bytes).unwrap(), obj);
633    }
634
635    #[test]
636    fn tree_with_three_entries_roundtrip() {
637        let obj = Object::Tree(Tree {
638            entries: vec![
639                TreeEntry {
640                    name: b"alpha".to_vec(),
641                    mode: EntryMode::Blob,
642                    object_hash: hash(b"a"),
643                },
644                TreeEntry {
645                    name: b"beta".to_vec(),
646                    mode: EntryMode::Tree,
647                    object_hash: hash(b"b"),
648                },
649                TreeEntry {
650                    name: b"gamma".to_vec(),
651                    mode: EntryMode::Executable,
652                    object_hash: hash(b"g"),
653                },
654            ],
655        });
656        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
657    }
658
659    #[test]
660    fn commit_with_one_parent_roundtrip() {
661        let obj = Object::Commit(Commit::new_unannotated(
662            hash(b"tree"),
663            vec![hash(b"parent")],
664            ed25519_id(),
665            [0xAA; 32],
666            b"initial".to_vec(),
667            1_711_300_000,
668            [0xBB; 64],
669        ));
670        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
671    }
672
673    #[test]
674    fn root_commit_roundtrip() {
675        let obj = Object::Commit(Commit::new_unannotated(
676            hash(b"tree"),
677            vec![],
678            ed25519_id(),
679            [0x11; 32],
680            b"genesis".to_vec(),
681            1_000_000,
682            [0x22; 64],
683        ));
684        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
685    }
686
687    #[test]
688    fn commit_with_opaque_identity_roundtrip() {
689        let mid = vec![42u8, 0, 0, 0, 0, 0, 0, 0];
690        let obj = Object::Commit(Commit::new_unannotated(
691            hash(b"tree"),
692            vec![],
693            Identity::opaque(mid.clone()),
694            [0xAA; 32],
695            b"opaque author".to_vec(),
696            1_700_000_000,
697            [0xBB; 64],
698        ));
699        let parsed = deserialize(&serialize(&obj).unwrap()).unwrap();
700        if let Object::Commit(c) = &parsed {
701            assert_eq!(c.author.kind, IdentityKind::Opaque);
702            assert_eq!(c.author.bytes, mid);
703        } else {
704            panic!("not a commit");
705        }
706        assert_eq!(parsed, obj);
707    }
708
709    #[test]
710    fn remix_with_one_source_roundtrip() {
711        let obj = Object::Remix(Remix {
712            tree_hash: hash(b"tree"),
713            parents: vec![],
714            sources: vec![RemixSource {
715                upstream_id: hash(b"project-a"),
716                commit_hash: hash(b"commit-x"),
717            }],
718            author: ed25519_id(),
719            signer: [0xCC; 32],
720            message: b"remixed".to_vec(),
721            timestamp: 1_711_300_100,
722            signature: [0xDD; 64],
723        });
724        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
725    }
726
727    #[test]
728    fn chunked_blob_roundtrip() {
729        let obj = Object::ChunkedBlob(ChunkedBlob {
730            total_size: 3 * 65536,
731            chunk_size: 65536,
732            chunks: vec![hash(b"c1"), hash(b"c2"), hash(b"c3")],
733        });
734        let bytes = serialize(&obj).unwrap();
735        assert_eq!(bytes[0], 0x05);
736        assert_eq!(deserialize(&bytes).unwrap(), obj);
737    }
738
739    #[test]
740    fn chunked_blob_cdc_marker_roundtrips() {
741        let obj = Object::ChunkedBlob(ChunkedBlob {
742            total_size: 100_000,
743            chunk_size: 0,
744            chunks: vec![hash(b"x"), hash(b"y")],
745        });
746        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
747    }
748
749    fn sample_tag() -> Tag {
750        Tag {
751            target: hash(b"target-commit"),
752            target_type: ObjectType::Commit,
753            name: b"v1.0.0".to_vec(),
754            tagger: ed25519_id(),
755            signer: [0xAA; 32],
756            message: b"release 1.0.0".to_vec(),
757            timestamp: 1_711_300_000,
758            signature: [0xCC; 64],
759        }
760    }
761
762    #[test]
763    fn tag_roundtrip() {
764        let obj = Object::Tag(sample_tag());
765        let bytes = serialize(&obj).unwrap();
766        assert_eq!(bytes[0], 0x07, "tag object_type tag");
767        assert_eq!(&bytes[1..5], b"MKT1");
768        assert_eq!(bytes[5], 0x01);
769        assert_eq!(deserialize(&bytes).unwrap(), obj);
770    }
771
772    #[test]
773    fn tag_empty_message_roundtrip() {
774        let mut t = sample_tag();
775        t.message = vec![];
776        let obj = Object::Tag(t);
777        assert_eq!(deserialize(&serialize(&obj).unwrap()).unwrap(), obj);
778    }
779
780    #[test]
781    fn tag_rejects_empty_name() {
782        let mut t = sample_tag();
783        t.name = vec![];
784        assert_eq!(serialize(&Object::Tag(t)), Err(MkitError::TagNameInvalid));
785    }
786
787    #[test]
788    fn tag_rejects_delta_target_type() {
789        let mut t = sample_tag();
790        t.target_type = ObjectType::Delta;
791        assert_eq!(
792            serialize(&Object::Tag(t)),
793            Err(MkitError::TagTargetTypeInvalid(ObjectType::Delta as u8))
794        );
795    }
796
797    #[test]
798    fn tag_decode_rejects_forbidden_name_byte() {
799        // Hand-craft a tag whose name embeds a `/`. The writer would
800        // reject it, so build the wire bytes directly.
801        let mut buf = vec![0x07, b'M', b'K', b'T', b'1', 0x01];
802        buf.extend_from_slice(&[0u8; 32]); // target
803        buf.push(ObjectType::Commit as u8); // target_type
804        buf.extend_from_slice(&3u32.to_le_bytes()); // name_len
805        buf.extend_from_slice(b"a/b");
806        assert_eq!(deserialize(&buf), Err(MkitError::TagNameInvalid));
807    }
808
809    // ---- Negative tests ----
810
811    #[test]
812    fn deserialize_empty_input() {
813        assert_eq!(deserialize(&[]), Err(MkitError::EmptyData));
814    }
815
816    #[test]
817    fn rejects_invalid_object_type() {
818        let bad = [0xFF, b'M', b'K', b'T', b'1', 0x01];
819        assert_eq!(deserialize(&bad), Err(MkitError::InvalidObjectType(0xFF)));
820    }
821
822    #[test]
823    fn rejects_bad_magic() {
824        let bad = [0x01, b'X', b'Y', b'Z', b'W', 0x01, 0, 0, 0, 0];
825        assert_eq!(deserialize(&bad), Err(MkitError::InvalidMagic));
826    }
827
828    #[test]
829    fn rejects_unsupported_schema_version() {
830        let bad = [0x01, b'M', b'K', b'T', b'1', 0x02, 0, 0, 0, 0];
831        assert_eq!(deserialize(&bad), Err(MkitError::UnsupportedObjectVersion));
832    }
833
834    #[test]
835    fn rejects_truncated_blob() {
836        // length=100 but only 2 bytes follow
837        let bad = [
838            0x01, b'M', b'K', b'T', b'1', 0x01, 0x64, 0x00, 0x00, 0x00, 0xAA, 0xBB,
839        ];
840        assert_eq!(deserialize(&bad), Err(MkitError::UnexpectedEof));
841    }
842
843    #[test]
844    fn rejects_unsorted_tree_entries() {
845        // Build an unsorted tree by hand — can't go through serialize()
846        // because writers don't validate ordering today.
847        let mut buf = vec![0x02, b'M', b'K', b'T', b'1', 0x01];
848        buf.extend_from_slice(&2u32.to_le_bytes());
849        // entry "z.txt"
850        buf.extend_from_slice(&5u32.to_le_bytes());
851        buf.extend_from_slice(b"z.txt");
852        buf.push(EntryMode::Blob as u8);
853        buf.extend_from_slice(&[0u8; 32]);
854        // entry "a.txt"
855        buf.extend_from_slice(&5u32.to_le_bytes());
856        buf.extend_from_slice(b"a.txt");
857        buf.push(EntryMode::Blob as u8);
858        buf.extend_from_slice(&[0u8; 32]);
859        assert_eq!(deserialize(&buf), Err(MkitError::InvalidEntryOrder));
860    }
861
862    #[test]
863    fn rejects_trailing_bytes() {
864        let obj = Object::Blob(Blob {
865            data: b"hello".to_vec(),
866        });
867        let mut bytes = serialize(&obj).unwrap();
868        bytes.push(0xFF);
869        assert_eq!(deserialize(&bytes), Err(MkitError::TrailingData));
870    }
871
872    #[test]
873    fn rejects_zero_length_identity() {
874        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
875        buf.extend_from_slice(&[0u8; 32]); // tree_hash
876        buf.extend_from_slice(&0u32.to_le_bytes()); // parent_count
877        buf.push(IdentityKind::Opaque as u8);
878        buf.extend_from_slice(&0u16.to_le_bytes()); // len = 0
879        assert_eq!(deserialize(&buf), Err(MkitError::InvalidIdentity));
880    }
881
882    #[test]
883    fn rejects_unknown_identity_kind() {
884        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
885        buf.extend_from_slice(&[0u8; 32]);
886        buf.extend_from_slice(&0u32.to_le_bytes());
887        buf.push(0xEE); // unknown kind
888        buf.extend_from_slice(&4u16.to_le_bytes());
889        buf.extend_from_slice(b"xxxx");
890        assert_eq!(deserialize(&buf), Err(MkitError::UnknownIdentityKind(0xEE)));
891    }
892
893    #[test]
894    fn rejects_ed25519_with_wrong_length() {
895        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
896        buf.extend_from_slice(&[0u8; 32]);
897        buf.extend_from_slice(&0u32.to_le_bytes());
898        buf.push(IdentityKind::Ed25519 as u8);
899        buf.extend_from_slice(&8u16.to_le_bytes());
900        buf.extend_from_slice(b"12345678");
901        assert_eq!(deserialize(&buf), Err(MkitError::InvalidIdentity));
902    }
903
904    #[test]
905    fn rejects_oversize_identity() {
906        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
907        buf.extend_from_slice(&[0u8; 32]);
908        buf.extend_from_slice(&0u32.to_le_bytes());
909        buf.push(IdentityKind::Opaque as u8);
910        buf.extend_from_slice(&(IDENTITY_MAX_LEN + 1).to_le_bytes());
911        buf.extend(core::iter::repeat_n(0u8, IDENTITY_MAX_LEN as usize + 1));
912        assert_eq!(deserialize(&buf), Err(MkitError::IdentityTooLarge));
913    }
914
915    #[test]
916    fn rejects_too_many_tree_entries() {
917        let mut buf = vec![0x02, b'M', b'K', b'T', b'1', 0x01];
918        buf.extend_from_slice(&(MAX_TREE_ENTRIES + 1).to_le_bytes());
919        assert_eq!(deserialize(&buf), Err(MkitError::TooManyEntries));
920    }
921
922    #[test]
923    fn rejects_truncated_chunk_list() {
924        let mut buf = vec![0x05, b'M', b'K', b'T', b'1', 0x01];
925        buf.extend_from_slice(&1024u64.to_le_bytes());
926        buf.extend_from_slice(&0u32.to_le_bytes());
927        buf.extend_from_slice(&2u32.to_le_bytes()); // chunk_count = 2
928        buf.extend_from_slice(&[0xAA; 32]); // only one chunk
929        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
930    }
931
932    #[test]
933    fn deterministic_serialization() {
934        let obj = Object::Blob(Blob {
935            data: b"deterministic".to_vec(),
936        });
937        let a = serialize(&obj).unwrap();
938        let b = serialize(&obj).unwrap();
939        assert_eq!(a, b);
940        assert_eq!(hash(&a), hash(&b));
941        // Ensure hash() and ZERO are linked correctly — silly sanity.
942        assert_ne!(a, vec![0u8; a.len()]);
943        let _ = ZERO;
944    }
945
946    // ---- Fallible-serialize tests (review follow-up #22) ----
947
948    #[test]
949    fn serialize_rejects_invalid_identity_in_commit() {
950        // Empty payload is structurally invalid for every kind.
951        let bad_id = Identity {
952            kind: IdentityKind::Opaque,
953            bytes: Vec::new(),
954        };
955        let obj = Object::Commit(Commit::new_unannotated(
956            hash(b"tree"),
957            vec![],
958            bad_id,
959            [0; 32],
960            b"x".to_vec(),
961            0,
962            [0; 64],
963        ));
964        assert_eq!(serialize(&obj), Err(MkitError::InvalidIdentity));
965    }
966
967    #[test]
968    fn read_identity_rejects_non_multibase_didkey() {
969        // Wire format: [u8 kind][u16 LE len][payload]. A DidKey payload must
970        // be a printable-ASCII multibase string, so a malformed object with a
971        // binary/whitespace DidKey payload must be rejected at the read
972        // boundary, not silently deserialized (#223).
973        let id_bytes = |payload: &[u8]| {
974            let mut b = vec![0x02u8]; // IdentityKind::DidKey
975            let len = u16::try_from(payload.len()).expect("test payload fits u16");
976            b.extend_from_slice(&len.to_le_bytes());
977            b.extend_from_slice(payload);
978            b
979        };
980        // NUL, high byte, and whitespace payloads all reject.
981        for bad in [b"z\x00ab".as_slice(), b"z\xff", b"z6Mk has space"] {
982            let buf = id_bytes(bad);
983            assert_eq!(
984                Reader::new(&buf).read_identity(),
985                Err(MkitError::InvalidIdentity),
986                "should reject DidKey payload {bad:?} at the read boundary"
987            );
988        }
989        // A real did:key multibase payload round-trips.
990        let good = id_bytes(b"z6MkExample");
991        let id = Reader::new(&good).read_identity().unwrap();
992        assert_eq!(id.kind, IdentityKind::DidKey);
993        assert_eq!(id.bytes, b"z6MkExample");
994    }
995
996    #[test]
997    fn serialize_rejects_invalid_identity_in_remix() {
998        // Ed25519 with non-32-byte payload.
999        let bad_id = Identity {
1000            kind: IdentityKind::Ed25519,
1001            bytes: vec![0u8; 16],
1002        };
1003        let obj = Object::Remix(Remix {
1004            tree_hash: ZERO,
1005            parents: vec![],
1006            sources: vec![],
1007            author: bad_id,
1008            signer: [0; 32],
1009            message: b"x".to_vec(),
1010            timestamp: 0,
1011            signature: [0; 64],
1012        });
1013        assert_eq!(serialize(&obj), Err(MkitError::InvalidIdentity));
1014    }
1015
1016    /// `read_commit` claims `parent_count = MAX_PARENTS` (`1_000`) but
1017    /// the remaining buffer is too small to ever hold that many
1018    /// 32-byte parent hashes. The pre-allocation guard must reject the
1019    /// header before the parent vec is sized from attacker input.
1020    #[test]
1021    fn rejects_truncated_commit_parents() {
1022        let mut buf = vec![0x03, b'M', b'K', b'T', b'1', 0x01];
1023        buf.extend_from_slice(&[0u8; 32]); // tree_hash
1024        // Within MAX_PARENTS (1_000) so the existing TooManyParents
1025        // guard doesn't fire — we want to confirm the capacity-vs-
1026        // remaining check rejects too. 1_000 parents = 32_000 bytes,
1027        // but only a single 32-byte hash follows.
1028        buf.extend_from_slice(&1_000u32.to_le_bytes()); // parent_count
1029        buf.extend_from_slice(&[0xAA; 32]); // only one parent worth
1030        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
1031    }
1032
1033    /// `read_remix` claims `source_count = MAX_REMIX_SOURCES` (`10_000`)
1034    /// but the remaining buffer cannot accommodate even one source
1035    /// (which is 64 bytes — two hashes). Reject without allocating.
1036    #[test]
1037    fn rejects_truncated_remix_sources() {
1038        let mut buf = vec![0x04, b'M', b'K', b'T', b'1', 0x01];
1039        buf.extend_from_slice(&[0u8; 32]); // tree_hash
1040        buf.extend_from_slice(&0u32.to_le_bytes()); // parent_count
1041        // 10_000 sources × 64 bytes = 640_000 bytes required, but the
1042        // buffer is empty after this point.
1043        buf.extend_from_slice(&10_000u32.to_le_bytes()); // source_count
1044        assert_eq!(deserialize(&buf), Err(MkitError::UnexpectedEof));
1045    }
1046
1047    #[cfg(target_pointer_width = "64")]
1048    #[test]
1049    fn checked_u32_rejects_oversize() {
1050        // Direct unit test on the bounds helper — we cannot allocate a
1051        // Vec with > u32::MAX entries in a unit test, so exercise the
1052        // guard surface itself. This pins the field-name string so
1053        // downstream consumers can grep on it. 32-bit targets cannot
1054        // even construct `n`, so the test is gated on pointer width.
1055        let n: usize = u32::MAX as usize + 1;
1056        let err = checked_u32("blob.data", n).unwrap_err();
1057        assert_eq!(
1058            err,
1059            MkitError::OversizePayload {
1060                field: "blob.data",
1061                len: n,
1062            }
1063        );
1064    }
1065
1066    // -- Property tests -------------------------------------------------
1067    //
1068    // Round-trip invariants exercised against arbitrary inputs via
1069    // `proptest`. The example tests above cover specific vectors and
1070    // the goldens pin wire bytes; the properties below catch the
1071    // boundary cases the examples miss (empty payloads, max-length
1072    // strings, non-ASCII bytes, etc.).
1073    proptest::proptest! {
1074        /// Any blob round-trips byte-for-byte through serialize/deserialize.
1075        #[test]
1076        fn proptest_blob_roundtrip(data in proptest::collection::vec(proptest::num::u8::ANY, 0..4096)) {
1077            let obj = Object::Blob(Blob { data });
1078            let bytes = serialize(&obj).expect("blob serialises");
1079            let parsed = deserialize(&bytes).expect("blob deserialises");
1080            proptest::prop_assert_eq!(obj, parsed);
1081        }
1082
1083        /// Any commit (single parent, fixed identity) round-trips
1084        /// byte-for-byte. Covers arbitrary tree hashes, arbitrary parent
1085        /// hashes, arbitrary message bytes including non-UTF-8 sequences
1086        /// (commit messages are bytes per SPEC-OBJECTS §5). Signer + sig
1087        /// arrays are constructed from a u8 seed (proptest only ships
1088        /// `uniform32` natively; 64-byte signatures get a tiled seed).
1089        #[test]
1090        fn proptest_commit_roundtrip(
1091            tree in proptest::array::uniform32(proptest::num::u8::ANY),
1092            parent in proptest::array::uniform32(proptest::num::u8::ANY),
1093            signer in proptest::array::uniform32(proptest::num::u8::ANY),
1094            msg in proptest::collection::vec(proptest::num::u8::ANY, 0..2048),
1095            sig_seed in proptest::num::u8::ANY,
1096            ts in 0u64..u64::from(u32::MAX),
1097        ) {
1098            let mut sig = [0u8; 64];
1099            sig.fill(sig_seed);
1100            let commit = Commit::new_unannotated(
1101                tree,
1102                vec![parent],
1103                ed25519_id(),
1104                signer,
1105                msg,
1106                ts,
1107                sig,
1108            );
1109            let obj = Object::Commit(commit);
1110            let bytes = serialize(&obj).expect("commit serialises");
1111            let parsed = deserialize(&bytes).expect("commit deserialises");
1112            proptest::prop_assert_eq!(obj, parsed);
1113        }
1114    }
1115}