Skip to main content

mkit_core/
object.rs

1//! mkit object types.
2//!
3//! Spec reference: `docs/SPEC-OBJECTS.md` §1–§9. Briefly:
4//!
5//! * Every stored object begins with the 6-byte v1 prologue
6//!   `[u8 object_type][4B "MKT1"][u8 0x01]`.
7//! * Hashes are 32-byte BLAKE3.
8//! * Integers are little-endian. Timestamps are `u64` (widened from
9//!   `u32` in the mkit-era).
10//! * Tree entry names are 1..=255 bytes, forbid `\0 / \\` and the
11//!   names `.` / `..`, and MUST be lex-sorted with no duplicates.
12//! * Identity is a tagged union `[u8 kind][u16 LE len][payload]`;
13//!   `len` is 1..=[`IDENTITY_MAX_LEN`], ed25519 MUST have `len == 32`.
14
15use crate::hash::{Hash, ZERO};
16use core::fmt;
17
18/// Fixed 4-byte magic at offset 1 of every v1 object.
19pub const MAGIC: [u8; 4] = *b"MKT1";
20/// Current (and only) v1 schema version byte.
21pub const SCHEMA_VERSION: u8 = 0x01;
22/// Upper bound on [`Identity`] payload length. Rejected at decode time
23/// as `IdentityTooLarge` for anything greater.
24pub const IDENTITY_MAX_LEN: u16 = 4096;
25
26/// Object type tag (1 byte, at offset 0 of the v1 prologue).
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
28#[repr(u8)]
29pub enum ObjectType {
30    Blob = 0x01,
31    Tree = 0x02,
32    Commit = 0x03,
33    Remix = 0x04,
34    ChunkedBlob = 0x05,
35    Delta = 0x06,
36    /// Annotated / signed tag. New in v1 (issue #230). See
37    /// `SPEC-OBJECTS.md` §6a and [`Tag`].
38    Tag = 0x07,
39}
40
41impl ObjectType {
42    /// Spec-defined short name, usable in logs / CLI output.
43    #[must_use]
44    pub fn name(self) -> &'static str {
45        match self {
46            Self::Blob => "blob",
47            Self::Tree => "tree",
48            Self::Commit => "commit",
49            Self::Remix => "remix",
50            Self::ChunkedBlob => "chunked_blob",
51            Self::Delta => "delta",
52            Self::Tag => "tag",
53        }
54    }
55
56    /// Decode the single-byte tag. Rejects reserved/future values.
57    pub(crate) fn from_u8(b: u8) -> Result<Self, MkitError> {
58        Ok(match b {
59            0x01 => Self::Blob,
60            0x02 => Self::Tree,
61            0x03 => Self::Commit,
62            0x04 => Self::Remix,
63            0x05 => Self::ChunkedBlob,
64            0x06 => Self::Delta,
65            0x07 => Self::Tag,
66            other => return Err(MkitError::InvalidObjectType(other)),
67        })
68    }
69}
70
71/// Tree entry mode (1 byte).
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
73#[repr(u8)]
74pub enum EntryMode {
75    Blob = 0x01,
76    Tree = 0x02,
77    Symlink = 0x03,
78    /// Regular file with the POSIX executable bit set (0o755). New in
79    /// v1 — the mkit-era silently lost this bit at commit time.
80    Executable = 0x04,
81}
82
83impl EntryMode {
84    pub(crate) fn from_u8(b: u8) -> Result<Self, MkitError> {
85        Ok(match b {
86            0x01 => Self::Blob,
87            0x02 => Self::Tree,
88            0x03 => Self::Symlink,
89            0x04 => Self::Executable,
90            other => return Err(MkitError::InvalidEntryMode(other)),
91        })
92    }
93}
94
95/// Tagged-union author identity. See `SPEC-OBJECTS.md` §9.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
97#[repr(u8)]
98pub enum IdentityKind {
99    /// 32-byte raw Ed25519 public key.
100    Ed25519 = 0x01,
101    /// `did:key:` multibase-encoded key material (the `did:key:` scheme
102    /// prefix is stripped — the payload is a multibase string, typically
103    /// base58btc starting with `'z'`). Validated as non-empty printable
104    /// ASCII so binary garbage can't masquerade as a DID (see
105    /// [`Identity::is_valid`]).
106    DidKey = 0x02,
107    /// Arbitrary producer-defined bytes.
108    Opaque = 0x03,
109}
110
111impl IdentityKind {
112    pub(crate) fn from_u8(b: u8) -> Result<Self, MkitError> {
113        Ok(match b {
114            0x01 => Self::Ed25519,
115            0x02 => Self::DidKey,
116            0x03 => Self::Opaque,
117            other => return Err(MkitError::UnknownIdentityKind(other)),
118        })
119    }
120}
121
122/// Tagged-union identity. Owned bytes, cheap to clone — payload is at
123/// most [`IDENTITY_MAX_LEN`] = 4 KiB.
124#[derive(Debug, Clone, PartialEq, Eq, Hash)]
125pub struct Identity {
126    pub kind: IdentityKind,
127    pub bytes: Vec<u8>,
128}
129
130impl Identity {
131    /// Convenience constructor: Ed25519 from a fixed 32-byte pubkey.
132    #[must_use]
133    pub fn ed25519(pubkey: [u8; 32]) -> Self {
134        Self {
135            kind: IdentityKind::Ed25519,
136            bytes: pubkey.to_vec(),
137        }
138    }
139
140    /// Convenience constructor: opaque producer-defined bytes.
141    #[must_use]
142    pub fn opaque(bytes: impl Into<Vec<u8>>) -> Self {
143        Self {
144            kind: IdentityKind::Opaque,
145            bytes: bytes.into(),
146        }
147    }
148
149    /// Structural validity check: payload len in `1..=IDENTITY_MAX_LEN`;
150    /// Ed25519 is exactly 32 bytes; a `DidKey` payload must be a multibase
151    /// string, i.e. all printable ASCII (no NUL/control/whitespace/high
152    /// bytes) — so a binary blob can't be smuggled in under the DID kind.
153    /// `Opaque` is producer-defined and accepts any non-empty bytes.
154    #[must_use]
155    pub fn is_valid(&self) -> bool {
156        if self.bytes.is_empty() || self.bytes.len() > IDENTITY_MAX_LEN as usize {
157            return false;
158        }
159        match self.kind {
160            IdentityKind::Ed25519 => self.bytes.len() == 32,
161            // A multibase string is always printable ASCII; this rejects
162            // garbage without committing to one multibase alphabet (the
163            // payload may be base58btc `z…`, base64 `m…`, etc.).
164            IdentityKind::DidKey => self.bytes.iter().all(u8::is_ascii_graphic),
165            IdentityKind::Opaque => true,
166        }
167    }
168}
169
170/// A single entry in a [`Tree`] object.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct TreeEntry {
173    /// Entry name. 1..=255 bytes, no `\0 / \\`, not `.` / `..`.
174    pub name: Vec<u8>,
175    pub mode: EntryMode,
176    pub object_hash: Hash,
177}
178
179impl TreeEntry {
180    /// Validate an entry name per §4.1.
181    ///
182    /// In addition to the base spec (no `\0 / \\`, not `.` / `..`, 1..=255
183    /// bytes), this rejects names that alias repo metadata or exploit
184    /// platform quirks:
185    ///
186    /// - `.mkit` / `.git` case-insensitively (Git CVE-2021-21300 family).
187    /// - Trailing `.` or space, which Windows strips, causing aliasing.
188    /// - Reserved Windows device names (`CON`, `PRN`, `AUX`, `NUL`,
189    ///   `COM1`-`COM9`, `LPT1`-`LPT9`), with or without an extension,
190    ///   case-insensitively.
191    ///
192    /// ASCII case-folding is sufficient because all other byte-level
193    /// rules above are ASCII-only; names with non-ASCII bytes bypass
194    /// these extra checks but remain constrained by the base rules.
195    #[must_use]
196    pub fn validate_name(name: &[u8]) -> bool {
197        if name.is_empty() || name.len() > 255 {
198            return false;
199        }
200        if name == b"." || name == b".." {
201            return false;
202        }
203        if name.iter().any(|&b| matches!(b, 0 | b'/' | b'\\')) {
204            return false;
205        }
206        // Trailing `.` or space — Windows strips these, causing aliasing
207        // with another entry of the same bare name.
208        if matches!(name.last(), Some(b'.' | b' ')) {
209            return false;
210        }
211        // Case-insensitive `.mkit` / `.git`.
212        if name.eq_ignore_ascii_case(b".mkit") || name.eq_ignore_ascii_case(b".git") {
213            return false;
214        }
215        // Reserved Windows device names — the stem (before the first `.`)
216        // is compared case-insensitively.
217        let stem = match name.iter().position(|&b| b == b'.') {
218            Some(i) => &name[..i],
219            None => name,
220        };
221        if is_windows_reserved_stem(stem) {
222            return false;
223        }
224        true
225    }
226}
227
228/// Returns `true` when `stem` (ASCII bytes, case-insensitive) matches a
229/// reserved Windows device name. The caller has already split on the
230/// first `.` so an extension is ignored.
231fn is_windows_reserved_stem(stem: &[u8]) -> bool {
232    match stem.len() {
233        3 => {
234            stem.eq_ignore_ascii_case(b"CON")
235                || stem.eq_ignore_ascii_case(b"PRN")
236                || stem.eq_ignore_ascii_case(b"AUX")
237                || stem.eq_ignore_ascii_case(b"NUL")
238        }
239        4 => {
240            // COM1-COM9 / LPT1-LPT9 only. 0 is not reserved.
241            let head = &stem[..3];
242            let tail = stem[3];
243            let is_digit_1_9 = matches!(tail, b'1'..=b'9');
244            is_digit_1_9 && (head.eq_ignore_ascii_case(b"COM") || head.eq_ignore_ascii_case(b"LPT"))
245        }
246        _ => false,
247    }
248}
249
250/// Remix source provenance. `upstream_id` is opaque 32-byte caller-
251/// chosen content (e.g. `BLAKE3(repo_url)`); core never interprets it.
252#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
253pub struct RemixSource {
254    pub upstream_id: Hash,
255    pub commit_hash: Hash,
256}
257
258/// Blob: raw bytes, no interpretation. Max 1 GiB at the storage layer.
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct Blob {
261    pub data: Vec<u8>,
262}
263
264/// Tree: lex-sorted list of entries.
265#[derive(Debug, Clone, PartialEq, Eq)]
266pub struct Tree {
267    pub entries: Vec<TreeEntry>,
268}
269
270impl Tree {
271    /// Returns `true` when entries are strictly ascending by byte-wise
272    /// name order (no duplicates).
273    #[must_use]
274    pub fn is_sorted(&self) -> bool {
275        self.entries
276            .windows(2)
277            .all(|w| w[0].name.as_slice() < w[1].name.as_slice())
278    }
279}
280
281/// Commit object. See `SPEC-OBJECTS.md` §5.
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct Commit {
284    pub tree_hash: Hash,
285    pub parents: Vec<Hash>,
286    pub author: Identity,
287    pub signer: [u8; 32],
288    pub message: Vec<u8>,
289    pub timestamp: u64,
290    /// Optional off-chain annotation. Zero = absent. NOT part of the
291    /// signing bytes — see SPEC-SIGNING §3.
292    pub message_hash: Hash,
293    /// Optional off-chain annotation. Zero = absent. NOT part of the
294    /// signing bytes.
295    pub content_digest: Hash,
296    pub signature: [u8; 64],
297}
298
299impl Commit {
300    /// Commit with both annotation slots zeroed out.
301    #[must_use]
302    pub fn new_unannotated(
303        tree_hash: Hash,
304        parents: Vec<Hash>,
305        author: Identity,
306        signer: [u8; 32],
307        message: Vec<u8>,
308        timestamp: u64,
309        signature: [u8; 64],
310    ) -> Self {
311        Self {
312            tree_hash,
313            parents,
314            author,
315            signer,
316            message,
317            timestamp,
318            message_hash: ZERO,
319            content_digest: ZERO,
320            signature,
321        }
322    }
323}
324
325/// Remix object. See `SPEC-OBJECTS.md` §6.
326#[derive(Debug, Clone, PartialEq, Eq)]
327pub struct Remix {
328    pub tree_hash: Hash,
329    pub parents: Vec<Hash>,
330    pub sources: Vec<RemixSource>,
331    pub author: Identity,
332    pub signer: [u8; 32],
333    pub message: Vec<u8>,
334    pub timestamp: u64,
335    pub signature: [u8; 64],
336}
337
338impl Remix {
339    /// Returns `true` when sources are sorted by `(upstream_id, commit_hash)`
340    /// with no duplicate `(upstream_id, commit_hash)` pairs.
341    #[must_use]
342    pub fn sources_sorted(&self) -> bool {
343        self.sources.windows(2).all(|w| {
344            let a = &w[0];
345            let b = &w[1];
346            match a.upstream_id.cmp(&b.upstream_id) {
347                core::cmp::Ordering::Less => true,
348                core::cmp::Ordering::Greater => false,
349                core::cmp::Ordering::Equal => a.commit_hash < b.commit_hash,
350            }
351        })
352    }
353}
354
355/// Annotated / signed tag object. See `SPEC-OBJECTS.md` §6a and
356/// `SPEC-SIGNING.md` §4a.
357///
358/// A tag binds a human-readable `name` to a `target` object (commit /
359/// remix / tree / blob), records the `tagger` identity, a free-form
360/// `message`, and a `timestamp`, and carries an Ed25519 `signature`
361/// over the canonical signing bytes (see [`crate::sign::tag_signing_bytes`]).
362///
363/// The `target_type` byte records what kind of object `target` names
364/// so a verifier need not fetch the target to display the tag. It is a
365/// [`ObjectType`] tag and MUST be one of the storable types (not
366/// `Delta`, which is pack-only).
367///
368/// `name` is 1..=[`TAG_NAME_MAX_LEN`] bytes. It is the short ref name
369/// (e.g. `v1.0.0`), not a full `refs/tags/...` path.
370#[derive(Debug, Clone, PartialEq, Eq)]
371pub struct Tag {
372    pub target: Hash,
373    pub target_type: ObjectType,
374    pub name: Vec<u8>,
375    pub tagger: Identity,
376    pub signer: [u8; 32],
377    pub message: Vec<u8>,
378    pub timestamp: u64,
379    pub signature: [u8; 64],
380}
381
382/// Upper bound on a [`Tag`] `name` payload. Rejected at decode time as
383/// [`MkitError::TagNameInvalid`] for anything outside `1..=TAG_NAME_MAX_LEN`.
384pub const TAG_NAME_MAX_LEN: u16 = 4096;
385
386impl Tag {
387    /// Structural validity of the `name`: non-empty, within the length
388    /// bound, and free of the same forbidden bytes a ref name forbids
389    /// (`\0`, `/`, `\\`). The full ref-name grammar is enforced by
390    /// `refs::validate_ref_name` at write time; this is the
391    /// object-layer floor that the serializer guards.
392    #[must_use]
393    pub fn name_is_valid(&self) -> bool {
394        if self.name.is_empty() || self.name.len() > TAG_NAME_MAX_LEN as usize {
395            return false;
396        }
397        !self.name.iter().any(|&b| matches!(b, 0 | b'/' | b'\\'))
398    }
399}
400
401/// Chunked-blob manifest. See `SPEC-OBJECTS.md` §7.
402#[derive(Debug, Clone, PartialEq, Eq)]
403pub struct ChunkedBlob {
404    pub total_size: u64,
405    /// `0` = content-defined chunking (`FastCDC`), otherwise fixed-size.
406    pub chunk_size: u32,
407    pub chunks: Vec<Hash>,
408}
409
410/// Delta object (pack-only). See `SPEC-OBJECTS.md` §8.
411#[derive(Debug, Clone, PartialEq, Eq)]
412pub struct Delta {
413    pub base_hash: Hash,
414    pub result_size: u32,
415    pub instructions: Vec<u8>,
416}
417
418/// Unified object union.
419#[derive(Debug, Clone, PartialEq, Eq)]
420pub enum Object {
421    Blob(Blob),
422    Tree(Tree),
423    Commit(Commit),
424    Remix(Remix),
425    ChunkedBlob(ChunkedBlob),
426    Delta(Delta),
427    Tag(Tag),
428}
429
430impl Object {
431    /// Return this object's type tag.
432    #[must_use]
433    pub fn object_type(&self) -> ObjectType {
434        match self {
435            Self::Blob(_) => ObjectType::Blob,
436            Self::Tree(_) => ObjectType::Tree,
437            Self::Commit(_) => ObjectType::Commit,
438            Self::Remix(_) => ObjectType::Remix,
439            Self::ChunkedBlob(_) => ObjectType::ChunkedBlob,
440            Self::Delta(_) => ObjectType::Delta,
441            Self::Tag(_) => ObjectType::Tag,
442        }
443    }
444}
445
446/// All decode / validation errors raised by the serialize module, plus
447/// a small number of construction-time errors.
448#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
449pub enum MkitError {
450    #[error("input is shorter than the 6-byte v1 prologue")]
451    EmptyData,
452    #[error("object_type byte {0:#04x} is not in 0x01..=0x07")]
453    InvalidObjectType(u8),
454    #[error("magic at offset 1 is not \"MKT1\"")]
455    InvalidMagic,
456    #[error("schema_version byte is not 0x01")]
457    UnsupportedObjectVersion,
458    #[error("input ended before a complete field could be read")]
459    UnexpectedEof,
460    #[error("non-empty trailing bytes after a complete object")]
461    TrailingData,
462    #[error("tree.entry_count > 1_000_000")]
463    TooManyEntries,
464    #[error("tree entry name is empty, too long, or contains a forbidden byte")]
465    InvalidEntryName,
466    #[error("tree entry mode byte {0:#04x} is not one of 0x01..=0x04")]
467    InvalidEntryMode(u8),
468    #[error("tree entries are not lexicographically sorted / contain duplicates")]
469    InvalidEntryOrder,
470    #[error("parent_count > 1_000")]
471    TooManyParents,
472    #[error("remix.source_count > 10_000")]
473    TooManySources,
474    #[error("tag name is empty, too long, or contains a forbidden byte (\\0 / \\)")]
475    TagNameInvalid,
476    #[error("tag target_type byte {0:#04x} is not a storable object type")]
477    TagTargetTypeInvalid(u8),
478    #[error("remix sources are not sorted by (upstream_id, commit_hash)")]
479    InvalidSourceOrder,
480    #[error("chunked_blob.chunk_count > 1_000_000")]
481    TooManyChunks,
482    #[error("identity kind byte {0:#04x} is not 0x01..=0x03")]
483    UnknownIdentityKind(u8),
484    #[error("identity has zero-length payload, or is Ed25519 with len != 32")]
485    InvalidIdentity,
486    #[error("identity payload len > {}", IDENTITY_MAX_LEN)]
487    IdentityTooLarge,
488    /// A length-prefixed field exceeded the wire-format `u32` cap. Only
489    /// raised by serialise; deserialise can never observe a value larger
490    /// than `u32::MAX` because it reads the prefix first.
491    #[error("oversized payload in field `{field}`: {len} bytes > u32::MAX")]
492    OversizePayload { field: &'static str, len: usize },
493    // ---- sign / key-management errors (Phase 6) ----
494    /// Underlying secure-randomness source could not produce bytes.
495    #[error("rng failed to produce key material")]
496    RngFailure,
497    /// Signature verification failed (bad signature, wrong key, tampered
498    /// input, or wrong domain). The Ed25519 layer never tells us *why*.
499    #[error("signature verification failed")]
500    SignatureInvalid,
501    /// Public-key bytes do not decode to a valid Edwards point.
502    #[error("public key is not a valid Ed25519 point")]
503    InvalidPublicKey,
504    /// Key file on disk has a permission bit set that allows non-owner
505    /// access (POSIX `mode & 0o077 != 0`). Refuses to load.
506    #[error("key file mode {actual:#o} is broader than 0600")]
507    InsecureKeyPermissions { actual: u32 },
508    /// Key file is owned by a different uid than the calling process.
509    /// Could mean a planted file from a tar extraction or a malicious
510    /// bind mount. Refuse with the observed uid for diagnostics.
511    #[error("key file owner uid {actual} does not match process euid {euid}")]
512    InsecureKeyOwner { actual: u32, euid: u32 },
513    /// Parent directory of the key file is group/world-accessible.
514    /// `.mkit/keys/` MUST be 0700 to keep `inotify`-style swap attacks
515    /// out of reach.
516    #[error("key directory mode {actual:#o} is broader than 0700")]
517    InsecureKeyDir { actual: u32 },
518    /// Key path resolves through a symlink. We refuse symlinks at the
519    /// open(2) layer (`O_NOFOLLOW`) — this variant fires when the
520    /// kernel returns ELOOP. An attacker who can pre-create the path
521    /// as a symlink could otherwise redirect us to a key they control.
522    #[error("key path {0} is a symlink — refused")]
523    KeyPathIsSymlink(String),
524    /// Key file length is not exactly 32 bytes.
525    #[error("key file size {actual} is not 32 bytes (raw Ed25519 seed)")]
526    InvalidKeyLength { actual: usize },
527    /// Wrapped I/O error from key load/save. Boxed to keep `MkitError`
528    /// variant size small.
529    #[error("key file I/O error: {0}")]
530    KeyIo(String),
531    /// Delta encode input exceeds the v1 wire-format `u32` length cap
532    /// (base or result > 4 GiB - 1). SPEC-PACKFILE holds individual
533    /// payloads under this bound, so this is a caller-programming
534    /// error, not a normal runtime condition — but saturating instead
535    /// of erroring silently produced a stream `decode()` would reject
536    /// with a misleading "length mismatch".
537    #[error("delta length {len} exceeds u32::MAX for field `{field}`")]
538    DeltaLengthOverflow { field: &'static str, len: usize },
539}
540
541impl fmt::Display for Object {
542    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
543        write!(f, "Object::{}", self.object_type().name())
544    }
545}
546
547#[cfg(test)]
548mod tests {
549    use super::*;
550
551    #[test]
552    fn object_type_names() {
553        assert_eq!(ObjectType::Blob.name(), "blob");
554        assert_eq!(ObjectType::Tree.name(), "tree");
555        assert_eq!(ObjectType::Commit.name(), "commit");
556        assert_eq!(ObjectType::Remix.name(), "remix");
557        assert_eq!(ObjectType::ChunkedBlob.name(), "chunked_blob");
558        assert_eq!(ObjectType::Delta.name(), "delta");
559        assert_eq!(ObjectType::Tag.name(), "tag");
560    }
561
562    #[test]
563    fn object_type_from_u8_accepts_valid_range() {
564        for b in 0x01u8..=0x07 {
565            assert!(
566                ObjectType::from_u8(b).is_ok(),
567                "byte {b:#04x} should decode"
568            );
569        }
570    }
571
572    #[test]
573    fn object_type_from_u8_rejects_zero_and_high() {
574        assert!(matches!(
575            ObjectType::from_u8(0x00),
576            Err(MkitError::InvalidObjectType(0))
577        ));
578        assert!(matches!(
579            ObjectType::from_u8(0xFF),
580            Err(MkitError::InvalidObjectType(0xFF))
581        ));
582        assert!(matches!(
583            ObjectType::from_u8(0x08),
584            Err(MkitError::InvalidObjectType(0x08))
585        ));
586    }
587
588    #[test]
589    fn tag_name_validity() {
590        let t = |name: &[u8]| Tag {
591            target: ZERO,
592            target_type: ObjectType::Commit,
593            name: name.to_vec(),
594            tagger: Identity::ed25519([0xaa; 32]),
595            signer: [0; 32],
596            message: vec![],
597            timestamp: 0,
598            signature: [0; 64],
599        };
600        assert!(t(b"v1.0.0").name_is_valid());
601        assert!(!t(b"").name_is_valid());
602        assert!(!t(b"a/b").name_is_valid());
603        assert!(!t(b"a\\b").name_is_valid());
604        assert!(!t(b"a\0b").name_is_valid());
605        assert!(!t(&vec![b'a'; TAG_NAME_MAX_LEN as usize + 1]).name_is_valid());
606    }
607
608    #[test]
609    fn tree_entry_name_rejects_empty() {
610        assert!(!TreeEntry::validate_name(b""));
611    }
612
613    #[test]
614    fn tree_entry_name_rejects_separators_and_null() {
615        assert!(!TreeEntry::validate_name(b"foo/bar"));
616        assert!(!TreeEntry::validate_name(b"foo\\bar"));
617        assert!(!TreeEntry::validate_name(b"fo\0o"));
618    }
619
620    #[test]
621    fn tree_entry_name_rejects_dot_and_dotdot() {
622        assert!(!TreeEntry::validate_name(b"."));
623        assert!(!TreeEntry::validate_name(b".."));
624    }
625
626    #[test]
627    fn tree_entry_name_accepts_common() {
628        assert!(TreeEntry::validate_name(b"file.txt"));
629        assert!(TreeEntry::validate_name(b"a"));
630        assert!(TreeEntry::validate_name(b"foo-bar_baz.rs"));
631    }
632
633    #[test]
634    fn tree_entry_name_rejects_over_255() {
635        let long = vec![b'a'; 256];
636        assert!(!TreeEntry::validate_name(&long));
637    }
638
639    #[test]
640    fn tree_entry_name_rejects_dot_mkit_and_dot_git_case_insensitive() {
641        // Exact-case basics
642        assert!(!TreeEntry::validate_name(b".mkit"));
643        assert!(!TreeEntry::validate_name(b".git"));
644        // Mixed/upper case — must also be rejected on case-insensitive FS.
645        assert!(!TreeEntry::validate_name(b".MKIT"));
646        assert!(!TreeEntry::validate_name(b".Mkit"));
647        assert!(!TreeEntry::validate_name(b".GIT"));
648        assert!(!TreeEntry::validate_name(b".Git"));
649        // Unrelated names starting with `.m` or `.g` are fine.
650        assert!(TreeEntry::validate_name(b".mkitignore"));
651        assert!(TreeEntry::validate_name(b".gitignore"));
652    }
653
654    #[test]
655    fn tree_entry_name_rejects_trailing_dot_or_space() {
656        // Windows strips trailing `.` and ` `, causing aliasing with
657        // another entry of the same bare name.
658        assert!(!TreeEntry::validate_name(b"foo."));
659        assert!(!TreeEntry::validate_name(b"foo "));
660        assert!(!TreeEntry::validate_name(b"foo..."));
661        assert!(!TreeEntry::validate_name(b"foo   "));
662        // Trailing dot/space only at end — interior dots and spaces are OK.
663        assert!(TreeEntry::validate_name(b"foo.bar"));
664        assert!(TreeEntry::validate_name(b"foo bar"));
665    }
666
667    #[test]
668    fn tree_entry_name_rejects_windows_reserved_device_names() {
669        for n in [
670            b"CON".as_slice(),
671            b"PRN",
672            b"AUX",
673            b"NUL",
674            b"COM1",
675            b"COM9",
676            b"LPT1",
677            b"LPT9",
678            // case-insensitive
679            b"con",
680            b"Nul",
681            b"lpt3",
682            // with extension
683            b"CON.txt",
684            b"nul.log",
685            b"COM1.dat",
686        ] {
687            assert!(
688                !TreeEntry::validate_name(n),
689                "expected Windows reserved name rejected: {:?}",
690                std::str::from_utf8(n).unwrap_or("?")
691            );
692        }
693        // Non-reserved lookalikes must still be accepted.
694        assert!(TreeEntry::validate_name(b"COM0"));
695        assert!(TreeEntry::validate_name(b"LPT0"));
696        assert!(TreeEntry::validate_name(b"COM10"));
697        assert!(TreeEntry::validate_name(b"CONSOLE"));
698        assert!(TreeEntry::validate_name(b"NULL"));
699    }
700
701    #[test]
702    fn identity_rejects_empty_payload_all_kinds() {
703        for kind in [
704            IdentityKind::Ed25519,
705            IdentityKind::DidKey,
706            IdentityKind::Opaque,
707        ] {
708            assert!(
709                !Identity {
710                    kind,
711                    bytes: Vec::new()
712                }
713                .is_valid()
714            );
715        }
716    }
717
718    #[test]
719    fn identity_rejects_oversize() {
720        let bytes = vec![0xaa; IDENTITY_MAX_LEN as usize + 1];
721        assert!(
722            !Identity {
723                kind: IdentityKind::Opaque,
724                bytes
725            }
726            .is_valid()
727        );
728    }
729
730    #[test]
731    fn identity_requires_32_bytes_for_ed25519() {
732        assert!(
733            !Identity {
734                kind: IdentityKind::Ed25519,
735                bytes: vec![0xaa; 16]
736            }
737            .is_valid()
738        );
739        assert!(Identity::ed25519([0xaa; 32]).is_valid());
740    }
741
742    #[test]
743    fn didkey_requires_printable_ascii_multibase() {
744        let didkey = |b: &[u8]| Identity {
745            kind: IdentityKind::DidKey,
746            bytes: b.to_vec(),
747        };
748        // A real did:key multibase payload (base58btc, scheme stripped).
749        assert!(didkey(b"z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK").is_valid());
750        // Other multibase prefixes are graphic ASCII too — accepted.
751        assert!(didkey(b"mEiB1234").is_valid());
752        // Binary garbage masquerading as a DID is rejected.
753        assert!(!didkey(b"z\0\x01\x02").is_valid());
754        assert!(!didkey(&[0xde, 0xad, 0xbe, 0xef]).is_valid());
755        // Whitespace / control chars are not valid multibase.
756        assert!(!didkey(b"z6Mk has space").is_valid());
757        assert!(!didkey(b"z6Mk\n").is_valid());
758    }
759
760    #[test]
761    fn tree_is_sorted_checks() {
762        let e = |n: &[u8]| TreeEntry {
763            name: n.to_vec(),
764            mode: EntryMode::Blob,
765            object_hash: ZERO,
766        };
767        let sorted = Tree {
768            entries: vec![e(b"alpha"), e(b"beta"), e(b"gamma")],
769        };
770        assert!(sorted.is_sorted());
771        let unsorted = Tree {
772            entries: vec![e(b"beta"), e(b"alpha")],
773        };
774        assert!(!unsorted.is_sorted());
775        let dup = Tree {
776            entries: vec![e(b"alpha"), e(b"alpha")],
777        };
778        assert!(!dup.is_sorted());
779    }
780
781    #[test]
782    fn remix_sources_sorted_checks() {
783        let src = |u: u8, c: u8| RemixSource {
784            upstream_id: [u; 32],
785            commit_hash: [c; 32],
786        };
787        let r = |sources| Remix {
788            tree_hash: ZERO,
789            parents: vec![],
790            sources,
791            author: Identity::ed25519([0xaa; 32]),
792            signer: [0; 32],
793            message: vec![],
794            timestamp: 0,
795            signature: [0; 64],
796        };
797        assert!(r(vec![src(1, 1), src(1, 2), src(2, 1)]).sources_sorted());
798        assert!(!r(vec![src(2, 1), src(1, 1)]).sources_sorted());
799        assert!(!r(vec![src(1, 1), src(1, 1)]).sources_sorted());
800    }
801}