Skip to main content

sley_object/
lib.rs

1//! git-object — Git's object model: commits, trees, tags, and the raw encoded
2//! object framing they share.
3//!
4//! This crate carries the in-memory representations of Git's four object types
5//! ([`Commit`], [`Tree`], [`Tag`], and the blob payload carried inside
6//! [`EncodedObject`]) together with their parse/serialize routines and the
7//! [`parse_framed_object`] helper that decodes the `"<type> <len>\0<body>"`
8//! loose-object frame.
9//!
10//! [`Commit`] and [`Tag`] are parsed, canonical representations of the headers
11//! this crate understands. They are convenient for structured edits, but they
12//! are not byte-lossless round-trippers for signed objects, custom headers, or
13//! other raw object body details. Use [`EncodedObject`] whenever exact object
14//! bytes, object ids, or framed-object bytes must be preserved.
15
16use sley_core::{GitError, ObjectFormat, ObjectId, Result, Signature};
17use std::str::FromStr;
18
19pub use sley_core::BString;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub enum ObjectType {
23    Blob,
24    Tree,
25    Commit,
26    Tag,
27}
28
29impl ObjectType {
30    pub const fn as_str(self) -> &'static str {
31        match self {
32            Self::Blob => "blob",
33            Self::Tree => "tree",
34            Self::Commit => "commit",
35            Self::Tag => "tag",
36        }
37    }
38}
39
40impl FromStr for ObjectType {
41    type Err = GitError;
42
43    fn from_str(value: &str) -> Result<Self> {
44        match value {
45            "blob" => Ok(Self::Blob),
46            "tree" => Ok(Self::Tree),
47            "commit" => Ok(Self::Commit),
48            "tag" => Ok(Self::Tag),
49            other => Err(GitError::InvalidObject(format!(
50                "unknown object type {other}"
51            ))),
52        }
53    }
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct EncodedObject {
58    pub object_type: ObjectType,
59    pub body: Vec<u8>,
60}
61
62impl EncodedObject {
63    /// Create a raw encoded object body.
64    ///
65    /// This is the byte-exact API for preserving Git object contents. For
66    /// commit and tag objects that may contain signatures, continuation
67    /// headers, custom headers, or otherwise unknown data, keep the original
68    /// body here instead of parsing through [`Commit`] or [`Tag`].
69    pub fn new(object_type: ObjectType, body: impl Into<Vec<u8>>) -> Self {
70        Self {
71            object_type,
72            body: body.into(),
73        }
74    }
75
76    /// Return the exact loose-object frame bytes: `"<type> <len>\0<body>"`.
77    pub fn framed_bytes(&self) -> Vec<u8> {
78        let mut out = Vec::with_capacity(self.body.len() + 32);
79        out.extend_from_slice(self.object_type.as_str().as_bytes());
80        out.push(b' ');
81        out.extend_from_slice(self.body.len().to_string().as_bytes());
82        out.push(0);
83        out.extend_from_slice(&self.body);
84        out
85    }
86
87    /// Compute the object id from the raw body bytes.
88    pub fn object_id(&self, format: ObjectFormat) -> Result<ObjectId> {
89        sley_core::object_id_for_bytes(format, self.object_type.as_str(), &self.body)
90    }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct Tree {
95    pub entries: Vec<TreeEntry>,
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct TreeEntry {
100    pub mode: u32,
101    pub name: BString,
102    pub oid: ObjectId,
103}
104
105/// A borrowed parse-view of a single entry in a raw tree object.
106///
107/// The `name` slice points into the original tree body. The object id is a
108/// fixed-size value parsed from the raw bytes, so iterating does not allocate
109/// entry names or build an intermediate entry list.
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct TreeEntryRef<'a> {
112    pub mode: u32,
113    pub name: &'a [u8],
114    pub oid: ObjectId,
115}
116
117/// Fallibly iterates raw tree-object bytes without allocating entry names.
118#[derive(Debug, Clone)]
119pub struct TreeEntries<'a> {
120    format: ObjectFormat,
121    bytes: &'a [u8],
122    offset: usize,
123}
124
125impl<'a> TreeEntries<'a> {
126    pub const fn new(format: ObjectFormat, bytes: &'a [u8]) -> Self {
127        Self {
128            format,
129            bytes,
130            offset: 0,
131        }
132    }
133}
134
135impl<'a> Iterator for TreeEntries<'a> {
136    type Item = Result<TreeEntryRef<'a>>;
137
138    fn next(&mut self) -> Option<Self::Item> {
139        if self.offset >= self.bytes.len() {
140            return None;
141        }
142        match parse_tree_entry_ref(self.format, self.bytes, self.offset) {
143            Ok((entry, next_offset)) => {
144                self.offset = next_offset;
145                Some(Ok(entry))
146            }
147            Err(err) => {
148                self.offset = self.bytes.len();
149                Some(Err(err))
150            }
151        }
152    }
153}
154
155impl<'a> From<TreeEntryRef<'a>> for TreeEntry {
156    fn from(entry: TreeEntryRef<'a>) -> Self {
157        Self {
158            mode: entry.mode,
159            name: entry.name.into(),
160            oid: entry.oid,
161        }
162    }
163}
164
165impl Tree {
166    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
167        let entries = TreeEntries::new(format, bytes)
168            .map(|entry| entry.map(TreeEntry::from))
169            .collect::<Result<Vec<_>>>()?;
170        Ok(Self { entries })
171    }
172
173    pub fn write(&self) -> Vec<u8> {
174        let mut out = Vec::new();
175        for entry in &self.entries {
176            out.extend_from_slice(format!("{:o}", entry.mode).as_bytes());
177            out.push(b' ');
178            out.extend_from_slice(entry.name.as_bytes());
179            out.push(0);
180            out.extend_from_slice(entry.oid.as_bytes());
181        }
182        out
183    }
184}
185
186fn parse_tree_entry_ref<'a>(
187    format: ObjectFormat,
188    bytes: &'a [u8],
189    offset: usize,
190) -> Result<(TreeEntryRef<'a>, usize)> {
191    let mode_end = bytes[offset..]
192        .iter()
193        .position(|byte| *byte == b' ')
194        .map(|relative| offset + relative)
195        .ok_or_else(|| GitError::InvalidFormat("unterminated tree mode".into()))?;
196    let mode_text = std::str::from_utf8(&bytes[offset..mode_end])
197        .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
198    let mode = u32::from_str_radix(mode_text, 8)
199        .map_err(|_| GitError::InvalidFormat("invalid tree mode".into()))?;
200
201    let name_start = mode_end + 1;
202    let name_end = bytes[name_start..]
203        .iter()
204        .position(|byte| *byte == 0)
205        .map(|relative| name_start + relative)
206        .ok_or_else(|| GitError::InvalidFormat("unterminated tree path".into()))?;
207    if name_end == name_start {
208        return Err(GitError::InvalidFormat("empty tree path".into()));
209    }
210
211    let oid_start = name_end + 1;
212    let oid_end = oid_start
213        .checked_add(format.raw_len())
214        .ok_or_else(|| GitError::InvalidFormat("tree oid overflow".into()))?;
215    if oid_end > bytes.len() {
216        return Err(GitError::InvalidFormat("truncated tree object id".into()));
217    }
218
219    Ok((
220        TreeEntryRef {
221            mode,
222            name: &bytes[name_start..name_end],
223            oid: ObjectId::from_raw(format, &bytes[oid_start..oid_end])?,
224        },
225        oid_end,
226    ))
227}
228
229pub fn tree_entry_object_type(mode: u32) -> ObjectType {
230    match mode {
231        0o040000 => ObjectType::Tree,
232        _ => ObjectType::Blob,
233    }
234}
235
236/// The five entry kinds Git allows inside a tree, each mapping to a fixed mode.
237///
238/// This is a *closed* domain used when *writing* trees; for reading arbitrary
239/// trees, keep the raw [`TreeEntry::mode`] and classify with
240/// [`EntryKind::from_mode`] (which returns `None` for non-canonical modes so
241/// they round-trip rather than being silently coerced).
242#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
243pub enum EntryKind {
244    /// A subtree (`040000`).
245    Tree,
246    /// A non-executable regular file (`100644`).
247    Blob,
248    /// An executable regular file (`100755`).
249    BlobExecutable,
250    /// A symbolic link (`120000`); the blob bytes are the link target and must
251    /// never be dereferenced.
252    Symlink,
253    /// A gitlink / submodule commit pointer (`160000`).
254    Commit,
255}
256
257impl EntryKind {
258    /// The octal tree-entry mode for this kind.
259    pub const fn mode(self) -> u32 {
260        match self {
261            Self::Tree => 0o040000,
262            Self::Blob => 0o100644,
263            Self::BlobExecutable => 0o100755,
264            Self::Symlink => 0o120000,
265            Self::Commit => 0o160000,
266        }
267    }
268
269    /// Classify a raw tree-entry mode, returning `None` for anything that is
270    /// not one of Git's canonical five.
271    pub const fn from_mode(mode: u32) -> Option<Self> {
272        match mode {
273            0o040000 => Some(Self::Tree),
274            0o100644 => Some(Self::Blob),
275            0o100755 => Some(Self::BlobExecutable),
276            0o120000 => Some(Self::Symlink),
277            0o160000 => Some(Self::Commit),
278            _ => None,
279        }
280    }
281
282    /// The object type an entry of this kind points at (a gitlink points at a
283    /// commit that lives in another repository).
284    pub const fn object_type(self) -> ObjectType {
285        match self {
286            Self::Tree => ObjectType::Tree,
287            Self::Commit => ObjectType::Commit,
288            _ => ObjectType::Blob,
289        }
290    }
291}
292
293impl From<EntryKind> for u32 {
294    fn from(kind: EntryKind) -> Self {
295        kind.mode()
296    }
297}
298
299impl TreeEntry {
300    /// Classify this entry's mode, if it is one of Git's canonical kinds.
301    pub fn kind(&self) -> Option<EntryKind> {
302        EntryKind::from_mode(self.mode)
303    }
304
305    pub fn is_tree(&self) -> bool {
306        self.mode == EntryKind::Tree.mode()
307    }
308
309    pub fn is_symlink(&self) -> bool {
310        self.mode == EntryKind::Symlink.mode()
311    }
312
313    pub fn is_gitlink(&self) -> bool {
314        self.mode == EntryKind::Commit.mode()
315    }
316
317    pub fn is_executable(&self) -> bool {
318        self.mode == EntryKind::BlobExecutable.mode()
319    }
320}
321
322impl TreeEntryRef<'_> {
323    /// Classify this entry's mode, if it is one of Git's canonical kinds.
324    pub fn kind(&self) -> Option<EntryKind> {
325        EntryKind::from_mode(self.mode)
326    }
327
328    pub fn is_tree(&self) -> bool {
329        self.mode == EntryKind::Tree.mode()
330    }
331
332    pub fn is_symlink(&self) -> bool {
333        self.mode == EntryKind::Symlink.mode()
334    }
335
336    pub fn is_gitlink(&self) -> bool {
337        self.mode == EntryKind::Commit.mode()
338    }
339
340    pub fn is_executable(&self) -> bool {
341        self.mode == EntryKind::BlobExecutable.mode()
342    }
343
344    pub fn to_owned(&self) -> TreeEntry {
345        TreeEntry {
346            mode: self.mode,
347            name: self.name.into(),
348            oid: self.oid,
349        }
350    }
351}
352
353/// Order two tree entries the way Git canonically sorts them: by name bytes,
354/// except that a subtree sorts as though its name ended in `/`. Writing a tree
355/// whose entries are in any other order produces a different (wrong) OID.
356pub fn tree_entry_cmp(
357    left_name: &[u8],
358    left_mode: u32,
359    right_name: &[u8],
360    right_mode: u32,
361) -> std::cmp::Ordering {
362    use std::cmp::Ordering;
363    let shared = left_name.len().min(right_name.len());
364    let name_order = left_name[..shared].cmp(&right_name[..shared]);
365    if name_order != Ordering::Equal {
366        return name_order;
367    }
368    let left_end = left_name.len() == shared;
369    let right_end = right_name.len() == shared;
370    match (left_end, right_end) {
371        (true, true) => Ordering::Equal,
372        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
373        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
374        (false, false) => Ordering::Equal,
375    }
376}
377
378fn tree_name_terminator(mode: u32) -> u8 {
379    if mode == 0o040000 { b'/' } else { 0 }
380}
381
382/// Builds a single tree level: deduplicates entries by name and emits them in
383/// Git's canonical order so the written object is byte-identical to Git's.
384///
385/// Start from [`TreeBuilder::new`] (empty) or [`TreeBuilder::from_tree`] (edit
386/// an existing level), [`upsert`](TreeBuilder::upsert) entries, then
387/// [`build`](TreeBuilder::build) / [`write`](TreeBuilder::write).
388#[derive(Debug, Clone, Default)]
389pub struct TreeBuilder {
390    entries: Vec<TreeEntry>,
391}
392
393impl TreeBuilder {
394    pub fn new() -> Self {
395        Self {
396            entries: Vec::new(),
397        }
398    }
399
400    /// Seed the builder with an existing tree level's entries.
401    pub fn from_tree(tree: Tree) -> Self {
402        Self {
403            entries: tree.entries,
404        }
405    }
406
407    /// Insert or replace the entry named `name` with one of Git's canonical
408    /// kinds.
409    pub fn upsert(&mut self, name: impl Into<BString>, kind: EntryKind, oid: ObjectId) {
410        self.upsert_raw(name, kind.mode(), oid);
411    }
412
413    /// Insert or replace using a raw mode (for round-tripping non-canonical
414    /// modes); prefer [`upsert`](TreeBuilder::upsert) for normal entries.
415    pub fn upsert_raw(&mut self, name: impl Into<BString>, mode: u32, oid: ObjectId) {
416        let name = name.into();
417        if let Some(entry) = self
418            .entries
419            .iter_mut()
420            .find(|entry| entry.name == name.as_bytes())
421        {
422            entry.mode = mode;
423            entry.oid = oid;
424        } else {
425            self.entries.push(TreeEntry { mode, name, oid });
426        }
427    }
428
429    /// Remove the entry named `name`, returning whether one was present.
430    pub fn remove(&mut self, name: &[u8]) -> bool {
431        if let Some(position) = self.entries.iter().position(|entry| entry.name == name) {
432            self.entries.swap_remove(position);
433            true
434        } else {
435            false
436        }
437    }
438
439    pub fn is_empty(&self) -> bool {
440        self.entries.is_empty()
441    }
442
443    pub fn len(&self) -> usize {
444        self.entries.len()
445    }
446
447    /// Collect into a [`Tree`] with entries in Git's canonical order.
448    pub fn build(self) -> Tree {
449        let mut entries = self.entries;
450        entries.sort_by(|left, right| {
451            tree_entry_cmp(
452                left.name.as_bytes(),
453                left.mode,
454                right.name.as_bytes(),
455                right.mode,
456            )
457        });
458        Tree { entries }
459    }
460
461    /// The canonical serialized tree body.
462    pub fn write(self) -> Vec<u8> {
463        self.build().write()
464    }
465
466    /// The OID this tree will have once written.
467    pub fn object_id(self, format: ObjectFormat) -> Result<ObjectId> {
468        EncodedObject::new(ObjectType::Tree, self.write()).object_id(format)
469    }
470}
471
472/// A parsed, canonical representation of the commit headers this crate
473/// understands.
474///
475/// `Commit` preserves `tree`, `parent`, `author`, `committer`, `encoding`, and
476/// message bytes. It intentionally does not retain unknown headers,
477/// continuation blocks such as `gpgsig`, mergetags, or their original ordering.
478/// Use [`EncodedObject`] when commit object bytes or object ids must be
479/// preserved exactly.
480#[derive(Debug, Clone, PartialEq, Eq)]
481pub struct Commit {
482    pub tree: ObjectId,
483    pub parents: Vec<ObjectId>,
484    pub author: Vec<u8>,
485    pub committer: Vec<u8>,
486    pub encoding: Option<Vec<u8>>,
487    pub message: Vec<u8>,
488}
489
490/// A borrowed parse-view of a raw commit object.
491///
492/// The identity, encoding, and message slices point into the original commit
493/// body. Object ids are parsed into fixed-size values while preserving the same
494/// validation behavior as [`Commit::parse`]. Like [`Commit`], this is a parsed
495/// canonical view of known fields rather than a byte-lossless view of every raw
496/// header.
497#[derive(Debug, Clone, PartialEq, Eq)]
498pub struct CommitRef<'a> {
499    pub tree: ObjectId,
500    pub parents: Vec<ObjectId>,
501    pub author: &'a [u8],
502    pub committer: &'a [u8],
503    pub encoding: Option<&'a [u8]>,
504    pub message: &'a [u8],
505}
506
507impl Commit {
508    /// Parse a commit into the canonical typed representation.
509    ///
510    /// Unknown headers and continuation records are accepted but not retained.
511    /// Use [`EncodedObject`] for byte-exact commit preservation.
512    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
513        Ok(Self::parse_ref(format, bytes)?.into())
514    }
515
516    pub fn parse_ref<'a>(format: ObjectFormat, bytes: &'a [u8]) -> Result<CommitRef<'a>> {
517        CommitRef::parse(format, bytes)
518    }
519
520    /// Serialize the canonical typed commit representation.
521    ///
522    /// The output contains only the fields represented by [`Commit`]; it is not
523    /// intended to reproduce raw input bytes that contained unknown headers,
524    /// signatures, or mergetags.
525    pub fn write(&self) -> Vec<u8> {
526        let mut out = Vec::new();
527        out.extend_from_slice(format!("tree {}\n", self.tree).as_bytes());
528        for parent in &self.parents {
529            out.extend_from_slice(format!("parent {parent}\n").as_bytes());
530        }
531        out.extend_from_slice(b"author ");
532        out.extend_from_slice(&self.author);
533        out.push(b'\n');
534        out.extend_from_slice(b"committer ");
535        out.extend_from_slice(&self.committer);
536        if let Some(encoding) = &self.encoding {
537            out.extend_from_slice(b"\nencoding ");
538            out.extend_from_slice(encoding);
539        }
540        out.extend_from_slice(b"\n\n");
541        out.extend_from_slice(&self.message);
542        out
543    }
544
545    /// Parse the raw [`author`](Commit::author) line into a typed
546    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
547    /// well-formed git identity.
548    ///
549    /// This is a read-only lens: it does not touch the raw `author` bytes, which
550    /// remain the source of truth for [`Commit::write`]. The returned signature
551    /// re-serializes byte-identically to `author` (see
552    /// [`Signature::to_ident_bytes`]).
553    pub fn author_signature(&self) -> Option<Signature> {
554        Signature::from_ident_line(&self.author)
555    }
556
557    /// Parse the raw [`committer`](Commit::committer) line into a typed
558    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
559    /// well-formed git identity. Read-only over the raw bytes, exactly like
560    /// [`Commit::author_signature`].
561    pub fn committer_signature(&self) -> Option<Signature> {
562        Signature::from_ident_line(&self.committer)
563    }
564}
565
566impl<'a> CommitRef<'a> {
567    pub fn parse(format: ObjectFormat, bytes: &'a [u8]) -> Result<Self> {
568        let split = bytes
569            .windows(2)
570            .position(|window| window == b"\n\n")
571            .ok_or_else(|| GitError::InvalidObject("commit missing message separator".into()))?;
572        let mut tree = None;
573        let mut parents = Vec::new();
574        let mut author = None;
575        let mut committer = None;
576        let mut encoding = None;
577        for line in bytes[..split].split(|byte| *byte == b'\n') {
578            if let Some(value) = line.strip_prefix(b"tree ") {
579                tree = Some(ObjectId::from_hex(format, ascii_header_value(value)?)?);
580            } else if let Some(value) = line.strip_prefix(b"parent ") {
581                parents.push(ObjectId::from_hex(format, ascii_header_value(value)?)?);
582            } else if let Some(value) = line.strip_prefix(b"author ") {
583                author = Some(value);
584            } else if let Some(value) = line.strip_prefix(b"committer ") {
585                committer = Some(value);
586            } else if let Some(value) = line.strip_prefix(b"encoding ") {
587                encoding = Some(value);
588            }
589        }
590        Ok(Self {
591            tree: tree.ok_or_else(|| GitError::InvalidObject("commit missing tree".into()))?,
592            parents,
593            author: author
594                .ok_or_else(|| GitError::InvalidObject("commit missing author".into()))?,
595            committer: committer
596                .ok_or_else(|| GitError::InvalidObject("commit missing committer".into()))?,
597            encoding,
598            message: &bytes[split + 2..],
599        })
600    }
601
602    pub fn to_owned(&self) -> Commit {
603        Commit {
604            tree: self.tree,
605            parents: self.parents.clone(),
606            author: self.author.to_vec(),
607            committer: self.committer.to_vec(),
608            encoding: self.encoding.map(<[u8]>::to_vec),
609            message: self.message.to_vec(),
610        }
611    }
612
613    /// Parse the raw [`author`](Commit::author) line into a typed
614    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
615    /// well-formed git identity.
616    ///
617    /// This is a read-only lens: it does not touch the raw `author` bytes, which
618    /// remain the source of truth for [`Commit::write`]. The returned signature
619    /// re-serializes byte-identically to `author` (see
620    /// [`Signature::to_ident_bytes`]).
621    pub fn author_signature(&self) -> Option<Signature> {
622        Signature::from_ident_line(self.author)
623    }
624
625    /// Parse the raw [`committer`](Commit::committer) line into a typed
626    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
627    /// well-formed git identity. Read-only over the raw bytes, exactly like
628    /// [`Commit::author_signature`].
629    pub fn committer_signature(&self) -> Option<Signature> {
630        Signature::from_ident_line(self.committer)
631    }
632}
633
634impl<'a> From<CommitRef<'a>> for Commit {
635    fn from(commit: CommitRef<'a>) -> Self {
636        Self {
637            tree: commit.tree,
638            parents: commit.parents,
639            author: commit.author.to_vec(),
640            committer: commit.committer.to_vec(),
641            encoding: commit.encoding.map(<[u8]>::to_vec),
642            message: commit.message.to_vec(),
643        }
644    }
645}
646
647/// A parsed, canonical representation of the annotated tag headers this crate
648/// understands.
649///
650/// `Tag` preserves `object`, `type`, `tag`, optional `tagger`, and message
651/// bytes. Parsed tags also retain their original body so parse/write can
652/// preserve annotated tag object ids exactly.
653#[derive(Debug, Clone, Eq)]
654pub struct Tag {
655    pub object: ObjectId,
656    pub object_type: ObjectType,
657    pub name: Vec<u8>,
658    pub tagger: Option<Vec<u8>>,
659    pub message: Vec<u8>,
660    pub raw_body: Option<Vec<u8>>,
661}
662
663/// A borrowed parse-view of a raw annotated tag object.
664///
665/// The tag name, tagger identity, and message slices point into the original
666/// tag body. The object id and object type are parsed into owned values while
667/// preserving the same validation behavior as [`Tag::parse`]. Like [`Tag`],
668/// this is a parsed canonical view of known fields rather than a byte-lossless
669/// view of every raw header.
670#[derive(Debug, Clone, PartialEq, Eq)]
671pub struct TagRef<'a> {
672    pub object: ObjectId,
673    pub object_type: ObjectType,
674    pub name: &'a [u8],
675    pub tagger: Option<&'a [u8]>,
676    pub message: &'a [u8],
677    pub raw_body: Option<&'a [u8]>,
678}
679
680impl PartialEq for Tag {
681    fn eq(&self, other: &Self) -> bool {
682        self.object == other.object
683            && self.object_type == other.object_type
684            && self.name == other.name
685            && self.tagger == other.tagger
686            && self.message == other.message
687    }
688}
689
690impl Tag {
691    /// Parse an annotated tag into the canonical typed representation.
692    ///
693    /// Unknown headers and continuation records are accepted but not retained.
694    /// Use [`EncodedObject`] for byte-exact tag preservation.
695    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
696        Ok(Self::parse_ref(format, bytes)?.into())
697    }
698
699    pub fn parse_ref<'a>(format: ObjectFormat, bytes: &'a [u8]) -> Result<TagRef<'a>> {
700        TagRef::parse(format, bytes)
701    }
702
703    /// Serialize the canonical typed tag representation.
704    ///
705    /// The output contains only the fields represented by [`Tag`]; it is not
706    /// intended to reproduce raw input bytes that contained unknown headers or
707    /// signatures.
708    pub fn write(&self) -> Vec<u8> {
709        if let Some(raw) = &self.raw_body {
710            return raw.clone();
711        }
712        let mut out = Vec::new();
713        out.extend_from_slice(format!("object {}\n", self.object).as_bytes());
714        out.extend_from_slice(format!("type {}\n", self.object_type.as_str()).as_bytes());
715        out.extend_from_slice(b"tag ");
716        out.extend_from_slice(&self.name);
717        out.push(b'\n');
718        if let Some(tagger) = &self.tagger {
719            out.extend_from_slice(b"tagger ");
720            out.extend_from_slice(tagger);
721            out.push(b'\n');
722        }
723        out.push(b'\n');
724        out.extend_from_slice(&self.message);
725        out
726    }
727
728    /// Parse the raw [`tagger`](Tag::tagger) line into a typed [`Signature`]
729    /// parse-view.
730    ///
731    /// Returns `None` when the tag has no tagger header *or* when the stored
732    /// bytes are not a well-formed git identity — callers that need to tell
733    /// those apart should inspect [`Tag::tagger`] directly. This is a read-only
734    /// lens over the raw bytes, which stay the source of truth for
735    /// [`Tag::write`]; the returned signature re-serializes byte-identically to
736    /// the stored `tagger` line.
737    pub fn tagger_signature(&self) -> Option<Signature> {
738        Signature::from_ident_line(self.tagger.as_deref()?)
739    }
740}
741
742impl<'a> TagRef<'a> {
743    pub fn parse(format: ObjectFormat, bytes: &'a [u8]) -> Result<Self> {
744        let split = bytes.windows(2).position(|window| window == b"\n\n");
745        let (headers, message) = match split {
746            Some(split) => (&bytes[..split], &bytes[split + 2..]),
747            None => (bytes, &bytes[bytes.len()..]),
748        };
749        let mut object = None;
750        let mut object_type = None;
751        let mut name = None;
752        let mut tagger = None;
753        for line in headers.split(|byte| *byte == b'\n') {
754            if let Some(value) = line.strip_prefix(b"object ") {
755                object = Some(ObjectId::from_hex(format, ascii_header_value(value)?)?);
756            } else if let Some(value) = line.strip_prefix(b"type ") {
757                object_type = Some(ascii_header_value(value)?.parse()?);
758            } else if let Some(value) = line.strip_prefix(b"tag ") {
759                name = Some(value);
760            } else if let Some(value) = line.strip_prefix(b"tagger ") {
761                tagger = Some(value);
762            }
763        }
764        Ok(Self {
765            object: object.ok_or_else(|| GitError::InvalidObject("tag missing object".into()))?,
766            object_type: object_type
767                .ok_or_else(|| GitError::InvalidObject("tag missing type".into()))?,
768            name: name.ok_or_else(|| GitError::InvalidObject("tag missing name".into()))?,
769            tagger,
770            message,
771            raw_body: Some(bytes),
772        })
773    }
774
775    pub fn to_owned(&self) -> Tag {
776        Tag {
777            object: self.object,
778            object_type: self.object_type,
779            name: self.name.to_vec(),
780            tagger: self.tagger.map(<[u8]>::to_vec),
781            message: self.message.to_vec(),
782            raw_body: self.raw_body.map(<[u8]>::to_vec),
783        }
784    }
785
786    /// Parse the raw [`tagger`](Tag::tagger) line into a typed [`Signature`]
787    /// parse-view.
788    ///
789    /// Returns `None` when the tag has no tagger header *or* when the stored
790    /// bytes are not a well-formed git identity — callers that need to tell
791    /// those apart should inspect [`Tag::tagger`] directly. This is a read-only
792    /// lens over the raw bytes, which stay the source of truth for
793    /// [`Tag::write`]; the returned signature re-serializes byte-identically to
794    /// the stored `tagger` line.
795    pub fn tagger_signature(&self) -> Option<Signature> {
796        Signature::from_ident_line(self.tagger?)
797    }
798}
799
800impl<'a> From<TagRef<'a>> for Tag {
801    fn from(tag: TagRef<'a>) -> Self {
802        Self {
803            object: tag.object,
804            object_type: tag.object_type,
805            name: tag.name.to_vec(),
806            tagger: tag.tagger.map(<[u8]>::to_vec),
807            message: tag.message.to_vec(),
808            raw_body: tag.raw_body.map(<[u8]>::to_vec),
809        }
810    }
811}
812
813fn ascii_header_value(value: &[u8]) -> Result<&str> {
814    std::str::from_utf8(value).map_err(|err| GitError::InvalidObject(err.to_string()))
815}
816
817pub fn parse_framed_object(bytes: &[u8]) -> Result<EncodedObject> {
818    let nul = bytes
819        .iter()
820        .position(|byte| *byte == 0)
821        .ok_or_else(|| GitError::InvalidObject("missing object header terminator".into()))?;
822    let header = std::str::from_utf8(&bytes[..nul])
823        .map_err(|err| GitError::InvalidObject(err.to_string()))?;
824    let (kind, size) = header
825        .split_once(' ')
826        .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
827    let size: usize = size
828        .parse()
829        .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
830    let body = &bytes[nul + 1..];
831    if body.len() != size {
832        return Err(GitError::InvalidObject(format!(
833            "object declared {size} bytes, found {}",
834            body.len()
835        )));
836    }
837    Ok(EncodedObject::new(kind.parse()?, body.to_vec()))
838}
839
840#[cfg(test)]
841mod tests {
842    use super::*;
843
844    #[test]
845    fn tree_builder_sorts_canonically_and_dedups() {
846        let format = ObjectFormat::Sha1;
847        let blob = ObjectId::empty_blob(format);
848        let subtree = ObjectId::empty_tree(format);
849        // Validate the infallible well-known constants while we're here.
850        assert_eq!(subtree.to_hex(), "4b825dc642cb6eb9a060e54bf8d69288fbee4904");
851        assert_eq!(blob.to_hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
852
853        let mut builder = TreeBuilder::new();
854        // Inserted out of order. The directory-suffix rule means "foo.txt"
855        // (blob) sorts before the "foo" subtree, because '.' (0x2e) < '/' (0x2f)
856        // — a plain byte sort of the names would (wrongly) put "foo" first.
857        builder.upsert("foo", EntryKind::Tree, subtree);
858        builder.upsert("a.txt", EntryKind::Blob, blob.clone());
859        builder.upsert("foo.txt", EntryKind::Blob, blob.clone());
860        // Last upsert for a name wins.
861        builder.upsert("a.txt", EntryKind::BlobExecutable, blob);
862
863        let tree = builder.build();
864        let names: Vec<&[u8]> = tree.entries.iter().map(|e| e.name.as_bytes()).collect();
865        assert_eq!(names, vec![&b"a.txt"[..], &b"foo.txt"[..], &b"foo"[..]]);
866        assert_eq!(tree.entries[0].mode, EntryKind::BlobExecutable.mode());
867        assert!(tree.entries[2].is_tree());
868    }
869
870    #[test]
871    fn entry_kind_round_trips_modes() {
872        for kind in [
873            EntryKind::Tree,
874            EntryKind::Blob,
875            EntryKind::BlobExecutable,
876            EntryKind::Symlink,
877            EntryKind::Commit,
878        ] {
879            assert_eq!(EntryKind::from_mode(kind.mode()), Some(kind));
880        }
881        assert_eq!(EntryKind::from_mode(0o100600), None);
882    }
883
884    #[test]
885    fn framed_object_round_trips() {
886        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
887        assert_eq!(
888            parse_framed_object(&object.framed_bytes()).expect("test operation should succeed"),
889            object
890        );
891    }
892
893    #[test]
894    fn encoded_raw_commit_with_multiline_gpgsig_preserves_bytes_and_id() {
895        let format = ObjectFormat::Sha1;
896        let tree = ObjectId::empty_tree(format);
897        let body = format!(
898            concat!(
899                "tree {tree}\n",
900                "author Signer <signer@example.invalid> 1700000000 +0000\n",
901                "committer Signer <signer@example.invalid> 1700000000 +0000\n",
902                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
903                " \n",
904                " iQEzBAABCgAdFiEErawcommitbytescontract\n",
905                " =abcd\n",
906                " -----END PGP SIGNATURE-----\n",
907                "\n",
908                "signed commit\n",
909            ),
910            tree = tree,
911        )
912        .into_bytes();
913
914        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Commit, body, format);
915    }
916
917    #[test]
918    fn encoded_raw_commit_with_mergetag_and_custom_headers_preserves_bytes_and_id() {
919        let format = ObjectFormat::Sha1;
920        let tree = ObjectId::empty_tree(format);
921        let parent = ObjectId::empty_blob(format);
922        let body = format!(
923            concat!(
924                "tree {tree}\n",
925                "parent {parent}\n",
926                "author Merger <merger@example.invalid> 1700000000 +0000\n",
927                "committer Merger <merger@example.invalid> 1700000001 +0000\n",
928                "x-review-id 42\n",
929                "mergetag object {parent}\n",
930                " type commit\n",
931                " tag imported-v1\n",
932                " tagger Tagger <tagger@example.invalid> 1699999999 +0000\n",
933                " \n",
934                " imported tag body\n",
935                " gpgsig -----BEGIN PGP SIGNATURE-----\n",
936                " nested-signature-line\n",
937                " -----END PGP SIGNATURE-----\n",
938                "x-sley-extra raw bytes stay here\n",
939                "\n",
940                "merge commit\n",
941            ),
942            tree = tree,
943            parent = parent,
944        )
945        .into_bytes();
946
947        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Commit, body, format);
948    }
949
950    #[test]
951    fn encoded_raw_annotated_tag_with_signature_and_custom_headers_preserves_bytes_and_id() {
952        let format = ObjectFormat::Sha1;
953        let object = ObjectId::empty_blob(format);
954        let body = format!(
955            concat!(
956                "object {object}\n",
957                "type blob\n",
958                "tag signed-v1\n",
959                "tagger Tagger <tagger@example.invalid> 1700000000 -0000\n",
960                "x-release-channel stable\n",
961                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
962                " tag-signature-line-1\n",
963                " tag-signature-line-2\n",
964                " -----END PGP SIGNATURE-----\n",
965                "\n",
966                "release notes\n",
967            ),
968            object = object,
969        )
970        .into_bytes();
971
972        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Tag, body, format);
973    }
974
975    #[test]
976    fn tree_round_trips_entries() {
977        let blob = ObjectId::from_hex(
978            ObjectFormat::Sha1,
979            "ce013625030ba8dba906f756967f9e9ca394464a",
980        )
981        .expect("test operation should succeed");
982        let tree = Tree {
983            entries: vec![TreeEntry {
984                mode: 0o100644,
985                name: BString::from(b"hello.txt"),
986                oid: blob,
987            }],
988        };
989        assert_eq!(
990            Tree::parse(ObjectFormat::Sha1, &tree.write()).expect("test operation should succeed"),
991            tree
992        );
993    }
994
995    #[test]
996    fn tree_entries_iterates_without_name_allocations() {
997        let format = ObjectFormat::Sha1;
998        let blob = ObjectId::from_hex(format, "ce013625030ba8dba906f756967f9e9ca394464a")
999            .expect("test operation should succeed");
1000        let subtree = ObjectId::empty_tree(format);
1001        let mut bytes = Vec::new();
1002
1003        let first_name_start = b"100644 ".len();
1004        write_tree_entry(&mut bytes, EntryKind::Blob.mode(), b"hello.txt", &blob);
1005        let second_name_start = bytes.len() + b"40000 ".len();
1006        write_tree_entry(&mut bytes, EntryKind::Tree.mode(), b"src", &subtree);
1007
1008        let mut entries = TreeEntries::new(format, &bytes);
1009        let first = entries
1010            .next()
1011            .expect("first entry")
1012            .expect("test operation should succeed");
1013        assert_eq!(first.mode, EntryKind::Blob.mode());
1014        assert_eq!(first.name, b"hello.txt");
1015        assert_eq!(first.oid, blob);
1016        assert_eq!(first.kind(), Some(EntryKind::Blob));
1017        assert!(std::ptr::eq(
1018            first.name.as_ptr(),
1019            bytes[first_name_start..].as_ptr()
1020        ));
1021
1022        let second = entries
1023            .next()
1024            .expect("second entry")
1025            .expect("test operation should succeed");
1026        assert_eq!(second.mode, EntryKind::Tree.mode());
1027        assert_eq!(second.name, b"src");
1028        assert_eq!(second.oid, subtree);
1029        assert!(second.is_tree());
1030        assert!(std::ptr::eq(
1031            second.name.as_ptr(),
1032            bytes[second_name_start..].as_ptr()
1033        ));
1034        assert!(entries.next().is_none());
1035
1036        let owned = Tree::parse(format, &bytes).expect("test operation should succeed");
1037        assert_eq!(owned.entries, vec![first.to_owned(), second.to_owned()]);
1038    }
1039
1040    #[test]
1041    fn tree_entries_reports_invalid_mode_path_and_truncated_oid() {
1042        let format = ObjectFormat::Sha1;
1043        let oid = ObjectId::empty_blob(format);
1044
1045        let mut invalid_mode = b"10088 bad\0".to_vec();
1046        invalid_mode.extend_from_slice(oid.as_bytes());
1047        assert_invalid_tree_entry(
1048            TreeEntries::new(format, &invalid_mode)
1049                .next()
1050                .expect("invalid mode result"),
1051            "invalid tree mode",
1052        );
1053
1054        let mut empty_path = b"100644 \0".to_vec();
1055        empty_path.extend_from_slice(oid.as_bytes());
1056        assert_invalid_tree_entry(
1057            TreeEntries::new(format, &empty_path)
1058                .next()
1059                .expect("empty path result"),
1060            "empty tree path",
1061        );
1062
1063        let mut truncated_oid = b"100644 bad\0".to_vec();
1064        truncated_oid.extend_from_slice(&oid.as_bytes()[..format.raw_len() - 1]);
1065        assert_invalid_tree_entry(
1066            TreeEntries::new(format, &truncated_oid)
1067                .next()
1068                .expect("truncated oid result"),
1069            "truncated tree object id",
1070        );
1071    }
1072
1073    #[test]
1074    fn tree_entry_ref_kind_helpers_match_entry_kinds() {
1075        let oid = ObjectId::null(ObjectFormat::Sha1);
1076
1077        let tree = TreeEntryRef {
1078            mode: EntryKind::Tree.mode(),
1079            name: b"dir",
1080            oid,
1081        };
1082        assert_eq!(tree.kind(), Some(EntryKind::Tree));
1083        assert!(tree.is_tree());
1084        assert!(!tree.is_symlink());
1085        assert!(!tree.is_gitlink());
1086        assert!(!tree.is_executable());
1087
1088        let symlink = TreeEntryRef {
1089            mode: EntryKind::Symlink.mode(),
1090            name: b"link",
1091            oid,
1092        };
1093        assert_eq!(symlink.kind(), Some(EntryKind::Symlink));
1094        assert!(symlink.is_symlink());
1095        assert!(!symlink.is_tree());
1096        assert!(!symlink.is_gitlink());
1097        assert!(!symlink.is_executable());
1098
1099        let executable = TreeEntryRef {
1100            mode: EntryKind::BlobExecutable.mode(),
1101            name: b"run",
1102            oid,
1103        };
1104        assert_eq!(executable.kind(), Some(EntryKind::BlobExecutable));
1105        assert!(executable.is_executable());
1106        assert!(!executable.is_tree());
1107        assert!(!executable.is_symlink());
1108        assert!(!executable.is_gitlink());
1109
1110        let gitlink = TreeEntryRef {
1111            mode: EntryKind::Commit.mode(),
1112            name: b"submodule",
1113            oid,
1114        };
1115        assert_eq!(gitlink.kind(), Some(EntryKind::Commit));
1116        assert!(gitlink.is_gitlink());
1117        assert!(!gitlink.is_tree());
1118        assert!(!gitlink.is_symlink());
1119        assert!(!gitlink.is_executable());
1120    }
1121
1122    #[test]
1123    fn commit_round_trips_headers_and_message() {
1124        let tree = ObjectId::from_hex(
1125            ObjectFormat::Sha1,
1126            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1127        )
1128        .expect("test operation should succeed");
1129        let commit = Commit {
1130            tree,
1131            parents: Vec::new(),
1132            author: b"A U Thor <a@example.invalid> 0 +0000".to_vec(),
1133            committer: b"C O Mitter <c@example.invalid> 0 +0000".to_vec(),
1134            encoding: Some(b"ISO-8859-1".to_vec()),
1135            message: b"subject\n\nbody\n".to_vec(),
1136        };
1137        assert_eq!(
1138            Commit::parse(ObjectFormat::Sha1, &commit.write())
1139                .expect("test operation should succeed"),
1140            commit
1141        );
1142    }
1143
1144    #[test]
1145    fn commit_ref_borrows_headers_and_message() {
1146        let format = ObjectFormat::Sha1;
1147        let tree_hex = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1148        let parent_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1149        let body = format!(
1150            "tree {tree_hex}\n\
1151             parent {parent_hex}\n\
1152             author A U Thor <a@example.invalid> 0 +0000\n\
1153             committer C O Mitter <c@example.invalid> 1 -0000\n\
1154             encoding UTF-8\n\
1155             \n\
1156             subject\n\nbody\n"
1157        )
1158        .into_bytes();
1159
1160        let commit = CommitRef::parse(format, &body).expect("test operation should succeed");
1161        assert_eq!(
1162            commit.tree,
1163            ObjectId::from_hex(format, tree_hex).expect("test operation should succeed")
1164        );
1165        assert_eq!(
1166            commit.parents,
1167            vec![ObjectId::from_hex(format, parent_hex).expect("test operation should succeed")]
1168        );
1169        assert_borrows_from(
1170            &body,
1171            commit.author,
1172            b"A U Thor <a@example.invalid> 0 +0000",
1173        );
1174        assert_borrows_from(
1175            &body,
1176            commit.committer,
1177            b"C O Mitter <c@example.invalid> 1 -0000",
1178        );
1179        assert_borrows_from(
1180            &body,
1181            commit.encoding.expect("test operation should succeed"),
1182            b"UTF-8",
1183        );
1184        assert_borrows_from(&body, commit.message, b"subject\n\nbody\n");
1185
1186        assert_eq!(
1187            Commit::parse_ref(format, &body).expect("test operation should succeed"),
1188            commit
1189        );
1190        assert_eq!(
1191            commit.to_owned(),
1192            Commit::parse(format, &body).expect("test operation should succeed")
1193        );
1194    }
1195
1196    #[test]
1197    fn commit_ref_accepts_non_utf8_headers_and_message() {
1198        let format = ObjectFormat::Sha1;
1199        let tree = ObjectId::empty_tree(format);
1200        let mut body = Vec::new();
1201        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
1202        body.extend_from_slice(b"author J\xF6rg <j@example.invalid> 0 +0000\n");
1203        body.extend_from_slice(b"committer M\xFCller <m@example.invalid> 1 +0000\n");
1204        body.extend_from_slice(b"encoding ISO-8859-1\n\n");
1205        body.extend_from_slice(b"caf\xE9\n");
1206
1207        let commit = CommitRef::parse(format, &body).expect("non-utf8 commit parses");
1208        assert_eq!(commit.tree, tree);
1209        assert_borrows_from(&body, commit.author, b"J\xF6rg <j@example.invalid> 0 +0000");
1210        assert_borrows_from(
1211            &body,
1212            commit.committer,
1213            b"M\xFCller <m@example.invalid> 1 +0000",
1214        );
1215        assert_borrows_from(&body, commit.encoding.expect("encoding"), b"ISO-8859-1");
1216        assert_borrows_from(&body, commit.message, b"caf\xE9\n");
1217        assert_eq!(commit.to_owned().write(), body);
1218    }
1219
1220    #[test]
1221    fn commit_ref_rejects_missing_or_malformed_required_headers() {
1222        let format = ObjectFormat::Sha1;
1223        let valid_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1224        let valid_idents =
1225            b"author A U Thor <a@example.invalid> 0 +0000\ncommitter C O Mitter <c@example.invalid> 0 +0000\n\nmessage\n";
1226        let mut missing_tree = Vec::new();
1227        missing_tree.extend_from_slice(valid_idents);
1228        assert_invalid_object(
1229            CommitRef::parse(format, &missing_tree),
1230            "commit missing tree",
1231        );
1232
1233        let malformed_tree = b"tree not-an-object-id\nauthor A U Thor <a@example.invalid> 0 +0000\ncommitter C O Mitter <c@example.invalid> 0 +0000\n\nmessage\n";
1234        assert!(matches!(
1235            CommitRef::parse(format, malformed_tree),
1236            Err(GitError::InvalidObjectId(_))
1237        ));
1238
1239        let missing_committer =
1240            format!("tree {valid_tree}\nauthor A U Thor <a@example.invalid> 0 +0000\n\nmessage\n")
1241                .into_bytes();
1242        assert_invalid_object(
1243            CommitRef::parse(format, &missing_committer),
1244            "commit missing committer",
1245        );
1246    }
1247
1248    #[test]
1249    fn tag_round_trips_headers_and_message() {
1250        let object = ObjectId::from_hex(
1251            ObjectFormat::Sha1,
1252            "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15",
1253        )
1254        .expect("test operation should succeed");
1255        let tag = Tag {
1256            object,
1257            object_type: ObjectType::Commit,
1258            name: b"v1.0".to_vec(),
1259            tagger: Some(b"Example User <example@example.invalid> 0 +0000".to_vec()),
1260            message: b"release\n".to_vec(),
1261            raw_body: None,
1262        };
1263        assert_eq!(
1264            Tag::parse(ObjectFormat::Sha1, &tag.write()).expect("test operation should succeed"),
1265            tag
1266        );
1267    }
1268
1269    #[test]
1270    fn tag_ref_accepts_non_utf8_tagger_and_message() {
1271        let format = ObjectFormat::Sha1;
1272        let object = ObjectId::empty_blob(format);
1273        let mut body = Vec::new();
1274        body.extend_from_slice(format!("object {object}\n").as_bytes());
1275        body.extend_from_slice(b"type blob\n");
1276        body.extend_from_slice(b"tag v1.0\n");
1277        body.extend_from_slice(b"tagger J\xF6rg <j@example.invalid> 0 +0000\n\n");
1278        body.extend_from_slice(b"caf\xE9\n");
1279
1280        let tag = TagRef::parse(format, &body).expect("non-utf8 tag parses");
1281        assert_eq!(tag.object, object);
1282        assert_eq!(tag.object_type, ObjectType::Blob);
1283        assert_borrows_from(&body, tag.name, b"v1.0");
1284        assert_borrows_from(
1285            &body,
1286            tag.tagger.expect("tagger"),
1287            b"J\xF6rg <j@example.invalid> 0 +0000",
1288        );
1289        assert_borrows_from(&body, tag.message, b"caf\xE9\n");
1290        assert_eq!(tag.to_owned().write(), body);
1291    }
1292
1293    #[test]
1294    fn typed_commit_canonicalizes_but_tag_write_preserves_raw_body() {
1295        let format = ObjectFormat::Sha1;
1296        let tree = ObjectId::empty_tree(format);
1297        let raw_commit = format!(
1298            concat!(
1299                "tree {tree}\n",
1300                "author A U Thor <a@example.invalid> 0 +0000\n",
1301                "x-hidden keep only in raw encoded object\n",
1302                "committer C O Mitter <c@example.invalid> 0 +0000\n",
1303                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
1304                " typed-parser-accepts-this\n",
1305                " -----END PGP SIGNATURE-----\n",
1306                "\n",
1307                "subject\n",
1308            ),
1309            tree = tree,
1310        )
1311        .into_bytes();
1312
1313        let commit = Commit::parse(format, &raw_commit).expect("test operation should succeed");
1314        assert_eq!(commit.tree, tree);
1315        assert_eq!(commit.author, b"A U Thor <a@example.invalid> 0 +0000");
1316        assert_eq!(commit.committer, b"C O Mitter <c@example.invalid> 0 +0000");
1317        assert_eq!(commit.message, b"subject\n");
1318
1319        let written_commit = commit.write();
1320        assert_ne!(written_commit, raw_commit);
1321        assert_bytes_not_contains(&written_commit, b"x-hidden");
1322        assert_bytes_not_contains(&written_commit, b"gpgsig");
1323
1324        let object = ObjectId::empty_blob(format);
1325        let raw_tag = format!(
1326            concat!(
1327                "object {object}\n",
1328                "type blob\n",
1329                "tag v1.0\n",
1330                "x-hidden keep only in raw encoded object\n",
1331                "tagger Example User <example@example.invalid> 0 +0000\n",
1332                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
1333                " typed-parser-accepts-this-too\n",
1334                " -----END PGP SIGNATURE-----\n",
1335                "\n",
1336                "release\n",
1337            ),
1338            object = object,
1339        )
1340        .into_bytes();
1341
1342        let tag = Tag::parse(format, &raw_tag).expect("test operation should succeed");
1343        assert_eq!(tag.object, object);
1344        assert_eq!(tag.object_type, ObjectType::Blob);
1345        assert_eq!(tag.name, b"v1.0");
1346        assert_eq!(
1347            tag.tagger.as_deref(),
1348            Some(&b"Example User <example@example.invalid> 0 +0000"[..])
1349        );
1350        assert_eq!(tag.message, b"release\n");
1351
1352        let written_tag = tag.write();
1353        assert_eq!(written_tag, raw_tag);
1354        let original_oid = EncodedObject::new(ObjectType::Tag, raw_tag).object_id(format);
1355        let written_oid = EncodedObject::new(ObjectType::Tag, written_tag).object_id(format);
1356        assert_eq!(
1357            original_oid.expect("original tag oid"),
1358            written_oid.expect("written tag oid")
1359        );
1360    }
1361
1362    #[test]
1363    fn tag_parse_write_preserves_uppercase_object_and_header_only_body() {
1364        let format = ObjectFormat::Sha1;
1365        let object = ObjectId::empty_blob(format);
1366        let mut raw_tag = Vec::new();
1367        raw_tag.extend_from_slice(
1368            format!("object {}\n", object.to_string().to_uppercase()).as_bytes(),
1369        );
1370        raw_tag.extend_from_slice(b"type blob\n");
1371        raw_tag.extend_from_slice(b"tag v1.0\n");
1372        raw_tag.extend_from_slice(b"tagger Example <example@example.invalid> 0 +0000\n");
1373
1374        let tag = Tag::parse(format, &raw_tag).expect("header-only tag parses");
1375        assert_eq!(tag.object, object);
1376        assert_eq!(tag.message, b"");
1377        assert_eq!(tag.write(), raw_tag);
1378    }
1379
1380    #[test]
1381    fn tag_ref_borrows_name_tagger_and_message() {
1382        let format = ObjectFormat::Sha1;
1383        let object_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1384        let body = format!(
1385            "object {object_hex}\n\
1386             type commit\n\
1387             tag v1.0-borrowed\n\
1388             tagger Example User <example@example.invalid> 0 +0000\n\
1389             \n\
1390             release notes\n"
1391        )
1392        .into_bytes();
1393
1394        let tag = TagRef::parse(format, &body).expect("test operation should succeed");
1395        assert_eq!(
1396            tag.object,
1397            ObjectId::from_hex(format, object_hex).expect("test operation should succeed")
1398        );
1399        assert_eq!(tag.object_type, ObjectType::Commit);
1400        assert_borrows_from(&body, tag.name, b"v1.0-borrowed");
1401        assert_borrows_from(
1402            &body,
1403            tag.tagger.expect("test operation should succeed"),
1404            b"Example User <example@example.invalid> 0 +0000",
1405        );
1406        assert_borrows_from(&body, tag.message, b"release notes\n");
1407
1408        assert_eq!(
1409            Tag::parse_ref(format, &body).expect("test operation should succeed"),
1410            tag
1411        );
1412        assert_eq!(
1413            tag.to_owned(),
1414            Tag::parse(format, &body).expect("test operation should succeed")
1415        );
1416    }
1417
1418    #[test]
1419    fn tag_ref_rejects_missing_or_malformed_required_headers() {
1420        let format = ObjectFormat::Sha1;
1421        let object_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1422
1423        let missing_name = format!("object {object_hex}\ntype commit\n\nmessage\n").into_bytes();
1424        assert_invalid_object(TagRef::parse(format, &missing_name), "tag missing name");
1425
1426        let malformed_object = b"object not-an-object-id\ntype commit\ntag v1.0\n\nmessage\n";
1427        assert!(matches!(
1428            TagRef::parse(format, malformed_object),
1429            Err(GitError::InvalidObjectId(_))
1430        ));
1431
1432        let malformed_type =
1433            format!("object {object_hex}\ntype mystery\ntag v1.0\n\nmessage\n").into_bytes();
1434        assert_invalid_object(
1435            TagRef::parse(format, &malformed_type),
1436            "unknown object type mystery",
1437        );
1438    }
1439
1440    #[test]
1441    fn commit_signature_accessors_parse_raw_idents_without_changing_storage() {
1442        let tree = ObjectId::from_hex(
1443            ObjectFormat::Sha1,
1444            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1445        )
1446        .expect("test operation should succeed");
1447        let author_raw = b"A U Thor <a@example.invalid> 1700000000 +0530".to_vec();
1448        let committer_raw = b"C O Mitter <c@example.invalid> 1700000001 -0000".to_vec();
1449        let commit = Commit {
1450            tree,
1451            parents: Vec::new(),
1452            author: author_raw.clone(),
1453            committer: committer_raw.clone(),
1454            encoding: None,
1455            message: b"subject\n".to_vec(),
1456        };
1457
1458        let author = commit.author_signature().expect("author parses");
1459        assert_eq!(author.name.as_bytes(), b"A U Thor");
1460        assert_eq!(author.email.as_bytes(), b"a@example.invalid");
1461        assert_eq!(author.time.seconds, 1_700_000_000);
1462        assert_eq!(author.time.timezone_offset_minutes, 330);
1463        assert!(!author.time.negative_utc);
1464        // The parse-view re-serializes to exactly the stored bytes.
1465        assert_eq!(author.to_ident_bytes(), author_raw);
1466
1467        let committer = commit.committer_signature().expect("committer parses");
1468        assert_eq!(committer.time.seconds, 1_700_000_001);
1469        // The committer used the -0000 sentinel; it must be preserved.
1470        assert!(committer.time.negative_utc);
1471        assert_eq!(committer.to_ident_bytes(), committer_raw);
1472
1473        // The accessors did not mutate the raw fields, and write() still emits
1474        // them verbatim.
1475        assert_eq!(commit.author, author_raw);
1476        assert_eq!(commit.committer, committer_raw);
1477        let written = commit.write();
1478        assert_eq!(
1479            Commit::parse(ObjectFormat::Sha1, &written).expect("test operation should succeed"),
1480            commit
1481        );
1482    }
1483
1484    #[test]
1485    fn commit_signature_accessor_is_none_for_malformed_ident() {
1486        let tree = ObjectId::from_hex(
1487            ObjectFormat::Sha1,
1488            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1489        )
1490        .expect("test operation should succeed");
1491        let commit = Commit {
1492            tree,
1493            parents: Vec::new(),
1494            author: b"garbage without an email or time".to_vec(),
1495            committer: b"C O Mitter <c@example.invalid> 0 +0000".to_vec(),
1496            encoding: None,
1497            message: b"x\n".to_vec(),
1498        };
1499        assert!(commit.author_signature().is_none());
1500        assert!(commit.committer_signature().is_some());
1501    }
1502
1503    #[test]
1504    fn tag_signature_accessor_parses_tagger_and_handles_absence() {
1505        let object = ObjectId::from_hex(
1506            ObjectFormat::Sha1,
1507            "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15",
1508        )
1509        .expect("test operation should succeed");
1510        let tagger_raw = b"Example User <example@example.invalid> 1700000000 -0000".to_vec();
1511        let tag = Tag {
1512            object: object.clone(),
1513            object_type: ObjectType::Commit,
1514            name: b"v1.0".to_vec(),
1515            tagger: Some(tagger_raw.clone()),
1516            message: b"release\n".to_vec(),
1517            raw_body: None,
1518        };
1519        let tagger = tag.tagger_signature().expect("tagger parses");
1520        assert_eq!(tagger.name.as_bytes(), b"Example User");
1521        assert!(tagger.time.negative_utc);
1522        assert_eq!(tagger.to_ident_bytes(), tagger_raw);
1523        // Raw field and serialization unaffected.
1524        assert_eq!(tag.tagger.as_deref(), Some(tagger_raw.as_slice()));
1525
1526        // A tag with no tagger header yields None.
1527        let lightweight = Tag {
1528            object,
1529            object_type: ObjectType::Commit,
1530            name: b"v1.0".to_vec(),
1531            tagger: None,
1532            message: b"x\n".to_vec(),
1533            raw_body: None,
1534        };
1535        assert!(lightweight.tagger_signature().is_none());
1536    }
1537
1538    fn write_tree_entry(body: &mut Vec<u8>, mode: u32, name: &[u8], oid: &ObjectId) {
1539        body.extend_from_slice(format!("{:o}", mode).as_bytes());
1540        body.push(b' ');
1541        body.extend_from_slice(name);
1542        body.push(0);
1543        body.extend_from_slice(oid.as_bytes());
1544    }
1545
1546    fn assert_invalid_tree_entry(result: Result<TreeEntryRef<'_>>, expected: &str) {
1547        match result {
1548            Err(GitError::InvalidFormat(message)) => assert_eq!(message, expected),
1549            other => panic!("expected invalid format {expected:?}, got {other:?}"),
1550        }
1551    }
1552
1553    fn assert_invalid_object<T: std::fmt::Debug>(result: Result<T>, expected: &str) {
1554        match result {
1555            Err(GitError::InvalidObject(message)) => assert_eq!(message, expected),
1556            other => panic!("expected invalid object {expected:?}, got {other:?}"),
1557        }
1558    }
1559
1560    fn assert_encoded_preserves_framed_bytes_and_id(
1561        object_type: ObjectType,
1562        body: Vec<u8>,
1563        format: ObjectFormat,
1564    ) {
1565        let object = EncodedObject::new(object_type, body.clone());
1566        let expected_id = object
1567            .object_id(format)
1568            .expect("test operation should succeed");
1569        let framed = object.framed_bytes();
1570
1571        let parsed = parse_framed_object(&framed).expect("test operation should succeed");
1572        assert_eq!(parsed.object_type, object_type);
1573        assert_eq!(parsed.body, body);
1574        assert_eq!(
1575            parsed
1576                .object_id(format)
1577                .expect("test operation should succeed"),
1578            expected_id
1579        );
1580        assert_eq!(parsed.framed_bytes(), framed);
1581    }
1582
1583    fn assert_bytes_not_contains(haystack: &[u8], needle: &[u8]) {
1584        assert!(
1585            !haystack
1586                .windows(needle.len())
1587                .any(|window| window == needle),
1588            "expected bytes not to contain {:?}",
1589            String::from_utf8_lossy(needle)
1590        );
1591    }
1592
1593    fn assert_borrows_from(body: &[u8], slice: &[u8], expected: &[u8]) {
1594        assert_eq!(slice, expected);
1595        let offset = body
1596            .windows(expected.len())
1597            .position(|window| window == expected)
1598            .expect("expected slice appears in body");
1599        assert!(std::ptr::eq(slice.as_ptr(), body[offset..].as_ptr()));
1600    }
1601}