Skip to main content

sley_object/
lib.rs

1//! git-object — Git's object model: commits, trees, tags, and the raw encoded
2//! object framing they share.
3//!
4//! This crate carries the in-memory representations of Git's four object types
5//! ([`Commit`], [`Tree`], [`Tag`], and the blob payload carried inside
6//! [`EncodedObject`]) together with their parse/serialize routines and the
7//! [`parse_framed_object`] helper that decodes the `"<type> <len>\0<body>"`
8//! loose-object frame.
9//!
10//! [`Commit`] and [`Tag`] are parsed, canonical representations of the headers
11//! this crate understands. They are convenient for structured edits, but they
12//! are not byte-lossless round-trippers for signed objects, custom headers, or
13//! other raw object body details. Use [`EncodedObject`] whenever exact object
14//! bytes, object ids, or framed-object bytes must be preserved.
15
16use sley_core::{GitError, ObjectFormat, ObjectId, Result, Signature};
17use std::str::FromStr;
18
19pub use sley_core::BString;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub enum ObjectType {
23    Blob,
24    Tree,
25    Commit,
26    Tag,
27}
28
29impl ObjectType {
30    pub const fn as_str(self) -> &'static str {
31        match self {
32            Self::Blob => "blob",
33            Self::Tree => "tree",
34            Self::Commit => "commit",
35            Self::Tag => "tag",
36        }
37    }
38}
39
40impl FromStr for ObjectType {
41    type Err = GitError;
42
43    fn from_str(value: &str) -> Result<Self> {
44        match value {
45            "blob" => Ok(Self::Blob),
46            "tree" => Ok(Self::Tree),
47            "commit" => Ok(Self::Commit),
48            "tag" => Ok(Self::Tag),
49            other => Err(GitError::InvalidObject(format!(
50                "unknown object type {other}"
51            ))),
52        }
53    }
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct EncodedObject {
58    pub object_type: ObjectType,
59    pub body: Vec<u8>,
60}
61
62impl EncodedObject {
63    /// Create a raw encoded object body.
64    ///
65    /// This is the byte-exact API for preserving Git object contents. For
66    /// commit and tag objects that may contain signatures, continuation
67    /// headers, custom headers, or otherwise unknown data, keep the original
68    /// body here instead of parsing through [`Commit`] or [`Tag`].
69    pub fn new(object_type: ObjectType, body: impl Into<Vec<u8>>) -> Self {
70        Self {
71            object_type,
72            body: body.into(),
73        }
74    }
75
76    /// Return the exact loose-object frame bytes: `"<type> <len>\0<body>"`.
77    pub fn framed_bytes(&self) -> Vec<u8> {
78        let mut out = Vec::with_capacity(self.body.len() + 32);
79        out.extend_from_slice(self.object_type.as_str().as_bytes());
80        out.push(b' ');
81        out.extend_from_slice(self.body.len().to_string().as_bytes());
82        out.push(0);
83        out.extend_from_slice(&self.body);
84        out
85    }
86
87    /// Compute the object id from the raw body bytes.
88    pub fn object_id(&self, format: ObjectFormat) -> Result<ObjectId> {
89        sley_core::object_id_for_bytes(format, self.object_type.as_str(), &self.body)
90    }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct Tree {
95    pub entries: Vec<TreeEntry>,
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct TreeEntry {
100    pub mode: u32,
101    pub name: BString,
102    pub oid: ObjectId,
103}
104
105/// A borrowed parse-view of a single entry in a raw tree object.
106///
107/// The `name` slice points into the original tree body. The object id is a
108/// fixed-size value parsed from the raw bytes, so iterating does not allocate
109/// entry names or build an intermediate entry list.
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct TreeEntryRef<'a> {
112    pub mode: u32,
113    pub name: &'a [u8],
114    pub oid: ObjectId,
115}
116
117/// Fallibly iterates raw tree-object bytes without allocating entry names.
118#[derive(Debug, Clone)]
119pub struct TreeEntries<'a> {
120    format: ObjectFormat,
121    bytes: &'a [u8],
122    offset: usize,
123}
124
125impl<'a> TreeEntries<'a> {
126    pub const fn new(format: ObjectFormat, bytes: &'a [u8]) -> Self {
127        Self {
128            format,
129            bytes,
130            offset: 0,
131        }
132    }
133}
134
135impl<'a> Iterator for TreeEntries<'a> {
136    type Item = Result<TreeEntryRef<'a>>;
137
138    fn next(&mut self) -> Option<Self::Item> {
139        if self.offset >= self.bytes.len() {
140            return None;
141        }
142        match parse_tree_entry_ref(self.format, self.bytes, self.offset) {
143            Ok((entry, next_offset)) => {
144                self.offset = next_offset;
145                Some(Ok(entry))
146            }
147            Err(err) => {
148                self.offset = self.bytes.len();
149                Some(Err(err))
150            }
151        }
152    }
153}
154
155impl<'a> From<TreeEntryRef<'a>> for TreeEntry {
156    fn from(entry: TreeEntryRef<'a>) -> Self {
157        Self {
158            mode: entry.mode,
159            name: entry.name.into(),
160            oid: entry.oid,
161        }
162    }
163}
164
165impl Tree {
166    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
167        let entries = TreeEntries::new(format, bytes)
168            .map(|entry| entry.map(TreeEntry::from))
169            .collect::<Result<Vec<_>>>()?;
170        Ok(Self { entries })
171    }
172
173    pub fn write(&self) -> Vec<u8> {
174        let mut out = Vec::new();
175        for entry in &self.entries {
176            out.extend_from_slice(format!("{:o}", entry.mode).as_bytes());
177            out.push(b' ');
178            out.extend_from_slice(entry.name.as_bytes());
179            out.push(0);
180            out.extend_from_slice(entry.oid.as_bytes());
181        }
182        out
183    }
184}
185
186fn parse_tree_entry_ref<'a>(
187    format: ObjectFormat,
188    bytes: &'a [u8],
189    offset: usize,
190) -> Result<(TreeEntryRef<'a>, usize)> {
191    let mode_end = bytes[offset..]
192        .iter()
193        .position(|byte| *byte == b' ')
194        .map(|relative| offset + relative)
195        .ok_or_else(|| GitError::InvalidFormat("unterminated tree mode".into()))?;
196    let mode_text = std::str::from_utf8(&bytes[offset..mode_end])
197        .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
198    let mode = u32::from_str_radix(mode_text, 8)
199        .map_err(|_| GitError::InvalidFormat("invalid tree mode".into()))?;
200
201    let name_start = mode_end + 1;
202    let name_end = bytes[name_start..]
203        .iter()
204        .position(|byte| *byte == 0)
205        .map(|relative| name_start + relative)
206        .ok_or_else(|| GitError::InvalidFormat("unterminated tree path".into()))?;
207    if name_end == name_start {
208        return Err(GitError::InvalidFormat("empty tree path".into()));
209    }
210
211    let oid_start = name_end + 1;
212    let oid_end = oid_start
213        .checked_add(format.raw_len())
214        .ok_or_else(|| GitError::InvalidFormat("tree oid overflow".into()))?;
215    if oid_end > bytes.len() {
216        return Err(GitError::InvalidFormat("truncated tree object id".into()));
217    }
218
219    Ok((
220        TreeEntryRef {
221            mode,
222            name: &bytes[name_start..name_end],
223            oid: ObjectId::from_raw(format, &bytes[oid_start..oid_end])?,
224        },
225        oid_end,
226    ))
227}
228
229pub fn tree_entry_object_type(mode: u32) -> ObjectType {
230    match mode {
231        0o040000 => ObjectType::Tree,
232        0o160000 => ObjectType::Commit,
233        _ => ObjectType::Blob,
234    }
235}
236
237/// The five entry kinds Git allows inside a tree, each mapping to a fixed mode.
238///
239/// This is a *closed* domain used when *writing* trees; for reading arbitrary
240/// trees, keep the raw [`TreeEntry::mode`] and classify with
241/// [`EntryKind::from_mode`] (which returns `None` for non-canonical modes so
242/// they round-trip rather than being silently coerced).
243#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
244pub enum EntryKind {
245    /// A subtree (`040000`).
246    Tree,
247    /// A non-executable regular file (`100644`).
248    Blob,
249    /// An executable regular file (`100755`).
250    BlobExecutable,
251    /// A symbolic link (`120000`); the blob bytes are the link target and must
252    /// never be dereferenced.
253    Symlink,
254    /// A gitlink / submodule commit pointer (`160000`).
255    Commit,
256}
257
258impl EntryKind {
259    /// The octal tree-entry mode for this kind.
260    pub const fn mode(self) -> u32 {
261        match self {
262            Self::Tree => 0o040000,
263            Self::Blob => 0o100644,
264            Self::BlobExecutable => 0o100755,
265            Self::Symlink => 0o120000,
266            Self::Commit => 0o160000,
267        }
268    }
269
270    /// Classify a raw tree-entry mode, returning `None` for anything that is
271    /// not one of Git's canonical five.
272    pub const fn from_mode(mode: u32) -> Option<Self> {
273        match mode {
274            0o040000 => Some(Self::Tree),
275            0o100644 => Some(Self::Blob),
276            0o100755 => Some(Self::BlobExecutable),
277            0o120000 => Some(Self::Symlink),
278            0o160000 => Some(Self::Commit),
279            _ => None,
280        }
281    }
282
283    /// The object type an entry of this kind points at (a gitlink points at a
284    /// commit that lives in another repository).
285    pub const fn object_type(self) -> ObjectType {
286        match self {
287            Self::Tree => ObjectType::Tree,
288            Self::Commit => ObjectType::Commit,
289            _ => ObjectType::Blob,
290        }
291    }
292}
293
294impl From<EntryKind> for u32 {
295    fn from(kind: EntryKind) -> Self {
296        kind.mode()
297    }
298}
299
300impl TreeEntry {
301    /// Classify this entry's mode, if it is one of Git's canonical kinds.
302    pub fn kind(&self) -> Option<EntryKind> {
303        EntryKind::from_mode(self.mode)
304    }
305
306    pub fn is_tree(&self) -> bool {
307        self.mode == EntryKind::Tree.mode()
308    }
309
310    pub fn is_symlink(&self) -> bool {
311        self.mode == EntryKind::Symlink.mode()
312    }
313
314    pub fn is_gitlink(&self) -> bool {
315        self.mode == EntryKind::Commit.mode()
316    }
317
318    pub fn is_executable(&self) -> bool {
319        self.mode == EntryKind::BlobExecutable.mode()
320    }
321}
322
323impl TreeEntryRef<'_> {
324    /// Classify this entry's mode, if it is one of Git's canonical kinds.
325    pub fn kind(&self) -> Option<EntryKind> {
326        EntryKind::from_mode(self.mode)
327    }
328
329    pub fn is_tree(&self) -> bool {
330        self.mode == EntryKind::Tree.mode()
331    }
332
333    pub fn is_symlink(&self) -> bool {
334        self.mode == EntryKind::Symlink.mode()
335    }
336
337    pub fn is_gitlink(&self) -> bool {
338        self.mode == EntryKind::Commit.mode()
339    }
340
341    pub fn is_executable(&self) -> bool {
342        self.mode == EntryKind::BlobExecutable.mode()
343    }
344
345    pub fn to_owned(&self) -> TreeEntry {
346        TreeEntry {
347            mode: self.mode,
348            name: self.name.into(),
349            oid: self.oid,
350        }
351    }
352}
353
354/// Order two tree entries the way Git canonically sorts them: by name bytes,
355/// except that a subtree sorts as though its name ended in `/`. Writing a tree
356/// whose entries are in any other order produces a different (wrong) OID.
357pub fn tree_entry_cmp(
358    left_name: &[u8],
359    left_mode: u32,
360    right_name: &[u8],
361    right_mode: u32,
362) -> std::cmp::Ordering {
363    use std::cmp::Ordering;
364    let shared = left_name.len().min(right_name.len());
365    let name_order = left_name[..shared].cmp(&right_name[..shared]);
366    if name_order != Ordering::Equal {
367        return name_order;
368    }
369    let left_end = left_name.len() == shared;
370    let right_end = right_name.len() == shared;
371    match (left_end, right_end) {
372        (true, true) => Ordering::Equal,
373        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
374        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
375        (false, false) => Ordering::Equal,
376    }
377}
378
379fn tree_name_terminator(mode: u32) -> u8 {
380    if mode == 0o040000 { b'/' } else { 0 }
381}
382
383/// Builds a single tree level: deduplicates entries by name and emits them in
384/// Git's canonical order so the written object is byte-identical to Git's.
385///
386/// Start from [`TreeBuilder::new`] (empty) or [`TreeBuilder::from_tree`] (edit
387/// an existing level), [`upsert`](TreeBuilder::upsert) entries, then
388/// [`build`](TreeBuilder::build) / [`write`](TreeBuilder::write).
389#[derive(Debug, Clone, Default)]
390pub struct TreeBuilder {
391    entries: Vec<TreeEntry>,
392}
393
394impl TreeBuilder {
395    pub fn new() -> Self {
396        Self {
397            entries: Vec::new(),
398        }
399    }
400
401    /// Seed the builder with an existing tree level's entries.
402    pub fn from_tree(tree: Tree) -> Self {
403        Self {
404            entries: tree.entries,
405        }
406    }
407
408    /// Insert or replace the entry named `name` with one of Git's canonical
409    /// kinds.
410    pub fn upsert(&mut self, name: impl Into<BString>, kind: EntryKind, oid: ObjectId) {
411        self.upsert_raw(name, kind.mode(), oid);
412    }
413
414    /// Insert or replace using a raw mode (for round-tripping non-canonical
415    /// modes); prefer [`upsert`](TreeBuilder::upsert) for normal entries.
416    pub fn upsert_raw(&mut self, name: impl Into<BString>, mode: u32, oid: ObjectId) {
417        let name = name.into();
418        if let Some(entry) = self
419            .entries
420            .iter_mut()
421            .find(|entry| entry.name == name.as_bytes())
422        {
423            entry.mode = mode;
424            entry.oid = oid;
425        } else {
426            self.entries.push(TreeEntry { mode, name, oid });
427        }
428    }
429
430    /// Remove the entry named `name`, returning whether one was present.
431    pub fn remove(&mut self, name: &[u8]) -> bool {
432        if let Some(position) = self.entries.iter().position(|entry| entry.name == name) {
433            self.entries.swap_remove(position);
434            true
435        } else {
436            false
437        }
438    }
439
440    pub fn is_empty(&self) -> bool {
441        self.entries.is_empty()
442    }
443
444    pub fn len(&self) -> usize {
445        self.entries.len()
446    }
447
448    /// Collect into a [`Tree`] with entries in Git's canonical order.
449    pub fn build(self) -> Tree {
450        let mut entries = self.entries;
451        entries.sort_by(|left, right| {
452            tree_entry_cmp(
453                left.name.as_bytes(),
454                left.mode,
455                right.name.as_bytes(),
456                right.mode,
457            )
458        });
459        Tree { entries }
460    }
461
462    /// The canonical serialized tree body.
463    pub fn write(self) -> Vec<u8> {
464        self.build().write()
465    }
466
467    /// The OID this tree will have once written.
468    pub fn object_id(self, format: ObjectFormat) -> Result<ObjectId> {
469        EncodedObject::new(ObjectType::Tree, self.write()).object_id(format)
470    }
471}
472
473/// A parsed, canonical representation of the commit headers this crate
474/// understands.
475///
476/// `Commit` preserves `tree`, `parent`, `author`, `committer`, `encoding`, and
477/// message bytes. It intentionally does not retain unknown headers,
478/// continuation blocks such as `gpgsig`, mergetags, or their original ordering.
479/// Use [`EncodedObject`] when commit object bytes or object ids must be
480/// preserved exactly.
481#[derive(Debug, Clone, PartialEq, Eq)]
482pub struct Commit {
483    pub tree: ObjectId,
484    pub parents: Vec<ObjectId>,
485    pub author: Vec<u8>,
486    pub committer: Vec<u8>,
487    pub encoding: Option<Vec<u8>>,
488    pub message: Vec<u8>,
489}
490
491/// A borrowed parse-view of a raw commit object.
492///
493/// The identity, encoding, and message slices point into the original commit
494/// body. Object ids are parsed into fixed-size values while preserving the same
495/// validation behavior as [`Commit::parse`]. Like [`Commit`], this is a parsed
496/// canonical view of known fields rather than a byte-lossless view of every raw
497/// header.
498#[derive(Debug, Clone, PartialEq, Eq)]
499pub struct CommitRef<'a> {
500    pub tree: ObjectId,
501    pub parents: Vec<ObjectId>,
502    pub author: &'a [u8],
503    pub committer: &'a [u8],
504    pub encoding: Option<&'a [u8]>,
505    pub message: &'a [u8],
506}
507
508impl Commit {
509    /// Parse a commit into the canonical typed representation.
510    ///
511    /// Unknown headers and continuation records are accepted but not retained.
512    /// Use [`EncodedObject`] for byte-exact commit preservation.
513    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
514        Ok(Self::parse_ref(format, bytes)?.into())
515    }
516
517    pub fn parse_ref<'a>(format: ObjectFormat, bytes: &'a [u8]) -> Result<CommitRef<'a>> {
518        CommitRef::parse(format, bytes)
519    }
520
521    /// Serialize the canonical typed commit representation.
522    ///
523    /// The output contains only the fields represented by [`Commit`]; it is not
524    /// intended to reproduce raw input bytes that contained unknown headers,
525    /// signatures, or mergetags.
526    pub fn write(&self) -> Vec<u8> {
527        let mut out = Vec::new();
528        out.extend_from_slice(format!("tree {}\n", self.tree).as_bytes());
529        for parent in &self.parents {
530            out.extend_from_slice(format!("parent {parent}\n").as_bytes());
531        }
532        out.extend_from_slice(b"author ");
533        out.extend_from_slice(&self.author);
534        out.push(b'\n');
535        out.extend_from_slice(b"committer ");
536        out.extend_from_slice(&self.committer);
537        if let Some(encoding) = &self.encoding {
538            out.extend_from_slice(b"\nencoding ");
539            out.extend_from_slice(encoding);
540        }
541        out.extend_from_slice(b"\n\n");
542        out.extend_from_slice(&self.message);
543        out
544    }
545
546    /// Parse the raw [`author`](Commit::author) line into a typed
547    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
548    /// well-formed git identity.
549    ///
550    /// This is a read-only lens: it does not touch the raw `author` bytes, which
551    /// remain the source of truth for [`Commit::write`]. The returned signature
552    /// re-serializes byte-identically to `author` (see
553    /// [`Signature::to_ident_bytes`]).
554    pub fn author_signature(&self) -> Option<Signature> {
555        Signature::from_ident_line(&self.author)
556    }
557
558    /// Parse the raw [`committer`](Commit::committer) line into a typed
559    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
560    /// well-formed git identity. Read-only over the raw bytes, exactly like
561    /// [`Commit::author_signature`].
562    pub fn committer_signature(&self) -> Option<Signature> {
563        Signature::from_ident_line(&self.committer)
564    }
565}
566
567impl<'a> CommitRef<'a> {
568    pub fn parse(format: ObjectFormat, bytes: &'a [u8]) -> Result<Self> {
569        let split = bytes
570            .windows(2)
571            .position(|window| window == b"\n\n")
572            .ok_or_else(|| GitError::InvalidObject("commit missing message separator".into()))?;
573        let mut tree = None;
574        let mut parents = Vec::new();
575        let mut author = None;
576        let mut committer = None;
577        let mut encoding = None;
578        for line in bytes[..split].split(|byte| *byte == b'\n') {
579            if let Some(value) = line.strip_prefix(b"tree ") {
580                tree = Some(ObjectId::from_hex(format, ascii_header_value(value)?)?);
581            } else if let Some(value) = line.strip_prefix(b"parent ") {
582                parents.push(ObjectId::from_hex(format, ascii_header_value(value)?)?);
583            } else if let Some(value) = line.strip_prefix(b"author ") {
584                author = Some(value);
585            } else if let Some(value) = line.strip_prefix(b"committer ") {
586                committer = Some(value);
587            } else if let Some(value) = line.strip_prefix(b"encoding ") {
588                encoding = Some(value);
589            }
590        }
591        Ok(Self {
592            tree: tree.ok_or_else(|| GitError::InvalidObject("commit missing tree".into()))?,
593            parents,
594            author: author
595                .ok_or_else(|| GitError::InvalidObject("commit missing author".into()))?,
596            committer: committer
597                .ok_or_else(|| GitError::InvalidObject("commit missing committer".into()))?,
598            encoding,
599            message: &bytes[split + 2..],
600        })
601    }
602
603    pub fn to_owned(&self) -> Commit {
604        Commit {
605            tree: self.tree,
606            parents: self.parents.clone(),
607            author: self.author.to_vec(),
608            committer: self.committer.to_vec(),
609            encoding: self.encoding.map(<[u8]>::to_vec),
610            message: self.message.to_vec(),
611        }
612    }
613
614    /// Parse the raw [`author`](Commit::author) line into a typed
615    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
616    /// well-formed git identity.
617    ///
618    /// This is a read-only lens: it does not touch the raw `author` bytes, which
619    /// remain the source of truth for [`Commit::write`]. The returned signature
620    /// re-serializes byte-identically to `author` (see
621    /// [`Signature::to_ident_bytes`]).
622    pub fn author_signature(&self) -> Option<Signature> {
623        Signature::from_ident_line(self.author)
624    }
625
626    /// Parse the raw [`committer`](Commit::committer) line into a typed
627    /// [`Signature`] parse-view, or `None` if the stored bytes are not a
628    /// well-formed git identity. Read-only over the raw bytes, exactly like
629    /// [`Commit::author_signature`].
630    pub fn committer_signature(&self) -> Option<Signature> {
631        Signature::from_ident_line(self.committer)
632    }
633}
634
635impl<'a> From<CommitRef<'a>> for Commit {
636    fn from(commit: CommitRef<'a>) -> Self {
637        Self {
638            tree: commit.tree,
639            parents: commit.parents,
640            author: commit.author.to_vec(),
641            committer: commit.committer.to_vec(),
642            encoding: commit.encoding.map(<[u8]>::to_vec),
643            message: commit.message.to_vec(),
644        }
645    }
646}
647
648/// A parsed, canonical representation of the annotated tag headers this crate
649/// understands.
650///
651/// `Tag` preserves `object`, `type`, `tag`, optional `tagger`, and message
652/// bytes. Parsed tags also retain their original body so parse/write can
653/// preserve annotated tag object ids exactly.
654#[derive(Debug, Clone, Eq)]
655pub struct Tag {
656    pub object: ObjectId,
657    pub object_type: ObjectType,
658    pub name: Vec<u8>,
659    pub tagger: Option<Vec<u8>>,
660    pub message: Vec<u8>,
661    pub raw_body: Option<Vec<u8>>,
662}
663
664/// A borrowed parse-view of a raw annotated tag object.
665///
666/// The tag name, tagger identity, and message slices point into the original
667/// tag body. The object id and object type are parsed into owned values while
668/// preserving the same validation behavior as [`Tag::parse`]. Like [`Tag`],
669/// this is a parsed canonical view of known fields rather than a byte-lossless
670/// view of every raw header.
671#[derive(Debug, Clone, PartialEq, Eq)]
672pub struct TagRef<'a> {
673    pub object: ObjectId,
674    pub object_type: ObjectType,
675    pub name: &'a [u8],
676    pub tagger: Option<&'a [u8]>,
677    pub message: &'a [u8],
678    pub raw_body: Option<&'a [u8]>,
679}
680
681impl PartialEq for Tag {
682    fn eq(&self, other: &Self) -> bool {
683        self.object == other.object
684            && self.object_type == other.object_type
685            && self.name == other.name
686            && self.tagger == other.tagger
687            && self.message == other.message
688    }
689}
690
691impl Tag {
692    /// Parse an annotated tag into the canonical typed representation.
693    ///
694    /// Unknown headers and continuation records are accepted but not retained.
695    /// Use [`EncodedObject`] for byte-exact tag preservation.
696    pub fn parse(format: ObjectFormat, bytes: &[u8]) -> Result<Self> {
697        Ok(Self::parse_ref(format, bytes)?.into())
698    }
699
700    pub fn parse_ref<'a>(format: ObjectFormat, bytes: &'a [u8]) -> Result<TagRef<'a>> {
701        TagRef::parse(format, bytes)
702    }
703
704    /// Serialize the canonical typed tag representation.
705    ///
706    /// The output contains only the fields represented by [`Tag`]; it is not
707    /// intended to reproduce raw input bytes that contained unknown headers or
708    /// signatures.
709    pub fn write(&self) -> Vec<u8> {
710        if let Some(raw) = &self.raw_body {
711            return raw.clone();
712        }
713        let mut out = Vec::new();
714        out.extend_from_slice(format!("object {}\n", self.object).as_bytes());
715        out.extend_from_slice(format!("type {}\n", self.object_type.as_str()).as_bytes());
716        out.extend_from_slice(b"tag ");
717        out.extend_from_slice(&self.name);
718        out.push(b'\n');
719        if let Some(tagger) = &self.tagger {
720            out.extend_from_slice(b"tagger ");
721            out.extend_from_slice(tagger);
722            out.push(b'\n');
723        }
724        out.push(b'\n');
725        out.extend_from_slice(&self.message);
726        out
727    }
728
729    /// Parse the raw [`tagger`](Tag::tagger) line into a typed [`Signature`]
730    /// parse-view.
731    ///
732    /// Returns `None` when the tag has no tagger header *or* when the stored
733    /// bytes are not a well-formed git identity — callers that need to tell
734    /// those apart should inspect [`Tag::tagger`] directly. This is a read-only
735    /// lens over the raw bytes, which stay the source of truth for
736    /// [`Tag::write`]; the returned signature re-serializes byte-identically to
737    /// the stored `tagger` line.
738    pub fn tagger_signature(&self) -> Option<Signature> {
739        Signature::from_ident_line(self.tagger.as_deref()?)
740    }
741}
742
743impl<'a> TagRef<'a> {
744    pub fn parse(format: ObjectFormat, bytes: &'a [u8]) -> Result<Self> {
745        let split = bytes.windows(2).position(|window| window == b"\n\n");
746        let (headers, message) = match split {
747            Some(split) => (&bytes[..split], &bytes[split + 2..]),
748            None => (bytes, &bytes[bytes.len()..]),
749        };
750        let mut object = None;
751        let mut object_type = None;
752        let mut name = None;
753        let mut tagger = None;
754        for line in headers.split(|byte| *byte == b'\n') {
755            if let Some(value) = line.strip_prefix(b"object ") {
756                object = Some(ObjectId::from_hex(format, ascii_header_value(value)?)?);
757            } else if let Some(value) = line.strip_prefix(b"type ") {
758                object_type = Some(ascii_header_value(value)?.parse()?);
759            } else if let Some(value) = line.strip_prefix(b"tag ") {
760                name = Some(value);
761            } else if let Some(value) = line.strip_prefix(b"tagger ") {
762                tagger = Some(value);
763            }
764        }
765        Ok(Self {
766            object: object.ok_or_else(|| GitError::InvalidObject("tag missing object".into()))?,
767            object_type: object_type
768                .ok_or_else(|| GitError::InvalidObject("tag missing type".into()))?,
769            name: name.ok_or_else(|| GitError::InvalidObject("tag missing name".into()))?,
770            tagger,
771            message,
772            raw_body: Some(bytes),
773        })
774    }
775
776    pub fn to_owned(&self) -> Tag {
777        Tag {
778            object: self.object,
779            object_type: self.object_type,
780            name: self.name.to_vec(),
781            tagger: self.tagger.map(<[u8]>::to_vec),
782            message: self.message.to_vec(),
783            raw_body: self.raw_body.map(<[u8]>::to_vec),
784        }
785    }
786
787    /// Parse the raw [`tagger`](Tag::tagger) line into a typed [`Signature`]
788    /// parse-view.
789    ///
790    /// Returns `None` when the tag has no tagger header *or* when the stored
791    /// bytes are not a well-formed git identity — callers that need to tell
792    /// those apart should inspect [`Tag::tagger`] directly. This is a read-only
793    /// lens over the raw bytes, which stay the source of truth for
794    /// [`Tag::write`]; the returned signature re-serializes byte-identically to
795    /// the stored `tagger` line.
796    pub fn tagger_signature(&self) -> Option<Signature> {
797        Signature::from_ident_line(self.tagger?)
798    }
799}
800
801impl<'a> From<TagRef<'a>> for Tag {
802    fn from(tag: TagRef<'a>) -> Self {
803        Self {
804            object: tag.object,
805            object_type: tag.object_type,
806            name: tag.name.to_vec(),
807            tagger: tag.tagger.map(<[u8]>::to_vec),
808            message: tag.message.to_vec(),
809            raw_body: tag.raw_body.map(<[u8]>::to_vec),
810        }
811    }
812}
813
814fn ascii_header_value(value: &[u8]) -> Result<&str> {
815    std::str::from_utf8(value).map_err(|err| GitError::InvalidObject(err.to_string()))
816}
817
818pub fn parse_framed_object(bytes: &[u8]) -> Result<EncodedObject> {
819    let nul = bytes
820        .iter()
821        .position(|byte| *byte == 0)
822        .ok_or_else(|| GitError::InvalidObject("missing object header terminator".into()))?;
823    let header = std::str::from_utf8(&bytes[..nul])
824        .map_err(|err| GitError::InvalidObject(err.to_string()))?;
825    let (kind, size) = header
826        .split_once(' ')
827        .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
828    let size: usize = size
829        .parse()
830        .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
831    let body = &bytes[nul + 1..];
832    if body.len() != size {
833        return Err(GitError::InvalidObject(format!(
834            "object declared {size} bytes, found {}",
835            body.len()
836        )));
837    }
838    Ok(EncodedObject::new(kind.parse()?, body.to_vec()))
839}
840
841#[cfg(test)]
842mod tests {
843    use super::*;
844
845    #[test]
846    fn tree_builder_sorts_canonically_and_dedups() {
847        let format = ObjectFormat::Sha1;
848        let blob = ObjectId::empty_blob(format);
849        let subtree = ObjectId::empty_tree(format);
850        // Validate the infallible well-known constants while we're here.
851        assert_eq!(subtree.to_hex(), "4b825dc642cb6eb9a060e54bf8d69288fbee4904");
852        assert_eq!(blob.to_hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
853
854        let mut builder = TreeBuilder::new();
855        // Inserted out of order. The directory-suffix rule means "foo.txt"
856        // (blob) sorts before the "foo" subtree, because '.' (0x2e) < '/' (0x2f)
857        // — a plain byte sort of the names would (wrongly) put "foo" first.
858        builder.upsert("foo", EntryKind::Tree, subtree);
859        builder.upsert("a.txt", EntryKind::Blob, blob.clone());
860        builder.upsert("foo.txt", EntryKind::Blob, blob.clone());
861        // Last upsert for a name wins.
862        builder.upsert("a.txt", EntryKind::BlobExecutable, blob);
863
864        let tree = builder.build();
865        let names: Vec<&[u8]> = tree.entries.iter().map(|e| e.name.as_bytes()).collect();
866        assert_eq!(names, vec![&b"a.txt"[..], &b"foo.txt"[..], &b"foo"[..]]);
867        assert_eq!(tree.entries[0].mode, EntryKind::BlobExecutable.mode());
868        assert!(tree.entries[2].is_tree());
869    }
870
871    #[test]
872    fn entry_kind_round_trips_modes() {
873        for kind in [
874            EntryKind::Tree,
875            EntryKind::Blob,
876            EntryKind::BlobExecutable,
877            EntryKind::Symlink,
878            EntryKind::Commit,
879        ] {
880            assert_eq!(EntryKind::from_mode(kind.mode()), Some(kind));
881        }
882        assert_eq!(EntryKind::from_mode(0o100600), None);
883    }
884
885    #[test]
886    fn framed_object_round_trips() {
887        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
888        assert_eq!(
889            parse_framed_object(&object.framed_bytes()).expect("test operation should succeed"),
890            object
891        );
892    }
893
894    #[test]
895    fn encoded_raw_commit_with_multiline_gpgsig_preserves_bytes_and_id() {
896        let format = ObjectFormat::Sha1;
897        let tree = ObjectId::empty_tree(format);
898        let body = format!(
899            concat!(
900                "tree {tree}\n",
901                "author Signer <signer@example.invalid> 1700000000 +0000\n",
902                "committer Signer <signer@example.invalid> 1700000000 +0000\n",
903                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
904                " \n",
905                " iQEzBAABCgAdFiEErawcommitbytescontract\n",
906                " =abcd\n",
907                " -----END PGP SIGNATURE-----\n",
908                "\n",
909                "signed commit\n",
910            ),
911            tree = tree,
912        )
913        .into_bytes();
914
915        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Commit, body, format);
916    }
917
918    #[test]
919    fn encoded_raw_commit_with_mergetag_and_custom_headers_preserves_bytes_and_id() {
920        let format = ObjectFormat::Sha1;
921        let tree = ObjectId::empty_tree(format);
922        let parent = ObjectId::empty_blob(format);
923        let body = format!(
924            concat!(
925                "tree {tree}\n",
926                "parent {parent}\n",
927                "author Merger <merger@example.invalid> 1700000000 +0000\n",
928                "committer Merger <merger@example.invalid> 1700000001 +0000\n",
929                "x-review-id 42\n",
930                "mergetag object {parent}\n",
931                " type commit\n",
932                " tag imported-v1\n",
933                " tagger Tagger <tagger@example.invalid> 1699999999 +0000\n",
934                " \n",
935                " imported tag body\n",
936                " gpgsig -----BEGIN PGP SIGNATURE-----\n",
937                " nested-signature-line\n",
938                " -----END PGP SIGNATURE-----\n",
939                "x-sley-extra raw bytes stay here\n",
940                "\n",
941                "merge commit\n",
942            ),
943            tree = tree,
944            parent = parent,
945        )
946        .into_bytes();
947
948        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Commit, body, format);
949    }
950
951    #[test]
952    fn encoded_raw_annotated_tag_with_signature_and_custom_headers_preserves_bytes_and_id() {
953        let format = ObjectFormat::Sha1;
954        let object = ObjectId::empty_blob(format);
955        let body = format!(
956            concat!(
957                "object {object}\n",
958                "type blob\n",
959                "tag signed-v1\n",
960                "tagger Tagger <tagger@example.invalid> 1700000000 -0000\n",
961                "x-release-channel stable\n",
962                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
963                " tag-signature-line-1\n",
964                " tag-signature-line-2\n",
965                " -----END PGP SIGNATURE-----\n",
966                "\n",
967                "release notes\n",
968            ),
969            object = object,
970        )
971        .into_bytes();
972
973        assert_encoded_preserves_framed_bytes_and_id(ObjectType::Tag, body, format);
974    }
975
976    #[test]
977    fn tree_round_trips_entries() {
978        let blob = ObjectId::from_hex(
979            ObjectFormat::Sha1,
980            "ce013625030ba8dba906f756967f9e9ca394464a",
981        )
982        .expect("test operation should succeed");
983        let tree = Tree {
984            entries: vec![TreeEntry {
985                mode: 0o100644,
986                name: BString::from(b"hello.txt"),
987                oid: blob,
988            }],
989        };
990        assert_eq!(
991            Tree::parse(ObjectFormat::Sha1, &tree.write()).expect("test operation should succeed"),
992            tree
993        );
994    }
995
996    #[test]
997    fn tree_entries_iterates_without_name_allocations() {
998        let format = ObjectFormat::Sha1;
999        let blob = ObjectId::from_hex(format, "ce013625030ba8dba906f756967f9e9ca394464a")
1000            .expect("test operation should succeed");
1001        let subtree = ObjectId::empty_tree(format);
1002        let mut bytes = Vec::new();
1003
1004        let first_name_start = b"100644 ".len();
1005        write_tree_entry(&mut bytes, EntryKind::Blob.mode(), b"hello.txt", &blob);
1006        let second_name_start = bytes.len() + b"40000 ".len();
1007        write_tree_entry(&mut bytes, EntryKind::Tree.mode(), b"src", &subtree);
1008
1009        let mut entries = TreeEntries::new(format, &bytes);
1010        let first = entries
1011            .next()
1012            .expect("first entry")
1013            .expect("test operation should succeed");
1014        assert_eq!(first.mode, EntryKind::Blob.mode());
1015        assert_eq!(first.name, b"hello.txt");
1016        assert_eq!(first.oid, blob);
1017        assert_eq!(first.kind(), Some(EntryKind::Blob));
1018        assert!(std::ptr::eq(
1019            first.name.as_ptr(),
1020            bytes[first_name_start..].as_ptr()
1021        ));
1022
1023        let second = entries
1024            .next()
1025            .expect("second entry")
1026            .expect("test operation should succeed");
1027        assert_eq!(second.mode, EntryKind::Tree.mode());
1028        assert_eq!(second.name, b"src");
1029        assert_eq!(second.oid, subtree);
1030        assert!(second.is_tree());
1031        assert!(std::ptr::eq(
1032            second.name.as_ptr(),
1033            bytes[second_name_start..].as_ptr()
1034        ));
1035        assert!(entries.next().is_none());
1036
1037        let owned = Tree::parse(format, &bytes).expect("test operation should succeed");
1038        assert_eq!(owned.entries, vec![first.to_owned(), second.to_owned()]);
1039    }
1040
1041    #[test]
1042    fn tree_entries_reports_invalid_mode_path_and_truncated_oid() {
1043        let format = ObjectFormat::Sha1;
1044        let oid = ObjectId::empty_blob(format);
1045
1046        let mut invalid_mode = b"10088 bad\0".to_vec();
1047        invalid_mode.extend_from_slice(oid.as_bytes());
1048        assert_invalid_tree_entry(
1049            TreeEntries::new(format, &invalid_mode)
1050                .next()
1051                .expect("invalid mode result"),
1052            "invalid tree mode",
1053        );
1054
1055        let mut empty_path = b"100644 \0".to_vec();
1056        empty_path.extend_from_slice(oid.as_bytes());
1057        assert_invalid_tree_entry(
1058            TreeEntries::new(format, &empty_path)
1059                .next()
1060                .expect("empty path result"),
1061            "empty tree path",
1062        );
1063
1064        let mut truncated_oid = b"100644 bad\0".to_vec();
1065        truncated_oid.extend_from_slice(&oid.as_bytes()[..format.raw_len() - 1]);
1066        assert_invalid_tree_entry(
1067            TreeEntries::new(format, &truncated_oid)
1068                .next()
1069                .expect("truncated oid result"),
1070            "truncated tree object id",
1071        );
1072    }
1073
1074    #[test]
1075    fn tree_entry_ref_kind_helpers_match_entry_kinds() {
1076        let oid = ObjectId::null(ObjectFormat::Sha1);
1077
1078        let tree = TreeEntryRef {
1079            mode: EntryKind::Tree.mode(),
1080            name: b"dir",
1081            oid,
1082        };
1083        assert_eq!(tree.kind(), Some(EntryKind::Tree));
1084        assert!(tree.is_tree());
1085        assert!(!tree.is_symlink());
1086        assert!(!tree.is_gitlink());
1087        assert!(!tree.is_executable());
1088
1089        let symlink = TreeEntryRef {
1090            mode: EntryKind::Symlink.mode(),
1091            name: b"link",
1092            oid,
1093        };
1094        assert_eq!(symlink.kind(), Some(EntryKind::Symlink));
1095        assert!(symlink.is_symlink());
1096        assert!(!symlink.is_tree());
1097        assert!(!symlink.is_gitlink());
1098        assert!(!symlink.is_executable());
1099
1100        let executable = TreeEntryRef {
1101            mode: EntryKind::BlobExecutable.mode(),
1102            name: b"run",
1103            oid,
1104        };
1105        assert_eq!(executable.kind(), Some(EntryKind::BlobExecutable));
1106        assert!(executable.is_executable());
1107        assert!(!executable.is_tree());
1108        assert!(!executable.is_symlink());
1109        assert!(!executable.is_gitlink());
1110
1111        let gitlink = TreeEntryRef {
1112            mode: EntryKind::Commit.mode(),
1113            name: b"submodule",
1114            oid,
1115        };
1116        assert_eq!(gitlink.kind(), Some(EntryKind::Commit));
1117        assert!(gitlink.is_gitlink());
1118        assert!(!gitlink.is_tree());
1119        assert!(!gitlink.is_symlink());
1120        assert!(!gitlink.is_executable());
1121    }
1122
1123    #[test]
1124    fn commit_round_trips_headers_and_message() {
1125        let tree = ObjectId::from_hex(
1126            ObjectFormat::Sha1,
1127            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1128        )
1129        .expect("test operation should succeed");
1130        let commit = Commit {
1131            tree,
1132            parents: Vec::new(),
1133            author: b"A U Thor <a@example.invalid> 0 +0000".to_vec(),
1134            committer: b"C O Mitter <c@example.invalid> 0 +0000".to_vec(),
1135            encoding: Some(b"ISO-8859-1".to_vec()),
1136            message: b"subject\n\nbody\n".to_vec(),
1137        };
1138        assert_eq!(
1139            Commit::parse(ObjectFormat::Sha1, &commit.write())
1140                .expect("test operation should succeed"),
1141            commit
1142        );
1143    }
1144
1145    #[test]
1146    fn commit_ref_borrows_headers_and_message() {
1147        let format = ObjectFormat::Sha1;
1148        let tree_hex = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1149        let parent_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1150        let body = format!(
1151            "tree {tree_hex}\n\
1152             parent {parent_hex}\n\
1153             author A U Thor <a@example.invalid> 0 +0000\n\
1154             committer C O Mitter <c@example.invalid> 1 -0000\n\
1155             encoding UTF-8\n\
1156             \n\
1157             subject\n\nbody\n"
1158        )
1159        .into_bytes();
1160
1161        let commit = CommitRef::parse(format, &body).expect("test operation should succeed");
1162        assert_eq!(
1163            commit.tree,
1164            ObjectId::from_hex(format, tree_hex).expect("test operation should succeed")
1165        );
1166        assert_eq!(
1167            commit.parents,
1168            vec![ObjectId::from_hex(format, parent_hex).expect("test operation should succeed")]
1169        );
1170        assert_borrows_from(
1171            &body,
1172            commit.author,
1173            b"A U Thor <a@example.invalid> 0 +0000",
1174        );
1175        assert_borrows_from(
1176            &body,
1177            commit.committer,
1178            b"C O Mitter <c@example.invalid> 1 -0000",
1179        );
1180        assert_borrows_from(
1181            &body,
1182            commit.encoding.expect("test operation should succeed"),
1183            b"UTF-8",
1184        );
1185        assert_borrows_from(&body, commit.message, b"subject\n\nbody\n");
1186
1187        assert_eq!(
1188            Commit::parse_ref(format, &body).expect("test operation should succeed"),
1189            commit
1190        );
1191        assert_eq!(
1192            commit.to_owned(),
1193            Commit::parse(format, &body).expect("test operation should succeed")
1194        );
1195    }
1196
1197    #[test]
1198    fn commit_ref_accepts_non_utf8_headers_and_message() {
1199        let format = ObjectFormat::Sha1;
1200        let tree = ObjectId::empty_tree(format);
1201        let mut body = Vec::new();
1202        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
1203        body.extend_from_slice(b"author J\xF6rg <j@example.invalid> 0 +0000\n");
1204        body.extend_from_slice(b"committer M\xFCller <m@example.invalid> 1 +0000\n");
1205        body.extend_from_slice(b"encoding ISO-8859-1\n\n");
1206        body.extend_from_slice(b"caf\xE9\n");
1207
1208        let commit = CommitRef::parse(format, &body).expect("non-utf8 commit parses");
1209        assert_eq!(commit.tree, tree);
1210        assert_borrows_from(&body, commit.author, b"J\xF6rg <j@example.invalid> 0 +0000");
1211        assert_borrows_from(
1212            &body,
1213            commit.committer,
1214            b"M\xFCller <m@example.invalid> 1 +0000",
1215        );
1216        assert_borrows_from(&body, commit.encoding.expect("encoding"), b"ISO-8859-1");
1217        assert_borrows_from(&body, commit.message, b"caf\xE9\n");
1218        assert_eq!(commit.to_owned().write(), body);
1219    }
1220
1221    #[test]
1222    fn commit_ref_rejects_missing_or_malformed_required_headers() {
1223        let format = ObjectFormat::Sha1;
1224        let valid_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1225        let valid_idents =
1226            b"author A U Thor <a@example.invalid> 0 +0000\ncommitter C O Mitter <c@example.invalid> 0 +0000\n\nmessage\n";
1227        let mut missing_tree = Vec::new();
1228        missing_tree.extend_from_slice(valid_idents);
1229        assert_invalid_object(
1230            CommitRef::parse(format, &missing_tree),
1231            "commit missing tree",
1232        );
1233
1234        let malformed_tree = b"tree not-an-object-id\nauthor A U Thor <a@example.invalid> 0 +0000\ncommitter C O Mitter <c@example.invalid> 0 +0000\n\nmessage\n";
1235        assert!(matches!(
1236            CommitRef::parse(format, malformed_tree),
1237            Err(GitError::InvalidObjectId(_))
1238        ));
1239
1240        let missing_committer =
1241            format!("tree {valid_tree}\nauthor A U Thor <a@example.invalid> 0 +0000\n\nmessage\n")
1242                .into_bytes();
1243        assert_invalid_object(
1244            CommitRef::parse(format, &missing_committer),
1245            "commit missing committer",
1246        );
1247    }
1248
1249    #[test]
1250    fn tag_round_trips_headers_and_message() {
1251        let object = ObjectId::from_hex(
1252            ObjectFormat::Sha1,
1253            "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15",
1254        )
1255        .expect("test operation should succeed");
1256        let tag = Tag {
1257            object,
1258            object_type: ObjectType::Commit,
1259            name: b"v1.0".to_vec(),
1260            tagger: Some(b"Example User <example@example.invalid> 0 +0000".to_vec()),
1261            message: b"release\n".to_vec(),
1262            raw_body: None,
1263        };
1264        assert_eq!(
1265            Tag::parse(ObjectFormat::Sha1, &tag.write()).expect("test operation should succeed"),
1266            tag
1267        );
1268    }
1269
1270    #[test]
1271    fn tag_ref_accepts_non_utf8_tagger_and_message() {
1272        let format = ObjectFormat::Sha1;
1273        let object = ObjectId::empty_blob(format);
1274        let mut body = Vec::new();
1275        body.extend_from_slice(format!("object {object}\n").as_bytes());
1276        body.extend_from_slice(b"type blob\n");
1277        body.extend_from_slice(b"tag v1.0\n");
1278        body.extend_from_slice(b"tagger J\xF6rg <j@example.invalid> 0 +0000\n\n");
1279        body.extend_from_slice(b"caf\xE9\n");
1280
1281        let tag = TagRef::parse(format, &body).expect("non-utf8 tag parses");
1282        assert_eq!(tag.object, object);
1283        assert_eq!(tag.object_type, ObjectType::Blob);
1284        assert_borrows_from(&body, tag.name, b"v1.0");
1285        assert_borrows_from(
1286            &body,
1287            tag.tagger.expect("tagger"),
1288            b"J\xF6rg <j@example.invalid> 0 +0000",
1289        );
1290        assert_borrows_from(&body, tag.message, b"caf\xE9\n");
1291        assert_eq!(tag.to_owned().write(), body);
1292    }
1293
1294    #[test]
1295    fn typed_commit_canonicalizes_but_tag_write_preserves_raw_body() {
1296        let format = ObjectFormat::Sha1;
1297        let tree = ObjectId::empty_tree(format);
1298        let raw_commit = format!(
1299            concat!(
1300                "tree {tree}\n",
1301                "author A U Thor <a@example.invalid> 0 +0000\n",
1302                "x-hidden keep only in raw encoded object\n",
1303                "committer C O Mitter <c@example.invalid> 0 +0000\n",
1304                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
1305                " typed-parser-accepts-this\n",
1306                " -----END PGP SIGNATURE-----\n",
1307                "\n",
1308                "subject\n",
1309            ),
1310            tree = tree,
1311        )
1312        .into_bytes();
1313
1314        let commit = Commit::parse(format, &raw_commit).expect("test operation should succeed");
1315        assert_eq!(commit.tree, tree);
1316        assert_eq!(commit.author, b"A U Thor <a@example.invalid> 0 +0000");
1317        assert_eq!(commit.committer, b"C O Mitter <c@example.invalid> 0 +0000");
1318        assert_eq!(commit.message, b"subject\n");
1319
1320        let written_commit = commit.write();
1321        assert_ne!(written_commit, raw_commit);
1322        assert_bytes_not_contains(&written_commit, b"x-hidden");
1323        assert_bytes_not_contains(&written_commit, b"gpgsig");
1324
1325        let object = ObjectId::empty_blob(format);
1326        let raw_tag = format!(
1327            concat!(
1328                "object {object}\n",
1329                "type blob\n",
1330                "tag v1.0\n",
1331                "x-hidden keep only in raw encoded object\n",
1332                "tagger Example User <example@example.invalid> 0 +0000\n",
1333                "gpgsig -----BEGIN PGP SIGNATURE-----\n",
1334                " typed-parser-accepts-this-too\n",
1335                " -----END PGP SIGNATURE-----\n",
1336                "\n",
1337                "release\n",
1338            ),
1339            object = object,
1340        )
1341        .into_bytes();
1342
1343        let tag = Tag::parse(format, &raw_tag).expect("test operation should succeed");
1344        assert_eq!(tag.object, object);
1345        assert_eq!(tag.object_type, ObjectType::Blob);
1346        assert_eq!(tag.name, b"v1.0");
1347        assert_eq!(
1348            tag.tagger.as_deref(),
1349            Some(&b"Example User <example@example.invalid> 0 +0000"[..])
1350        );
1351        assert_eq!(tag.message, b"release\n");
1352
1353        let written_tag = tag.write();
1354        assert_eq!(written_tag, raw_tag);
1355        let original_oid = EncodedObject::new(ObjectType::Tag, raw_tag).object_id(format);
1356        let written_oid = EncodedObject::new(ObjectType::Tag, written_tag).object_id(format);
1357        assert_eq!(
1358            original_oid.expect("original tag oid"),
1359            written_oid.expect("written tag oid")
1360        );
1361    }
1362
1363    #[test]
1364    fn tag_parse_write_preserves_uppercase_object_and_header_only_body() {
1365        let format = ObjectFormat::Sha1;
1366        let object = ObjectId::empty_blob(format);
1367        let mut raw_tag = Vec::new();
1368        raw_tag.extend_from_slice(
1369            format!("object {}\n", object.to_string().to_uppercase()).as_bytes(),
1370        );
1371        raw_tag.extend_from_slice(b"type blob\n");
1372        raw_tag.extend_from_slice(b"tag v1.0\n");
1373        raw_tag.extend_from_slice(b"tagger Example <example@example.invalid> 0 +0000\n");
1374
1375        let tag = Tag::parse(format, &raw_tag).expect("header-only tag parses");
1376        assert_eq!(tag.object, object);
1377        assert_eq!(tag.message, b"");
1378        assert_eq!(tag.write(), raw_tag);
1379    }
1380
1381    #[test]
1382    fn tag_ref_borrows_name_tagger_and_message() {
1383        let format = ObjectFormat::Sha1;
1384        let object_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1385        let body = format!(
1386            "object {object_hex}\n\
1387             type commit\n\
1388             tag v1.0-borrowed\n\
1389             tagger Example User <example@example.invalid> 0 +0000\n\
1390             \n\
1391             release notes\n"
1392        )
1393        .into_bytes();
1394
1395        let tag = TagRef::parse(format, &body).expect("test operation should succeed");
1396        assert_eq!(
1397            tag.object,
1398            ObjectId::from_hex(format, object_hex).expect("test operation should succeed")
1399        );
1400        assert_eq!(tag.object_type, ObjectType::Commit);
1401        assert_borrows_from(&body, tag.name, b"v1.0-borrowed");
1402        assert_borrows_from(
1403            &body,
1404            tag.tagger.expect("test operation should succeed"),
1405            b"Example User <example@example.invalid> 0 +0000",
1406        );
1407        assert_borrows_from(&body, tag.message, b"release notes\n");
1408
1409        assert_eq!(
1410            Tag::parse_ref(format, &body).expect("test operation should succeed"),
1411            tag
1412        );
1413        assert_eq!(
1414            tag.to_owned(),
1415            Tag::parse(format, &body).expect("test operation should succeed")
1416        );
1417    }
1418
1419    #[test]
1420    fn tag_ref_rejects_missing_or_malformed_required_headers() {
1421        let format = ObjectFormat::Sha1;
1422        let object_hex = "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15";
1423
1424        let missing_name = format!("object {object_hex}\ntype commit\n\nmessage\n").into_bytes();
1425        assert_invalid_object(TagRef::parse(format, &missing_name), "tag missing name");
1426
1427        let malformed_object = b"object not-an-object-id\ntype commit\ntag v1.0\n\nmessage\n";
1428        assert!(matches!(
1429            TagRef::parse(format, malformed_object),
1430            Err(GitError::InvalidObjectId(_))
1431        ));
1432
1433        let malformed_type =
1434            format!("object {object_hex}\ntype mystery\ntag v1.0\n\nmessage\n").into_bytes();
1435        assert_invalid_object(
1436            TagRef::parse(format, &malformed_type),
1437            "unknown object type mystery",
1438        );
1439    }
1440
1441    #[test]
1442    fn commit_signature_accessors_parse_raw_idents_without_changing_storage() {
1443        let tree = ObjectId::from_hex(
1444            ObjectFormat::Sha1,
1445            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1446        )
1447        .expect("test operation should succeed");
1448        let author_raw = b"A U Thor <a@example.invalid> 1700000000 +0530".to_vec();
1449        let committer_raw = b"C O Mitter <c@example.invalid> 1700000001 -0000".to_vec();
1450        let commit = Commit {
1451            tree,
1452            parents: Vec::new(),
1453            author: author_raw.clone(),
1454            committer: committer_raw.clone(),
1455            encoding: None,
1456            message: b"subject\n".to_vec(),
1457        };
1458
1459        let author = commit.author_signature().expect("author parses");
1460        assert_eq!(author.name.as_bytes(), b"A U Thor");
1461        assert_eq!(author.email.as_bytes(), b"a@example.invalid");
1462        assert_eq!(author.time.seconds, 1_700_000_000);
1463        assert_eq!(author.time.timezone_offset_minutes, 330);
1464        assert!(!author.time.negative_utc);
1465        // The parse-view re-serializes to exactly the stored bytes.
1466        assert_eq!(author.to_ident_bytes(), author_raw);
1467
1468        let committer = commit.committer_signature().expect("committer parses");
1469        assert_eq!(committer.time.seconds, 1_700_000_001);
1470        // The committer used the -0000 sentinel; it must be preserved.
1471        assert!(committer.time.negative_utc);
1472        assert_eq!(committer.to_ident_bytes(), committer_raw);
1473
1474        // The accessors did not mutate the raw fields, and write() still emits
1475        // them verbatim.
1476        assert_eq!(commit.author, author_raw);
1477        assert_eq!(commit.committer, committer_raw);
1478        let written = commit.write();
1479        assert_eq!(
1480            Commit::parse(ObjectFormat::Sha1, &written).expect("test operation should succeed"),
1481            commit
1482        );
1483    }
1484
1485    #[test]
1486    fn commit_signature_accessor_is_none_for_malformed_ident() {
1487        let tree = ObjectId::from_hex(
1488            ObjectFormat::Sha1,
1489            "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
1490        )
1491        .expect("test operation should succeed");
1492        let commit = Commit {
1493            tree,
1494            parents: Vec::new(),
1495            author: b"garbage without an email or time".to_vec(),
1496            committer: b"C O Mitter <c@example.invalid> 0 +0000".to_vec(),
1497            encoding: None,
1498            message: b"x\n".to_vec(),
1499        };
1500        assert!(commit.author_signature().is_none());
1501        assert!(commit.committer_signature().is_some());
1502    }
1503
1504    #[test]
1505    fn tag_signature_accessor_parses_tagger_and_handles_absence() {
1506        let object = ObjectId::from_hex(
1507            ObjectFormat::Sha1,
1508            "e7556fb3ba7b8f5b1f4772180772a4d6a7323e15",
1509        )
1510        .expect("test operation should succeed");
1511        let tagger_raw = b"Example User <example@example.invalid> 1700000000 -0000".to_vec();
1512        let tag = Tag {
1513            object: object.clone(),
1514            object_type: ObjectType::Commit,
1515            name: b"v1.0".to_vec(),
1516            tagger: Some(tagger_raw.clone()),
1517            message: b"release\n".to_vec(),
1518            raw_body: None,
1519        };
1520        let tagger = tag.tagger_signature().expect("tagger parses");
1521        assert_eq!(tagger.name.as_bytes(), b"Example User");
1522        assert!(tagger.time.negative_utc);
1523        assert_eq!(tagger.to_ident_bytes(), tagger_raw);
1524        // Raw field and serialization unaffected.
1525        assert_eq!(tag.tagger.as_deref(), Some(tagger_raw.as_slice()));
1526
1527        // A tag with no tagger header yields None.
1528        let lightweight = Tag {
1529            object,
1530            object_type: ObjectType::Commit,
1531            name: b"v1.0".to_vec(),
1532            tagger: None,
1533            message: b"x\n".to_vec(),
1534            raw_body: None,
1535        };
1536        assert!(lightweight.tagger_signature().is_none());
1537    }
1538
1539    fn write_tree_entry(body: &mut Vec<u8>, mode: u32, name: &[u8], oid: &ObjectId) {
1540        body.extend_from_slice(format!("{:o}", mode).as_bytes());
1541        body.push(b' ');
1542        body.extend_from_slice(name);
1543        body.push(0);
1544        body.extend_from_slice(oid.as_bytes());
1545    }
1546
1547    fn assert_invalid_tree_entry(result: Result<TreeEntryRef<'_>>, expected: &str) {
1548        match result {
1549            Err(GitError::InvalidFormat(message)) => assert_eq!(message, expected),
1550            other => panic!("expected invalid format {expected:?}, got {other:?}"),
1551        }
1552    }
1553
1554    fn assert_invalid_object<T: std::fmt::Debug>(result: Result<T>, expected: &str) {
1555        match result {
1556            Err(GitError::InvalidObject(message)) => assert_eq!(message, expected),
1557            other => panic!("expected invalid object {expected:?}, got {other:?}"),
1558        }
1559    }
1560
1561    fn assert_encoded_preserves_framed_bytes_and_id(
1562        object_type: ObjectType,
1563        body: Vec<u8>,
1564        format: ObjectFormat,
1565    ) {
1566        let object = EncodedObject::new(object_type, body.clone());
1567        let expected_id = object
1568            .object_id(format)
1569            .expect("test operation should succeed");
1570        let framed = object.framed_bytes();
1571
1572        let parsed = parse_framed_object(&framed).expect("test operation should succeed");
1573        assert_eq!(parsed.object_type, object_type);
1574        assert_eq!(parsed.body, body);
1575        assert_eq!(
1576            parsed
1577                .object_id(format)
1578                .expect("test operation should succeed"),
1579            expected_id
1580        );
1581        assert_eq!(parsed.framed_bytes(), framed);
1582    }
1583
1584    fn assert_bytes_not_contains(haystack: &[u8], needle: &[u8]) {
1585        assert!(
1586            !haystack
1587                .windows(needle.len())
1588                .any(|window| window == needle),
1589            "expected bytes not to contain {:?}",
1590            String::from_utf8_lossy(needle)
1591        );
1592    }
1593
1594    fn assert_borrows_from(body: &[u8], slice: &[u8], expected: &[u8]) {
1595        assert_eq!(slice, expected);
1596        let offset = body
1597            .windows(expected.len())
1598            .position(|window| window == expected)
1599            .expect("expected slice appears in body");
1600        assert!(std::ptr::eq(slice.as_ptr(), body[offset..].as_ptr()));
1601    }
1602}