1use std::fmt;
22use std::str::FromStr;
23
24use crate::commit_encoding;
25use crate::error::{Error, Result};
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
33pub enum HashAlgo {
34 #[default]
36 Sha1,
37 Sha256,
39}
40
41impl HashAlgo {
42 #[must_use]
44 pub const fn len(self) -> usize {
45 match self {
46 Self::Sha1 => 20,
47 Self::Sha256 => 32,
48 }
49 }
50
51 #[must_use]
53 pub const fn hex_len(self) -> usize {
54 self.len() * 2
55 }
56
57 #[must_use]
59 pub const fn name(self) -> &'static str {
60 match self {
61 Self::Sha1 => "sha1",
62 Self::Sha256 => "sha256",
63 }
64 }
65
66 #[must_use]
69 pub const fn oid_version(self) -> u8 {
70 match self {
71 Self::Sha1 => 1,
72 Self::Sha256 => 2,
73 }
74 }
75
76 #[must_use]
78 pub fn from_name(name: &str) -> Option<Self> {
79 match name.trim() {
80 "sha1" => Some(Self::Sha1),
81 "sha256" => Some(Self::Sha256),
82 _ => None,
83 }
84 }
85
86 #[must_use]
88 pub const fn from_len(len: usize) -> Option<Self> {
89 match len {
90 20 => Some(Self::Sha1),
91 32 => Some(Self::Sha256),
92 _ => None,
93 }
94 }
95}
96
97const MAX_OID_LEN: usize = 32;
99
100#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
106pub struct ObjectId {
107 bytes: [u8; MAX_OID_LEN],
108 len: u8,
109}
110
111impl ObjectId {
112 #[must_use]
118 pub const fn zero() -> Self {
119 Self {
120 bytes: [0u8; MAX_OID_LEN],
121 len: 20,
122 }
123 }
124
125 #[must_use]
127 pub const fn null(algo: HashAlgo) -> Self {
128 Self {
129 bytes: [0u8; MAX_OID_LEN],
130 len: algo.len() as u8,
131 }
132 }
133
134 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
141 if HashAlgo::from_len(bytes.len()).is_none() {
142 return Err(Error::InvalidObjectId(hex::encode(bytes)));
143 }
144 let mut buf = [0u8; MAX_OID_LEN];
145 buf[..bytes.len()].copy_from_slice(bytes);
146 Ok(Self {
147 bytes: buf,
148 len: bytes.len() as u8,
149 })
150 }
151
152 #[must_use]
154 pub fn as_bytes(&self) -> &[u8] {
155 &self.bytes[..self.len as usize]
156 }
157
158 #[must_use]
160 pub fn algo(&self) -> HashAlgo {
161 HashAlgo::from_len(self.len as usize).unwrap_or(HashAlgo::Sha1)
162 }
163
164 #[must_use]
166 pub fn is_zero(&self) -> bool {
167 self.as_bytes().iter().all(|&b| b == 0)
168 }
169
170 #[must_use]
172 pub fn to_hex(&self) -> String {
173 hex::encode(self.as_bytes())
174 }
175
176 #[must_use]
180 pub fn loose_prefix(&self) -> String {
181 hex::encode(&self.bytes[..1])
182 }
183
184 pub fn from_hex(s: &str) -> Result<Self> {
191 s.parse()
192 }
193
194 #[must_use]
197 pub fn loose_suffix(&self) -> String {
198 hex::encode(&self.bytes[1..self.len as usize])
199 }
200
201 #[must_use]
204 pub fn is_full_hex(s: &str) -> bool {
205 (s.len() == HashAlgo::Sha1.hex_len() || s.len() == HashAlgo::Sha256.hex_len())
206 && s.bytes().all(|b| b.is_ascii_hexdigit())
207 }
208
209 #[must_use]
211 pub const fn is_hex_len(len: usize) -> bool {
212 len == HashAlgo::Sha1.hex_len() || len == HashAlgo::Sha256.hex_len()
213 }
214
215 #[must_use]
218 pub const fn is_loose_suffix_len(len: usize) -> bool {
219 len == HashAlgo::Sha1.hex_len() - 2 || len == HashAlgo::Sha256.hex_len() - 2
220 }
221}
222
223impl fmt::Display for ObjectId {
224 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
225 f.write_str(&self.to_hex())
226 }
227}
228
229impl fmt::Debug for ObjectId {
230 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
231 write!(f, "ObjectId({})", self.to_hex())
232 }
233}
234
235impl FromStr for ObjectId {
236 type Err = Error;
237
238 fn from_str(s: &str) -> Result<Self> {
239 if s.len() != HashAlgo::Sha1.hex_len() && s.len() != HashAlgo::Sha256.hex_len() {
240 return Err(Error::InvalidObjectId(s.to_owned()));
241 }
242 let bytes = hex::decode(s).map_err(|_| Error::InvalidObjectId(s.to_owned()))?;
243 Self::from_bytes(&bytes)
244 }
245}
246
247#[derive(Debug, Clone, Copy, PartialEq, Eq)]
249pub enum ObjectKind {
250 Blob,
252 Tree,
254 Commit,
256 Tag,
258}
259
260impl ObjectKind {
261 pub fn from_bytes(b: &[u8]) -> Result<Self> {
267 match b {
268 b"blob" => Ok(Self::Blob),
269 b"tree" => Ok(Self::Tree),
270 b"commit" => Ok(Self::Commit),
271 b"tag" => Ok(Self::Tag),
272 other => Err(Error::UnknownObjectType(
273 String::from_utf8_lossy(other).into_owned(),
274 )),
275 }
276 }
277
278 #[must_use]
283 pub fn from_tag_type_field(line: &[u8]) -> Option<Self> {
284 fn keyword_matches(canonical: &[u8], field: &[u8]) -> bool {
285 if field.is_empty() {
286 return false;
287 }
288 for (i, &bc) in field.iter().enumerate() {
289 let sc = canonical.get(i).copied().unwrap_or(0);
290 if sc != bc {
291 return false;
292 }
293 }
294 canonical.get(field.len()).copied().unwrap_or(0) == 0
295 }
296
297 const NAMES: &[(ObjectKind, &[u8])] = &[
298 (ObjectKind::Blob, b"blob"),
299 (ObjectKind::Tree, b"tree"),
300 (ObjectKind::Commit, b"commit"),
301 (ObjectKind::Tag, b"tag"),
302 ];
303 for &(kind, name) in NAMES {
304 if keyword_matches(name, line) {
305 return Some(kind);
306 }
307 }
308 None
309 }
310
311 #[must_use]
313 pub fn as_str(&self) -> &'static str {
314 match self {
315 Self::Blob => "blob",
316 Self::Tree => "tree",
317 Self::Commit => "commit",
318 Self::Tag => "tag",
319 }
320 }
321}
322
323impl fmt::Display for ObjectKind {
324 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
325 f.write_str(self.as_str())
326 }
327}
328
329impl FromStr for ObjectKind {
330 type Err = Error;
331
332 fn from_str(s: &str) -> Result<Self> {
333 Self::from_bytes(s.as_bytes())
334 }
335}
336
337#[derive(Debug, Clone)]
339pub struct Object {
340 pub kind: ObjectKind,
342 pub data: Vec<u8>,
344}
345
346impl Object {
347 #[must_use]
349 pub fn new(kind: ObjectKind, data: Vec<u8>) -> Self {
350 Self { kind, data }
351 }
352
353 #[must_use]
355 pub fn to_store_bytes(&self) -> Vec<u8> {
356 let header = format!("{} {}\0", self.kind, self.data.len());
357 let mut out = Vec::with_capacity(header.len() + self.data.len());
358 out.extend_from_slice(header.as_bytes());
359 out.extend_from_slice(&self.data);
360 out
361 }
362}
363
364#[derive(Debug, Clone, PartialEq, Eq)]
366pub struct TreeEntry {
367 pub mode: u32,
369 pub name: Vec<u8>,
371 pub oid: ObjectId,
373}
374
375impl TreeEntry {
376 #[must_use]
380 pub fn mode_str(&self) -> String {
381 if self.mode == 0o040000 {
383 "40000".to_owned()
384 } else {
385 format!("{:o}", self.mode)
386 }
387 }
388}
389
390pub fn parse_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
401 match parse_tree_with_oid_len(data, HashAlgo::Sha1.len()) {
407 Ok(entries) => Ok(entries),
408 Err(sha1_err) => parse_tree_with_oid_len(data, HashAlgo::Sha256.len()).map_err(|_| sha1_err),
409 }
410}
411
412pub fn parse_tree_with_oid_len(data: &[u8], oid_len: usize) -> Result<Vec<TreeEntry>> {
417 let mut entries = Vec::new();
418 let mut pos = 0;
419
420 while pos < data.len() {
421 let sp = data[pos..]
423 .iter()
424 .position(|&b| b == b' ')
425 .ok_or_else(|| Error::CorruptObject("tree entry missing space".to_owned()))?;
426 let mode_bytes = &data[pos..pos + sp];
427 let mode = std::str::from_utf8(mode_bytes)
428 .ok()
429 .and_then(|s| u32::from_str_radix(s, 8).ok())
430 .ok_or_else(|| {
431 Error::CorruptObject(format!(
432 "invalid tree mode: {}",
433 String::from_utf8_lossy(mode_bytes)
434 ))
435 })?;
436 pos += sp + 1;
437
438 let nul = data[pos..]
440 .iter()
441 .position(|&b| b == 0)
442 .ok_or_else(|| Error::CorruptObject("tree entry missing NUL".to_owned()))?;
443 let name = data[pos..pos + nul].to_vec();
444 pos += nul + 1;
445
446 if pos + oid_len > data.len() {
447 return Err(Error::CorruptObject("tree entry truncated SHA".to_owned()));
448 }
449 let oid = ObjectId::from_bytes(&data[pos..pos + oid_len])?;
450 pos += oid_len;
451
452 entries.push(TreeEntry { mode, name, oid });
453 }
454
455 Ok(entries)
456}
457
458#[must_use]
463pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
464 let mut out = Vec::new();
465 for e in entries {
466 out.extend_from_slice(e.mode_str().as_bytes());
467 out.push(b' ');
468 out.extend_from_slice(&e.name);
469 out.push(0);
470 out.extend_from_slice(e.oid.as_bytes());
471 }
472 out
473}
474
475#[must_use]
488pub fn tree_entry_cmp(
489 a_name: &[u8],
490 a_is_tree: bool,
491 b_name: &[u8],
492 b_is_tree: bool,
493) -> std::cmp::Ordering {
494 let a_trailer = if a_is_tree { b'/' } else { 0u8 };
495 let b_trailer = if b_is_tree { b'/' } else { 0u8 };
496
497 let min_len = a_name.len().min(b_name.len());
498 let cmp = a_name[..min_len].cmp(&b_name[..min_len]);
499 if cmp != std::cmp::Ordering::Equal {
500 return cmp;
501 }
502 let ac = a_name.get(min_len).copied().unwrap_or(a_trailer);
504 let bc = b_name.get(min_len).copied().unwrap_or(b_trailer);
505 ac.cmp(&bc)
506}
507
508#[derive(Debug, Clone)]
510pub struct CommitData {
511 pub tree: ObjectId,
513 pub parents: Vec<ObjectId>,
515 pub author: String,
517 pub committer: String,
519 pub author_raw: Vec<u8>,
523 pub committer_raw: Vec<u8>,
525 pub encoding: Option<String>,
527 pub message: String,
529 #[doc = "Optional raw message bytes for non-UTF-8 messages."]
532 pub raw_message: Option<Vec<u8>>,
533}
534
535pub fn parse_commit(data: &[u8]) -> Result<CommitData> {
541 #[derive(Clone, Copy)]
545 enum Continuation {
546 Author,
547 Committer,
548 Multiline,
549 Ignore,
550 }
551
552 let mut pos = 0usize;
553 let mut tree = None;
554 let mut parents = Vec::new();
555 let mut author_raw: Option<Vec<u8>> = None;
556 let mut committer_raw: Option<Vec<u8>> = None;
557 let mut encoding: Option<String> = None;
558 let mut cont = Continuation::Ignore;
559
560 while pos < data.len() {
561 let line_start = pos;
562 let mut line_end = pos;
563 while line_end < data.len() && data[line_end] != b'\n' {
564 line_end += 1;
565 }
566 let line = &data[line_start..line_end];
567 let after_nl = line_end.saturating_add(1);
568 if line.is_empty() {
569 let body = data.get(after_nl..).unwrap_or_default();
570 let message = commit_encoding::decode_bytes(encoding.as_deref(), body);
571 let has_non_utf8_encoding = encoding.as_deref().is_some_and(|label| {
575 !label.eq_ignore_ascii_case("utf-8") && !label.eq_ignore_ascii_case("utf8")
576 });
577 let raw_message = if body.is_empty() {
578 None
579 } else if has_non_utf8_encoding
580 || std::str::from_utf8(body).is_err()
581 || !body.ends_with(b"\n")
582 {
583 Some(body.to_vec())
584 } else {
585 None
586 };
587 let author_bytes = author_raw
588 .ok_or_else(|| Error::CorruptObject("commit missing author header".to_owned()))?;
589 let committer_bytes = committer_raw.ok_or_else(|| {
590 Error::CorruptObject("commit missing committer header".to_owned())
591 })?;
592 let author = commit_encoding::decode_bytes(encoding.as_deref(), &author_bytes);
593 let committer = commit_encoding::decode_bytes(encoding.as_deref(), &committer_bytes);
594 return Ok(CommitData {
595 tree: tree
596 .ok_or_else(|| Error::CorruptObject("commit missing tree header".to_owned()))?,
597 parents,
598 author,
599 committer,
600 author_raw: author_bytes,
601 committer_raw: committer_bytes,
602 encoding,
603 message,
604 raw_message,
605 });
606 }
607
608 if line.first() == Some(&b' ') {
609 let rest = line.get(1..).unwrap_or_default();
610 match cont {
611 Continuation::Author => {
612 let a = author_raw.as_mut().ok_or_else(|| {
613 Error::CorruptObject("orphan header continuation".to_owned())
614 })?;
615 a.extend_from_slice(rest);
616 }
617 Continuation::Committer => {
618 let c = committer_raw.as_mut().ok_or_else(|| {
619 Error::CorruptObject("orphan header continuation".to_owned())
620 })?;
621 c.extend_from_slice(rest);
622 }
623 Continuation::Multiline | Continuation::Ignore => {}
624 }
625 pos = after_nl;
626 continue;
627 }
628
629 let key_end = line
630 .iter()
631 .position(|&b| b == b' ')
632 .ok_or_else(|| Error::CorruptObject("malformed commit header line".to_owned()))?;
633 let key = &line[..key_end];
634 let rest = line.get(key_end + 1..).unwrap_or_default();
635
636 match key {
637 b"tree" => {
638 let line_str = std::str::from_utf8(rest).map_err(|_| {
639 Error::CorruptObject("commit tree line is not valid UTF-8".to_owned())
640 })?;
641 tree = Some(line_str.trim().parse::<ObjectId>()?);
642 cont = Continuation::Ignore;
643 }
644 b"parent" => {
645 let line_str = std::str::from_utf8(rest).map_err(|_| {
646 Error::CorruptObject("commit parent line is not valid UTF-8".to_owned())
647 })?;
648 parents.push(line_str.trim().parse::<ObjectId>()?);
649 cont = Continuation::Ignore;
650 }
651 b"author" => {
652 author_raw = Some(rest.to_vec());
653 cont = Continuation::Author;
654 }
655 b"committer" => {
656 committer_raw = Some(rest.to_vec());
657 cont = Continuation::Committer;
658 }
659 b"encoding" => {
660 let line_str = std::str::from_utf8(rest).map_err(|_| {
661 Error::CorruptObject("commit encoding line is not valid UTF-8".to_owned())
662 })?;
663 encoding = Some(line_str.to_owned());
664 cont = Continuation::Ignore;
665 }
666 _ => {
667 cont = Continuation::Multiline;
668 }
669 }
670 pos = after_nl;
671 }
672
673 Err(Error::CorruptObject(
674 "commit missing blank line before message".to_owned(),
675 ))
676}
677
678#[must_use]
681pub fn tag_header_field(data: &[u8], prefix: &[u8]) -> Option<String> {
682 let mut pos = 0usize;
683 while pos < data.len() {
684 let rest = &data[pos..];
685 let nl = rest.iter().position(|&b| b == b'\n');
686 let line = if let Some(i) = nl { &rest[..i] } else { rest };
687 if line.is_empty() {
688 break;
689 }
690 if let Some(after) = line.strip_prefix(prefix) {
691 return Some(String::from_utf8_lossy(after).trim().to_owned());
692 }
693 pos += line.len().saturating_add(nl.map(|_| 1).unwrap_or(0));
694 if nl.is_none() {
695 break;
696 }
697 }
698 None
699}
700
701#[must_use]
703pub fn tag_object_line_oid(data: &[u8]) -> Option<ObjectId> {
704 let s = tag_header_field(data, b"object ")?;
705 s.parse().ok()
706}
707
708#[derive(Debug, Clone)]
710pub struct TagData {
711 pub object: ObjectId,
713 pub object_type: String,
715 pub tag: String,
717 pub tagger: Option<String>,
719 pub message: String,
721}
722
723pub fn parse_tag(data: &[u8]) -> Result<TagData> {
729 let text = std::str::from_utf8(data)
730 .map_err(|_| Error::CorruptObject("tag is not valid UTF-8".to_owned()))?;
731
732 let mut object = None;
733 let mut object_type = None;
734 let mut tag_name = None;
735 let mut tagger = None;
736 let mut message = String::new();
737 let mut in_message = false;
738
739 for line in text.split('\n') {
740 if in_message {
741 message.push_str(line);
742 message.push('\n');
743 continue;
744 }
745 if line.is_empty() {
746 in_message = true;
747 continue;
748 }
749 if let Some(rest) = line.strip_prefix("object ") {
750 object = Some(rest.trim().parse::<ObjectId>()?);
751 } else if let Some(rest) = line.strip_prefix("type ") {
752 let typ = rest.trim();
753 if ObjectKind::from_tag_type_field(typ.as_bytes()).is_none() {
754 return Err(Error::CorruptObject(format!(
755 "invalid 'type' value in tag: {typ}"
756 )));
757 }
758 object_type = Some(typ.to_owned());
759 } else if let Some(rest) = line.strip_prefix("tag ") {
760 tag_name = Some(rest.trim().to_owned());
761 } else if let Some(rest) = line.strip_prefix("tagger ") {
762 tagger = Some(rest.to_owned());
763 }
764 }
765
766 if message.ends_with('\n') {
768 message.pop();
769 }
770
771 Ok(TagData {
772 object: object
773 .ok_or_else(|| Error::CorruptObject("tag missing object header".to_owned()))?,
774 object_type: object_type
775 .ok_or_else(|| Error::CorruptObject("tag missing type header".to_owned()))?,
776 tag: tag_name.ok_or_else(|| Error::CorruptObject("tag missing tag header".to_owned()))?,
777 tagger,
778 message,
779 })
780}
781
782#[must_use]
787pub fn serialize_tag(t: &TagData) -> Vec<u8> {
788 let mut out = String::new();
789 out.push_str(&format!("object {}\n", t.object));
790 out.push_str(&format!("type {}\n", t.object_type));
791 out.push_str(&format!("tag {}\n", t.tag));
792 if let Some(ref tagger) = t.tagger {
793 out.push_str(&format!("tagger {tagger}\n"));
794 }
795 out.push('\n');
796 let msg = t.message.trim_end_matches('\n');
798 if !msg.is_empty() {
799 out.push_str(msg);
800 out.push('\n');
801 }
802 out.into_bytes()
803}
804
805#[must_use]
813pub fn serialize_commit(c: &CommitData) -> Vec<u8> {
814 let mut out = Vec::new();
815 out.extend_from_slice(format!("tree {}\n", c.tree).as_bytes());
816 for p in &c.parents {
817 out.extend_from_slice(format!("parent {p}\n").as_bytes());
818 }
819 out.extend_from_slice(b"author ");
820 if c.author_raw.is_empty() {
821 out.extend_from_slice(c.author.as_bytes());
822 } else {
823 out.extend_from_slice(&c.author_raw);
824 }
825 out.push(b'\n');
826 out.extend_from_slice(b"committer ");
827 if c.committer_raw.is_empty() {
828 out.extend_from_slice(c.committer.as_bytes());
829 } else {
830 out.extend_from_slice(&c.committer_raw);
831 }
832 out.push(b'\n');
833 if let Some(enc) = &c.encoding {
834 out.extend_from_slice(format!("encoding {enc}\n").as_bytes());
835 }
836 out.push(b'\n');
837 if let Some(raw) = &c.raw_message {
838 out.extend_from_slice(raw);
839 } else if !c.message.is_empty() {
840 out.extend_from_slice(c.message.as_bytes());
841 }
842 out
843}
844
845#[cfg(test)]
846mod commit_parse_tests {
847 use super::*;
848
849 #[test]
850 fn parse_commit_skips_multiline_gpgsig_continuation() {
851 let raw = concat!(
852 "tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n",
853 "author A U Thor <author@example.com> 1 +0000\n",
854 "committer C O Mitter <committer@example.com> 1 +0000\n",
855 "gpgsig -----BEGIN PGP SIGNATURE-----\n",
856 " abcdef\n",
857 " -----END PGP SIGNATURE-----\n",
858 "\n",
859 "msg\n",
860 );
861 let c = parse_commit(raw.as_bytes()).expect("parse signed commit");
862 assert_eq!(c.tree.to_hex(), "4b825dc642cb6eb9a060e54bf8d69288fbee4904");
863 assert_eq!(c.message, "msg\n");
864 }
865}