1use std::fmt;
22use std::str::FromStr;
23
24use crate::commit_encoding;
25use crate::error::{Error, Result};
26
27#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29pub struct ObjectId([u8; 20]);
30
31impl ObjectId {
32 #[must_use]
37 pub const fn zero() -> Self {
38 Self([0u8; 20])
39 }
40
41 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
47 let arr: [u8; 20] = bytes
48 .try_into()
49 .map_err(|_| Error::InvalidObjectId(hex::encode(bytes)))?;
50 Ok(Self(arr))
51 }
52
53 #[must_use]
55 pub fn as_bytes(&self) -> &[u8; 20] {
56 &self.0
57 }
58
59 #[must_use]
61 pub fn is_zero(&self) -> bool {
62 self.0 == [0u8; 20]
63 }
64
65 #[must_use]
67 pub fn to_hex(&self) -> String {
68 hex::encode(self.0)
69 }
70
71 #[must_use]
75 pub fn loose_prefix(&self) -> String {
76 hex::encode(&self.0[..1])
77 }
78
79 pub fn from_hex(s: &str) -> Result<Self> {
86 s.parse()
87 }
88
89 #[must_use]
91 pub fn loose_suffix(&self) -> String {
92 hex::encode(&self.0[1..])
93 }
94}
95
96impl fmt::Display for ObjectId {
97 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98 f.write_str(&self.to_hex())
99 }
100}
101
102impl fmt::Debug for ObjectId {
103 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104 write!(f, "ObjectId({})", self.to_hex())
105 }
106}
107
108impl FromStr for ObjectId {
109 type Err = Error;
110
111 fn from_str(s: &str) -> Result<Self> {
112 if s.len() != 40 {
113 return Err(Error::InvalidObjectId(s.to_owned()));
114 }
115 let bytes = hex::decode(s).map_err(|_| Error::InvalidObjectId(s.to_owned()))?;
116 Self::from_bytes(&bytes)
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum ObjectKind {
123 Blob,
125 Tree,
127 Commit,
129 Tag,
131}
132
133impl ObjectKind {
134 pub fn from_bytes(b: &[u8]) -> Result<Self> {
140 match b {
141 b"blob" => Ok(Self::Blob),
142 b"tree" => Ok(Self::Tree),
143 b"commit" => Ok(Self::Commit),
144 b"tag" => Ok(Self::Tag),
145 other => Err(Error::UnknownObjectType(
146 String::from_utf8_lossy(other).into_owned(),
147 )),
148 }
149 }
150
151 #[must_use]
156 pub fn from_tag_type_field(line: &[u8]) -> Option<Self> {
157 fn keyword_matches(canonical: &[u8], field: &[u8]) -> bool {
158 if field.is_empty() {
159 return false;
160 }
161 for (i, &bc) in field.iter().enumerate() {
162 let sc = canonical.get(i).copied().unwrap_or(0);
163 if sc != bc {
164 return false;
165 }
166 }
167 canonical.get(field.len()).copied().unwrap_or(0) == 0
168 }
169
170 const NAMES: &[(ObjectKind, &[u8])] = &[
171 (ObjectKind::Blob, b"blob"),
172 (ObjectKind::Tree, b"tree"),
173 (ObjectKind::Commit, b"commit"),
174 (ObjectKind::Tag, b"tag"),
175 ];
176 for &(kind, name) in NAMES {
177 if keyword_matches(name, line) {
178 return Some(kind);
179 }
180 }
181 None
182 }
183
184 #[must_use]
186 pub fn as_str(&self) -> &'static str {
187 match self {
188 Self::Blob => "blob",
189 Self::Tree => "tree",
190 Self::Commit => "commit",
191 Self::Tag => "tag",
192 }
193 }
194}
195
196impl fmt::Display for ObjectKind {
197 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198 f.write_str(self.as_str())
199 }
200}
201
202impl FromStr for ObjectKind {
203 type Err = Error;
204
205 fn from_str(s: &str) -> Result<Self> {
206 Self::from_bytes(s.as_bytes())
207 }
208}
209
210#[derive(Debug, Clone)]
212pub struct Object {
213 pub kind: ObjectKind,
215 pub data: Vec<u8>,
217}
218
219impl Object {
220 #[must_use]
222 pub fn new(kind: ObjectKind, data: Vec<u8>) -> Self {
223 Self { kind, data }
224 }
225
226 #[must_use]
228 pub fn to_store_bytes(&self) -> Vec<u8> {
229 let header = format!("{} {}\0", self.kind, self.data.len());
230 let mut out = Vec::with_capacity(header.len() + self.data.len());
231 out.extend_from_slice(header.as_bytes());
232 out.extend_from_slice(&self.data);
233 out
234 }
235}
236
237#[derive(Debug, Clone, PartialEq, Eq)]
239pub struct TreeEntry {
240 pub mode: u32,
242 pub name: Vec<u8>,
244 pub oid: ObjectId,
246}
247
248impl TreeEntry {
249 #[must_use]
253 pub fn mode_str(&self) -> String {
254 if self.mode == 0o040000 {
256 "40000".to_owned()
257 } else {
258 format!("{:o}", self.mode)
259 }
260 }
261}
262
263pub fn parse_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
274 let mut entries = Vec::new();
275 let mut pos = 0;
276
277 while pos < data.len() {
278 let sp = data[pos..]
280 .iter()
281 .position(|&b| b == b' ')
282 .ok_or_else(|| Error::CorruptObject("tree entry missing space".to_owned()))?;
283 let mode_bytes = &data[pos..pos + sp];
284 let mode = std::str::from_utf8(mode_bytes)
285 .ok()
286 .and_then(|s| u32::from_str_radix(s, 8).ok())
287 .ok_or_else(|| {
288 Error::CorruptObject(format!(
289 "invalid tree mode: {}",
290 String::from_utf8_lossy(mode_bytes)
291 ))
292 })?;
293 pos += sp + 1;
294
295 let nul = data[pos..]
297 .iter()
298 .position(|&b| b == 0)
299 .ok_or_else(|| Error::CorruptObject("tree entry missing NUL".to_owned()))?;
300 let name = data[pos..pos + nul].to_vec();
301 pos += nul + 1;
302
303 if pos + 20 > data.len() {
304 return Err(Error::CorruptObject("tree entry truncated SHA".to_owned()));
305 }
306 let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
307 pos += 20;
308
309 entries.push(TreeEntry { mode, name, oid });
310 }
311
312 Ok(entries)
313}
314
315#[must_use]
320pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
321 let mut out = Vec::new();
322 for e in entries {
323 out.extend_from_slice(e.mode_str().as_bytes());
324 out.push(b' ');
325 out.extend_from_slice(&e.name);
326 out.push(0);
327 out.extend_from_slice(e.oid.as_bytes());
328 }
329 out
330}
331
332#[must_use]
345pub fn tree_entry_cmp(
346 a_name: &[u8],
347 a_is_tree: bool,
348 b_name: &[u8],
349 b_is_tree: bool,
350) -> std::cmp::Ordering {
351 let a_trailer = if a_is_tree { b'/' } else { 0u8 };
352 let b_trailer = if b_is_tree { b'/' } else { 0u8 };
353
354 let min_len = a_name.len().min(b_name.len());
355 let cmp = a_name[..min_len].cmp(&b_name[..min_len]);
356 if cmp != std::cmp::Ordering::Equal {
357 return cmp;
358 }
359 let ac = a_name.get(min_len).copied().unwrap_or(a_trailer);
361 let bc = b_name.get(min_len).copied().unwrap_or(b_trailer);
362 ac.cmp(&bc)
363}
364
365#[derive(Debug, Clone)]
367pub struct CommitData {
368 pub tree: ObjectId,
370 pub parents: Vec<ObjectId>,
372 pub author: String,
374 pub committer: String,
376 pub author_raw: Vec<u8>,
380 pub committer_raw: Vec<u8>,
382 pub encoding: Option<String>,
384 pub message: String,
386 #[doc = "Optional raw message bytes for non-UTF-8 messages."]
389 pub raw_message: Option<Vec<u8>>,
390}
391
392pub fn parse_commit(data: &[u8]) -> Result<CommitData> {
398 #[derive(Clone, Copy)]
402 enum Continuation {
403 Author,
404 Committer,
405 Multiline,
406 Ignore,
407 }
408
409 let mut pos = 0usize;
410 let mut tree = None;
411 let mut parents = Vec::new();
412 let mut author_raw: Option<Vec<u8>> = None;
413 let mut committer_raw: Option<Vec<u8>> = None;
414 let mut encoding: Option<String> = None;
415 let mut cont = Continuation::Ignore;
416
417 while pos < data.len() {
418 let line_start = pos;
419 let mut line_end = pos;
420 while line_end < data.len() && data[line_end] != b'\n' {
421 line_end += 1;
422 }
423 let line = &data[line_start..line_end];
424 let after_nl = line_end.saturating_add(1);
425 if line.is_empty() {
426 let body = data.get(after_nl..).unwrap_or_default();
427 let message = commit_encoding::decode_bytes(encoding.as_deref(), body);
428 let has_non_utf8_encoding = encoding.as_deref().is_some_and(|label| {
432 !label.eq_ignore_ascii_case("utf-8") && !label.eq_ignore_ascii_case("utf8")
433 });
434 let raw_message = if body.is_empty() {
435 None
436 } else if has_non_utf8_encoding
437 || std::str::from_utf8(body).is_err()
438 || !body.ends_with(b"\n")
439 {
440 Some(body.to_vec())
441 } else {
442 None
443 };
444 let author_bytes = author_raw
445 .ok_or_else(|| Error::CorruptObject("commit missing author header".to_owned()))?;
446 let committer_bytes = committer_raw.ok_or_else(|| {
447 Error::CorruptObject("commit missing committer header".to_owned())
448 })?;
449 let author = commit_encoding::decode_bytes(encoding.as_deref(), &author_bytes);
450 let committer = commit_encoding::decode_bytes(encoding.as_deref(), &committer_bytes);
451 return Ok(CommitData {
452 tree: tree
453 .ok_or_else(|| Error::CorruptObject("commit missing tree header".to_owned()))?,
454 parents,
455 author,
456 committer,
457 author_raw: author_bytes,
458 committer_raw: committer_bytes,
459 encoding,
460 message,
461 raw_message,
462 });
463 }
464
465 if line.first() == Some(&b' ') {
466 let rest = line.get(1..).unwrap_or_default();
467 match cont {
468 Continuation::Author => {
469 let a = author_raw.as_mut().ok_or_else(|| {
470 Error::CorruptObject("orphan header continuation".to_owned())
471 })?;
472 a.extend_from_slice(rest);
473 }
474 Continuation::Committer => {
475 let c = committer_raw.as_mut().ok_or_else(|| {
476 Error::CorruptObject("orphan header continuation".to_owned())
477 })?;
478 c.extend_from_slice(rest);
479 }
480 Continuation::Multiline | Continuation::Ignore => {}
481 }
482 pos = after_nl;
483 continue;
484 }
485
486 let key_end = line
487 .iter()
488 .position(|&b| b == b' ')
489 .ok_or_else(|| Error::CorruptObject("malformed commit header line".to_owned()))?;
490 let key = &line[..key_end];
491 let rest = line.get(key_end + 1..).unwrap_or_default();
492
493 match key {
494 b"tree" => {
495 let line_str = std::str::from_utf8(rest).map_err(|_| {
496 Error::CorruptObject("commit tree line is not valid UTF-8".to_owned())
497 })?;
498 tree = Some(line_str.trim().parse::<ObjectId>()?);
499 cont = Continuation::Ignore;
500 }
501 b"parent" => {
502 let line_str = std::str::from_utf8(rest).map_err(|_| {
503 Error::CorruptObject("commit parent line is not valid UTF-8".to_owned())
504 })?;
505 parents.push(line_str.trim().parse::<ObjectId>()?);
506 cont = Continuation::Ignore;
507 }
508 b"author" => {
509 author_raw = Some(rest.to_vec());
510 cont = Continuation::Author;
511 }
512 b"committer" => {
513 committer_raw = Some(rest.to_vec());
514 cont = Continuation::Committer;
515 }
516 b"encoding" => {
517 let line_str = std::str::from_utf8(rest).map_err(|_| {
518 Error::CorruptObject("commit encoding line is not valid UTF-8".to_owned())
519 })?;
520 encoding = Some(line_str.to_owned());
521 cont = Continuation::Ignore;
522 }
523 _ => {
524 cont = Continuation::Multiline;
525 }
526 }
527 pos = after_nl;
528 }
529
530 Err(Error::CorruptObject(
531 "commit missing blank line before message".to_owned(),
532 ))
533}
534
535#[must_use]
538pub fn tag_header_field(data: &[u8], prefix: &[u8]) -> Option<String> {
539 let mut pos = 0usize;
540 while pos < data.len() {
541 let rest = &data[pos..];
542 let nl = rest.iter().position(|&b| b == b'\n');
543 let line = if let Some(i) = nl { &rest[..i] } else { rest };
544 if line.is_empty() {
545 break;
546 }
547 if let Some(after) = line.strip_prefix(prefix) {
548 return Some(String::from_utf8_lossy(after).trim().to_owned());
549 }
550 pos += line.len().saturating_add(nl.map(|_| 1).unwrap_or(0));
551 if nl.is_none() {
552 break;
553 }
554 }
555 None
556}
557
558#[must_use]
560pub fn tag_object_line_oid(data: &[u8]) -> Option<ObjectId> {
561 let s = tag_header_field(data, b"object ")?;
562 s.parse().ok()
563}
564
565#[derive(Debug, Clone)]
567pub struct TagData {
568 pub object: ObjectId,
570 pub object_type: String,
572 pub tag: String,
574 pub tagger: Option<String>,
576 pub message: String,
578}
579
580pub fn parse_tag(data: &[u8]) -> Result<TagData> {
586 let text = std::str::from_utf8(data)
587 .map_err(|_| Error::CorruptObject("tag is not valid UTF-8".to_owned()))?;
588
589 let mut object = None;
590 let mut object_type = None;
591 let mut tag_name = None;
592 let mut tagger = None;
593 let mut message = String::new();
594 let mut in_message = false;
595
596 for line in text.split('\n') {
597 if in_message {
598 message.push_str(line);
599 message.push('\n');
600 continue;
601 }
602 if line.is_empty() {
603 in_message = true;
604 continue;
605 }
606 if let Some(rest) = line.strip_prefix("object ") {
607 object = Some(rest.trim().parse::<ObjectId>()?);
608 } else if let Some(rest) = line.strip_prefix("type ") {
609 let typ = rest.trim();
610 if ObjectKind::from_tag_type_field(typ.as_bytes()).is_none() {
611 return Err(Error::CorruptObject(format!(
612 "invalid 'type' value in tag: {typ}"
613 )));
614 }
615 object_type = Some(typ.to_owned());
616 } else if let Some(rest) = line.strip_prefix("tag ") {
617 tag_name = Some(rest.trim().to_owned());
618 } else if let Some(rest) = line.strip_prefix("tagger ") {
619 tagger = Some(rest.to_owned());
620 }
621 }
622
623 if message.ends_with('\n') {
625 message.pop();
626 }
627
628 Ok(TagData {
629 object: object
630 .ok_or_else(|| Error::CorruptObject("tag missing object header".to_owned()))?,
631 object_type: object_type
632 .ok_or_else(|| Error::CorruptObject("tag missing type header".to_owned()))?,
633 tag: tag_name.ok_or_else(|| Error::CorruptObject("tag missing tag header".to_owned()))?,
634 tagger,
635 message,
636 })
637}
638
639#[must_use]
644pub fn serialize_tag(t: &TagData) -> Vec<u8> {
645 let mut out = String::new();
646 out.push_str(&format!("object {}\n", t.object));
647 out.push_str(&format!("type {}\n", t.object_type));
648 out.push_str(&format!("tag {}\n", t.tag));
649 if let Some(ref tagger) = t.tagger {
650 out.push_str(&format!("tagger {tagger}\n"));
651 }
652 out.push('\n');
653 let msg = t.message.trim_end_matches('\n');
655 if !msg.is_empty() {
656 out.push_str(msg);
657 out.push('\n');
658 }
659 out.into_bytes()
660}
661
662#[must_use]
670pub fn serialize_commit(c: &CommitData) -> Vec<u8> {
671 let mut out = Vec::new();
672 out.extend_from_slice(format!("tree {}\n", c.tree).as_bytes());
673 for p in &c.parents {
674 out.extend_from_slice(format!("parent {p}\n").as_bytes());
675 }
676 out.extend_from_slice(b"author ");
677 if c.author_raw.is_empty() {
678 out.extend_from_slice(c.author.as_bytes());
679 } else {
680 out.extend_from_slice(&c.author_raw);
681 }
682 out.push(b'\n');
683 out.extend_from_slice(b"committer ");
684 if c.committer_raw.is_empty() {
685 out.extend_from_slice(c.committer.as_bytes());
686 } else {
687 out.extend_from_slice(&c.committer_raw);
688 }
689 out.push(b'\n');
690 if let Some(enc) = &c.encoding {
691 out.extend_from_slice(format!("encoding {enc}\n").as_bytes());
692 }
693 out.push(b'\n');
694 if let Some(raw) = &c.raw_message {
695 out.extend_from_slice(raw);
696 } else if !c.message.is_empty() {
697 out.extend_from_slice(c.message.as_bytes());
698 }
699 out
700}
701
702#[cfg(test)]
703mod commit_parse_tests {
704 use super::*;
705
706 #[test]
707 fn parse_commit_skips_multiline_gpgsig_continuation() {
708 let raw = concat!(
709 "tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n",
710 "author A U Thor <author@example.com> 1 +0000\n",
711 "committer C O Mitter <committer@example.com> 1 +0000\n",
712 "gpgsig -----BEGIN PGP SIGNATURE-----\n",
713 " abcdef\n",
714 " -----END PGP SIGNATURE-----\n",
715 "\n",
716 "msg\n",
717 );
718 let c = parse_commit(raw.as_bytes()).expect("parse signed commit");
719 assert_eq!(c.tree.to_hex(), "4b825dc642cb6eb9a060e54bf8d69288fbee4904");
720 assert_eq!(c.message, "msg\n");
721 }
722}