1use std::fmt;
22use std::str::FromStr;
23
24use crate::commit_encoding;
25use crate::error::{Error, Result};
26
27#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29pub struct ObjectId([u8; 20]);
30
31impl ObjectId {
32 #[must_use]
37 pub const fn zero() -> Self {
38 Self([0u8; 20])
39 }
40
41 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
47 let arr: [u8; 20] = bytes
48 .try_into()
49 .map_err(|_| Error::InvalidObjectId(hex::encode(bytes)))?;
50 Ok(Self(arr))
51 }
52
53 #[must_use]
55 pub fn as_bytes(&self) -> &[u8; 20] {
56 &self.0
57 }
58
59 #[must_use]
61 pub fn is_zero(&self) -> bool {
62 self.0 == [0u8; 20]
63 }
64
65 #[must_use]
67 pub fn to_hex(&self) -> String {
68 hex::encode(self.0)
69 }
70
71 #[must_use]
75 pub fn loose_prefix(&self) -> String {
76 hex::encode(&self.0[..1])
77 }
78
79 pub fn from_hex(s: &str) -> Result<Self> {
86 s.parse()
87 }
88
89 #[must_use]
91 pub fn loose_suffix(&self) -> String {
92 hex::encode(&self.0[1..])
93 }
94}
95
96impl fmt::Display for ObjectId {
97 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98 f.write_str(&self.to_hex())
99 }
100}
101
102impl fmt::Debug for ObjectId {
103 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104 write!(f, "ObjectId({})", self.to_hex())
105 }
106}
107
108impl FromStr for ObjectId {
109 type Err = Error;
110
111 fn from_str(s: &str) -> Result<Self> {
112 if s.len() != 40 {
113 return Err(Error::InvalidObjectId(s.to_owned()));
114 }
115 let bytes = hex::decode(s).map_err(|_| Error::InvalidObjectId(s.to_owned()))?;
116 Self::from_bytes(&bytes)
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum ObjectKind {
123 Blob,
125 Tree,
127 Commit,
129 Tag,
131}
132
133impl ObjectKind {
134 pub fn from_bytes(b: &[u8]) -> Result<Self> {
140 match b {
141 b"blob" => Ok(Self::Blob),
142 b"tree" => Ok(Self::Tree),
143 b"commit" => Ok(Self::Commit),
144 b"tag" => Ok(Self::Tag),
145 other => Err(Error::UnknownObjectType(
146 String::from_utf8_lossy(other).into_owned(),
147 )),
148 }
149 }
150
151 #[must_use]
156 pub fn from_tag_type_field(line: &[u8]) -> Option<Self> {
157 fn keyword_matches(canonical: &[u8], field: &[u8]) -> bool {
158 if field.is_empty() {
159 return false;
160 }
161 for (i, &bc) in field.iter().enumerate() {
162 let sc = canonical.get(i).copied().unwrap_or(0);
163 if sc != bc {
164 return false;
165 }
166 }
167 canonical.get(field.len()).copied().unwrap_or(0) == 0
168 }
169
170 const NAMES: &[(ObjectKind, &[u8])] = &[
171 (ObjectKind::Blob, b"blob"),
172 (ObjectKind::Tree, b"tree"),
173 (ObjectKind::Commit, b"commit"),
174 (ObjectKind::Tag, b"tag"),
175 ];
176 for &(kind, name) in NAMES {
177 if keyword_matches(name, line) {
178 return Some(kind);
179 }
180 }
181 None
182 }
183
184 #[must_use]
186 pub fn as_str(&self) -> &'static str {
187 match self {
188 Self::Blob => "blob",
189 Self::Tree => "tree",
190 Self::Commit => "commit",
191 Self::Tag => "tag",
192 }
193 }
194}
195
196impl fmt::Display for ObjectKind {
197 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
198 f.write_str(self.as_str())
199 }
200}
201
202impl FromStr for ObjectKind {
203 type Err = Error;
204
205 fn from_str(s: &str) -> Result<Self> {
206 Self::from_bytes(s.as_bytes())
207 }
208}
209
210#[derive(Debug, Clone)]
212pub struct Object {
213 pub kind: ObjectKind,
215 pub data: Vec<u8>,
217}
218
219impl Object {
220 #[must_use]
222 pub fn new(kind: ObjectKind, data: Vec<u8>) -> Self {
223 Self { kind, data }
224 }
225
226 #[must_use]
228 pub fn to_store_bytes(&self) -> Vec<u8> {
229 let header = format!("{} {}\0", self.kind, self.data.len());
230 let mut out = Vec::with_capacity(header.len() + self.data.len());
231 out.extend_from_slice(header.as_bytes());
232 out.extend_from_slice(&self.data);
233 out
234 }
235}
236
237#[derive(Debug, Clone, PartialEq, Eq)]
239pub struct TreeEntry {
240 pub mode: u32,
242 pub name: Vec<u8>,
244 pub oid: ObjectId,
246}
247
248impl TreeEntry {
249 #[must_use]
253 pub fn mode_str(&self) -> String {
254 if self.mode == 0o040000 {
256 "40000".to_owned()
257 } else {
258 format!("{:o}", self.mode)
259 }
260 }
261}
262
263pub fn parse_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
274 let mut entries = Vec::new();
275 let mut pos = 0;
276
277 while pos < data.len() {
278 let sp = data[pos..]
280 .iter()
281 .position(|&b| b == b' ')
282 .ok_or_else(|| Error::CorruptObject("tree entry missing space".to_owned()))?;
283 let mode_bytes = &data[pos..pos + sp];
284 let mode = std::str::from_utf8(mode_bytes)
285 .ok()
286 .and_then(|s| u32::from_str_radix(s, 8).ok())
287 .ok_or_else(|| {
288 Error::CorruptObject(format!(
289 "invalid tree mode: {}",
290 String::from_utf8_lossy(mode_bytes)
291 ))
292 })?;
293 pos += sp + 1;
294
295 let nul = data[pos..]
297 .iter()
298 .position(|&b| b == 0)
299 .ok_or_else(|| Error::CorruptObject("tree entry missing NUL".to_owned()))?;
300 let name = data[pos..pos + nul].to_vec();
301 pos += nul + 1;
302
303 if pos + 20 > data.len() {
304 return Err(Error::CorruptObject("tree entry truncated SHA".to_owned()));
305 }
306 let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
307 pos += 20;
308
309 entries.push(TreeEntry { mode, name, oid });
310 }
311
312 Ok(entries)
313}
314
315#[must_use]
320pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
321 let mut out = Vec::new();
322 for e in entries {
323 out.extend_from_slice(e.mode_str().as_bytes());
324 out.push(b' ');
325 out.extend_from_slice(&e.name);
326 out.push(0);
327 out.extend_from_slice(e.oid.as_bytes());
328 }
329 out
330}
331
332#[must_use]
345pub fn tree_entry_cmp(
346 a_name: &[u8],
347 a_is_tree: bool,
348 b_name: &[u8],
349 b_is_tree: bool,
350) -> std::cmp::Ordering {
351 let a_trailer = if a_is_tree { b'/' } else { 0u8 };
352 let b_trailer = if b_is_tree { b'/' } else { 0u8 };
353
354 let min_len = a_name.len().min(b_name.len());
355 let cmp = a_name[..min_len].cmp(&b_name[..min_len]);
356 if cmp != std::cmp::Ordering::Equal {
357 return cmp;
358 }
359 let ac = a_name.get(min_len).copied().unwrap_or(a_trailer);
361 let bc = b_name.get(min_len).copied().unwrap_or(b_trailer);
362 ac.cmp(&bc)
363}
364
365#[derive(Debug, Clone)]
367pub struct CommitData {
368 pub tree: ObjectId,
370 pub parents: Vec<ObjectId>,
372 pub author: String,
374 pub committer: String,
376 pub author_raw: Vec<u8>,
380 pub committer_raw: Vec<u8>,
382 pub encoding: Option<String>,
384 pub message: String,
386 #[doc = "Optional raw message bytes for non-UTF-8 messages."]
389 pub raw_message: Option<Vec<u8>>,
390}
391
392pub fn parse_commit(data: &[u8]) -> Result<CommitData> {
398 #[derive(Clone, Copy)]
402 enum Continuation {
403 Author,
404 Committer,
405 Multiline,
406 Ignore,
407 }
408
409 let mut pos = 0usize;
410 let mut tree = None;
411 let mut parents = Vec::new();
412 let mut author_raw: Option<Vec<u8>> = None;
413 let mut committer_raw: Option<Vec<u8>> = None;
414 let mut encoding = None;
415 let mut cont = Continuation::Ignore;
416
417 while pos < data.len() {
418 let line_start = pos;
419 let mut line_end = pos;
420 while line_end < data.len() && data[line_end] != b'\n' {
421 line_end += 1;
422 }
423 let line = &data[line_start..line_end];
424 let after_nl = line_end.saturating_add(1);
425 if line.is_empty() {
426 let body = data.get(after_nl..).unwrap_or_default();
427 let message = commit_encoding::decode_bytes(encoding.as_deref(), body);
428 let raw_message = if body.is_empty() {
432 None
433 } else if std::str::from_utf8(body).is_err() || !body.ends_with(b"\n") {
434 Some(body.to_vec())
435 } else {
436 None
437 };
438 let author_bytes = author_raw
439 .ok_or_else(|| Error::CorruptObject("commit missing author header".to_owned()))?;
440 let committer_bytes = committer_raw.ok_or_else(|| {
441 Error::CorruptObject("commit missing committer header".to_owned())
442 })?;
443 let author = commit_encoding::decode_bytes(encoding.as_deref(), &author_bytes);
444 let committer = commit_encoding::decode_bytes(encoding.as_deref(), &committer_bytes);
445 return Ok(CommitData {
446 tree: tree
447 .ok_or_else(|| Error::CorruptObject("commit missing tree header".to_owned()))?,
448 parents,
449 author,
450 committer,
451 author_raw: author_bytes,
452 committer_raw: committer_bytes,
453 encoding,
454 message,
455 raw_message,
456 });
457 }
458
459 if line.first() == Some(&b' ') {
460 let rest = line.get(1..).unwrap_or_default();
461 match cont {
462 Continuation::Author => {
463 let a = author_raw.as_mut().ok_or_else(|| {
464 Error::CorruptObject("orphan header continuation".to_owned())
465 })?;
466 a.extend_from_slice(rest);
467 }
468 Continuation::Committer => {
469 let c = committer_raw.as_mut().ok_or_else(|| {
470 Error::CorruptObject("orphan header continuation".to_owned())
471 })?;
472 c.extend_from_slice(rest);
473 }
474 Continuation::Multiline | Continuation::Ignore => {}
475 }
476 pos = after_nl;
477 continue;
478 }
479
480 let key_end = line
481 .iter()
482 .position(|&b| b == b' ')
483 .ok_or_else(|| Error::CorruptObject("malformed commit header line".to_owned()))?;
484 let key = &line[..key_end];
485 let rest = line.get(key_end + 1..).unwrap_or_default();
486
487 match key {
488 b"tree" => {
489 let line_str = std::str::from_utf8(rest).map_err(|_| {
490 Error::CorruptObject("commit tree line is not valid UTF-8".to_owned())
491 })?;
492 tree = Some(line_str.trim().parse::<ObjectId>()?);
493 cont = Continuation::Ignore;
494 }
495 b"parent" => {
496 let line_str = std::str::from_utf8(rest).map_err(|_| {
497 Error::CorruptObject("commit parent line is not valid UTF-8".to_owned())
498 })?;
499 parents.push(line_str.trim().parse::<ObjectId>()?);
500 cont = Continuation::Ignore;
501 }
502 b"author" => {
503 author_raw = Some(rest.to_vec());
504 cont = Continuation::Author;
505 }
506 b"committer" => {
507 committer_raw = Some(rest.to_vec());
508 cont = Continuation::Committer;
509 }
510 b"encoding" => {
511 let line_str = std::str::from_utf8(rest).map_err(|_| {
512 Error::CorruptObject("commit encoding line is not valid UTF-8".to_owned())
513 })?;
514 encoding = Some(line_str.to_owned());
515 cont = Continuation::Ignore;
516 }
517 _ => {
518 cont = Continuation::Multiline;
519 }
520 }
521 pos = after_nl;
522 }
523
524 Err(Error::CorruptObject(
525 "commit missing blank line before message".to_owned(),
526 ))
527}
528
529#[must_use]
532pub fn tag_header_field(data: &[u8], prefix: &[u8]) -> Option<String> {
533 let mut pos = 0usize;
534 while pos < data.len() {
535 let rest = &data[pos..];
536 let nl = rest.iter().position(|&b| b == b'\n');
537 let line = if let Some(i) = nl { &rest[..i] } else { rest };
538 if line.is_empty() {
539 break;
540 }
541 if let Some(after) = line.strip_prefix(prefix) {
542 return Some(String::from_utf8_lossy(after).trim().to_owned());
543 }
544 pos += line.len().saturating_add(nl.map(|_| 1).unwrap_or(0));
545 if nl.is_none() {
546 break;
547 }
548 }
549 None
550}
551
552#[must_use]
554pub fn tag_object_line_oid(data: &[u8]) -> Option<ObjectId> {
555 let s = tag_header_field(data, b"object ")?;
556 s.parse().ok()
557}
558
559#[derive(Debug, Clone)]
561pub struct TagData {
562 pub object: ObjectId,
564 pub object_type: String,
566 pub tag: String,
568 pub tagger: Option<String>,
570 pub message: String,
572}
573
574pub fn parse_tag(data: &[u8]) -> Result<TagData> {
580 let text = std::str::from_utf8(data)
581 .map_err(|_| Error::CorruptObject("tag is not valid UTF-8".to_owned()))?;
582
583 let mut object = None;
584 let mut object_type = None;
585 let mut tag_name = None;
586 let mut tagger = None;
587 let mut message = String::new();
588 let mut in_message = false;
589
590 for line in text.split('\n') {
591 if in_message {
592 message.push_str(line);
593 message.push('\n');
594 continue;
595 }
596 if line.is_empty() {
597 in_message = true;
598 continue;
599 }
600 if let Some(rest) = line.strip_prefix("object ") {
601 object = Some(rest.trim().parse::<ObjectId>()?);
602 } else if let Some(rest) = line.strip_prefix("type ") {
603 let typ = rest.trim();
604 if ObjectKind::from_tag_type_field(typ.as_bytes()).is_none() {
605 return Err(Error::CorruptObject(format!(
606 "invalid 'type' value in tag: {typ}"
607 )));
608 }
609 object_type = Some(typ.to_owned());
610 } else if let Some(rest) = line.strip_prefix("tag ") {
611 tag_name = Some(rest.trim().to_owned());
612 } else if let Some(rest) = line.strip_prefix("tagger ") {
613 tagger = Some(rest.to_owned());
614 }
615 }
616
617 if message.ends_with('\n') {
619 message.pop();
620 }
621
622 Ok(TagData {
623 object: object
624 .ok_or_else(|| Error::CorruptObject("tag missing object header".to_owned()))?,
625 object_type: object_type
626 .ok_or_else(|| Error::CorruptObject("tag missing type header".to_owned()))?,
627 tag: tag_name.ok_or_else(|| Error::CorruptObject("tag missing tag header".to_owned()))?,
628 tagger,
629 message,
630 })
631}
632
633#[must_use]
638pub fn serialize_tag(t: &TagData) -> Vec<u8> {
639 let mut out = String::new();
640 out.push_str(&format!("object {}\n", t.object));
641 out.push_str(&format!("type {}\n", t.object_type));
642 out.push_str(&format!("tag {}\n", t.tag));
643 if let Some(ref tagger) = t.tagger {
644 out.push_str(&format!("tagger {tagger}\n"));
645 }
646 out.push('\n');
647 let msg = t.message.trim_end_matches('\n');
649 if !msg.is_empty() {
650 out.push_str(msg);
651 out.push('\n');
652 }
653 out.into_bytes()
654}
655
656#[must_use]
664pub fn serialize_commit(c: &CommitData) -> Vec<u8> {
665 let mut out = Vec::new();
666 out.extend_from_slice(format!("tree {}\n", c.tree).as_bytes());
667 for p in &c.parents {
668 out.extend_from_slice(format!("parent {p}\n").as_bytes());
669 }
670 out.extend_from_slice(b"author ");
671 if c.author_raw.is_empty() {
672 out.extend_from_slice(c.author.as_bytes());
673 } else {
674 out.extend_from_slice(&c.author_raw);
675 }
676 out.push(b'\n');
677 out.extend_from_slice(b"committer ");
678 if c.committer_raw.is_empty() {
679 out.extend_from_slice(c.committer.as_bytes());
680 } else {
681 out.extend_from_slice(&c.committer_raw);
682 }
683 out.push(b'\n');
684 if let Some(enc) = &c.encoding {
685 out.extend_from_slice(format!("encoding {enc}\n").as_bytes());
686 }
687 out.push(b'\n');
688 if let Some(raw) = &c.raw_message {
689 out.extend_from_slice(raw);
690 } else if !c.message.is_empty() {
691 out.extend_from_slice(c.message.as_bytes());
692 }
693 out
694}
695
696#[cfg(test)]
697mod commit_parse_tests {
698 use super::*;
699
700 #[test]
701 fn parse_commit_skips_multiline_gpgsig_continuation() {
702 let raw = concat!(
703 "tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n",
704 "author A U Thor <author@example.com> 1 +0000\n",
705 "committer C O Mitter <committer@example.com> 1 +0000\n",
706 "gpgsig -----BEGIN PGP SIGNATURE-----\n",
707 " abcdef\n",
708 " -----END PGP SIGNATURE-----\n",
709 "\n",
710 "msg\n",
711 );
712 let c = parse_commit(raw.as_bytes()).expect("parse signed commit");
713 assert_eq!(c.tree.to_hex(), "4b825dc642cb6eb9a060e54bf8d69288fbee4904");
714 assert_eq!(c.message, "msg\n");
715 }
716}