1use std::fmt;
22use std::str::FromStr;
23
24use crate::commit_encoding;
25use crate::error::{Error, Result};
26
27#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29pub struct ObjectId([u8; 20]);
30
31impl ObjectId {
32 #[must_use]
37 pub const fn zero() -> Self {
38 Self([0u8; 20])
39 }
40
41 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
47 let arr: [u8; 20] = bytes
48 .try_into()
49 .map_err(|_| Error::InvalidObjectId(hex::encode(bytes)))?;
50 Ok(Self(arr))
51 }
52
53 #[must_use]
55 pub fn as_bytes(&self) -> &[u8; 20] {
56 &self.0
57 }
58
59 #[must_use]
61 pub fn is_zero(&self) -> bool {
62 self.0 == [0u8; 20]
63 }
64
65 #[must_use]
67 pub fn to_hex(&self) -> String {
68 hex::encode(self.0)
69 }
70
71 #[must_use]
75 pub fn loose_prefix(&self) -> String {
76 hex::encode(&self.0[..1])
77 }
78
79 pub fn from_hex(s: &str) -> Result<Self> {
86 s.parse()
87 }
88
89 #[must_use]
91 pub fn loose_suffix(&self) -> String {
92 hex::encode(&self.0[1..])
93 }
94}
95
96impl fmt::Display for ObjectId {
97 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98 f.write_str(&self.to_hex())
99 }
100}
101
102impl fmt::Debug for ObjectId {
103 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104 write!(f, "ObjectId({})", self.to_hex())
105 }
106}
107
108impl FromStr for ObjectId {
109 type Err = Error;
110
111 fn from_str(s: &str) -> Result<Self> {
112 if s.len() != 40 {
113 return Err(Error::InvalidObjectId(s.to_owned()));
114 }
115 let bytes = hex::decode(s).map_err(|_| Error::InvalidObjectId(s.to_owned()))?;
116 Self::from_bytes(&bytes)
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum ObjectKind {
123 Blob,
125 Tree,
127 Commit,
129 Tag,
131}
132
133impl ObjectKind {
134 pub fn from_bytes(b: &[u8]) -> Result<Self> {
140 match b {
141 b"blob" => Ok(Self::Blob),
142 b"tree" => Ok(Self::Tree),
143 b"commit" => Ok(Self::Commit),
144 b"tag" => Ok(Self::Tag),
145 other => Err(Error::UnknownObjectType(
146 String::from_utf8_lossy(other).into_owned(),
147 )),
148 }
149 }
150
151 #[must_use]
153 pub fn as_str(&self) -> &'static str {
154 match self {
155 Self::Blob => "blob",
156 Self::Tree => "tree",
157 Self::Commit => "commit",
158 Self::Tag => "tag",
159 }
160 }
161}
162
163impl fmt::Display for ObjectKind {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 f.write_str(self.as_str())
166 }
167}
168
169impl FromStr for ObjectKind {
170 type Err = Error;
171
172 fn from_str(s: &str) -> Result<Self> {
173 Self::from_bytes(s.as_bytes())
174 }
175}
176
177#[derive(Debug, Clone)]
179pub struct Object {
180 pub kind: ObjectKind,
182 pub data: Vec<u8>,
184}
185
186impl Object {
187 #[must_use]
189 pub fn new(kind: ObjectKind, data: Vec<u8>) -> Self {
190 Self { kind, data }
191 }
192
193 #[must_use]
195 pub fn to_store_bytes(&self) -> Vec<u8> {
196 let header = format!("{} {}\0", self.kind, self.data.len());
197 let mut out = Vec::with_capacity(header.len() + self.data.len());
198 out.extend_from_slice(header.as_bytes());
199 out.extend_from_slice(&self.data);
200 out
201 }
202}
203
204#[derive(Debug, Clone, PartialEq, Eq)]
206pub struct TreeEntry {
207 pub mode: u32,
209 pub name: Vec<u8>,
211 pub oid: ObjectId,
213}
214
215impl TreeEntry {
216 #[must_use]
220 pub fn mode_str(&self) -> String {
221 if self.mode == 0o040000 {
223 "40000".to_owned()
224 } else {
225 format!("{:o}", self.mode)
226 }
227 }
228}
229
230pub fn parse_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
241 let mut entries = Vec::new();
242 let mut pos = 0;
243
244 while pos < data.len() {
245 let sp = data[pos..]
247 .iter()
248 .position(|&b| b == b' ')
249 .ok_or_else(|| Error::CorruptObject("tree entry missing space".to_owned()))?;
250 let mode_bytes = &data[pos..pos + sp];
251 let mode = std::str::from_utf8(mode_bytes)
252 .ok()
253 .and_then(|s| u32::from_str_radix(s, 8).ok())
254 .ok_or_else(|| {
255 Error::CorruptObject(format!(
256 "invalid tree mode: {}",
257 String::from_utf8_lossy(mode_bytes)
258 ))
259 })?;
260 pos += sp + 1;
261
262 let nul = data[pos..]
264 .iter()
265 .position(|&b| b == 0)
266 .ok_or_else(|| Error::CorruptObject("tree entry missing NUL".to_owned()))?;
267 let name = data[pos..pos + nul].to_vec();
268 pos += nul + 1;
269
270 if pos + 20 > data.len() {
271 return Err(Error::CorruptObject("tree entry truncated SHA".to_owned()));
272 }
273 let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
274 pos += 20;
275
276 entries.push(TreeEntry { mode, name, oid });
277 }
278
279 Ok(entries)
280}
281
282#[must_use]
287pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
288 let mut out = Vec::new();
289 for e in entries {
290 out.extend_from_slice(e.mode_str().as_bytes());
291 out.push(b' ');
292 out.extend_from_slice(&e.name);
293 out.push(0);
294 out.extend_from_slice(e.oid.as_bytes());
295 }
296 out
297}
298
299#[must_use]
312pub fn tree_entry_cmp(
313 a_name: &[u8],
314 a_is_tree: bool,
315 b_name: &[u8],
316 b_is_tree: bool,
317) -> std::cmp::Ordering {
318 let a_trailer = if a_is_tree { b'/' } else { 0u8 };
319 let b_trailer = if b_is_tree { b'/' } else { 0u8 };
320
321 let min_len = a_name.len().min(b_name.len());
322 let cmp = a_name[..min_len].cmp(&b_name[..min_len]);
323 if cmp != std::cmp::Ordering::Equal {
324 return cmp;
325 }
326 let ac = a_name.get(min_len).copied().unwrap_or(a_trailer);
328 let bc = b_name.get(min_len).copied().unwrap_or(b_trailer);
329 ac.cmp(&bc)
330}
331
332#[derive(Debug, Clone)]
334pub struct CommitData {
335 pub tree: ObjectId,
337 pub parents: Vec<ObjectId>,
339 pub author: String,
341 pub committer: String,
343 pub author_raw: Vec<u8>,
347 pub committer_raw: Vec<u8>,
349 pub encoding: Option<String>,
351 pub message: String,
353 #[doc = "Optional raw message bytes for non-UTF-8 messages."]
356 pub raw_message: Option<Vec<u8>>,
357}
358
359pub fn parse_commit(data: &[u8]) -> Result<CommitData> {
365 #[derive(Clone, Copy)]
368 enum Continuation {
369 Author,
370 Committer,
371 Ignore,
372 }
373
374 let mut pos = 0usize;
375 let mut tree = None;
376 let mut parents = Vec::new();
377 let mut author_raw: Option<Vec<u8>> = None;
378 let mut committer_raw: Option<Vec<u8>> = None;
379 let mut encoding = None;
380 let mut cont = Continuation::Ignore;
381
382 while pos < data.len() {
383 let line_start = pos;
384 let mut line_end = pos;
385 while line_end < data.len() && data[line_end] != b'\n' {
386 line_end += 1;
387 }
388 let line = &data[line_start..line_end];
389 let after_nl = line_end.saturating_add(1);
390 if line.is_empty() {
391 let body = data.get(after_nl..).unwrap_or_default();
392 let message = commit_encoding::decode_bytes(encoding.as_deref(), body);
393 let raw_message = if body.is_empty() {
397 None
398 } else if std::str::from_utf8(body).is_err() || !body.ends_with(b"\n") {
399 Some(body.to_vec())
400 } else {
401 None
402 };
403 let author_bytes = author_raw
404 .ok_or_else(|| Error::CorruptObject("commit missing author header".to_owned()))?;
405 let committer_bytes = committer_raw.ok_or_else(|| {
406 Error::CorruptObject("commit missing committer header".to_owned())
407 })?;
408 let author = commit_encoding::decode_bytes(encoding.as_deref(), &author_bytes);
409 let committer = commit_encoding::decode_bytes(encoding.as_deref(), &committer_bytes);
410 return Ok(CommitData {
411 tree: tree
412 .ok_or_else(|| Error::CorruptObject("commit missing tree header".to_owned()))?,
413 parents,
414 author,
415 committer,
416 author_raw: author_bytes,
417 committer_raw: committer_bytes,
418 encoding,
419 message,
420 raw_message,
421 });
422 }
423
424 if line.first() == Some(&b' ') {
425 let rest = line.get(1..).unwrap_or_default();
426 match cont {
427 Continuation::Author => {
428 let a = author_raw.as_mut().ok_or_else(|| {
429 Error::CorruptObject("orphan header continuation".to_owned())
430 })?;
431 a.extend_from_slice(rest);
432 }
433 Continuation::Committer => {
434 let c = committer_raw.as_mut().ok_or_else(|| {
435 Error::CorruptObject("orphan header continuation".to_owned())
436 })?;
437 c.extend_from_slice(rest);
438 }
439 Continuation::Ignore => {}
440 }
441 pos = after_nl;
442 continue;
443 }
444
445 let key_end = line
446 .iter()
447 .position(|&b| b == b' ')
448 .ok_or_else(|| Error::CorruptObject("malformed commit header line".to_owned()))?;
449 let key = &line[..key_end];
450 let rest = line.get(key_end + 1..).unwrap_or_default();
451
452 match key {
453 b"tree" => {
454 let line_str = std::str::from_utf8(rest).map_err(|_| {
455 Error::CorruptObject("commit tree line is not valid UTF-8".to_owned())
456 })?;
457 tree = Some(line_str.trim().parse::<ObjectId>()?);
458 cont = Continuation::Ignore;
459 }
460 b"parent" => {
461 let line_str = std::str::from_utf8(rest).map_err(|_| {
462 Error::CorruptObject("commit parent line is not valid UTF-8".to_owned())
463 })?;
464 parents.push(line_str.trim().parse::<ObjectId>()?);
465 cont = Continuation::Ignore;
466 }
467 b"author" => {
468 author_raw = Some(rest.to_vec());
469 cont = Continuation::Author;
470 }
471 b"committer" => {
472 committer_raw = Some(rest.to_vec());
473 cont = Continuation::Committer;
474 }
475 b"encoding" => {
476 let line_str = std::str::from_utf8(rest).map_err(|_| {
477 Error::CorruptObject("commit encoding line is not valid UTF-8".to_owned())
478 })?;
479 encoding = Some(line_str.to_owned());
480 cont = Continuation::Ignore;
481 }
482 _ => {
483 cont = Continuation::Ignore;
484 }
485 }
486 pos = after_nl;
487 }
488
489 Err(Error::CorruptObject(
490 "commit missing blank line before message".to_owned(),
491 ))
492}
493
494#[derive(Debug, Clone)]
496pub struct TagData {
497 pub object: ObjectId,
499 pub object_type: String,
501 pub tag: String,
503 pub tagger: Option<String>,
505 pub message: String,
507}
508
509pub fn parse_tag(data: &[u8]) -> Result<TagData> {
515 let text = std::str::from_utf8(data)
516 .map_err(|_| Error::CorruptObject("tag is not valid UTF-8".to_owned()))?;
517
518 let mut object = None;
519 let mut object_type = None;
520 let mut tag_name = None;
521 let mut tagger = None;
522 let mut message = String::new();
523 let mut in_message = false;
524
525 for line in text.split('\n') {
526 if in_message {
527 message.push_str(line);
528 message.push('\n');
529 continue;
530 }
531 if line.is_empty() {
532 in_message = true;
533 continue;
534 }
535 if let Some(rest) = line.strip_prefix("object ") {
536 object = Some(rest.trim().parse::<ObjectId>()?);
537 } else if let Some(rest) = line.strip_prefix("type ") {
538 object_type = Some(rest.trim().to_owned());
539 } else if let Some(rest) = line.strip_prefix("tag ") {
540 tag_name = Some(rest.trim().to_owned());
541 } else if let Some(rest) = line.strip_prefix("tagger ") {
542 tagger = Some(rest.to_owned());
543 }
544 }
545
546 if message.ends_with('\n') {
548 message.pop();
549 }
550
551 Ok(TagData {
552 object: object
553 .ok_or_else(|| Error::CorruptObject("tag missing object header".to_owned()))?,
554 object_type: object_type
555 .ok_or_else(|| Error::CorruptObject("tag missing type header".to_owned()))?,
556 tag: tag_name.ok_or_else(|| Error::CorruptObject("tag missing tag header".to_owned()))?,
557 tagger,
558 message,
559 })
560}
561
562#[must_use]
567pub fn serialize_tag(t: &TagData) -> Vec<u8> {
568 let mut out = String::new();
569 out.push_str(&format!("object {}\n", t.object));
570 out.push_str(&format!("type {}\n", t.object_type));
571 out.push_str(&format!("tag {}\n", t.tag));
572 if let Some(ref tagger) = t.tagger {
573 out.push_str(&format!("tagger {tagger}\n"));
574 }
575 out.push('\n');
576 let msg = t.message.trim_end_matches('\n');
578 if !msg.is_empty() {
579 out.push_str(msg);
580 out.push('\n');
581 }
582 out.into_bytes()
583}
584
585#[must_use]
593pub fn serialize_commit(c: &CommitData) -> Vec<u8> {
594 let mut out = Vec::new();
595 out.extend_from_slice(format!("tree {}\n", c.tree).as_bytes());
596 for p in &c.parents {
597 out.extend_from_slice(format!("parent {p}\n").as_bytes());
598 }
599 out.extend_from_slice(b"author ");
600 if c.author_raw.is_empty() {
601 out.extend_from_slice(c.author.as_bytes());
602 } else {
603 out.extend_from_slice(&c.author_raw);
604 }
605 out.push(b'\n');
606 out.extend_from_slice(b"committer ");
607 if c.committer_raw.is_empty() {
608 out.extend_from_slice(c.committer.as_bytes());
609 } else {
610 out.extend_from_slice(&c.committer_raw);
611 }
612 out.push(b'\n');
613 if let Some(enc) = &c.encoding {
614 out.extend_from_slice(format!("encoding {enc}\n").as_bytes());
615 }
616 out.push(b'\n');
617 if let Some(raw) = &c.raw_message {
618 out.extend_from_slice(raw);
619 } else if !c.message.is_empty() {
620 out.extend_from_slice(c.message.as_bytes());
621 }
622 out
623}