use std::fmt;
use std::str::FromStr;
use crate::error::{Error, Result};
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ObjectId([u8; 20]);
impl ObjectId {
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let arr: [u8; 20] = bytes
.try_into()
.map_err(|_| Error::InvalidObjectId(hex::encode(bytes)))?;
Ok(Self(arr))
}
#[must_use]
pub fn as_bytes(&self) -> &[u8; 20] {
&self.0
}
#[must_use]
pub fn to_hex(&self) -> String {
hex::encode(self.0)
}
#[must_use]
pub fn loose_prefix(&self) -> String {
hex::encode(&self.0[..1])
}
pub fn from_hex(s: &str) -> Result<Self> {
s.parse()
}
#[must_use]
pub fn loose_suffix(&self) -> String {
hex::encode(&self.0[1..])
}
}
impl fmt::Display for ObjectId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.to_hex())
}
}
impl fmt::Debug for ObjectId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ObjectId({})", self.to_hex())
}
}
impl FromStr for ObjectId {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
if s.len() != 40 {
return Err(Error::InvalidObjectId(s.to_owned()));
}
let bytes = hex::decode(s).map_err(|_| Error::InvalidObjectId(s.to_owned()))?;
Self::from_bytes(&bytes)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ObjectKind {
Blob,
Tree,
Commit,
Tag,
}
impl ObjectKind {
pub fn from_bytes(b: &[u8]) -> Result<Self> {
match b {
b"blob" => Ok(Self::Blob),
b"tree" => Ok(Self::Tree),
b"commit" => Ok(Self::Commit),
b"tag" => Ok(Self::Tag),
other => Err(Error::UnknownObjectType(
String::from_utf8_lossy(other).into_owned(),
)),
}
}
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
Self::Blob => "blob",
Self::Tree => "tree",
Self::Commit => "commit",
Self::Tag => "tag",
}
}
}
impl fmt::Display for ObjectKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl FromStr for ObjectKind {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
Self::from_bytes(s.as_bytes())
}
}
#[derive(Debug, Clone)]
pub struct Object {
pub kind: ObjectKind,
pub data: Vec<u8>,
}
impl Object {
#[must_use]
pub fn new(kind: ObjectKind, data: Vec<u8>) -> Self {
Self { kind, data }
}
#[must_use]
pub fn to_store_bytes(&self) -> Vec<u8> {
let header = format!("{} {}\0", self.kind, self.data.len());
let mut out = Vec::with_capacity(header.len() + self.data.len());
out.extend_from_slice(header.as_bytes());
out.extend_from_slice(&self.data);
out
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TreeEntry {
pub mode: u32,
pub name: Vec<u8>,
pub oid: ObjectId,
}
impl TreeEntry {
#[must_use]
pub fn mode_str(&self) -> String {
if self.mode == 0o040000 {
"40000".to_owned()
} else {
format!("{:o}", self.mode)
}
}
}
pub fn parse_tree(data: &[u8]) -> Result<Vec<TreeEntry>> {
let mut entries = Vec::new();
let mut pos = 0;
while pos < data.len() {
let sp = data[pos..]
.iter()
.position(|&b| b == b' ')
.ok_or_else(|| Error::CorruptObject("tree entry missing space".to_owned()))?;
let mode_bytes = &data[pos..pos + sp];
let mode = std::str::from_utf8(mode_bytes)
.ok()
.and_then(|s| u32::from_str_radix(s, 8).ok())
.ok_or_else(|| {
Error::CorruptObject(format!(
"invalid tree mode: {}",
String::from_utf8_lossy(mode_bytes)
))
})?;
pos += sp + 1;
let nul = data[pos..]
.iter()
.position(|&b| b == 0)
.ok_or_else(|| Error::CorruptObject("tree entry missing NUL".to_owned()))?;
let name = data[pos..pos + nul].to_vec();
pos += nul + 1;
if pos + 20 > data.len() {
return Err(Error::CorruptObject("tree entry truncated SHA".to_owned()));
}
let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
pos += 20;
entries.push(TreeEntry { mode, name, oid });
}
Ok(entries)
}
#[must_use]
pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
let mut out = Vec::new();
for e in entries {
out.extend_from_slice(e.mode_str().as_bytes());
out.push(b' ');
out.extend_from_slice(&e.name);
out.push(0);
out.extend_from_slice(e.oid.as_bytes());
}
out
}
#[must_use]
pub fn tree_entry_cmp(
a_name: &[u8],
a_is_tree: bool,
b_name: &[u8],
b_is_tree: bool,
) -> std::cmp::Ordering {
let a_trailer = if a_is_tree { b'/' } else { 0u8 };
let b_trailer = if b_is_tree { b'/' } else { 0u8 };
let min_len = a_name.len().min(b_name.len());
let cmp = a_name[..min_len].cmp(&b_name[..min_len]);
if cmp != std::cmp::Ordering::Equal {
return cmp;
}
let ac = a_name.get(min_len).copied().unwrap_or(a_trailer);
let bc = b_name.get(min_len).copied().unwrap_or(b_trailer);
ac.cmp(&bc)
}
#[derive(Debug, Clone)]
pub struct CommitData {
pub tree: ObjectId,
pub parents: Vec<ObjectId>,
pub author: String,
pub committer: String,
pub encoding: Option<String>,
pub message: String,
}
pub fn parse_commit(data: &[u8]) -> Result<CommitData> {
let text = std::str::from_utf8(data)
.map_err(|_| Error::CorruptObject("commit is not valid UTF-8".to_owned()))?;
let mut tree = None;
let mut parents = Vec::new();
let mut author = None;
let mut committer = None;
let mut encoding = None;
let mut message = String::new();
let mut in_message = false;
for line in text.split('\n') {
if in_message {
message.push_str(line);
message.push('\n');
continue;
}
if line.is_empty() {
in_message = true;
continue;
}
if let Some(rest) = line.strip_prefix("tree ") {
tree = Some(rest.trim().parse::<ObjectId>()?);
} else if let Some(rest) = line.strip_prefix("parent ") {
parents.push(rest.trim().parse::<ObjectId>()?);
} else if let Some(rest) = line.strip_prefix("author ") {
author = Some(rest.to_owned());
} else if let Some(rest) = line.strip_prefix("committer ") {
committer = Some(rest.to_owned());
} else if let Some(rest) = line.strip_prefix("encoding ") {
encoding = Some(rest.to_owned());
}
}
if message.ends_with('\n') {
message.pop();
}
Ok(CommitData {
tree: tree.ok_or_else(|| Error::CorruptObject("commit missing tree header".to_owned()))?,
parents,
author: author
.ok_or_else(|| Error::CorruptObject("commit missing author header".to_owned()))?,
committer: committer
.ok_or_else(|| Error::CorruptObject("commit missing committer header".to_owned()))?,
encoding,
message,
})
}
#[derive(Debug, Clone)]
pub struct TagData {
pub object: ObjectId,
pub object_type: String,
pub tag: String,
pub tagger: Option<String>,
pub message: String,
}
pub fn parse_tag(data: &[u8]) -> Result<TagData> {
let text = std::str::from_utf8(data)
.map_err(|_| Error::CorruptObject("tag is not valid UTF-8".to_owned()))?;
let mut object = None;
let mut object_type = None;
let mut tag_name = None;
let mut tagger = None;
let mut message = String::new();
let mut in_message = false;
for line in text.split('\n') {
if in_message {
message.push_str(line);
message.push('\n');
continue;
}
if line.is_empty() {
in_message = true;
continue;
}
if let Some(rest) = line.strip_prefix("object ") {
object = Some(rest.trim().parse::<ObjectId>()?);
} else if let Some(rest) = line.strip_prefix("type ") {
object_type = Some(rest.trim().to_owned());
} else if let Some(rest) = line.strip_prefix("tag ") {
tag_name = Some(rest.trim().to_owned());
} else if let Some(rest) = line.strip_prefix("tagger ") {
tagger = Some(rest.to_owned());
}
}
if message.ends_with('\n') {
message.pop();
}
Ok(TagData {
object: object
.ok_or_else(|| Error::CorruptObject("tag missing object header".to_owned()))?,
object_type: object_type
.ok_or_else(|| Error::CorruptObject("tag missing type header".to_owned()))?,
tag: tag_name.ok_or_else(|| Error::CorruptObject("tag missing tag header".to_owned()))?,
tagger,
message,
})
}
#[must_use]
pub fn serialize_tag(t: &TagData) -> Vec<u8> {
let mut out = String::new();
out.push_str(&format!("object {}\n", t.object));
out.push_str(&format!("type {}\n", t.object_type));
out.push_str(&format!("tag {}\n", t.tag));
if let Some(ref tagger) = t.tagger {
out.push_str(&format!("tagger {tagger}\n"));
}
out.push('\n');
out.push_str(&t.message);
if !t.message.is_empty() && !t.message.ends_with('\n') {
out.push('\n');
}
out.into_bytes()
}
#[must_use]
pub fn serialize_commit(c: &CommitData) -> Vec<u8> {
let mut out = String::new();
out.push_str(&format!("tree {}\n", c.tree));
for p in &c.parents {
out.push_str(&format!("parent {p}\n"));
}
out.push_str(&format!("author {}\n", c.author));
out.push_str(&format!("committer {}\n", c.committer));
if let Some(enc) = &c.encoding {
out.push_str(&format!("encoding {enc}\n"));
}
out.push('\n');
out.push_str(&c.message);
if !c.message.ends_with('\n') {
out.push('\n');
}
out.into_bytes()
}