use thiserror::Error;
const VERSION_URL: &str = "https://git-lfs.github.com/spec/v1";
const MAX_POINTER_BYTES: usize = 1024;
const VERSION_PREFIX: &[u8] = b"version https://git-lfs.github.com/spec/v1\n";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Pointer {
pub oid: [u8; 32],
pub size: u64,
pub extensions: Vec<(String, String)>,
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum ParseError {
#[error("pointer is empty")]
Empty,
#[error("pointer too large: {0} bytes (max {MAX_POINTER_BYTES})")]
TooLarge(usize),
#[error("missing or invalid version line")]
BadVersion,
#[error("unsupported pointer version: {found}")]
UnsupportedVersion { found: String },
#[error("missing or invalid oid line")]
BadOid,
#[error("missing or invalid size line")]
BadSize,
#[error("non-ASCII bytes in pointer")]
NonAscii,
#[error("duplicate key: {0}")]
DuplicateKey(String),
#[error("CRLF line endings not allowed")]
CrlfLineEndings,
}
impl Pointer {
pub fn parse(bytes: &[u8]) -> Result<Self, ParseError> {
if bytes.is_empty() {
return Err(ParseError::Empty);
}
if bytes.len() > MAX_POINTER_BYTES {
return Err(ParseError::TooLarge(bytes.len()));
}
if !bytes.is_ascii() {
return Err(ParseError::NonAscii);
}
if bytes.contains(&b'\r') {
return Err(ParseError::CrlfLineEndings);
}
if !bytes.ends_with(b"\n") {
return Err(ParseError::BadVersion);
}
let text = std::str::from_utf8(bytes).map_err(|_| ParseError::NonAscii)?;
let mut lines = text.split('\n');
let version_line = lines.next().ok_or(ParseError::BadVersion)?;
let version_value = version_line
.strip_prefix("version ")
.ok_or(ParseError::BadVersion)?;
if version_value != VERSION_URL {
return Err(ParseError::UnsupportedVersion {
found: version_value.to_owned(),
});
}
let mut oid: Option<[u8; 32]> = None;
let mut size: Option<u64> = None;
let mut extensions: Vec<(String, String)> = Vec::new();
let mut seen_keys: Vec<String> = Vec::new();
for line in lines {
if line.is_empty() {
continue; }
let (key, value) = line.split_once(' ').ok_or(ParseError::BadVersion)?;
if seen_keys.iter().any(|k| k == key) {
return Err(ParseError::DuplicateKey(key.to_owned()));
}
seen_keys.push(key.to_owned());
match key {
"oid" => {
let hex = value.strip_prefix("sha256:").ok_or(ParseError::BadOid)?;
if hex.len() != 64 {
return Err(ParseError::BadOid);
}
let mut bytes = [0u8; 32];
for (i, byte) in bytes.iter_mut().enumerate() {
let hi = hex_digit(hex.as_bytes()[i * 2]).ok_or(ParseError::BadOid)?;
let lo = hex_digit(hex.as_bytes()[i * 2 + 1]).ok_or(ParseError::BadOid)?;
if hex.as_bytes()[i * 2].is_ascii_uppercase()
|| hex.as_bytes()[i * 2 + 1].is_ascii_uppercase()
{
return Err(ParseError::BadOid);
}
*byte = (hi << 4) | lo;
}
oid = Some(bytes);
}
"size" => {
let n: u64 = value.parse().map_err(|_| ParseError::BadSize)?;
size = Some(n);
}
_ => {
extensions.push((key.to_owned(), value.to_owned()));
}
}
}
let oid = oid.ok_or(ParseError::BadOid)?;
let size = size.ok_or(ParseError::BadSize)?;
Ok(Self {
oid,
size,
extensions,
})
}
#[must_use]
pub fn write(&self) -> Vec<u8> {
let mut keyed: Vec<(String, String)> = Vec::with_capacity(2 + self.extensions.len());
keyed.push(("oid".to_owned(), format!("sha256:{}", self.oid_hex())));
keyed.push(("size".to_owned(), self.size.to_string()));
for (k, v) in &self.extensions {
keyed.push((k.clone(), v.clone()));
}
keyed.sort_by(|a, b| a.0.cmp(&b.0));
let mut out = String::with_capacity(
VERSION_PREFIX.len()
+ keyed
.iter()
.map(|(k, v)| k.len() + v.len() + 2)
.sum::<usize>(),
);
out.push_str("version ");
out.push_str(VERSION_URL);
out.push('\n');
for (k, v) in &keyed {
out.push_str(k);
out.push(' ');
out.push_str(v);
out.push('\n');
}
out.into_bytes()
}
#[must_use]
pub fn oid_hex(&self) -> String {
let mut s = String::with_capacity(64);
for byte in &self.oid {
s.push(hex_char(byte >> 4));
s.push(hex_char(byte & 0x0f));
}
s
}
}
const fn hex_digit(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
fn hex_char(n: u8) -> char {
match n {
0..=9 => (b'0' + n) as char,
10..=15 => (b'a' + n - 10) as char,
_ => unreachable!(),
}
}
#[must_use]
pub fn looks_like_pointer(bytes: &[u8]) -> bool {
bytes.len() <= MAX_POINTER_BYTES && bytes.starts_with(VERSION_PREFIX)
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE_OID_HEX: &str = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
const SAMPLE_SIZE: u64 = 12345;
fn sample_oid() -> [u8; 32] {
let mut out = [0u8; 32];
for (i, byte) in out.iter_mut().enumerate() {
let hi = hex_digit(SAMPLE_OID_HEX.as_bytes()[i * 2]).expect("operation should succeed");
let lo =
hex_digit(SAMPLE_OID_HEX.as_bytes()[i * 2 + 1]).expect("operation should succeed");
*byte = (hi << 4) | lo;
}
out
}
fn sample_pointer_bytes() -> Vec<u8> {
format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
)
.into_bytes()
}
#[test]
fn roundtrip_canonical_pointer() {
let bytes = sample_pointer_bytes();
let p = Pointer::parse(&bytes).expect("operation should succeed");
assert_eq!(p.oid, sample_oid());
assert_eq!(p.size, SAMPLE_SIZE);
assert!(p.extensions.is_empty());
assert_eq!(p.oid_hex(), SAMPLE_OID_HEX);
assert_eq!(p.write(), bytes);
}
#[test]
fn parse_keys_in_any_order_after_version() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\nsize {SAMPLE_SIZE}\noid sha256:{SAMPLE_OID_HEX}\n"
);
let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
assert_eq!(p.size, SAMPLE_SIZE);
let out = p.write();
let text = std::str::from_utf8(&out).expect("operation should succeed");
let lines: Vec<&str> = text.lines().collect();
assert_eq!(lines[0], "version https://git-lfs.github.com/spec/v1");
assert!(lines[1].starts_with("oid "));
assert!(lines[2].starts_with("size "));
}
#[test]
fn empty_input_rejected() {
assert_eq!(Pointer::parse(b""), Err(ParseError::Empty));
}
#[test]
fn too_large_rejected() {
let huge = vec![b'a'; MAX_POINTER_BYTES + 1];
assert!(matches!(
Pointer::parse(&huge),
Err(ParseError::TooLarge(_))
));
}
#[test]
fn non_ascii_rejected() {
let bytes = b"version https://git-lfs.github.com/spec/v1\nsize 1\noid sha256:\xff\n";
assert_eq!(Pointer::parse(bytes), Err(ParseError::NonAscii));
}
#[test]
fn crlf_rejected() {
let bytes = b"version https://git-lfs.github.com/spec/v1\r\nsize 1\r\n";
assert_eq!(Pointer::parse(bytes), Err(ParseError::CrlfLineEndings));
}
#[test]
fn missing_trailing_newline_rejected() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}"
);
assert!(Pointer::parse(bytes.as_bytes()).is_err());
}
#[test]
fn bad_version_url_rejected() {
let bytes = b"version https://example.com/v99\noid sha256:0\nsize 1\n";
assert!(matches!(
Pointer::parse(bytes),
Err(ParseError::UnsupportedVersion { .. })
));
}
#[test]
fn missing_version_rejected() {
let bytes = format!("oid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n");
assert_eq!(
Pointer::parse(bytes.as_bytes()),
Err(ParseError::BadVersion)
);
}
#[test]
fn uppercase_hex_rejected() {
let upper: String = SAMPLE_OID_HEX.to_ascii_uppercase();
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{upper}\nsize {SAMPLE_SIZE}\n"
);
assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadOid));
}
#[test]
fn short_oid_rejected() {
let bytes = b"version https://git-lfs.github.com/spec/v1\noid sha256:abc\nsize 1\n";
assert_eq!(Pointer::parse(bytes), Err(ParseError::BadOid));
}
#[test]
fn non_numeric_size_rejected() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize notanumber\n"
);
assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadSize));
}
#[test]
fn missing_oid_rejected() {
let bytes = b"version https://git-lfs.github.com/spec/v1\nsize 1\n";
assert_eq!(Pointer::parse(bytes), Err(ParseError::BadOid));
}
#[test]
fn missing_size_rejected() {
let bytes =
format!("version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\n");
assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadSize));
}
#[test]
fn duplicate_key_rejected() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\noid sha256:{SAMPLE_OID_HEX}\nsize 1\n"
);
assert!(matches!(
Pointer::parse(bytes.as_bytes()),
Err(ParseError::DuplicateKey(_))
));
}
#[test]
fn extensions_preserved_roundtrip() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\nextra value-x\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
);
let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
assert_eq!(
p.extensions,
vec![("extra".to_owned(), "value-x".to_owned())]
);
let out = p.write();
let expected = format!(
"version https://git-lfs.github.com/spec/v1\nextra value-x\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
);
assert_eq!(out, expected.as_bytes());
}
#[test]
fn looks_like_pointer_positive() {
assert!(looks_like_pointer(&sample_pointer_bytes()));
}
#[test]
fn looks_like_pointer_rejects_binary() {
let binary: Vec<u8> = (0..2048u16)
.map(|i| u8::try_from(i % 256).expect("value reduced below byte range"))
.collect();
assert!(!looks_like_pointer(&binary));
}
#[test]
fn looks_like_pointer_rejects_text_starting_with_version() {
assert!(!looks_like_pointer(b"version 2.0 something else\n"));
}
#[test]
fn looks_like_pointer_rejects_too_large_even_with_prefix() {
let mut buf = VERSION_PREFIX.to_vec();
buf.resize(MAX_POINTER_BYTES + 1, b'x');
assert!(!looks_like_pointer(&buf));
}
#[test]
fn size_zero_accepted() {
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize 0\n"
);
let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
assert_eq!(p.size, 0);
}
#[test]
fn large_size_accepted() {
let big = u64::MAX;
let bytes = format!(
"version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {big}\n"
);
let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
assert_eq!(p.size, big);
}
}
#[cfg(test)]
mod interop_tests {
use super::*;
#[test]
fn matches_git_lfs_output() {
let hex = "a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447";
let mut oid = [0u8; 32];
for (i, byte) in oid.iter_mut().enumerate() {
let hi = hex_digit(hex.as_bytes()[i * 2]).expect("operation should succeed");
let lo = hex_digit(hex.as_bytes()[i * 2 + 1]).expect("operation should succeed");
*byte = (hi << 4) | lo;
}
let p = Pointer {
oid,
size: 12,
extensions: vec![],
};
let out = p.write();
let expected = b"version https://git-lfs.github.com/spec/v1\noid sha256:a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447\nsize 12\n";
assert_eq!(out, expected);
}
}