1use thiserror::Error;
21
22const VERSION_URL: &str = "https://git-lfs.github.com/spec/v1";
23const MAX_POINTER_BYTES: usize = 1024;
24const VERSION_PREFIX: &[u8] = b"version https://git-lfs.github.com/spec/v1\n";
25
26#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct Pointer {
30 pub oid: [u8; 32],
32 pub size: u64,
34 pub extensions: Vec<(String, String)>,
37}
38
39#[derive(Debug, Error, PartialEq, Eq)]
40pub enum ParseError {
41 #[error("pointer is empty")]
42 Empty,
43 #[error("pointer too large: {0} bytes (max {MAX_POINTER_BYTES})")]
44 TooLarge(usize),
45 #[error("missing or invalid version line")]
46 BadVersion,
47 #[error("unsupported pointer version: {found}")]
48 UnsupportedVersion { found: String },
49 #[error("missing or invalid oid line")]
50 BadOid,
51 #[error("missing or invalid size line")]
52 BadSize,
53 #[error("non-ASCII bytes in pointer")]
54 NonAscii,
55 #[error("duplicate key: {0}")]
56 DuplicateKey(String),
57 #[error("CRLF line endings not allowed")]
58 CrlfLineEndings,
59}
60
61impl Pointer {
62 pub fn parse(bytes: &[u8]) -> Result<Self, ParseError> {
69 if bytes.is_empty() {
70 return Err(ParseError::Empty);
71 }
72 if bytes.len() > MAX_POINTER_BYTES {
73 return Err(ParseError::TooLarge(bytes.len()));
74 }
75 if !bytes.is_ascii() {
76 return Err(ParseError::NonAscii);
77 }
78 if bytes.contains(&b'\r') {
79 return Err(ParseError::CrlfLineEndings);
80 }
81 if !bytes.ends_with(b"\n") {
83 return Err(ParseError::BadVersion);
84 }
85
86 let text = std::str::from_utf8(bytes).map_err(|_| ParseError::NonAscii)?;
88
89 let mut lines = text.split('\n');
90 let version_line = lines.next().ok_or(ParseError::BadVersion)?;
92
93 let version_value = version_line
95 .strip_prefix("version ")
96 .ok_or(ParseError::BadVersion)?;
97 if version_value != VERSION_URL {
98 return Err(ParseError::UnsupportedVersion {
99 found: version_value.to_owned(),
100 });
101 }
102
103 let mut oid: Option<[u8; 32]> = None;
104 let mut size: Option<u64> = None;
105 let mut extensions: Vec<(String, String)> = Vec::new();
106 let mut seen_keys: Vec<String> = Vec::new();
107
108 for line in lines {
109 if line.is_empty() {
110 continue; }
112 let (key, value) = line.split_once(' ').ok_or(ParseError::BadVersion)?;
114 if seen_keys.iter().any(|k| k == key) {
115 return Err(ParseError::DuplicateKey(key.to_owned()));
116 }
117 seen_keys.push(key.to_owned());
118
119 match key {
120 "oid" => {
121 let hex = value.strip_prefix("sha256:").ok_or(ParseError::BadOid)?;
122 if hex.len() != 64 {
123 return Err(ParseError::BadOid);
124 }
125 let mut bytes = [0u8; 32];
126 for (i, byte) in bytes.iter_mut().enumerate() {
127 let hi = hex_digit(hex.as_bytes()[i * 2]).ok_or(ParseError::BadOid)?;
128 let lo = hex_digit(hex.as_bytes()[i * 2 + 1]).ok_or(ParseError::BadOid)?;
129 if hex.as_bytes()[i * 2].is_ascii_uppercase()
131 || hex.as_bytes()[i * 2 + 1].is_ascii_uppercase()
132 {
133 return Err(ParseError::BadOid);
134 }
135 *byte = (hi << 4) | lo;
136 }
137 oid = Some(bytes);
138 }
139 "size" => {
140 let n: u64 = value.parse().map_err(|_| ParseError::BadSize)?;
141 size = Some(n);
142 }
143 _ => {
144 extensions.push((key.to_owned(), value.to_owned()));
145 }
146 }
147 }
148
149 let oid = oid.ok_or(ParseError::BadOid)?;
150 let size = size.ok_or(ParseError::BadSize)?;
151
152 Ok(Self {
153 oid,
154 size,
155 extensions,
156 })
157 }
158
159 #[must_use]
161 pub fn write(&self) -> Vec<u8> {
162 let mut keyed: Vec<(String, String)> = Vec::with_capacity(2 + self.extensions.len());
165 keyed.push(("oid".to_owned(), format!("sha256:{}", self.oid_hex())));
166 keyed.push(("size".to_owned(), self.size.to_string()));
167 for (k, v) in &self.extensions {
168 keyed.push((k.clone(), v.clone()));
169 }
170 keyed.sort_by(|a, b| a.0.cmp(&b.0));
171
172 let mut out = String::with_capacity(
173 VERSION_PREFIX.len()
174 + keyed
175 .iter()
176 .map(|(k, v)| k.len() + v.len() + 2)
177 .sum::<usize>(),
178 );
179 out.push_str("version ");
180 out.push_str(VERSION_URL);
181 out.push('\n');
182 for (k, v) in &keyed {
183 out.push_str(k);
184 out.push(' ');
185 out.push_str(v);
186 out.push('\n');
187 }
188 out.into_bytes()
189 }
190
191 #[must_use]
192 pub fn oid_hex(&self) -> String {
193 let mut s = String::with_capacity(64);
194 for byte in &self.oid {
195 s.push(hex_char(byte >> 4));
196 s.push(hex_char(byte & 0x0f));
197 }
198 s
199 }
200}
201
202const fn hex_digit(b: u8) -> Option<u8> {
203 match b {
204 b'0'..=b'9' => Some(b - b'0'),
205 b'a'..=b'f' => Some(b - b'a' + 10),
206 b'A'..=b'F' => Some(b - b'A' + 10),
207 _ => None,
208 }
209}
210
211fn hex_char(n: u8) -> char {
212 match n {
213 0..=9 => (b'0' + n) as char,
214 10..=15 => (b'a' + n - 10) as char,
215 _ => unreachable!(),
216 }
217}
218
219#[must_use]
223pub fn looks_like_pointer(bytes: &[u8]) -> bool {
224 bytes.len() <= MAX_POINTER_BYTES && bytes.starts_with(VERSION_PREFIX)
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 const SAMPLE_OID_HEX: &str = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
232 const SAMPLE_SIZE: u64 = 12345;
233
234 fn sample_oid() -> [u8; 32] {
235 let mut out = [0u8; 32];
236 for (i, byte) in out.iter_mut().enumerate() {
237 let hi = hex_digit(SAMPLE_OID_HEX.as_bytes()[i * 2]).expect("operation should succeed");
238 let lo =
239 hex_digit(SAMPLE_OID_HEX.as_bytes()[i * 2 + 1]).expect("operation should succeed");
240 *byte = (hi << 4) | lo;
241 }
242 out
243 }
244
245 fn sample_pointer_bytes() -> Vec<u8> {
246 format!(
247 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
248 )
249 .into_bytes()
250 }
251
252 #[test]
253 fn roundtrip_canonical_pointer() {
254 let bytes = sample_pointer_bytes();
255 let p = Pointer::parse(&bytes).expect("operation should succeed");
256 assert_eq!(p.oid, sample_oid());
257 assert_eq!(p.size, SAMPLE_SIZE);
258 assert!(p.extensions.is_empty());
259 assert_eq!(p.oid_hex(), SAMPLE_OID_HEX);
260 assert_eq!(p.write(), bytes);
261 }
262
263 #[test]
264 fn parse_keys_in_any_order_after_version() {
265 let bytes = format!(
266 "version https://git-lfs.github.com/spec/v1\nsize {SAMPLE_SIZE}\noid sha256:{SAMPLE_OID_HEX}\n"
267 );
268 let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
269 assert_eq!(p.size, SAMPLE_SIZE);
270 let out = p.write();
272 let text = std::str::from_utf8(&out).expect("operation should succeed");
273 let lines: Vec<&str> = text.lines().collect();
274 assert_eq!(lines[0], "version https://git-lfs.github.com/spec/v1");
275 assert!(lines[1].starts_with("oid "));
276 assert!(lines[2].starts_with("size "));
277 }
278
279 #[test]
280 fn empty_input_rejected() {
281 assert_eq!(Pointer::parse(b""), Err(ParseError::Empty));
282 }
283
284 #[test]
285 fn too_large_rejected() {
286 let huge = vec![b'a'; MAX_POINTER_BYTES + 1];
287 assert!(matches!(
288 Pointer::parse(&huge),
289 Err(ParseError::TooLarge(_))
290 ));
291 }
292
293 #[test]
294 fn non_ascii_rejected() {
295 let bytes = b"version https://git-lfs.github.com/spec/v1\nsize 1\noid sha256:\xff\n";
296 assert_eq!(Pointer::parse(bytes), Err(ParseError::NonAscii));
297 }
298
299 #[test]
300 fn crlf_rejected() {
301 let bytes = b"version https://git-lfs.github.com/spec/v1\r\nsize 1\r\n";
302 assert_eq!(Pointer::parse(bytes), Err(ParseError::CrlfLineEndings));
303 }
304
305 #[test]
306 fn missing_trailing_newline_rejected() {
307 let bytes = format!(
309 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}"
310 );
311 assert!(Pointer::parse(bytes.as_bytes()).is_err());
312 }
313
314 #[test]
315 fn bad_version_url_rejected() {
316 let bytes = b"version https://example.com/v99\noid sha256:0\nsize 1\n";
317 assert!(matches!(
318 Pointer::parse(bytes),
319 Err(ParseError::UnsupportedVersion { .. })
320 ));
321 }
322
323 #[test]
324 fn missing_version_rejected() {
325 let bytes = format!("oid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n");
326 assert_eq!(
327 Pointer::parse(bytes.as_bytes()),
328 Err(ParseError::BadVersion)
329 );
330 }
331
332 #[test]
333 fn uppercase_hex_rejected() {
334 let upper: String = SAMPLE_OID_HEX.to_ascii_uppercase();
335 let bytes = format!(
336 "version https://git-lfs.github.com/spec/v1\noid sha256:{upper}\nsize {SAMPLE_SIZE}\n"
337 );
338 assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadOid));
339 }
340
341 #[test]
342 fn short_oid_rejected() {
343 let bytes = b"version https://git-lfs.github.com/spec/v1\noid sha256:abc\nsize 1\n";
344 assert_eq!(Pointer::parse(bytes), Err(ParseError::BadOid));
345 }
346
347 #[test]
348 fn non_numeric_size_rejected() {
349 let bytes = format!(
350 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize notanumber\n"
351 );
352 assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadSize));
353 }
354
355 #[test]
356 fn missing_oid_rejected() {
357 let bytes = b"version https://git-lfs.github.com/spec/v1\nsize 1\n";
358 assert_eq!(Pointer::parse(bytes), Err(ParseError::BadOid));
359 }
360
361 #[test]
362 fn missing_size_rejected() {
363 let bytes =
364 format!("version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\n");
365 assert_eq!(Pointer::parse(bytes.as_bytes()), Err(ParseError::BadSize));
366 }
367
368 #[test]
369 fn duplicate_key_rejected() {
370 let bytes = format!(
371 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\noid sha256:{SAMPLE_OID_HEX}\nsize 1\n"
372 );
373 assert!(matches!(
374 Pointer::parse(bytes.as_bytes()),
375 Err(ParseError::DuplicateKey(_))
376 ));
377 }
378
379 #[test]
380 fn extensions_preserved_roundtrip() {
381 let bytes = format!(
383 "version https://git-lfs.github.com/spec/v1\nextra value-x\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
384 );
385 let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
386 assert_eq!(
387 p.extensions,
388 vec![("extra".to_owned(), "value-x".to_owned())]
389 );
390 let out = p.write();
391 let expected = format!(
392 "version https://git-lfs.github.com/spec/v1\nextra value-x\noid sha256:{SAMPLE_OID_HEX}\nsize {SAMPLE_SIZE}\n"
393 );
394 assert_eq!(out, expected.as_bytes());
395 }
396
397 #[test]
398 fn looks_like_pointer_positive() {
399 assert!(looks_like_pointer(&sample_pointer_bytes()));
400 }
401
402 #[test]
403 fn looks_like_pointer_rejects_binary() {
404 let binary: Vec<u8> = (0..2048u16)
405 .map(|i| u8::try_from(i % 256).expect("value reduced below byte range"))
406 .collect();
407 assert!(!looks_like_pointer(&binary));
408 }
409
410 #[test]
411 fn looks_like_pointer_rejects_text_starting_with_version() {
412 assert!(!looks_like_pointer(b"version 2.0 something else\n"));
413 }
414
415 #[test]
416 fn looks_like_pointer_rejects_too_large_even_with_prefix() {
417 let mut buf = VERSION_PREFIX.to_vec();
418 buf.resize(MAX_POINTER_BYTES + 1, b'x');
419 assert!(!looks_like_pointer(&buf));
420 }
421
422 #[test]
423 fn size_zero_accepted() {
424 let bytes = format!(
426 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize 0\n"
427 );
428 let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
429 assert_eq!(p.size, 0);
430 }
431
432 #[test]
433 fn large_size_accepted() {
434 let big = u64::MAX;
435 let bytes = format!(
436 "version https://git-lfs.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize {big}\n"
437 );
438 let p = Pointer::parse(bytes.as_bytes()).expect("operation should succeed");
439 assert_eq!(p.size, big);
440 }
441}
442
443#[cfg(test)]
444mod interop_tests {
445 use super::*;
446
447 #[test]
448 fn matches_git_lfs_output() {
449 let hex = "a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447";
451 let mut oid = [0u8; 32];
452 for (i, byte) in oid.iter_mut().enumerate() {
453 let hi = hex_digit(hex.as_bytes()[i * 2]).expect("operation should succeed");
454 let lo = hex_digit(hex.as_bytes()[i * 2 + 1]).expect("operation should succeed");
455 *byte = (hi << 4) | lo;
456 }
457 let p = Pointer {
458 oid,
459 size: 12,
460 extensions: vec![],
461 };
462 let out = p.write();
463 let expected = b"version https://git-lfs.github.com/spec/v1\noid sha256:a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447\nsize 12\n";
464 assert_eq!(out, expected);
465 }
466}