1use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30#[derive(Debug, Clone)]
37pub struct Store {
38 root: PathBuf,
39 references: Vec<PathBuf>,
44}
45
46#[derive(Debug, thiserror::Error)]
47pub enum StoreError {
48 #[error(transparent)]
49 Io(#[from] io::Error),
50 #[error("expected OID {expected}, got {actual}")]
51 HashMismatch { expected: Oid, actual: Oid },
52}
53
54impl Store {
55 pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
58 Self {
59 root: lfs_dir.into(),
60 references: Vec::new(),
61 }
62 }
63
64 #[must_use]
73 pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
74 self.references = refs.into_iter().collect();
75 self
76 }
77
78 pub fn root(&self) -> &Path {
80 &self.root
81 }
82
83 pub fn tmp_dir(&self) -> PathBuf {
85 self.root.join("tmp")
86 }
87
88 pub fn incomplete_dir(&self) -> PathBuf {
93 self.root.join("incomplete")
94 }
95
96 pub fn incomplete_path(&self, oid: Oid) -> PathBuf {
99 self.incomplete_dir().join(format!("{oid}.part"))
100 }
101
102 pub fn commit_partial(&self, oid: Oid, partial: &Path) -> io::Result<()> {
108 if oid == Oid::EMPTY {
109 return Ok(());
110 }
111 let dest = self.object_path(oid);
112 if let Some(parent) = dest.parent() {
113 std::fs::create_dir_all(parent)?;
114 }
115 std::fs::rename(partial, &dest)
116 }
117
118 pub fn cleanup_tmp_objects(&self) {
128 let dir = self.root.join("tmp").join("objects");
129 let Ok(entries) = std::fs::read_dir(&dir) else {
130 return;
131 };
132 for entry in entries.flatten() {
133 let name = entry.file_name();
134 let name_str = name.to_string_lossy();
135 if name_str.len() < 64 {
136 continue;
137 }
138 let oid_str = &name_str[..64];
145 let object_path = self
146 .root
147 .join("objects")
148 .join(&oid_str[0..2])
149 .join(&oid_str[2..4])
150 .join(oid_str);
151 if object_path.is_file() {
152 let _ = std::fs::remove_file(entry.path());
153 }
154 }
155 }
156
157 pub fn object_path(&self, oid: Oid) -> PathBuf {
163 if oid == Oid::EMPTY {
164 return PathBuf::from(NULL_DEVICE);
165 }
166 let hex = oid.to_string();
167 self.root
168 .join("objects")
169 .join(&hex[0..2])
170 .join(&hex[2..4])
171 .join(&hex)
172 }
173
174 pub fn contains(&self, oid: Oid) -> bool {
178 if oid == Oid::EMPTY {
179 return true;
180 }
181 if self.object_path(oid).is_file() {
182 return true;
183 }
184 self.materialize_from_reference(oid, None)
185 }
186
187 pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
192 if oid == Oid::EMPTY {
193 return size == 0;
194 }
195 let local = std::fs::metadata(self.object_path(oid))
196 .map(|m| m.is_file() && m.len() == size)
197 .unwrap_or(false);
198 if local {
199 return true;
200 }
201 self.materialize_from_reference(oid, Some(size))
202 }
203
204 fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
209 if self.references.is_empty() {
210 return false;
211 }
212 let hex = oid.to_string();
213 for refdir in &self.references {
214 let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
215 let Ok(meta) = std::fs::metadata(&src) else {
216 continue;
217 };
218 if !meta.is_file() {
219 continue;
220 }
221 if let Some(want) = size
222 && meta.len() != want
223 {
224 continue;
225 }
226 let dest = self.object_path(oid);
227 if let Some(parent) = dest.parent() {
228 let _ = std::fs::create_dir_all(parent);
229 }
230 if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
234 return true;
235 }
236 }
237 false
238 }
239
240 pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
249 let objects_dir = self.root.join("objects");
250 if !objects_dir.exists() {
251 return Ok(Vec::new());
252 }
253 let mut out = Vec::new();
254 for aa in std::fs::read_dir(&objects_dir)? {
255 let aa = aa?;
256 if !aa.file_type()?.is_dir() {
257 continue;
258 }
259 for bb in std::fs::read_dir(aa.path())? {
260 let bb = bb?;
261 if !bb.file_type()?.is_dir() {
262 continue;
263 }
264 for entry in std::fs::read_dir(bb.path())? {
265 let entry = entry?;
266 let name = entry.file_name();
267 let Some(name_str) = name.to_str() else {
268 continue;
269 };
270 let Ok(oid) = name_str.parse::<Oid>() else {
271 continue;
272 };
273 let meta = entry.metadata()?;
274 if !meta.is_file() {
275 continue;
276 }
277 out.push((oid, meta.len()));
278 }
279 }
280 }
281 Ok(out)
282 }
283
284 pub fn open(&self, oid: Oid) -> io::Result<File> {
288 let path = self.object_path(oid);
289 match File::open(&path) {
290 Ok(f) => Ok(f),
291 Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
292 if self.materialize_from_reference(oid, None) {
293 File::open(&path)
294 } else {
295 Err(e)
296 }
297 }
298 Err(e) => Err(e),
299 }
300 }
301
302 pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
315 let (oid, size, tmp) = self.stream_to_tmp(src)?;
316 if oid != Oid::EMPTY && self.object_path(oid).is_file() {
317 drop(tmp);
318 return Ok((oid, size));
319 }
320 self.commit(oid, tmp)?;
321 Ok((oid, size))
322 }
323
324 pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
331 let (actual, size, tmp) = self.stream_to_tmp(src)?;
332 if actual != expected {
333 return Err(StoreError::HashMismatch { expected, actual });
335 }
336 self.commit(actual, tmp)?;
337 Ok(size)
338 }
339
340 fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
341 std::fs::create_dir_all(self.tmp_dir())?;
342 let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
343 let mut hasher = Sha256::new();
344 let mut total: u64 = 0;
345 let mut buf = vec![0u8; COPY_BUFFER];
346 let file = tmp.as_file_mut();
347 loop {
348 let n = src.read(&mut buf)?;
349 if n == 0 {
350 break;
351 }
352 hasher.update(&buf[..n]);
353 file.write_all(&buf[..n])?;
354 total += n as u64;
355 }
356 file.flush()?;
357 let bytes: [u8; 32] = hasher.finalize().into();
358 Ok((Oid::from_bytes(bytes), total, tmp))
359 }
360
361 fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
362 if oid == Oid::EMPTY {
364 return Ok(());
365 }
366 let dest = self.object_path(oid);
367 if let Some(parent) = dest.parent() {
368 std::fs::create_dir_all(parent)?;
369 }
370 tmp.persist(&dest).map(|_| ()).map_err(|e| e.error)
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382 use tempfile::TempDir;
383
384 fn fixture() -> (TempDir, Store) {
385 let tmp = TempDir::new().unwrap();
386 let store = Store::new(tmp.path().join("lfs"));
387 (tmp, store)
388 }
389
390 const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
392
393 fn abc_oid() -> Oid {
394 ABC_OID_HEX.parse().unwrap()
395 }
396
397 #[test]
398 fn object_path_is_sharded() {
399 let (_tmp, store) = fixture();
400 let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
401 .parse()
402 .unwrap();
403 let path = store.object_path(oid);
404 let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
405 assert!(
406 path.ends_with(&suffix),
407 "{path:?} does not end with {suffix:?}"
408 );
409 }
410
411 #[test]
412 fn empty_oid_short_circuits() {
413 let (_tmp, store) = fixture();
414 assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
415 assert!(store.contains(Oid::EMPTY));
416 assert!(store.contains_with_size(Oid::EMPTY, 0));
417 assert!(!store.contains_with_size(Oid::EMPTY, 1));
418 let mut buf = Vec::new();
420 store
421 .open(Oid::EMPTY)
422 .unwrap()
423 .read_to_end(&mut buf)
424 .unwrap();
425 assert!(buf.is_empty());
426 }
427
428 #[test]
429 fn insert_round_trip() {
430 let (_tmp, store) = fixture();
431 let content = b"hello world!";
432 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
433 assert_eq!(size, content.len() as u64);
434 assert!(store.contains(oid));
435 assert!(store.contains_with_size(oid, size));
436 let mut readback = Vec::new();
437 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
438 assert_eq!(readback, content);
439 }
440
441 #[test]
442 fn insert_computes_correct_sha256() {
443 let (_tmp, store) = fixture();
444 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
445 assert_eq!(oid, abc_oid());
446 }
447
448 #[test]
449 fn insert_empty_yields_empty_oid_and_no_object_file() {
450 let (_tmp, store) = fixture();
451 let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
452 assert_eq!(oid, Oid::EMPTY);
453 assert_eq!(size, 0);
454 assert!(!store.root.join("objects").exists());
456 }
457
458 #[test]
459 fn insert_idempotent() {
460 let (_tmp, store) = fixture();
461 let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
462 let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
463 assert_eq!(oid1, oid2);
464 assert!(store.contains(oid1));
465 }
466
467 #[test]
468 fn insert_verified_succeeds_on_match() {
469 let (_tmp, store) = fixture();
470 let size = store
471 .insert_verified(abc_oid(), &mut b"abc".as_slice())
472 .unwrap();
473 assert_eq!(size, 3);
474 assert!(store.contains(abc_oid()));
475 }
476
477 #[test]
478 fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
479 let (_tmp, store) = fixture();
480 let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
481 .parse()
482 .unwrap();
483 let err = store
484 .insert_verified(wrong, &mut b"abc".as_slice())
485 .unwrap_err();
486 match err {
487 StoreError::HashMismatch { expected, actual } => {
488 assert_eq!(expected, wrong);
489 assert_eq!(actual, abc_oid());
490 }
491 other => panic!("expected HashMismatch, got {other:?}"),
492 }
493 assert!(!store.contains(wrong));
496 assert!(!store.contains(abc_oid()));
497 let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
499 .unwrap()
500 .collect::<Result<_, _>>()
501 .unwrap();
502 assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
503 }
504
505 #[test]
506 fn open_missing_oid_is_not_found() {
507 let (_tmp, store) = fixture();
508 let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
509 .parse()
510 .unwrap();
511 let err = store.open(oid).unwrap_err();
512 assert_eq!(err.kind(), io::ErrorKind::NotFound);
513 }
514
515 #[test]
516 fn streaming_megabyte_input() {
517 let (_tmp, store) = fixture();
518 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
520 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
521 assert_eq!(size, content.len() as u64);
522 let mut readback = Vec::new();
523 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
524 assert_eq!(readback, content);
525 }
526
527 #[test]
528 fn each_object_returns_empty_when_no_objects_dir() {
529 let (_tmp, store) = fixture();
530 assert!(store.each_object().unwrap().is_empty());
532 }
533
534 #[test]
535 fn each_object_finds_inserted_objects_with_correct_size() {
536 let (_tmp, store) = fixture();
537 let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
538 let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
539 let mut got = store.each_object().unwrap();
540 got.sort_by_key(|(_, size)| *size);
541 assert_eq!(got.len(), 2);
542 assert_eq!(got[0].0, oid_a);
544 assert_eq!(got[0].1, 5);
545 assert_eq!(got[1].0, oid_b);
546 assert_eq!(got[1].1, 8);
547 }
548
549 #[test]
550 fn each_object_skips_unrecognized_filenames() {
551 let (_tmp, store) = fixture();
552 let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
553 let shard = store
556 .root()
557 .join("objects")
558 .join(&oid.to_string()[0..2])
559 .join(&oid.to_string()[2..4]);
560 std::fs::write(shard.join("README"), b"ignored").unwrap();
561 let got = store.each_object().unwrap();
562 assert_eq!(got.len(), 1);
563 assert_eq!(got[0].0, oid);
564 }
565
566 #[test]
567 fn insert_verified_overwrites_corrupt_existing_file() {
568 let (_tmp, store) = fixture();
574 let dest = store.object_path(abc_oid());
575 std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
576 std::fs::write(&dest, b"").unwrap();
577 assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
578
579 store
580 .insert_verified(abc_oid(), &mut b"abc".as_slice())
581 .unwrap();
582 let bytes = std::fs::read(&dest).unwrap();
583 assert_eq!(bytes, b"abc");
584 }
585
586 #[test]
587 fn insert_creates_dirs_on_demand() {
588 let (_tmp, store) = fixture();
589 assert!(!store.root.exists());
591 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
592 assert!(store.tmp_dir().is_dir());
593 assert!(store.object_path(oid).is_file());
594 }
595
596 fn shared_fixture() -> (TempDir, Store, Store, Oid) {
600 let tmp = TempDir::new().unwrap();
601 let source = Store::new(tmp.path().join("src/lfs"));
602 let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
603 let shared = Store::new(tmp.path().join("shared/lfs"))
604 .with_references([source.root().join("objects")]);
605 (tmp, source, shared, oid)
606 }
607
608 #[test]
609 fn contains_finds_object_via_reference() {
610 let (_tmp, _source, shared, oid) = shared_fixture();
611 assert!(shared.contains(oid));
615 assert!(shared.object_path(oid).is_file());
616 }
617
618 #[test]
619 fn open_faults_in_from_reference() {
620 let (_tmp, _source, shared, oid) = shared_fixture();
621 let mut buf = Vec::new();
622 shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
623 assert_eq!(buf, b"abc");
624 assert!(shared.object_path(oid).is_file());
627 }
628
629 #[test]
630 fn contains_with_size_rejects_size_mismatch_in_reference() {
631 let (_tmp, _source, shared, oid) = shared_fixture();
632 assert!(!shared.contains_with_size(oid, 4));
634 assert!(!shared.object_path(oid).is_file());
635 }
636
637 #[test]
638 fn store_without_references_misses() {
639 let (_tmp, store) = fixture();
642 let oid = abc_oid();
643 assert!(!store.contains(oid));
644 assert!(matches!(
645 store.open(oid).unwrap_err().kind(),
646 io::ErrorKind::NotFound,
647 ));
648 }
649}