1use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30#[derive(Debug, Clone)]
37pub struct Store {
38 root: PathBuf,
39 references: Vec<PathBuf>,
44}
45
46#[derive(Debug, thiserror::Error)]
47pub enum StoreError {
48 #[error(transparent)]
49 Io(#[from] io::Error),
50 #[error("hash mismatch: expected {expected}, got {actual}")]
51 HashMismatch { expected: Oid, actual: Oid },
52}
53
54impl Store {
55 pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
58 Self {
59 root: lfs_dir.into(),
60 references: Vec::new(),
61 }
62 }
63
64 #[must_use]
73 pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
74 self.references = refs.into_iter().collect();
75 self
76 }
77
78 pub fn root(&self) -> &Path {
80 &self.root
81 }
82
83 pub fn tmp_dir(&self) -> PathBuf {
85 self.root.join("tmp")
86 }
87
88 pub fn cleanup_tmp_objects(&self) {
98 let dir = self.root.join("tmp").join("objects");
99 let Ok(entries) = std::fs::read_dir(&dir) else {
100 return;
101 };
102 for entry in entries.flatten() {
103 let name = entry.file_name();
104 let name_str = name.to_string_lossy();
105 if name_str.len() < 64 {
106 continue;
107 }
108 let oid_str = &name_str[..64];
115 let object_path = self
116 .root
117 .join("objects")
118 .join(&oid_str[0..2])
119 .join(&oid_str[2..4])
120 .join(oid_str);
121 if object_path.is_file() {
122 let _ = std::fs::remove_file(entry.path());
123 }
124 }
125 }
126
127 pub fn object_path(&self, oid: Oid) -> PathBuf {
133 if oid == Oid::EMPTY {
134 return PathBuf::from(NULL_DEVICE);
135 }
136 let hex = oid.to_string();
137 self.root
138 .join("objects")
139 .join(&hex[0..2])
140 .join(&hex[2..4])
141 .join(&hex)
142 }
143
144 pub fn contains(&self, oid: Oid) -> bool {
148 if oid == Oid::EMPTY {
149 return true;
150 }
151 if self.object_path(oid).is_file() {
152 return true;
153 }
154 self.materialize_from_reference(oid, None)
155 }
156
157 pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
162 if oid == Oid::EMPTY {
163 return size == 0;
164 }
165 let local = std::fs::metadata(self.object_path(oid))
166 .map(|m| m.is_file() && m.len() == size)
167 .unwrap_or(false);
168 if local {
169 return true;
170 }
171 self.materialize_from_reference(oid, Some(size))
172 }
173
174 fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
179 if self.references.is_empty() {
180 return false;
181 }
182 let hex = oid.to_string();
183 for refdir in &self.references {
184 let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
185 let Ok(meta) = std::fs::metadata(&src) else {
186 continue;
187 };
188 if !meta.is_file() {
189 continue;
190 }
191 if let Some(want) = size
192 && meta.len() != want
193 {
194 continue;
195 }
196 let dest = self.object_path(oid);
197 if let Some(parent) = dest.parent() {
198 let _ = std::fs::create_dir_all(parent);
199 }
200 if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
204 return true;
205 }
206 }
207 false
208 }
209
210 pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
219 let objects_dir = self.root.join("objects");
220 if !objects_dir.exists() {
221 return Ok(Vec::new());
222 }
223 let mut out = Vec::new();
224 for aa in std::fs::read_dir(&objects_dir)? {
225 let aa = aa?;
226 if !aa.file_type()?.is_dir() {
227 continue;
228 }
229 for bb in std::fs::read_dir(aa.path())? {
230 let bb = bb?;
231 if !bb.file_type()?.is_dir() {
232 continue;
233 }
234 for entry in std::fs::read_dir(bb.path())? {
235 let entry = entry?;
236 let name = entry.file_name();
237 let Some(name_str) = name.to_str() else {
238 continue;
239 };
240 let Ok(oid) = name_str.parse::<Oid>() else {
241 continue;
242 };
243 let meta = entry.metadata()?;
244 if !meta.is_file() {
245 continue;
246 }
247 out.push((oid, meta.len()));
248 }
249 }
250 }
251 Ok(out)
252 }
253
254 pub fn open(&self, oid: Oid) -> io::Result<File> {
258 let path = self.object_path(oid);
259 match File::open(&path) {
260 Ok(f) => Ok(f),
261 Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
262 if self.materialize_from_reference(oid, None) {
263 File::open(&path)
264 } else {
265 Err(e)
266 }
267 }
268 Err(e) => Err(e),
269 }
270 }
271
272 pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
285 let (oid, size, tmp) = self.stream_to_tmp(src)?;
286 if oid != Oid::EMPTY && self.object_path(oid).is_file() {
287 drop(tmp);
288 return Ok((oid, size));
289 }
290 self.commit(oid, tmp)?;
291 Ok((oid, size))
292 }
293
294 pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
301 let (actual, size, tmp) = self.stream_to_tmp(src)?;
302 if actual != expected {
303 return Err(StoreError::HashMismatch { expected, actual });
305 }
306 self.commit(actual, tmp)?;
307 Ok(size)
308 }
309
310 fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
311 std::fs::create_dir_all(self.tmp_dir())?;
312 let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
313 let mut hasher = Sha256::new();
314 let mut total: u64 = 0;
315 let mut buf = vec![0u8; COPY_BUFFER];
316 let file = tmp.as_file_mut();
317 loop {
318 let n = src.read(&mut buf)?;
319 if n == 0 {
320 break;
321 }
322 hasher.update(&buf[..n]);
323 file.write_all(&buf[..n])?;
324 total += n as u64;
325 }
326 file.flush()?;
327 let bytes: [u8; 32] = hasher.finalize().into();
328 Ok((Oid::from_bytes(bytes), total, tmp))
329 }
330
331 fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
332 if oid == Oid::EMPTY {
334 return Ok(());
335 }
336 let dest = self.object_path(oid);
337 if let Some(parent) = dest.parent() {
338 std::fs::create_dir_all(parent)?;
339 }
340 tmp.persist(&dest).map(|_| ()).map_err(|e| e.error)
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352 use tempfile::TempDir;
353
354 fn fixture() -> (TempDir, Store) {
355 let tmp = TempDir::new().unwrap();
356 let store = Store::new(tmp.path().join("lfs"));
357 (tmp, store)
358 }
359
360 const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
362
363 fn abc_oid() -> Oid {
364 ABC_OID_HEX.parse().unwrap()
365 }
366
367 #[test]
368 fn object_path_is_sharded() {
369 let (_tmp, store) = fixture();
370 let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
371 .parse()
372 .unwrap();
373 let path = store.object_path(oid);
374 let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
375 assert!(
376 path.ends_with(&suffix),
377 "{path:?} does not end with {suffix:?}"
378 );
379 }
380
381 #[test]
382 fn empty_oid_short_circuits() {
383 let (_tmp, store) = fixture();
384 assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
385 assert!(store.contains(Oid::EMPTY));
386 assert!(store.contains_with_size(Oid::EMPTY, 0));
387 assert!(!store.contains_with_size(Oid::EMPTY, 1));
388 let mut buf = Vec::new();
390 store
391 .open(Oid::EMPTY)
392 .unwrap()
393 .read_to_end(&mut buf)
394 .unwrap();
395 assert!(buf.is_empty());
396 }
397
398 #[test]
399 fn insert_round_trip() {
400 let (_tmp, store) = fixture();
401 let content = b"hello world!";
402 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
403 assert_eq!(size, content.len() as u64);
404 assert!(store.contains(oid));
405 assert!(store.contains_with_size(oid, size));
406 let mut readback = Vec::new();
407 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
408 assert_eq!(readback, content);
409 }
410
411 #[test]
412 fn insert_computes_correct_sha256() {
413 let (_tmp, store) = fixture();
414 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
415 assert_eq!(oid, abc_oid());
416 }
417
418 #[test]
419 fn insert_empty_yields_empty_oid_and_no_object_file() {
420 let (_tmp, store) = fixture();
421 let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
422 assert_eq!(oid, Oid::EMPTY);
423 assert_eq!(size, 0);
424 assert!(!store.root.join("objects").exists());
426 }
427
428 #[test]
429 fn insert_idempotent() {
430 let (_tmp, store) = fixture();
431 let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
432 let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
433 assert_eq!(oid1, oid2);
434 assert!(store.contains(oid1));
435 }
436
437 #[test]
438 fn insert_verified_succeeds_on_match() {
439 let (_tmp, store) = fixture();
440 let size = store
441 .insert_verified(abc_oid(), &mut b"abc".as_slice())
442 .unwrap();
443 assert_eq!(size, 3);
444 assert!(store.contains(abc_oid()));
445 }
446
447 #[test]
448 fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
449 let (_tmp, store) = fixture();
450 let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
451 .parse()
452 .unwrap();
453 let err = store
454 .insert_verified(wrong, &mut b"abc".as_slice())
455 .unwrap_err();
456 match err {
457 StoreError::HashMismatch { expected, actual } => {
458 assert_eq!(expected, wrong);
459 assert_eq!(actual, abc_oid());
460 }
461 other => panic!("expected HashMismatch, got {other:?}"),
462 }
463 assert!(!store.contains(wrong));
466 assert!(!store.contains(abc_oid()));
467 let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
469 .unwrap()
470 .collect::<Result<_, _>>()
471 .unwrap();
472 assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
473 }
474
475 #[test]
476 fn open_missing_oid_is_not_found() {
477 let (_tmp, store) = fixture();
478 let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
479 .parse()
480 .unwrap();
481 let err = store.open(oid).unwrap_err();
482 assert_eq!(err.kind(), io::ErrorKind::NotFound);
483 }
484
485 #[test]
486 fn streaming_megabyte_input() {
487 let (_tmp, store) = fixture();
488 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
490 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
491 assert_eq!(size, content.len() as u64);
492 let mut readback = Vec::new();
493 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
494 assert_eq!(readback, content);
495 }
496
497 #[test]
498 fn each_object_returns_empty_when_no_objects_dir() {
499 let (_tmp, store) = fixture();
500 assert!(store.each_object().unwrap().is_empty());
502 }
503
504 #[test]
505 fn each_object_finds_inserted_objects_with_correct_size() {
506 let (_tmp, store) = fixture();
507 let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
508 let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
509 let mut got = store.each_object().unwrap();
510 got.sort_by_key(|(_, size)| *size);
511 assert_eq!(got.len(), 2);
512 assert_eq!(got[0].0, oid_a);
514 assert_eq!(got[0].1, 5);
515 assert_eq!(got[1].0, oid_b);
516 assert_eq!(got[1].1, 8);
517 }
518
519 #[test]
520 fn each_object_skips_unrecognized_filenames() {
521 let (_tmp, store) = fixture();
522 let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
523 let shard = store
526 .root()
527 .join("objects")
528 .join(&oid.to_string()[0..2])
529 .join(&oid.to_string()[2..4]);
530 std::fs::write(shard.join("README"), b"ignored").unwrap();
531 let got = store.each_object().unwrap();
532 assert_eq!(got.len(), 1);
533 assert_eq!(got[0].0, oid);
534 }
535
536 #[test]
537 fn insert_verified_overwrites_corrupt_existing_file() {
538 let (_tmp, store) = fixture();
544 let dest = store.object_path(abc_oid());
545 std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
546 std::fs::write(&dest, b"").unwrap();
547 assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
548
549 store
550 .insert_verified(abc_oid(), &mut b"abc".as_slice())
551 .unwrap();
552 let bytes = std::fs::read(&dest).unwrap();
553 assert_eq!(bytes, b"abc");
554 }
555
556 #[test]
557 fn insert_creates_dirs_on_demand() {
558 let (_tmp, store) = fixture();
559 assert!(!store.root.exists());
561 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
562 assert!(store.tmp_dir().is_dir());
563 assert!(store.object_path(oid).is_file());
564 }
565
566 fn shared_fixture() -> (TempDir, Store, Store, Oid) {
570 let tmp = TempDir::new().unwrap();
571 let source = Store::new(tmp.path().join("src/lfs"));
572 let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
573 let shared = Store::new(tmp.path().join("shared/lfs"))
574 .with_references([source.root().join("objects")]);
575 (tmp, source, shared, oid)
576 }
577
578 #[test]
579 fn contains_finds_object_via_reference() {
580 let (_tmp, _source, shared, oid) = shared_fixture();
581 assert!(shared.contains(oid));
585 assert!(shared.object_path(oid).is_file());
586 }
587
588 #[test]
589 fn open_faults_in_from_reference() {
590 let (_tmp, _source, shared, oid) = shared_fixture();
591 let mut buf = Vec::new();
592 shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
593 assert_eq!(buf, b"abc");
594 assert!(shared.object_path(oid).is_file());
597 }
598
599 #[test]
600 fn contains_with_size_rejects_size_mismatch_in_reference() {
601 let (_tmp, _source, shared, oid) = shared_fixture();
602 assert!(!shared.contains_with_size(oid, 4));
604 assert!(!shared.object_path(oid).is_file());
605 }
606
607 #[test]
608 fn store_without_references_misses() {
609 let (_tmp, store) = fixture();
612 let oid = abc_oid();
613 assert!(!store.contains(oid));
614 assert!(matches!(
615 store.open(oid).unwrap_err().kind(),
616 io::ErrorKind::NotFound,
617 ));
618 }
619}