1use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30#[derive(Debug, Clone)]
37pub struct Store {
38 root: PathBuf,
39 references: Vec<PathBuf>,
44}
45
46#[derive(Debug, thiserror::Error)]
47pub enum StoreError {
48 #[error(transparent)]
49 Io(#[from] io::Error),
50 #[error("hash mismatch: expected {expected}, got {actual}")]
51 HashMismatch { expected: Oid, actual: Oid },
52}
53
54impl Store {
55 pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
58 Self {
59 root: lfs_dir.into(),
60 references: Vec::new(),
61 }
62 }
63
64 #[must_use]
73 pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
74 self.references = refs.into_iter().collect();
75 self
76 }
77
78 pub fn root(&self) -> &Path {
80 &self.root
81 }
82
83 pub fn tmp_dir(&self) -> PathBuf {
85 self.root.join("tmp")
86 }
87
88 pub fn object_path(&self, oid: Oid) -> PathBuf {
94 if oid == Oid::EMPTY {
95 return PathBuf::from(NULL_DEVICE);
96 }
97 let hex = oid.to_string();
98 self.root
99 .join("objects")
100 .join(&hex[0..2])
101 .join(&hex[2..4])
102 .join(&hex)
103 }
104
105 pub fn contains(&self, oid: Oid) -> bool {
109 if oid == Oid::EMPTY {
110 return true;
111 }
112 if self.object_path(oid).is_file() {
113 return true;
114 }
115 self.materialize_from_reference(oid, None)
116 }
117
118 pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
123 if oid == Oid::EMPTY {
124 return size == 0;
125 }
126 let local = std::fs::metadata(self.object_path(oid))
127 .map(|m| m.is_file() && m.len() == size)
128 .unwrap_or(false);
129 if local {
130 return true;
131 }
132 self.materialize_from_reference(oid, Some(size))
133 }
134
135 fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
140 if self.references.is_empty() {
141 return false;
142 }
143 let hex = oid.to_string();
144 for refdir in &self.references {
145 let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
146 let Ok(meta) = std::fs::metadata(&src) else {
147 continue;
148 };
149 if !meta.is_file() {
150 continue;
151 }
152 if let Some(want) = size
153 && meta.len() != want
154 {
155 continue;
156 }
157 let dest = self.object_path(oid);
158 if let Some(parent) = dest.parent() {
159 let _ = std::fs::create_dir_all(parent);
160 }
161 if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
165 return true;
166 }
167 }
168 false
169 }
170
171 pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
180 let objects_dir = self.root.join("objects");
181 if !objects_dir.exists() {
182 return Ok(Vec::new());
183 }
184 let mut out = Vec::new();
185 for aa in std::fs::read_dir(&objects_dir)? {
186 let aa = aa?;
187 if !aa.file_type()?.is_dir() {
188 continue;
189 }
190 for bb in std::fs::read_dir(aa.path())? {
191 let bb = bb?;
192 if !bb.file_type()?.is_dir() {
193 continue;
194 }
195 for entry in std::fs::read_dir(bb.path())? {
196 let entry = entry?;
197 let name = entry.file_name();
198 let Some(name_str) = name.to_str() else {
199 continue;
200 };
201 let Ok(oid) = name_str.parse::<Oid>() else {
202 continue;
203 };
204 let meta = entry.metadata()?;
205 if !meta.is_file() {
206 continue;
207 }
208 out.push((oid, meta.len()));
209 }
210 }
211 }
212 Ok(out)
213 }
214
215 pub fn open(&self, oid: Oid) -> io::Result<File> {
219 let path = self.object_path(oid);
220 match File::open(&path) {
221 Ok(f) => Ok(f),
222 Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
223 if self.materialize_from_reference(oid, None) {
224 File::open(&path)
225 } else {
226 Err(e)
227 }
228 }
229 Err(e) => Err(e),
230 }
231 }
232
233 pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
239 let (oid, size, tmp) = self.stream_to_tmp(src)?;
240 self.commit(oid, tmp)?;
241 Ok((oid, size))
242 }
243
244 pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
251 let (actual, size, tmp) = self.stream_to_tmp(src)?;
252 if actual != expected {
253 return Err(StoreError::HashMismatch { expected, actual });
255 }
256 self.commit(actual, tmp)?;
257 Ok(size)
258 }
259
260 fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
261 std::fs::create_dir_all(self.tmp_dir())?;
262 let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
263 let mut hasher = Sha256::new();
264 let mut total: u64 = 0;
265 let mut buf = vec![0u8; COPY_BUFFER];
266 let file = tmp.as_file_mut();
267 loop {
268 let n = src.read(&mut buf)?;
269 if n == 0 {
270 break;
271 }
272 hasher.update(&buf[..n]);
273 file.write_all(&buf[..n])?;
274 total += n as u64;
275 }
276 file.flush()?;
277 let bytes: [u8; 32] = hasher.finalize().into();
278 Ok((Oid::from_bytes(bytes), total, tmp))
279 }
280
281 fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
282 if oid == Oid::EMPTY {
284 return Ok(());
285 }
286 let dest = self.object_path(oid);
287 if let Some(parent) = dest.parent() {
288 std::fs::create_dir_all(parent)?;
289 }
290 tmp.persist(&dest).map(|_| ()).map_err(|e| e.error)
296 }
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302 use tempfile::TempDir;
303
304 fn fixture() -> (TempDir, Store) {
305 let tmp = TempDir::new().unwrap();
306 let store = Store::new(tmp.path().join("lfs"));
307 (tmp, store)
308 }
309
310 const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
312
313 fn abc_oid() -> Oid {
314 ABC_OID_HEX.parse().unwrap()
315 }
316
317 #[test]
318 fn object_path_is_sharded() {
319 let (_tmp, store) = fixture();
320 let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
321 .parse()
322 .unwrap();
323 let path = store.object_path(oid);
324 let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
325 assert!(
326 path.ends_with(&suffix),
327 "{path:?} does not end with {suffix:?}"
328 );
329 }
330
331 #[test]
332 fn empty_oid_short_circuits() {
333 let (_tmp, store) = fixture();
334 assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
335 assert!(store.contains(Oid::EMPTY));
336 assert!(store.contains_with_size(Oid::EMPTY, 0));
337 assert!(!store.contains_with_size(Oid::EMPTY, 1));
338 let mut buf = Vec::new();
340 store
341 .open(Oid::EMPTY)
342 .unwrap()
343 .read_to_end(&mut buf)
344 .unwrap();
345 assert!(buf.is_empty());
346 }
347
348 #[test]
349 fn insert_round_trip() {
350 let (_tmp, store) = fixture();
351 let content = b"hello world!";
352 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
353 assert_eq!(size, content.len() as u64);
354 assert!(store.contains(oid));
355 assert!(store.contains_with_size(oid, size));
356 let mut readback = Vec::new();
357 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
358 assert_eq!(readback, content);
359 }
360
361 #[test]
362 fn insert_computes_correct_sha256() {
363 let (_tmp, store) = fixture();
364 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
365 assert_eq!(oid, abc_oid());
366 }
367
368 #[test]
369 fn insert_empty_yields_empty_oid_and_no_object_file() {
370 let (_tmp, store) = fixture();
371 let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
372 assert_eq!(oid, Oid::EMPTY);
373 assert_eq!(size, 0);
374 assert!(!store.root.join("objects").exists());
376 }
377
378 #[test]
379 fn insert_idempotent() {
380 let (_tmp, store) = fixture();
381 let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
382 let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
383 assert_eq!(oid1, oid2);
384 assert!(store.contains(oid1));
385 }
386
387 #[test]
388 fn insert_verified_succeeds_on_match() {
389 let (_tmp, store) = fixture();
390 let size = store
391 .insert_verified(abc_oid(), &mut b"abc".as_slice())
392 .unwrap();
393 assert_eq!(size, 3);
394 assert!(store.contains(abc_oid()));
395 }
396
397 #[test]
398 fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
399 let (_tmp, store) = fixture();
400 let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
401 .parse()
402 .unwrap();
403 let err = store
404 .insert_verified(wrong, &mut b"abc".as_slice())
405 .unwrap_err();
406 match err {
407 StoreError::HashMismatch { expected, actual } => {
408 assert_eq!(expected, wrong);
409 assert_eq!(actual, abc_oid());
410 }
411 other => panic!("expected HashMismatch, got {other:?}"),
412 }
413 assert!(!store.contains(wrong));
416 assert!(!store.contains(abc_oid()));
417 let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
419 .unwrap()
420 .collect::<Result<_, _>>()
421 .unwrap();
422 assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
423 }
424
425 #[test]
426 fn open_missing_oid_is_not_found() {
427 let (_tmp, store) = fixture();
428 let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
429 .parse()
430 .unwrap();
431 let err = store.open(oid).unwrap_err();
432 assert_eq!(err.kind(), io::ErrorKind::NotFound);
433 }
434
435 #[test]
436 fn streaming_megabyte_input() {
437 let (_tmp, store) = fixture();
438 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
440 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
441 assert_eq!(size, content.len() as u64);
442 let mut readback = Vec::new();
443 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
444 assert_eq!(readback, content);
445 }
446
447 #[test]
448 fn each_object_returns_empty_when_no_objects_dir() {
449 let (_tmp, store) = fixture();
450 assert!(store.each_object().unwrap().is_empty());
452 }
453
454 #[test]
455 fn each_object_finds_inserted_objects_with_correct_size() {
456 let (_tmp, store) = fixture();
457 let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
458 let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
459 let mut got = store.each_object().unwrap();
460 got.sort_by_key(|(_, size)| *size);
461 assert_eq!(got.len(), 2);
462 assert_eq!(got[0].0, oid_a);
464 assert_eq!(got[0].1, 5);
465 assert_eq!(got[1].0, oid_b);
466 assert_eq!(got[1].1, 8);
467 }
468
469 #[test]
470 fn each_object_skips_unrecognized_filenames() {
471 let (_tmp, store) = fixture();
472 let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
473 let shard = store
476 .root()
477 .join("objects")
478 .join(&oid.to_string()[0..2])
479 .join(&oid.to_string()[2..4]);
480 std::fs::write(shard.join("README"), b"ignored").unwrap();
481 let got = store.each_object().unwrap();
482 assert_eq!(got.len(), 1);
483 assert_eq!(got[0].0, oid);
484 }
485
486 #[test]
487 fn insert_verified_overwrites_corrupt_existing_file() {
488 let (_tmp, store) = fixture();
494 let dest = store.object_path(abc_oid());
495 std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
496 std::fs::write(&dest, b"").unwrap();
497 assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
498
499 store
500 .insert_verified(abc_oid(), &mut b"abc".as_slice())
501 .unwrap();
502 let bytes = std::fs::read(&dest).unwrap();
503 assert_eq!(bytes, b"abc");
504 }
505
506 #[test]
507 fn insert_creates_dirs_on_demand() {
508 let (_tmp, store) = fixture();
509 assert!(!store.root.exists());
511 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
512 assert!(store.tmp_dir().is_dir());
513 assert!(store.object_path(oid).is_file());
514 }
515
516 fn shared_fixture() -> (TempDir, Store, Store, Oid) {
520 let tmp = TempDir::new().unwrap();
521 let source = Store::new(tmp.path().join("src/lfs"));
522 let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
523 let shared = Store::new(tmp.path().join("shared/lfs"))
524 .with_references([source.root().join("objects")]);
525 (tmp, source, shared, oid)
526 }
527
528 #[test]
529 fn contains_finds_object_via_reference() {
530 let (_tmp, _source, shared, oid) = shared_fixture();
531 assert!(shared.contains(oid));
535 assert!(shared.object_path(oid).is_file());
536 }
537
538 #[test]
539 fn open_faults_in_from_reference() {
540 let (_tmp, _source, shared, oid) = shared_fixture();
541 let mut buf = Vec::new();
542 shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
543 assert_eq!(buf, b"abc");
544 assert!(shared.object_path(oid).is_file());
547 }
548
549 #[test]
550 fn contains_with_size_rejects_size_mismatch_in_reference() {
551 let (_tmp, _source, shared, oid) = shared_fixture();
552 assert!(!shared.contains_with_size(oid, 4));
554 assert!(!shared.object_path(oid).is_file());
555 }
556
557 #[test]
558 fn store_without_references_misses() {
559 let (_tmp, store) = fixture();
562 let oid = abc_oid();
563 assert!(!store.contains(oid));
564 assert!(matches!(
565 store.open(oid).unwrap_err().kind(),
566 io::ErrorKind::NotFound,
567 ));
568 }
569}