Skip to main content

git_lfs_store/
lib.rs

1//! Local content-addressable object store for git-lfs.
2//!
3//! Objects live under `<lfs_dir>/objects/aa/bb/aabbcc…` where `aabbcc…` is
4//! the SHA-256 hex of the content (sharded by the first two hex bytes — see
5//! `docs/spec.md`). Writes go through a tmp file in `<lfs_dir>/tmp/` and are
6//! atomically renamed into place once their hash is known.
7//!
8//! ```no_run
9//! use git_lfs_store::Store;
10//! let store = Store::new(".git/lfs");
11//! let mut input: &[u8] = b"hello world";
12//! let (oid, size) = store.insert(&mut input).unwrap();
13//! assert!(store.contains(oid));
14//! # let _ = size;
15//! ```
16
17use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25/// Platform null device — what `object_path` returns for [`Oid::EMPTY`].
26const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30/// A local LFS object store rooted at `<lfs_dir>` (typically `.git/lfs`).
31///
32/// May reference any number of alternate stores — typically the LFS
33/// objects of a `git clone --shared` source — and will materialize a
34/// hit from one of them into the local store on demand. See
35/// [`Store::with_references`].
36#[derive(Debug, Clone)]
37pub struct Store {
38    root: PathBuf,
39    /// Paths to alternate `lfs/objects/` directories. Each maps to a
40    /// `.git/objects/info/alternates` entry: when the local store
41    /// misses, [`Store::contains_with_size`] / [`Store::open`] walk
42    /// these in order and hardlink (or copy) any hit into `root`.
43    references: Vec<PathBuf>,
44}
45
46#[derive(Debug, thiserror::Error)]
47pub enum StoreError {
48    #[error(transparent)]
49    Io(#[from] io::Error),
50    #[error("hash mismatch: expected {expected}, got {actual}")]
51    HashMismatch { expected: Oid, actual: Oid },
52}
53
54impl Store {
55    /// Create a store rooted at the given LFS directory. The directory is not
56    /// created eagerly; subdirectories are created on demand as objects land.
57    pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
58        Self {
59            root: lfs_dir.into(),
60            references: Vec::new(),
61        }
62    }
63
64    /// Attach alternate `lfs/objects/` directories that the store may
65    /// hardlink-or-copy from when a local lookup misses. Used by
66    /// `git clone --shared` setups so the new repo can read the
67    /// source's existing LFS objects without re-downloading.
68    ///
69    /// Pass [`git_lfs_git::lfs_alternate_dirs`](https://docs.rs/git-lfs-git)
70    /// (`<git-dir>/objects/info/alternates` resolved to LFS-objects
71    /// dirs) at construction.
72    #[must_use]
73    pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
74        self.references = refs.into_iter().collect();
75        self
76    }
77
78    /// Root LFS directory.
79    pub fn root(&self) -> &Path {
80        &self.root
81    }
82
83    /// Directory holding temp files for in-flight inserts.
84    pub fn tmp_dir(&self) -> PathBuf {
85        self.root.join("tmp")
86    }
87
88    /// Sweep `<root>/tmp/objects/` (upstream's path for in-flight
89    /// download temp files: `<oid>-<random>`) and remove any whose
90    /// leading 64-char OID is already complete in the store.
91    ///
92    /// Best-effort — the dir not existing, or any individual remove
93    /// failing, is silently ignored. Intended to run once per
94    /// command invocation, before the command's main work, so an
95    /// interrupted prior run doesn't leak temp files indefinitely
96    /// (matches upstream's `lfs.cleanupTempFiles` startup task).
97    pub fn cleanup_tmp_objects(&self) {
98        let dir = self.root.join("tmp").join("objects");
99        let Ok(entries) = std::fs::read_dir(&dir) else {
100            return;
101        };
102        for entry in entries.flatten() {
103            let name = entry.file_name();
104            let name_str = name.to_string_lossy();
105            if name_str.len() < 64 {
106                continue;
107            }
108            // Slice the leading 64 chars and reconstruct the
109            // object's sharded path purely as a string (no hex
110            // validation): upstream's cleanup is filesystem-level
111            // and accepts any 64-char prefix, which matters because
112            // the upstream test exercises this with non-hex
113            // sentinel strings like `good...` / `bad...`.
114            let oid_str = &name_str[..64];
115            let object_path = self
116                .root
117                .join("objects")
118                .join(&oid_str[0..2])
119                .join(&oid_str[2..4])
120                .join(oid_str);
121            if object_path.is_file() {
122                let _ = std::fs::remove_file(entry.path());
123            }
124        }
125    }
126
127    /// Where the object with this OID lives on disk.
128    ///
129    /// For [`Oid::EMPTY`] this returns the platform null device, mirroring
130    /// upstream's behavior so callers can `open` an empty object without
131    /// special-casing.
132    pub fn object_path(&self, oid: Oid) -> PathBuf {
133        if oid == Oid::EMPTY {
134            return PathBuf::from(NULL_DEVICE);
135        }
136        let hex = oid.to_string();
137        self.root
138            .join("objects")
139            .join(&hex[0..2])
140            .join(&hex[2..4])
141            .join(&hex)
142    }
143
144    /// `true` if this object is present locally as a regular file. The empty
145    /// OID is always considered present. If the local copy is missing but
146    /// an alternate store has the object, materializes it locally first.
147    pub fn contains(&self, oid: Oid) -> bool {
148        if oid == Oid::EMPTY {
149            return true;
150        }
151        if self.object_path(oid).is_file() {
152            return true;
153        }
154        self.materialize_from_reference(oid, None)
155    }
156
157    /// `true` if the object is present and its on-disk size matches `size`.
158    /// Used to detect partial/corrupted local copies. Like
159    /// [`contains`](Self::contains), will fault in a matching alternate-store
160    /// object on demand.
161    pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
162        if oid == Oid::EMPTY {
163            return size == 0;
164        }
165        let local = std::fs::metadata(self.object_path(oid))
166            .map(|m| m.is_file() && m.len() == size)
167            .unwrap_or(false);
168        if local {
169            return true;
170        }
171        self.materialize_from_reference(oid, Some(size))
172    }
173
174    /// Walk reference stores looking for `oid`; the first hit (matching
175    /// `size` if specified) is hardlinked — or copied, on cross-device
176    /// fallback — into the local store. Returns `true` if the object
177    /// is now present locally as a result.
178    fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
179        if self.references.is_empty() {
180            return false;
181        }
182        let hex = oid.to_string();
183        for refdir in &self.references {
184            let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
185            let Ok(meta) = std::fs::metadata(&src) else {
186                continue;
187            };
188            if !meta.is_file() {
189                continue;
190            }
191            if let Some(want) = size
192                && meta.len() != want
193            {
194                continue;
195            }
196            let dest = self.object_path(oid);
197            if let Some(parent) = dest.parent() {
198                let _ = std::fs::create_dir_all(parent);
199            }
200            // Hardlink first (free, O(1), shares inode); fall back to
201            // copy on EXDEV / NotSupported (e.g. alternate on a
202            // different filesystem).
203            if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
204                return true;
205            }
206        }
207        false
208    }
209
210    /// Walk every object file in the store, yielding (oid, size_on_disk).
211    ///
212    /// Traverses the sharded `objects/<aa>/<bb>/<oid>` layout. Filenames
213    /// that don't parse as 64-char SHA-256 hex are silently skipped, as
214    /// are unexpected directories. The store directory not existing is
215    /// not an error — the result is just empty.
216    ///
217    /// Used by `git lfs prune` and (eventually) `fsck --orphaned`.
218    pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
219        let objects_dir = self.root.join("objects");
220        if !objects_dir.exists() {
221            return Ok(Vec::new());
222        }
223        let mut out = Vec::new();
224        for aa in std::fs::read_dir(&objects_dir)? {
225            let aa = aa?;
226            if !aa.file_type()?.is_dir() {
227                continue;
228            }
229            for bb in std::fs::read_dir(aa.path())? {
230                let bb = bb?;
231                if !bb.file_type()?.is_dir() {
232                    continue;
233                }
234                for entry in std::fs::read_dir(bb.path())? {
235                    let entry = entry?;
236                    let name = entry.file_name();
237                    let Some(name_str) = name.to_str() else {
238                        continue;
239                    };
240                    let Ok(oid) = name_str.parse::<Oid>() else {
241                        continue;
242                    };
243                    let meta = entry.metadata()?;
244                    if !meta.is_file() {
245                        continue;
246                    }
247                    out.push((oid, meta.len()));
248                }
249            }
250        }
251        Ok(out)
252    }
253
254    /// Open an object for reading. Errors with [`io::ErrorKind::NotFound`]
255    /// if the object isn't in the store. Faults in from a reference
256    /// store if needed.
257    pub fn open(&self, oid: Oid) -> io::Result<File> {
258        let path = self.object_path(oid);
259        match File::open(&path) {
260            Ok(f) => Ok(f),
261            Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
262                if self.materialize_from_reference(oid, None) {
263                    File::open(&path)
264                } else {
265                    Err(e)
266                }
267            }
268            Err(e) => Err(e),
269        }
270    }
271
272    /// Stream `src` into the store, computing SHA-256 as we go.
273    /// Returns the resulting OID and byte count.
274    ///
275    /// This is the clean-filter path: we don't know the OID until after the
276    /// content is hashed.
277    ///
278    /// If the resulting OID is already present locally, the temp file is
279    /// dropped without persisting. The store is content-addressed, so an
280    /// existing file at that path is necessarily the same bytes; skipping
281    /// `tmp.persist` here preserves any hardlink already at the
282    /// destination (a rename swaps a fresh inode in, which would break
283    /// the link to the alternate-store source).
284    pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
285        let (oid, size, tmp) = self.stream_to_tmp(src)?;
286        if oid != Oid::EMPTY && self.object_path(oid).is_file() {
287            drop(tmp);
288            return Ok((oid, size));
289        }
290        self.commit(oid, tmp)?;
291        Ok((oid, size))
292    }
293
294    /// Stream `src` into the store, requiring the resulting hash to equal
295    /// `expected`. On mismatch, returns [`StoreError::HashMismatch`] and the
296    /// temp file is dropped without being committed.
297    ///
298    /// This is the download path: we know the OID upfront and must verify
299    /// what the server sent.
300    pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
301        let (actual, size, tmp) = self.stream_to_tmp(src)?;
302        if actual != expected {
303            // Drop the tmp file; it goes away on Drop.
304            return Err(StoreError::HashMismatch { expected, actual });
305        }
306        self.commit(actual, tmp)?;
307        Ok(size)
308    }
309
310    fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
311        std::fs::create_dir_all(self.tmp_dir())?;
312        let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
313        let mut hasher = Sha256::new();
314        let mut total: u64 = 0;
315        let mut buf = vec![0u8; COPY_BUFFER];
316        let file = tmp.as_file_mut();
317        loop {
318            let n = src.read(&mut buf)?;
319            if n == 0 {
320                break;
321            }
322            hasher.update(&buf[..n]);
323            file.write_all(&buf[..n])?;
324            total += n as u64;
325        }
326        file.flush()?;
327        let bytes: [u8; 32] = hasher.finalize().into();
328        Ok((Oid::from_bytes(bytes), total, tmp))
329    }
330
331    fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
332        // The empty object lives at /dev/null — never persist it.
333        if oid == Oid::EMPTY {
334            return Ok(());
335        }
336        let dest = self.object_path(oid);
337        if let Some(parent) = dest.parent() {
338            std::fs::create_dir_all(parent)?;
339        }
340        // Atomic rename, *clobbering* any existing file at the target
341        // path. The store is content-addressed: anything already there
342        // is either the same content (no-op overwrite) or corrupt
343        // (truncated, half-written) — and the latter is exactly what
344        // `git lfs fetch --refetch` exists to recover from.
345        tmp.persist(&dest).map(|_| ()).map_err(|e| e.error)
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use tempfile::TempDir;
353
354    fn fixture() -> (TempDir, Store) {
355        let tmp = TempDir::new().unwrap();
356        let store = Store::new(tmp.path().join("lfs"));
357        (tmp, store)
358    }
359
360    /// Sample non-empty OID used across tests (SHA-256 of "abc").
361    const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
362
363    fn abc_oid() -> Oid {
364        ABC_OID_HEX.parse().unwrap()
365    }
366
367    #[test]
368    fn object_path_is_sharded() {
369        let (_tmp, store) = fixture();
370        let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
371            .parse()
372            .unwrap();
373        let path = store.object_path(oid);
374        let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
375        assert!(
376            path.ends_with(&suffix),
377            "{path:?} does not end with {suffix:?}"
378        );
379    }
380
381    #[test]
382    fn empty_oid_short_circuits() {
383        let (_tmp, store) = fixture();
384        assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
385        assert!(store.contains(Oid::EMPTY));
386        assert!(store.contains_with_size(Oid::EMPTY, 0));
387        assert!(!store.contains_with_size(Oid::EMPTY, 1));
388        // Opening the empty OID yields zero bytes.
389        let mut buf = Vec::new();
390        store
391            .open(Oid::EMPTY)
392            .unwrap()
393            .read_to_end(&mut buf)
394            .unwrap();
395        assert!(buf.is_empty());
396    }
397
398    #[test]
399    fn insert_round_trip() {
400        let (_tmp, store) = fixture();
401        let content = b"hello world!";
402        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
403        assert_eq!(size, content.len() as u64);
404        assert!(store.contains(oid));
405        assert!(store.contains_with_size(oid, size));
406        let mut readback = Vec::new();
407        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
408        assert_eq!(readback, content);
409    }
410
411    #[test]
412    fn insert_computes_correct_sha256() {
413        let (_tmp, store) = fixture();
414        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
415        assert_eq!(oid, abc_oid());
416    }
417
418    #[test]
419    fn insert_empty_yields_empty_oid_and_no_object_file() {
420        let (_tmp, store) = fixture();
421        let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
422        assert_eq!(oid, Oid::EMPTY);
423        assert_eq!(size, 0);
424        // Critically: nothing was persisted under objects/.
425        assert!(!store.root.join("objects").exists());
426    }
427
428    #[test]
429    fn insert_idempotent() {
430        let (_tmp, store) = fixture();
431        let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
432        let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
433        assert_eq!(oid1, oid2);
434        assert!(store.contains(oid1));
435    }
436
437    #[test]
438    fn insert_verified_succeeds_on_match() {
439        let (_tmp, store) = fixture();
440        let size = store
441            .insert_verified(abc_oid(), &mut b"abc".as_slice())
442            .unwrap();
443        assert_eq!(size, 3);
444        assert!(store.contains(abc_oid()));
445    }
446
447    #[test]
448    fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
449        let (_tmp, store) = fixture();
450        let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
451            .parse()
452            .unwrap();
453        let err = store
454            .insert_verified(wrong, &mut b"abc".as_slice())
455            .unwrap_err();
456        match err {
457            StoreError::HashMismatch { expected, actual } => {
458                assert_eq!(expected, wrong);
459                assert_eq!(actual, abc_oid());
460            }
461            other => panic!("expected HashMismatch, got {other:?}"),
462        }
463        // Neither the wrong OID nor the actual OID should be present —
464        // a failed verify must not leak a half-committed file.
465        assert!(!store.contains(wrong));
466        assert!(!store.contains(abc_oid()));
467        // And no leftover tmp file.
468        let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
469            .unwrap()
470            .collect::<Result<_, _>>()
471            .unwrap();
472        assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
473    }
474
475    #[test]
476    fn open_missing_oid_is_not_found() {
477        let (_tmp, store) = fixture();
478        let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
479            .parse()
480            .unwrap();
481        let err = store.open(oid).unwrap_err();
482        assert_eq!(err.kind(), io::ErrorKind::NotFound);
483    }
484
485    #[test]
486    fn streaming_megabyte_input() {
487        let (_tmp, store) = fixture();
488        // ~1 MiB to exercise the streaming loop across many buffer fills.
489        let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
490        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
491        assert_eq!(size, content.len() as u64);
492        let mut readback = Vec::new();
493        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
494        assert_eq!(readback, content);
495    }
496
497    #[test]
498    fn each_object_returns_empty_when_no_objects_dir() {
499        let (_tmp, store) = fixture();
500        // Store dir doesn't exist yet.
501        assert!(store.each_object().unwrap().is_empty());
502    }
503
504    #[test]
505    fn each_object_finds_inserted_objects_with_correct_size() {
506        let (_tmp, store) = fixture();
507        let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
508        let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
509        let mut got = store.each_object().unwrap();
510        got.sort_by_key(|(_, size)| *size);
511        assert_eq!(got.len(), 2);
512        // Order by size: "hello" (5 bytes) first, then "world!!!" (8 bytes).
513        assert_eq!(got[0].0, oid_a);
514        assert_eq!(got[0].1, 5);
515        assert_eq!(got[1].0, oid_b);
516        assert_eq!(got[1].1, 8);
517    }
518
519    #[test]
520    fn each_object_skips_unrecognized_filenames() {
521        let (_tmp, store) = fixture();
522        let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
523        // Drop a stray file in the same shard directory that isn't a
524        // 64-char hex name — must not crash or be reported.
525        let shard = store
526            .root()
527            .join("objects")
528            .join(&oid.to_string()[0..2])
529            .join(&oid.to_string()[2..4]);
530        std::fs::write(shard.join("README"), b"ignored").unwrap();
531        let got = store.each_object().unwrap();
532        assert_eq!(got.len(), 1);
533        assert_eq!(got[0].0, oid);
534    }
535
536    #[test]
537    fn insert_verified_overwrites_corrupt_existing_file() {
538        // Mirrors the scenario t-fetch's `--refetch` test exercises:
539        // a previous fetch landed an object, then the file got
540        // truncated (cp /dev/null over it). A subsequent verified
541        // insert must replace the corrupt file rather than silently
542        // skipping the write.
543        let (_tmp, store) = fixture();
544        let dest = store.object_path(abc_oid());
545        std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
546        std::fs::write(&dest, b"").unwrap();
547        assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
548
549        store
550            .insert_verified(abc_oid(), &mut b"abc".as_slice())
551            .unwrap();
552        let bytes = std::fs::read(&dest).unwrap();
553        assert_eq!(bytes, b"abc");
554    }
555
556    #[test]
557    fn insert_creates_dirs_on_demand() {
558        let (_tmp, store) = fixture();
559        // Before any insert, neither objects/ nor tmp/ exists.
560        assert!(!store.root.exists());
561        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
562        assert!(store.tmp_dir().is_dir());
563        assert!(store.object_path(oid).is_file());
564    }
565
566    /// Build a "source" store with an object pre-installed, plus an
567    /// empty "shared" store that references it. Mirrors the
568    /// `git clone --shared` setup from t-fetch's init.
569    fn shared_fixture() -> (TempDir, Store, Store, Oid) {
570        let tmp = TempDir::new().unwrap();
571        let source = Store::new(tmp.path().join("src/lfs"));
572        let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
573        let shared = Store::new(tmp.path().join("shared/lfs"))
574            .with_references([source.root().join("objects")]);
575        (tmp, source, shared, oid)
576    }
577
578    #[test]
579    fn contains_finds_object_via_reference() {
580        let (_tmp, _source, shared, oid) = shared_fixture();
581        // Object lives only in the source's lfs/objects/ at this
582        // point — `contains` should report it as present (and fault
583        // it in along the way).
584        assert!(shared.contains(oid));
585        assert!(shared.object_path(oid).is_file());
586    }
587
588    #[test]
589    fn open_faults_in_from_reference() {
590        let (_tmp, _source, shared, oid) = shared_fixture();
591        let mut buf = Vec::new();
592        shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
593        assert_eq!(buf, b"abc");
594        // After open, the object is materialized locally so future
595        // reads are independent of the alternate.
596        assert!(shared.object_path(oid).is_file());
597    }
598
599    #[test]
600    fn contains_with_size_rejects_size_mismatch_in_reference() {
601        let (_tmp, _source, shared, oid) = shared_fixture();
602        // Real size is 3; ask for 4 → reference hit gets rejected.
603        assert!(!shared.contains_with_size(oid, 4));
604        assert!(!shared.object_path(oid).is_file());
605    }
606
607    #[test]
608    fn store_without_references_misses() {
609        // Sanity: same OID that the shared fixture finds via
610        // alternates is genuinely absent in a plain store.
611        let (_tmp, store) = fixture();
612        let oid = abc_oid();
613        assert!(!store.contains(oid));
614        assert!(matches!(
615            store.open(oid).unwrap_err().kind(),
616            io::ErrorKind::NotFound,
617        ));
618    }
619}