Skip to main content

git_lfs_store/
lib.rs

1//! Local content-addressable object store for git-lfs.
2//!
3//! Objects live under `<lfs_dir>/objects/aa/bb/aabbcc…` where `aabbcc…` is
4//! the SHA-256 hex of the content (sharded by the first two hex bytes — see
5//! `docs/spec.md`). Writes go through a tmp file in `<lfs_dir>/tmp/` and are
6//! atomically renamed into place once their hash is known.
7//!
8//! ```no_run
9//! use git_lfs_store::Store;
10//! let store = Store::new(".git/lfs");
11//! let mut input: &[u8] = b"hello world";
12//! let (oid, size) = store.insert(&mut input).unwrap();
13//! assert!(store.contains(oid));
14//! # let _ = size;
15//! ```
16
17use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25/// Platform null device — what `object_path` returns for [`Oid::EMPTY`].
26const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30/// A local LFS object store rooted at `<lfs_dir>` (typically `.git/lfs`).
31///
32/// May reference any number of alternate stores — typically the LFS
33/// objects of a `git clone --shared` source — and will materialize a
34/// hit from one of them into the local store on demand. See
35/// [`Store::with_references`].
36#[derive(Debug, Clone)]
37pub struct Store {
38    root: PathBuf,
39    /// Paths to alternate `lfs/objects/` directories. Each maps to a
40    /// `.git/objects/info/alternates` entry: when the local store
41    /// misses, [`Store::contains_with_size`] / [`Store::open`] walk
42    /// these in order and hardlink (or copy) any hit into `root`.
43    references: Vec<PathBuf>,
44}
45
46#[derive(Debug, thiserror::Error)]
47pub enum StoreError {
48    #[error(transparent)]
49    Io(#[from] io::Error),
50    #[error("hash mismatch: expected {expected}, got {actual}")]
51    HashMismatch { expected: Oid, actual: Oid },
52}
53
54impl Store {
55    /// Create a store rooted at the given LFS directory. The directory is not
56    /// created eagerly; subdirectories are created on demand as objects land.
57    pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
58        Self {
59            root: lfs_dir.into(),
60            references: Vec::new(),
61        }
62    }
63
64    /// Attach alternate `lfs/objects/` directories that the store may
65    /// hardlink-or-copy from when a local lookup misses. Used by
66    /// `git clone --shared` setups so the new repo can read the
67    /// source's existing LFS objects without re-downloading.
68    ///
69    /// Pass [`git_lfs_git::lfs_alternate_dirs`](https://docs.rs/git-lfs-git)
70    /// (`<git-dir>/objects/info/alternates` resolved to LFS-objects
71    /// dirs) at construction.
72    #[must_use]
73    pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
74        self.references = refs.into_iter().collect();
75        self
76    }
77
78    /// Root LFS directory.
79    pub fn root(&self) -> &Path {
80        &self.root
81    }
82
83    /// Directory holding temp files for in-flight inserts.
84    pub fn tmp_dir(&self) -> PathBuf {
85        self.root.join("tmp")
86    }
87
88    /// Where the object with this OID lives on disk.
89    ///
90    /// For [`Oid::EMPTY`] this returns the platform null device, mirroring
91    /// upstream's behavior so callers can `open` an empty object without
92    /// special-casing.
93    pub fn object_path(&self, oid: Oid) -> PathBuf {
94        if oid == Oid::EMPTY {
95            return PathBuf::from(NULL_DEVICE);
96        }
97        let hex = oid.to_string();
98        self.root
99            .join("objects")
100            .join(&hex[0..2])
101            .join(&hex[2..4])
102            .join(&hex)
103    }
104
105    /// `true` if this object is present locally as a regular file. The empty
106    /// OID is always considered present. If the local copy is missing but
107    /// an alternate store has the object, materializes it locally first.
108    pub fn contains(&self, oid: Oid) -> bool {
109        if oid == Oid::EMPTY {
110            return true;
111        }
112        if self.object_path(oid).is_file() {
113            return true;
114        }
115        self.materialize_from_reference(oid, None)
116    }
117
118    /// `true` if the object is present and its on-disk size matches `size`.
119    /// Used to detect partial/corrupted local copies. Like
120    /// [`contains`](Self::contains), will fault in a matching alternate-store
121    /// object on demand.
122    pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
123        if oid == Oid::EMPTY {
124            return size == 0;
125        }
126        let local = std::fs::metadata(self.object_path(oid))
127            .map(|m| m.is_file() && m.len() == size)
128            .unwrap_or(false);
129        if local {
130            return true;
131        }
132        self.materialize_from_reference(oid, Some(size))
133    }
134
135    /// Walk reference stores looking for `oid`; the first hit (matching
136    /// `size` if specified) is hardlinked — or copied, on cross-device
137    /// fallback — into the local store. Returns `true` if the object
138    /// is now present locally as a result.
139    fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
140        if self.references.is_empty() {
141            return false;
142        }
143        let hex = oid.to_string();
144        for refdir in &self.references {
145            let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
146            let Ok(meta) = std::fs::metadata(&src) else {
147                continue;
148            };
149            if !meta.is_file() {
150                continue;
151            }
152            if let Some(want) = size
153                && meta.len() != want
154            {
155                continue;
156            }
157            let dest = self.object_path(oid);
158            if let Some(parent) = dest.parent() {
159                let _ = std::fs::create_dir_all(parent);
160            }
161            // Hardlink first (free, O(1), shares inode); fall back to
162            // copy on EXDEV / NotSupported (e.g. alternate on a
163            // different filesystem).
164            if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
165                return true;
166            }
167        }
168        false
169    }
170
171    /// Walk every object file in the store, yielding (oid, size_on_disk).
172    ///
173    /// Traverses the sharded `objects/<aa>/<bb>/<oid>` layout. Filenames
174    /// that don't parse as 64-char SHA-256 hex are silently skipped, as
175    /// are unexpected directories. The store directory not existing is
176    /// not an error — the result is just empty.
177    ///
178    /// Used by `git lfs prune` and (eventually) `fsck --orphaned`.
179    pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
180        let objects_dir = self.root.join("objects");
181        if !objects_dir.exists() {
182            return Ok(Vec::new());
183        }
184        let mut out = Vec::new();
185        for aa in std::fs::read_dir(&objects_dir)? {
186            let aa = aa?;
187            if !aa.file_type()?.is_dir() {
188                continue;
189            }
190            for bb in std::fs::read_dir(aa.path())? {
191                let bb = bb?;
192                if !bb.file_type()?.is_dir() {
193                    continue;
194                }
195                for entry in std::fs::read_dir(bb.path())? {
196                    let entry = entry?;
197                    let name = entry.file_name();
198                    let Some(name_str) = name.to_str() else {
199                        continue;
200                    };
201                    let Ok(oid) = name_str.parse::<Oid>() else {
202                        continue;
203                    };
204                    let meta = entry.metadata()?;
205                    if !meta.is_file() {
206                        continue;
207                    }
208                    out.push((oid, meta.len()));
209                }
210            }
211        }
212        Ok(out)
213    }
214
215    /// Open an object for reading. Errors with [`io::ErrorKind::NotFound`]
216    /// if the object isn't in the store. Faults in from a reference
217    /// store if needed.
218    pub fn open(&self, oid: Oid) -> io::Result<File> {
219        let path = self.object_path(oid);
220        match File::open(&path) {
221            Ok(f) => Ok(f),
222            Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
223                if self.materialize_from_reference(oid, None) {
224                    File::open(&path)
225                } else {
226                    Err(e)
227                }
228            }
229            Err(e) => Err(e),
230        }
231    }
232
233    /// Stream `src` into the store, computing SHA-256 as we go.
234    /// Returns the resulting OID and byte count.
235    ///
236    /// This is the clean-filter path: we don't know the OID until after the
237    /// content is hashed.
238    pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
239        let (oid, size, tmp) = self.stream_to_tmp(src)?;
240        self.commit(oid, tmp)?;
241        Ok((oid, size))
242    }
243
244    /// Stream `src` into the store, requiring the resulting hash to equal
245    /// `expected`. On mismatch, returns [`StoreError::HashMismatch`] and the
246    /// temp file is dropped without being committed.
247    ///
248    /// This is the download path: we know the OID upfront and must verify
249    /// what the server sent.
250    pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
251        let (actual, size, tmp) = self.stream_to_tmp(src)?;
252        if actual != expected {
253            // Drop the tmp file; it goes away on Drop.
254            return Err(StoreError::HashMismatch { expected, actual });
255        }
256        self.commit(actual, tmp)?;
257        Ok(size)
258    }
259
260    fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
261        std::fs::create_dir_all(self.tmp_dir())?;
262        let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
263        let mut hasher = Sha256::new();
264        let mut total: u64 = 0;
265        let mut buf = vec![0u8; COPY_BUFFER];
266        let file = tmp.as_file_mut();
267        loop {
268            let n = src.read(&mut buf)?;
269            if n == 0 {
270                break;
271            }
272            hasher.update(&buf[..n]);
273            file.write_all(&buf[..n])?;
274            total += n as u64;
275        }
276        file.flush()?;
277        let bytes: [u8; 32] = hasher.finalize().into();
278        Ok((Oid::from_bytes(bytes), total, tmp))
279    }
280
281    fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
282        // The empty object lives at /dev/null — never persist it.
283        if oid == Oid::EMPTY {
284            return Ok(());
285        }
286        let dest = self.object_path(oid);
287        if let Some(parent) = dest.parent() {
288            std::fs::create_dir_all(parent)?;
289        }
290        // Atomic rename, *clobbering* any existing file at the target
291        // path. The store is content-addressed: anything already there
292        // is either the same content (no-op overwrite) or corrupt
293        // (truncated, half-written) — and the latter is exactly what
294        // `git lfs fetch --refetch` exists to recover from.
295        tmp.persist(&dest).map(|_| ()).map_err(|e| e.error)
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302    use tempfile::TempDir;
303
304    fn fixture() -> (TempDir, Store) {
305        let tmp = TempDir::new().unwrap();
306        let store = Store::new(tmp.path().join("lfs"));
307        (tmp, store)
308    }
309
310    /// Sample non-empty OID used across tests (SHA-256 of "abc").
311    const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
312
313    fn abc_oid() -> Oid {
314        ABC_OID_HEX.parse().unwrap()
315    }
316
317    #[test]
318    fn object_path_is_sharded() {
319        let (_tmp, store) = fixture();
320        let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
321            .parse()
322            .unwrap();
323        let path = store.object_path(oid);
324        let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
325        assert!(
326            path.ends_with(&suffix),
327            "{path:?} does not end with {suffix:?}"
328        );
329    }
330
331    #[test]
332    fn empty_oid_short_circuits() {
333        let (_tmp, store) = fixture();
334        assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
335        assert!(store.contains(Oid::EMPTY));
336        assert!(store.contains_with_size(Oid::EMPTY, 0));
337        assert!(!store.contains_with_size(Oid::EMPTY, 1));
338        // Opening the empty OID yields zero bytes.
339        let mut buf = Vec::new();
340        store
341            .open(Oid::EMPTY)
342            .unwrap()
343            .read_to_end(&mut buf)
344            .unwrap();
345        assert!(buf.is_empty());
346    }
347
348    #[test]
349    fn insert_round_trip() {
350        let (_tmp, store) = fixture();
351        let content = b"hello world!";
352        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
353        assert_eq!(size, content.len() as u64);
354        assert!(store.contains(oid));
355        assert!(store.contains_with_size(oid, size));
356        let mut readback = Vec::new();
357        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
358        assert_eq!(readback, content);
359    }
360
361    #[test]
362    fn insert_computes_correct_sha256() {
363        let (_tmp, store) = fixture();
364        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
365        assert_eq!(oid, abc_oid());
366    }
367
368    #[test]
369    fn insert_empty_yields_empty_oid_and_no_object_file() {
370        let (_tmp, store) = fixture();
371        let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
372        assert_eq!(oid, Oid::EMPTY);
373        assert_eq!(size, 0);
374        // Critically: nothing was persisted under objects/.
375        assert!(!store.root.join("objects").exists());
376    }
377
378    #[test]
379    fn insert_idempotent() {
380        let (_tmp, store) = fixture();
381        let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
382        let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
383        assert_eq!(oid1, oid2);
384        assert!(store.contains(oid1));
385    }
386
387    #[test]
388    fn insert_verified_succeeds_on_match() {
389        let (_tmp, store) = fixture();
390        let size = store
391            .insert_verified(abc_oid(), &mut b"abc".as_slice())
392            .unwrap();
393        assert_eq!(size, 3);
394        assert!(store.contains(abc_oid()));
395    }
396
397    #[test]
398    fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
399        let (_tmp, store) = fixture();
400        let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
401            .parse()
402            .unwrap();
403        let err = store
404            .insert_verified(wrong, &mut b"abc".as_slice())
405            .unwrap_err();
406        match err {
407            StoreError::HashMismatch { expected, actual } => {
408                assert_eq!(expected, wrong);
409                assert_eq!(actual, abc_oid());
410            }
411            other => panic!("expected HashMismatch, got {other:?}"),
412        }
413        // Neither the wrong OID nor the actual OID should be present —
414        // a failed verify must not leak a half-committed file.
415        assert!(!store.contains(wrong));
416        assert!(!store.contains(abc_oid()));
417        // And no leftover tmp file.
418        let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
419            .unwrap()
420            .collect::<Result<_, _>>()
421            .unwrap();
422        assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
423    }
424
425    #[test]
426    fn open_missing_oid_is_not_found() {
427        let (_tmp, store) = fixture();
428        let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
429            .parse()
430            .unwrap();
431        let err = store.open(oid).unwrap_err();
432        assert_eq!(err.kind(), io::ErrorKind::NotFound);
433    }
434
435    #[test]
436    fn streaming_megabyte_input() {
437        let (_tmp, store) = fixture();
438        // ~1 MiB to exercise the streaming loop across many buffer fills.
439        let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
440        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
441        assert_eq!(size, content.len() as u64);
442        let mut readback = Vec::new();
443        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
444        assert_eq!(readback, content);
445    }
446
447    #[test]
448    fn each_object_returns_empty_when_no_objects_dir() {
449        let (_tmp, store) = fixture();
450        // Store dir doesn't exist yet.
451        assert!(store.each_object().unwrap().is_empty());
452    }
453
454    #[test]
455    fn each_object_finds_inserted_objects_with_correct_size() {
456        let (_tmp, store) = fixture();
457        let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
458        let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
459        let mut got = store.each_object().unwrap();
460        got.sort_by_key(|(_, size)| *size);
461        assert_eq!(got.len(), 2);
462        // Order by size: "hello" (5 bytes) first, then "world!!!" (8 bytes).
463        assert_eq!(got[0].0, oid_a);
464        assert_eq!(got[0].1, 5);
465        assert_eq!(got[1].0, oid_b);
466        assert_eq!(got[1].1, 8);
467    }
468
469    #[test]
470    fn each_object_skips_unrecognized_filenames() {
471        let (_tmp, store) = fixture();
472        let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
473        // Drop a stray file in the same shard directory that isn't a
474        // 64-char hex name — must not crash or be reported.
475        let shard = store
476            .root()
477            .join("objects")
478            .join(&oid.to_string()[0..2])
479            .join(&oid.to_string()[2..4]);
480        std::fs::write(shard.join("README"), b"ignored").unwrap();
481        let got = store.each_object().unwrap();
482        assert_eq!(got.len(), 1);
483        assert_eq!(got[0].0, oid);
484    }
485
486    #[test]
487    fn insert_verified_overwrites_corrupt_existing_file() {
488        // Mirrors the scenario t-fetch's `--refetch` test exercises:
489        // a previous fetch landed an object, then the file got
490        // truncated (cp /dev/null over it). A subsequent verified
491        // insert must replace the corrupt file rather than silently
492        // skipping the write.
493        let (_tmp, store) = fixture();
494        let dest = store.object_path(abc_oid());
495        std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
496        std::fs::write(&dest, b"").unwrap();
497        assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
498
499        store
500            .insert_verified(abc_oid(), &mut b"abc".as_slice())
501            .unwrap();
502        let bytes = std::fs::read(&dest).unwrap();
503        assert_eq!(bytes, b"abc");
504    }
505
506    #[test]
507    fn insert_creates_dirs_on_demand() {
508        let (_tmp, store) = fixture();
509        // Before any insert, neither objects/ nor tmp/ exists.
510        assert!(!store.root.exists());
511        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
512        assert!(store.tmp_dir().is_dir());
513        assert!(store.object_path(oid).is_file());
514    }
515
516    /// Build a "source" store with an object pre-installed, plus an
517    /// empty "shared" store that references it. Mirrors the
518    /// `git clone --shared` setup from t-fetch's init.
519    fn shared_fixture() -> (TempDir, Store, Store, Oid) {
520        let tmp = TempDir::new().unwrap();
521        let source = Store::new(tmp.path().join("src/lfs"));
522        let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
523        let shared = Store::new(tmp.path().join("shared/lfs"))
524            .with_references([source.root().join("objects")]);
525        (tmp, source, shared, oid)
526    }
527
528    #[test]
529    fn contains_finds_object_via_reference() {
530        let (_tmp, _source, shared, oid) = shared_fixture();
531        // Object lives only in the source's lfs/objects/ at this
532        // point — `contains` should report it as present (and fault
533        // it in along the way).
534        assert!(shared.contains(oid));
535        assert!(shared.object_path(oid).is_file());
536    }
537
538    #[test]
539    fn open_faults_in_from_reference() {
540        let (_tmp, _source, shared, oid) = shared_fixture();
541        let mut buf = Vec::new();
542        shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
543        assert_eq!(buf, b"abc");
544        // After open, the object is materialized locally so future
545        // reads are independent of the alternate.
546        assert!(shared.object_path(oid).is_file());
547    }
548
549    #[test]
550    fn contains_with_size_rejects_size_mismatch_in_reference() {
551        let (_tmp, _source, shared, oid) = shared_fixture();
552        // Real size is 3; ask for 4 → reference hit gets rejected.
553        assert!(!shared.contains_with_size(oid, 4));
554        assert!(!shared.object_path(oid).is_file());
555    }
556
557    #[test]
558    fn store_without_references_misses() {
559        // Sanity: same OID that the shared fixture finds via
560        // alternates is genuinely absent in a plain store.
561        let (_tmp, store) = fixture();
562        let oid = abc_oid();
563        assert!(!store.contains(oid));
564        assert!(matches!(
565            store.open(oid).unwrap_err().kind(),
566            io::ErrorKind::NotFound,
567        ));
568    }
569}