Skip to main content

git_lfs_store/
lib.rs

1//! Local content-addressable object store for git-lfs.
2//!
3//! Objects live under `<lfs_dir>/objects/aa/bb/aabbcc…` where `aabbcc…` is
4//! the SHA-256 hex of the content (sharded by the first two hex bytes — see
5//! `docs/spec.md`). Writes go through a tmp file in `<lfs_dir>/tmp/` and are
6//! atomically renamed into place once their hash is known.
7//!
8//! ```no_run
9//! use git_lfs_store::Store;
10//! let store = Store::new(".git/lfs");
11//! let mut input: &[u8] = b"hello world";
12//! let (oid, size) = store.insert(&mut input).unwrap();
13//! assert!(store.contains(oid));
14//! # let _ = size;
15//! ```
16
17use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25/// Platform null device — what `object_path` returns for [`Oid::EMPTY`].
26const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30/// A local LFS object store rooted at `<lfs_dir>` (typically `.git/lfs`).
31#[derive(Debug, Clone)]
32pub struct Store {
33    root: PathBuf,
34}
35
36#[derive(Debug, thiserror::Error)]
37pub enum StoreError {
38    #[error(transparent)]
39    Io(#[from] io::Error),
40    #[error("hash mismatch: expected {expected}, got {actual}")]
41    HashMismatch { expected: Oid, actual: Oid },
42}
43
44impl Store {
45    /// Create a store rooted at the given LFS directory. The directory is not
46    /// created eagerly; subdirectories are created on demand as objects land.
47    pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
48        Self {
49            root: lfs_dir.into(),
50        }
51    }
52
53    /// Root LFS directory.
54    pub fn root(&self) -> &Path {
55        &self.root
56    }
57
58    /// Directory holding temp files for in-flight inserts.
59    pub fn tmp_dir(&self) -> PathBuf {
60        self.root.join("tmp")
61    }
62
63    /// Where the object with this OID lives on disk.
64    ///
65    /// For [`Oid::EMPTY`] this returns the platform null device, mirroring
66    /// upstream's behavior so callers can `open` an empty object without
67    /// special-casing.
68    pub fn object_path(&self, oid: Oid) -> PathBuf {
69        if oid == Oid::EMPTY {
70            return PathBuf::from(NULL_DEVICE);
71        }
72        let hex = oid.to_string();
73        self.root
74            .join("objects")
75            .join(&hex[0..2])
76            .join(&hex[2..4])
77            .join(&hex)
78    }
79
80    /// `true` if this object is present locally as a regular file. The empty
81    /// OID is always considered present.
82    pub fn contains(&self, oid: Oid) -> bool {
83        if oid == Oid::EMPTY {
84            return true;
85        }
86        self.object_path(oid).is_file()
87    }
88
89    /// `true` if the object is present and its on-disk size matches `size`.
90    /// Used to detect partial/corrupted local copies.
91    pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
92        if oid == Oid::EMPTY {
93            return size == 0;
94        }
95        std::fs::metadata(self.object_path(oid))
96            .map(|m| m.is_file() && m.len() == size)
97            .unwrap_or(false)
98    }
99
100    /// Walk every object file in the store, yielding (oid, size_on_disk).
101    ///
102    /// Traverses the sharded `objects/<aa>/<bb>/<oid>` layout. Filenames
103    /// that don't parse as 64-char SHA-256 hex are silently skipped, as
104    /// are unexpected directories. The store directory not existing is
105    /// not an error — the result is just empty.
106    ///
107    /// Used by `git lfs prune` and (eventually) `fsck --orphaned`.
108    pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
109        let objects_dir = self.root.join("objects");
110        if !objects_dir.exists() {
111            return Ok(Vec::new());
112        }
113        let mut out = Vec::new();
114        for aa in std::fs::read_dir(&objects_dir)? {
115            let aa = aa?;
116            if !aa.file_type()?.is_dir() {
117                continue;
118            }
119            for bb in std::fs::read_dir(aa.path())? {
120                let bb = bb?;
121                if !bb.file_type()?.is_dir() {
122                    continue;
123                }
124                for entry in std::fs::read_dir(bb.path())? {
125                    let entry = entry?;
126                    let name = entry.file_name();
127                    let Some(name_str) = name.to_str() else { continue };
128                    let Ok(oid) = name_str.parse::<Oid>() else { continue };
129                    let meta = entry.metadata()?;
130                    if !meta.is_file() {
131                        continue;
132                    }
133                    out.push((oid, meta.len()));
134                }
135            }
136        }
137        Ok(out)
138    }
139
140    /// Open an object for reading. Errors with [`io::ErrorKind::NotFound`]
141    /// if the object isn't in the store.
142    pub fn open(&self, oid: Oid) -> io::Result<File> {
143        File::open(self.object_path(oid))
144    }
145
146    /// Stream `src` into the store, computing SHA-256 as we go.
147    /// Returns the resulting OID and byte count.
148    ///
149    /// This is the clean-filter path: we don't know the OID until after the
150    /// content is hashed.
151    pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
152        let (oid, size, tmp) = self.stream_to_tmp(src)?;
153        self.commit(oid, tmp)?;
154        Ok((oid, size))
155    }
156
157    /// Stream `src` into the store, requiring the resulting hash to equal
158    /// `expected`. On mismatch, returns [`StoreError::HashMismatch`] and the
159    /// temp file is dropped without being committed.
160    ///
161    /// This is the download path: we know the OID upfront and must verify
162    /// what the server sent.
163    pub fn insert_verified(
164        &self,
165        expected: Oid,
166        src: &mut impl Read,
167    ) -> Result<u64, StoreError> {
168        let (actual, size, tmp) = self.stream_to_tmp(src)?;
169        if actual != expected {
170            // Drop the tmp file; it goes away on Drop.
171            return Err(StoreError::HashMismatch { expected, actual });
172        }
173        self.commit(actual, tmp)?;
174        Ok(size)
175    }
176
177    fn stream_to_tmp(
178        &self,
179        src: &mut impl Read,
180    ) -> io::Result<(Oid, u64, NamedTempFile)> {
181        std::fs::create_dir_all(self.tmp_dir())?;
182        let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
183        let mut hasher = Sha256::new();
184        let mut total: u64 = 0;
185        let mut buf = vec![0u8; COPY_BUFFER];
186        let file = tmp.as_file_mut();
187        loop {
188            let n = src.read(&mut buf)?;
189            if n == 0 {
190                break;
191            }
192            hasher.update(&buf[..n]);
193            file.write_all(&buf[..n])?;
194            total += n as u64;
195        }
196        file.flush()?;
197        let bytes: [u8; 32] = hasher.finalize().into();
198        Ok((Oid::from_bytes(bytes), total, tmp))
199    }
200
201    fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
202        // The empty object lives at /dev/null — never persist it.
203        if oid == Oid::EMPTY {
204            return Ok(());
205        }
206        let dest = self.object_path(oid);
207        // Idempotent: if the target already exists with the same OID, the
208        // content must match (content-addressed). Drop the temp and succeed.
209        if dest.is_file() {
210            return Ok(());
211        }
212        if let Some(parent) = dest.parent() {
213            std::fs::create_dir_all(parent)?;
214        }
215        match tmp.persist_noclobber(&dest) {
216            Ok(_) => Ok(()),
217            Err(e) if e.error.kind() == io::ErrorKind::AlreadyExists => {
218                // A concurrent writer beat us. Same content; treat as success.
219                Ok(())
220            }
221            Err(e) => Err(e.error),
222        }
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use tempfile::TempDir;
230
231    fn fixture() -> (TempDir, Store) {
232        let tmp = TempDir::new().unwrap();
233        let store = Store::new(tmp.path().join("lfs"));
234        (tmp, store)
235    }
236
237    /// Sample non-empty OID used across tests (SHA-256 of "abc").
238    const ABC_OID_HEX: &str =
239        "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
240
241    fn abc_oid() -> Oid {
242        ABC_OID_HEX.parse().unwrap()
243    }
244
245    #[test]
246    fn object_path_is_sharded() {
247        let (_tmp, store) = fixture();
248        let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
249            .parse()
250            .unwrap();
251        let path = store.object_path(oid);
252        let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
253        assert!(path.ends_with(&suffix), "{path:?} does not end with {suffix:?}");
254    }
255
256    #[test]
257    fn empty_oid_short_circuits() {
258        let (_tmp, store) = fixture();
259        assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
260        assert!(store.contains(Oid::EMPTY));
261        assert!(store.contains_with_size(Oid::EMPTY, 0));
262        assert!(!store.contains_with_size(Oid::EMPTY, 1));
263        // Opening the empty OID yields zero bytes.
264        let mut buf = Vec::new();
265        store.open(Oid::EMPTY).unwrap().read_to_end(&mut buf).unwrap();
266        assert!(buf.is_empty());
267    }
268
269    #[test]
270    fn insert_round_trip() {
271        let (_tmp, store) = fixture();
272        let content = b"hello world!";
273        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
274        assert_eq!(size, content.len() as u64);
275        assert!(store.contains(oid));
276        assert!(store.contains_with_size(oid, size));
277        let mut readback = Vec::new();
278        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
279        assert_eq!(readback, content);
280    }
281
282    #[test]
283    fn insert_computes_correct_sha256() {
284        let (_tmp, store) = fixture();
285        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
286        assert_eq!(oid, abc_oid());
287    }
288
289    #[test]
290    fn insert_empty_yields_empty_oid_and_no_object_file() {
291        let (_tmp, store) = fixture();
292        let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
293        assert_eq!(oid, Oid::EMPTY);
294        assert_eq!(size, 0);
295        // Critically: nothing was persisted under objects/.
296        assert!(!store.root.join("objects").exists());
297    }
298
299    #[test]
300    fn insert_idempotent() {
301        let (_tmp, store) = fixture();
302        let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
303        let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
304        assert_eq!(oid1, oid2);
305        assert!(store.contains(oid1));
306    }
307
308    #[test]
309    fn insert_verified_succeeds_on_match() {
310        let (_tmp, store) = fixture();
311        let size = store
312            .insert_verified(abc_oid(), &mut b"abc".as_slice())
313            .unwrap();
314        assert_eq!(size, 3);
315        assert!(store.contains(abc_oid()));
316    }
317
318    #[test]
319    fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
320        let (_tmp, store) = fixture();
321        let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
322            .parse()
323            .unwrap();
324        let err = store
325            .insert_verified(wrong, &mut b"abc".as_slice())
326            .unwrap_err();
327        match err {
328            StoreError::HashMismatch { expected, actual } => {
329                assert_eq!(expected, wrong);
330                assert_eq!(actual, abc_oid());
331            }
332            other => panic!("expected HashMismatch, got {other:?}"),
333        }
334        // Neither the wrong OID nor the actual OID should be present —
335        // a failed verify must not leak a half-committed file.
336        assert!(!store.contains(wrong));
337        assert!(!store.contains(abc_oid()));
338        // And no leftover tmp file.
339        let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
340            .unwrap()
341            .collect::<Result<_, _>>()
342            .unwrap();
343        assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
344    }
345
346    #[test]
347    fn open_missing_oid_is_not_found() {
348        let (_tmp, store) = fixture();
349        let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
350            .parse()
351            .unwrap();
352        let err = store.open(oid).unwrap_err();
353        assert_eq!(err.kind(), io::ErrorKind::NotFound);
354    }
355
356    #[test]
357    fn streaming_megabyte_input() {
358        let (_tmp, store) = fixture();
359        // ~1 MiB to exercise the streaming loop across many buffer fills.
360        let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
361        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
362        assert_eq!(size, content.len() as u64);
363        let mut readback = Vec::new();
364        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
365        assert_eq!(readback, content);
366    }
367
368    #[test]
369    fn each_object_returns_empty_when_no_objects_dir() {
370        let (_tmp, store) = fixture();
371        // Store dir doesn't exist yet.
372        assert!(store.each_object().unwrap().is_empty());
373    }
374
375    #[test]
376    fn each_object_finds_inserted_objects_with_correct_size() {
377        let (_tmp, store) = fixture();
378        let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
379        let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
380        let mut got = store.each_object().unwrap();
381        got.sort_by_key(|(_, size)| *size);
382        assert_eq!(got.len(), 2);
383        // Order by size: "hello" (5 bytes) first, then "world!!!" (8 bytes).
384        assert_eq!(got[0].0, oid_a);
385        assert_eq!(got[0].1, 5);
386        assert_eq!(got[1].0, oid_b);
387        assert_eq!(got[1].1, 8);
388    }
389
390    #[test]
391    fn each_object_skips_unrecognized_filenames() {
392        let (_tmp, store) = fixture();
393        let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
394        // Drop a stray file in the same shard directory that isn't a
395        // 64-char hex name — must not crash or be reported.
396        let shard = store
397            .root()
398            .join("objects")
399            .join(&oid.to_string()[0..2])
400            .join(&oid.to_string()[2..4]);
401        std::fs::write(shard.join("README"), b"ignored").unwrap();
402        let got = store.each_object().unwrap();
403        assert_eq!(got.len(), 1);
404        assert_eq!(got[0].0, oid);
405    }
406
407    #[test]
408    fn insert_creates_dirs_on_demand() {
409        let (_tmp, store) = fixture();
410        // Before any insert, neither objects/ nor tmp/ exists.
411        assert!(!store.root.exists());
412        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
413        assert!(store.tmp_dir().is_dir());
414        assert!(store.object_path(oid).is_file());
415    }
416}