Skip to main content

maw_lfs/
store.rs

1//! Content-addressed local LFS object store under `.git/lfs/objects/`.
2//!
3//! Layout: `<git_dir>/lfs/objects/<sha[0:2]>/<sha[2:4]>/<full-sha>`.
4//! Bit-identical to git-lfs's on-disk layout for full interoperability.
5//!
6//! All writes are atomic (tempfile + rename within the destination directory)
7//! and streaming (constant memory usage regardless of file size).
8
9use std::fs;
10use std::io::{self, Read, Write};
11use std::path::{Path, PathBuf};
12
13use sha2::{Digest, Sha256};
14use thiserror::Error;
15
16use crate::pointer::Pointer;
17
18const BUF_SIZE: usize = 64 * 1024;
19
20/// A handle to the local LFS object store.
21pub struct Store {
22    /// The LFS root: `<git_dir>/lfs`.
23    root: PathBuf,
24}
25
26#[derive(Debug, Error)]
27pub enum StoreError {
28    #[error("io error at {path}: {source}")]
29    Io {
30        path: PathBuf,
31        #[source]
32        source: io::Error,
33    },
34    #[error(
35        "content mismatch: expected oid {expected} size {expected_size}, got oid {got} size {got_size}"
36    )]
37    ContentMismatch {
38        expected: String,
39        expected_size: u64,
40        got: String,
41        got_size: u64,
42    },
43}
44
45fn io_err(path: impl Into<PathBuf>, source: io::Error) -> StoreError {
46    StoreError::Io {
47        path: path.into(),
48        source,
49    }
50}
51
52fn oid_hex(oid: &[u8; 32]) -> String {
53    let mut s = String::with_capacity(64);
54    for b in oid {
55        s.push(hex_char(b >> 4));
56        s.push(hex_char(b & 0x0f));
57    }
58    s
59}
60
61const fn hex_char(n: u8) -> char {
62    match n {
63        0..=9 => (b'0' + n) as char,
64        10..=15 => (b'a' + n - 10) as char,
65        _ => unreachable!(),
66    }
67}
68
69impl Store {
70    /// Open (and create if missing) the LFS store under `<git_dir>/lfs`.
71    ///
72    /// # Errors
73    /// Returns an error if the store directory cannot be created.
74    pub fn open(git_dir: &Path) -> Result<Self, StoreError> {
75        let root = git_dir.join("lfs");
76        let objects = root.join("objects");
77        fs::create_dir_all(&objects).map_err(|e| io_err(&objects, e))?;
78        Ok(Self { root })
79    }
80
81    /// Path to the real file for a given oid (may or may not exist).
82    #[must_use]
83    pub fn object_path(&self, oid: &[u8; 32]) -> PathBuf {
84        let hex = oid_hex(oid);
85        self.root
86            .join("objects")
87            .join(&hex[0..2])
88            .join(&hex[2..4])
89            .join(&hex)
90    }
91
92    /// Is this object present in the local store?
93    #[must_use]
94    pub fn contains(&self, oid: &[u8; 32]) -> bool {
95        self.object_path(oid).is_file()
96    }
97
98    /// Open a reader for the real bytes, or None if the object is missing.
99    ///
100    /// # Errors
101    /// Returns an error if the object exists but cannot be opened.
102    pub fn open_object(&self, oid: &[u8; 32]) -> Result<Option<Box<dyn Read + Send>>, StoreError> {
103        let path = self.object_path(oid);
104        match fs::File::open(&path) {
105            Ok(f) => Ok(Some(Box::new(f))),
106            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
107            Err(e) => Err(io_err(&path, e)),
108        }
109    }
110
111    /// Stream bytes in from a reader, hashing as we go, then atomically
112    /// land the file at its content-addressed final path.
113    ///
114    /// Returns the built pointer and the byte count.
115    /// Idempotent: if an object with the computed oid already exists, the
116    /// temp file is dropped and the existing file is kept.
117    ///
118    /// # Errors
119    /// Returns an error if the input cannot be read, the temp object cannot be
120    /// written, or the completed object cannot be moved into the store.
121    pub fn insert_from_reader<R: Read>(&self, reader: R) -> Result<(Pointer, u64), StoreError> {
122        let (oid, size, tmp_path) = self.stream_to_tmp(reader, None, None)?;
123        let final_path = self.object_path(&oid);
124        Self::commit_tmp(&tmp_path, &final_path)?;
125        Ok((
126            Pointer {
127                oid,
128                size,
129                extensions: Vec::new(),
130            },
131            size,
132        ))
133    }
134
135    /// Stream bytes in while verifying them against a known oid + size.
136    /// On mismatch, the temp file is cleaned up.
137    ///
138    /// # Errors
139    /// Returns an error if streaming fails, verification fails, or the completed
140    /// object cannot be moved into the store.
141    pub fn insert_from_stream<R: Read>(
142        &self,
143        expected_oid: &[u8; 32],
144        expected_size: u64,
145        reader: R,
146    ) -> Result<(), StoreError> {
147        let (oid, size, tmp_path) =
148            self.stream_to_tmp(reader, Some(*expected_oid), Some(expected_size))?;
149        if oid != *expected_oid || size != expected_size {
150            // stream_to_tmp already cleaned up on mismatch. Belt-and-braces:
151            let _ = fs::remove_file(&tmp_path);
152            return Err(StoreError::ContentMismatch {
153                expected: oid_hex(expected_oid),
154                expected_size,
155                got: oid_hex(&oid),
156                got_size: size,
157            });
158        }
159        let final_path = self.object_path(&oid);
160        Self::commit_tmp(&tmp_path, &final_path)?;
161        Ok(())
162    }
163
164    /// Stream `reader` to a temp file inside the lfs tmp directory, hashing
165    /// on the fly. If `expected_*` are set and verification fails, the
166    /// temp file is removed before returning.
167    fn stream_to_tmp<R: Read>(
168        &self,
169        mut reader: R,
170        expected_oid: Option<[u8; 32]>,
171        expected_size: Option<u64>,
172    ) -> Result<([u8; 32], u64, PathBuf), StoreError> {
173        let tmp_dir = self.root.join("tmp");
174        fs::create_dir_all(&tmp_dir).map_err(|e| io_err(&tmp_dir, e))?;
175
176        // Create an anonymous temp file in the lfs tmp dir. Same filesystem
177        // as the final destination so rename is atomic.
178        let mut named =
179            tempfile::NamedTempFile::new_in(&tmp_dir).map_err(|e| io_err(&tmp_dir, e))?;
180        let tmp_path = named.path().to_owned();
181
182        let mut hasher = Sha256::new();
183        let mut buf = vec![0u8; BUF_SIZE];
184        let mut total: u64 = 0;
185
186        loop {
187            let n = match reader.read(&mut buf) {
188                Ok(0) => break,
189                Ok(n) => n,
190                Err(e) => {
191                    let _ = named.close();
192                    return Err(io_err(&tmp_path, e));
193                }
194            };
195            hasher.update(&buf[..n]);
196            if let Err(e) = named.as_file_mut().write_all(&buf[..n]) {
197                let _ = named.close();
198                return Err(io_err(&tmp_path, e));
199            }
200            total += n as u64;
201
202            // Early abort on size overflow if verifying.
203            if let Some(es) = expected_size
204                && total > es
205            {
206                let _ = named.close();
207                return Err(StoreError::ContentMismatch {
208                    expected: expected_oid.as_ref().map(oid_hex).unwrap_or_default(),
209                    expected_size: es,
210                    got: String::new(),
211                    got_size: total,
212                });
213            }
214        }
215
216        if let Err(e) = named.as_file_mut().sync_all() {
217            let _ = named.close();
218            return Err(io_err(&tmp_path, e));
219        }
220
221        let oid_bytes: [u8; 32] = hasher.finalize().into();
222
223        // Persist keeps the file around. We rename in commit_tmp().
224        // Detach from NamedTempFile so its Drop doesn't delete.
225        let (_file, persisted) = named.keep().map_err(|e| io_err(&tmp_path, e.error))?;
226
227        if let (Some(eo), Some(es)) = (expected_oid, expected_size)
228            && (oid_bytes != eo || total != es)
229        {
230            let _ = fs::remove_file(&persisted);
231            return Err(StoreError::ContentMismatch {
232                expected: oid_hex(&eo),
233                expected_size: es,
234                got: oid_hex(&oid_bytes),
235                got_size: total,
236            });
237        }
238
239        Ok((oid_bytes, total, persisted))
240    }
241
242    /// Move a completed temp file to its final content-addressed path.
243    /// Idempotent: if the target already exists, drop the tmp file.
244    fn commit_tmp(tmp: &Path, final_path: &Path) -> Result<(), StoreError> {
245        if final_path.exists() {
246            let _ = fs::remove_file(tmp);
247            return Ok(());
248        }
249        // Ensure parent dirs exist.
250        if let Some(parent) = final_path.parent() {
251            fs::create_dir_all(parent).map_err(|e| io_err(parent, e))?;
252        }
253        match fs::rename(tmp, final_path) {
254            Ok(()) => Ok(()),
255            Err(_) if final_path.exists() => {
256                // Lost a race to another writer — tmp already gone OR the
257                // rename succeeded under us. Either way target is present.
258                let _ = fs::remove_file(tmp);
259                Ok(())
260            }
261            Err(e) => Err(io_err(final_path, e)),
262        }
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use std::io::Cursor;
270
271    fn new_store() -> (tempfile::TempDir, Store) {
272        let tmp = tempfile::tempdir().expect("operation should succeed");
273        let store = Store::open(tmp.path()).expect("operation should succeed");
274        (tmp, store)
275    }
276
277    const HELLO_OID_HEX: &str = "a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447";
278
279    fn hex_to_oid(hex: &str) -> [u8; 32] {
280        let mut out = [0u8; 32];
281        for i in 0..32 {
282            out[i] =
283                u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).expect("operation should succeed");
284        }
285        out
286    }
287
288    #[test]
289    fn insert_from_reader_computes_correct_oid() {
290        let (_tmp, store) = new_store();
291        let (pointer, size) = store
292            .insert_from_reader(Cursor::new(b"hello world\n".to_vec()))
293            .expect("operation should succeed");
294        assert_eq!(size, 12);
295        assert_eq!(pointer.oid_hex(), HELLO_OID_HEX);
296        assert!(store.contains(&pointer.oid));
297    }
298
299    #[test]
300    fn object_path_layout_matches_git_lfs() {
301        let (_tmp, store) = new_store();
302        let oid = hex_to_oid(HELLO_OID_HEX);
303        let path = store.object_path(&oid);
304        let s = path.to_string_lossy();
305        // <lfs-root>/objects/a9/48/a948904f...
306        assert!(s.contains("/lfs/objects/a9/48/"));
307        assert!(s.ends_with(HELLO_OID_HEX));
308    }
309
310    #[test]
311    fn contains_false_when_absent() {
312        let (_tmp, store) = new_store();
313        let oid = [0u8; 32];
314        assert!(!store.contains(&oid));
315    }
316
317    #[test]
318    fn open_object_returns_none_for_missing() {
319        let (_tmp, store) = new_store();
320        let oid = [0u8; 32];
321        assert!(
322            store
323                .open_object(&oid)
324                .expect("operation should succeed")
325                .is_none()
326        );
327    }
328
329    #[test]
330    fn open_object_returns_bytes_after_insert() {
331        let (_tmp, store) = new_store();
332        let (p, _) = store
333            .insert_from_reader(Cursor::new(b"hello world\n".to_vec()))
334            .expect("operation should succeed");
335        let mut reader = store
336            .open_object(&p.oid)
337            .expect("operation should succeed")
338            .expect("operation should succeed");
339        let mut out = Vec::new();
340        reader
341            .read_to_end(&mut out)
342            .expect("operation should succeed");
343        assert_eq!(out, b"hello world\n");
344    }
345
346    #[test]
347    fn insert_twice_same_content_is_idempotent() {
348        let (_tmp, store) = new_store();
349        let (p1, _) = store
350            .insert_from_reader(Cursor::new(b"same".to_vec()))
351            .expect("operation should succeed");
352        let (p2, _) = store
353            .insert_from_reader(Cursor::new(b"same".to_vec()))
354            .expect("operation should succeed");
355        assert_eq!(p1.oid, p2.oid);
356        assert!(store.contains(&p1.oid));
357    }
358
359    #[test]
360    fn insert_from_stream_verifies_match() {
361        let (_tmp, store) = new_store();
362        let data = b"hello world\n";
363        let oid = hex_to_oid(HELLO_OID_HEX);
364        store
365            .insert_from_stream(&oid, 12, Cursor::new(data.to_vec()))
366            .expect("operation should succeed");
367        assert!(store.contains(&oid));
368    }
369
370    #[test]
371    fn insert_from_stream_rejects_wrong_size() {
372        let (_tmp, store) = new_store();
373        let data = b"hello world\n";
374        let oid = hex_to_oid(HELLO_OID_HEX);
375        let err = store
376            .insert_from_stream(&oid, 999, Cursor::new(data.to_vec()))
377            .expect_err("operation should fail");
378        assert!(matches!(err, StoreError::ContentMismatch { .. }));
379        assert!(!store.contains(&oid));
380    }
381
382    #[test]
383    fn insert_from_stream_rejects_wrong_oid() {
384        let (_tmp, store) = new_store();
385        let data = b"different content";
386        let fake_oid = hex_to_oid(HELLO_OID_HEX);
387        let err = store
388            .insert_from_stream(&fake_oid, 17, Cursor::new(data.to_vec()))
389            .expect_err("operation should fail");
390        assert!(matches!(err, StoreError::ContentMismatch { .. }));
391        assert!(!store.contains(&fake_oid));
392    }
393
394    #[test]
395    fn insert_from_stream_early_aborts_oversize() {
396        let (_tmp, store) = new_store();
397        let data = vec![b'x'; 100];
398        let oid = [0u8; 32];
399        let err = store
400            .insert_from_stream(&oid, 10, Cursor::new(data))
401            .expect_err("operation should fail");
402        assert!(matches!(err, StoreError::ContentMismatch { .. }));
403        assert!(!store.contains(&oid));
404    }
405
406    #[test]
407    fn concurrent_insert_same_content_is_safe() {
408        use std::sync::Arc;
409        use std::thread;
410
411        let (tmp, _) = new_store();
412        let git_dir = Arc::new(tmp.path().to_owned());
413        // Deliberately reopen from each thread to exercise the full race.
414        let data = vec![b'y'; 10 * 1024 * 1024]; // 10 MiB
415        let data = Arc::new(data);
416
417        let mut handles = vec![];
418        for _ in 0..4 {
419            let gd = git_dir.clone();
420            let d = data.clone();
421            handles.push(thread::spawn(move || {
422                let store = Store::open(&gd).expect("operation should succeed");
423                store
424                    .insert_from_reader(Cursor::new((*d).clone()))
425                    .expect("operation should succeed")
426            }));
427        }
428        let results: Vec<_> = handles
429            .into_iter()
430            .map(|h| h.join().expect("operation should succeed"))
431            .collect();
432        // All threads compute the same oid.
433        let first_oid = results[0].0.oid;
434        for (p, _) in &results {
435            assert_eq!(p.oid, first_oid);
436        }
437        let store = Store::open(&git_dir).expect("operation should succeed");
438        assert!(store.contains(&first_oid));
439        // File content is intact.
440        let mut reader = store
441            .open_object(&first_oid)
442            .expect("operation should succeed")
443            .expect("operation should succeed");
444        let mut out = Vec::new();
445        reader
446            .read_to_end(&mut out)
447            .expect("operation should succeed");
448        assert_eq!(out.len(), data.len());
449        assert_eq!(out[0], b'y');
450    }
451
452    #[test]
453    fn large_file_streams_without_full_load() {
454        // 10 MiB of pseudo-random-ish bytes.
455        let (_tmp, store) = new_store();
456        let data: Vec<u8> = (0..10_000_000u32).map(|i| (i % 251) as u8).collect();
457        let (p, size) = store
458            .insert_from_reader(Cursor::new(data.clone()))
459            .expect("operation should succeed");
460        assert_eq!(size, data.len() as u64);
461        // Round-trip equality.
462        let mut out = Vec::new();
463        store
464            .open_object(&p.oid)
465            .expect("operation should succeed")
466            .expect("operation should succeed")
467            .read_to_end(&mut out)
468            .expect("operation should succeed");
469        assert_eq!(out, data);
470    }
471
472    #[test]
473    fn git_lfs_written_object_readable_by_maw() {
474        // Simulate a file written by git-lfs by hand-placing it at the
475        // expected path. maw's contains/open_object must find it.
476        let (_tmp, store) = new_store();
477        let oid = hex_to_oid(HELLO_OID_HEX);
478        let path = store.object_path(&oid);
479        fs::create_dir_all(path.parent().expect("operation should succeed"))
480            .expect("operation should succeed");
481        fs::write(&path, b"hello world\n").expect("operation should succeed");
482        assert!(store.contains(&oid));
483        let mut reader = store
484            .open_object(&oid)
485            .expect("operation should succeed")
486            .expect("operation should succeed");
487        let mut buf = Vec::new();
488        reader
489            .read_to_end(&mut buf)
490            .expect("operation should succeed");
491        assert_eq!(buf, b"hello world\n");
492    }
493
494    #[test]
495    fn empty_file_is_valid() {
496        let (_tmp, store) = new_store();
497        let (p, size) = store
498            .insert_from_reader(Cursor::new(Vec::<u8>::new()))
499            .expect("operation should succeed");
500        assert_eq!(size, 0);
501        // sha256 of empty: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
502        assert_eq!(
503            p.oid_hex(),
504            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
505        );
506        assert!(store.contains(&p.oid));
507    }
508}