Skip to main content

git_lfs_store/
lib.rs

1//! Content-addressable on-disk store for Git LFS objects.
2//!
3//! Git LFS keeps large files outside git's object database, leaving
4//! small pointer blobs committed to git in their place. This crate
5//! owns the local half of that split: where the actual file bytes
6//! live on disk, how they get there, and how they're served back out.
7//!
8//! Objects live under `<lfs_dir>/objects/aa/bb/aabbcc…` where the
9//! hex string is the SHA-256 of the content, sharded by the first
10//! two bytes (see [`docs/spec.md`]). Writes go through a tempfile in
11//! `<lfs_dir>/tmp/` and are atomically renamed into place once their
12//! hash is known.
13//!
14//! Two insert paths cover the two callers: [`Store::insert`] hashes
15//! bytes as they're written (the clean-filter path: bytes in, OID
16//! out), and [`Store::insert_verified`] checks the resulting hash
17//! against a caller-supplied expected OID (the download path: the
18//! server names the OID, we confirm what arrived).
19//!
20//! In-progress downloads stage as `.part` files at
21//! [`Store::incomplete_path`] and rename into place via
22//! [`Store::commit_partial`], so an interrupted transfer resumes
23//! with a `Range:` request rather than restarting. Alternate object
24//! stores attached via [`Store::with_references`] are hardlinked or
25//! copied into the primary on a miss (the LFS analogue of
26//! `git clone --shared`). File and directory modes follow
27//! `core.sharedRepository`, see [`Store::with_shared_repository`].
28//!
29//! ```
30//! use git_lfs_store::Store;
31//!
32//! # let _tmp = tempfile::TempDir::new().unwrap();
33//! # let lfs_dir = _tmp.path().join("lfs");
34//! let store = Store::new(&lfs_dir);
35//! let (oid, size) = store.insert(&mut &b"hello world"[..]).unwrap();
36//! assert!(store.contains(oid));
37//! assert_eq!(size, 11);
38//! ```
39//!
40//! [`docs/spec.md`]: https://gitlab.com/rustutils/git-lfs/-/blob/master/docs/spec.md
41
42use std::collections::HashMap;
43use std::fs::File;
44use std::io::{self, Read, Write};
45use std::path::{Path, PathBuf};
46
47use git_lfs_pointer::Oid;
48use sha2::{Digest, Sha256};
49use tempfile::NamedTempFile;
50
51/// Platform null device
52///
53/// What `object_path` returns for [`Oid::EMPTY`].
54const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
55
56const COPY_BUFFER: usize = 64 * 1024;
57
58/// A local LFS object store rooted at `<lfs_dir>` (typically `.git/lfs`).
59///
60/// May reference any number of alternate stores (typically the LFS
61/// objects of a `git clone --shared` source) and will materialize a
62/// hit from one of them into the local store on demand. See
63/// [`Store::with_references`].
64#[derive(Debug, Clone)]
65pub struct Store {
66    root: PathBuf,
67    /// Paths to alternate `lfs/objects/` directories. Each maps to a
68    /// `.git/objects/info/alternates` entry: when the local store
69    /// misses, [`Store::contains_with_size`] / [`Store::open`] walk
70    /// these in order and hardlink (or copy) any hit into `root`.
71    references: Vec<PathBuf>,
72    /// File/directory mode policy for objects committed into the
73    /// store. Defaults to "honor process umask"; set via
74    /// [`Store::with_shared_repository`] to override (e.g. to apply
75    /// `core.sharedRepository=group` semantics).
76    mode_policy: ModePolicy,
77}
78
79/// File-mode rule used when committing objects and creating their
80/// containing directories. Mirrors git's `core.sharedRepository`
81/// semantics, see `config/config.go::getMask` upstream.
82#[derive(Debug, Clone, Copy)]
83struct ModePolicy {
84    /// Bits to mask off `0o666` when chmoding a committed object;
85    /// resolved eagerly at construction (from the process umask or
86    /// from `core.sharedRepository`). An explicit chmod is always
87    /// applied because the `tempfile` crate creates files at 0o600
88    /// regardless of umask.
89    mask: u32,
90}
91
92impl ModePolicy {
93    fn from_umask() -> Self {
94        Self {
95            mask: process_umask(),
96        }
97    }
98
99    /// Parse a `core.sharedRepository` config value into a mask.
100    /// Recognized: `umask`/`false`/`0`/unset → process umask;
101    /// `group`/`true`/`1` → 0o007; `all`/`world`/`everybody`/`2` →
102    /// 0o002; any other octal value N → `0o666 & !N`. Unrecognized
103    /// strings fall back to umask.
104    fn from_shared_repository(value: &str) -> Self {
105        let v = value.trim().to_ascii_lowercase();
106        let mask = match v.as_str() {
107            "group" | "true" | "1" => 0o007,
108            "all" | "world" | "everybody" | "2" => 0o002,
109            "umask" | "false" | "0" | "" => process_umask(),
110            other => {
111                // Try octal interpretation. Strip any leading `0` to
112                // match git's `strconv.ParseInt(v, 8, ...)` behavior.
113                match u32::from_str_radix(other.trim_start_matches('0'), 8) {
114                    Ok(mode) if mode <= 0o777 => 0o666 & !mode,
115                    _ => process_umask(),
116                }
117            }
118        };
119        Self { mask: mask & 0o777 }
120    }
121
122    /// Target file mode for committed objects (and the temp files
123    /// they're persisted from).
124    fn file_mode(self) -> u32 {
125        0o666 & !self.mask & 0o777
126    }
127
128    /// Target directory mode. Matches git's
129    /// `tools.ExecutablePermissions`: copy read bits to execute bits.
130    fn dir_mode(self) -> u32 {
131        let f = self.file_mode();
132        (f | ((f & 0o444) >> 2)) & 0o777
133    }
134}
135
136/// Read the process umask without permanently changing it. POSIX's
137/// `umask` syscall is read-modify-write; the bracketed `(0,prev)`
138/// dance is the standard way to capture it without races.
139#[cfg(unix)]
140fn process_umask() -> u32 {
141    // SAFETY: `libc::umask` is signal-safe and thread-safe to call;
142    // the brief window where umask is 0 only matters if another
143    // thread creates a file in that interval. Stores live for the
144    // duration of a single command and are constructed before any
145    // worker threads spawn.
146    unsafe {
147        let prev = libc::umask(0o022);
148        libc::umask(prev);
149        (prev as u32) & 0o777
150    }
151}
152
153#[cfg(not(unix))]
154fn process_umask() -> u32 {
155    0o022
156}
157
158/// Things that can go wrong while inserting an object.
159///
160/// Reads from the store ([`Store::open`], [`Store::contains`], and others)
161/// return a plain [`io::Error`]. This enum is only surfaced by the
162/// insert paths because they have a non-IO failure mode (hash
163/// mismatch) that needs its own variant.
164#[derive(Debug, thiserror::Error)]
165pub enum StoreError {
166    /// Filesystem-level failure.
167    ///
168    /// Surfaced by operations like tempfile creation, write, rename,
169    /// permission, etc.
170    #[error(transparent)]
171    Io(#[from] io::Error),
172    /// [`Store::insert_verified`] received bytes that hashed to
173    /// something other than the OID the caller asserted.
174    ///
175    /// The tempfile is dropped, so no half-committed object is left behind.
176    #[error("expected OID {expected}, got {actual}")]
177    HashMismatch { expected: Oid, actual: Oid },
178}
179
180impl Store {
181    /// Create a store rooted at the given LFS directory. The directory is not
182    /// created eagerly; subdirectories are created on demand as objects land.
183    pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
184        Self {
185            root: lfs_dir.into(),
186            references: Vec::new(),
187            mode_policy: ModePolicy::from_umask(),
188        }
189    }
190
191    /// Apply `core.sharedRepository` semantics to objects this store
192    /// commits.
193    ///
194    /// `value` is the literal string from `git config`
195    /// (`group`, `everybody`, octal `0660`, etc). Unrecognized values
196    /// fall back to honoring the process umask. Resets any prior
197    /// policy on this `Store`.
198    #[must_use]
199    pub fn with_shared_repository(mut self, value: &str) -> Self {
200        self.mode_policy = ModePolicy::from_shared_repository(value);
201        self
202    }
203
204    /// Attach alternate `lfs/objects/` directories that the store may
205    /// hardlink-or-copy from when a local lookup misses.
206    ///
207    /// Used by `git clone --shared` setups so the new repo can read the
208    /// source's existing LFS objects without re-downloading.
209    ///
210    /// Pass [`git_lfs_git::lfs_alternate_dirs`](https://docs.rs/git-lfs-git)
211    /// (`<git-dir>/objects/info/alternates` resolved to LFS-objects
212    /// dirs) at construction.
213    #[must_use]
214    pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
215        self.references = refs.into_iter().collect();
216        self
217    }
218
219    /// Root LFS directory.
220    pub fn root(&self) -> &Path {
221        &self.root
222    }
223
224    /// Directory holding temp files for in-flight inserts.
225    pub fn tmp_dir(&self) -> PathBuf {
226        self.root.join("tmp")
227    }
228
229    /// Directory holding partial or in-progress downloads.
230    ///
231    /// Files are named `<oid>.part` and persist across process
232    /// invocations so a later attempt can pick up where a prior
233    /// one left off (issuing a `Range:` request). Mirrors upstream's
234    /// `incomplete/` layout.
235    pub fn incomplete_dir(&self) -> PathBuf {
236        self.root.join("incomplete")
237    }
238
239    /// Path to the partial-download file for `oid`.
240    ///
241    /// The file may not exist; the caller is responsible for creating
242    /// and writing it.
243    pub fn incomplete_path(&self, oid: Oid) -> PathBuf {
244        self.incomplete_dir().join(format!("{oid}.part"))
245    }
246
247    /// Atomically move a fully-downloaded partial file into its final
248    /// object-path location.
249    ///
250    /// The caller is responsible for confirming
251    /// the file's bytes hash to `oid` first; this is a pure rename.
252    /// Clobbers any existing file at the destination, see
253    /// [`insert_verified`](Self::insert_verified) for the rationale.
254    pub fn commit_partial(&self, oid: Oid, partial: &Path) -> io::Result<()> {
255        if oid == Oid::EMPTY {
256            return Ok(());
257        }
258        let dest = self.object_path(oid);
259        if let Some(parent) = dest.parent() {
260            self.create_dir_all_with_mode(parent)?;
261        }
262        std::fs::rename(partial, &dest)?;
263        self.set_file_mode(&dest)?;
264        Ok(())
265    }
266
267    /// Sweep `<root>/tmp/` for stale temp files left behind by
268    /// interrupted prior runs.
269    ///
270    /// Filenames matching `<64-hex>-<random>`
271    /// whose object is already complete in the store are removed
272    /// unconditionally (upstream's in-flight download tempfile shape);
273    /// everything else older than an hour is pruned.
274    ///
275    /// Best-effort: the dir not existing, or any individual remove
276    /// failing, is silently ignored. Intended to run once per
277    /// command invocation, before the command's main work, so an
278    /// interrupted prior run doesn't leak temp files indefinitely
279    /// (matches upstream's `lfs.cleanupTempFiles` startup task in
280    /// `fs/cleanup.go`).
281    ///
282    /// Per-file rules, mirroring upstream:
283    /// 1. Filenames starting with `<64-hex>-` whose object is already
284    ///    complete in the store are removed unconditionally
285    ///    (interrupted-rename leftovers).
286    /// 2. Otherwise, files older than 1 hour are removed *unless*
287    ///    they live in a subdirectory whose own mtime is fresher than
288    ///    1 hour, since active processes may have stale-looking files
289    ///    they still hold open (hard-linked across repos). Files
290    ///    directly under `tmp/` are exempt from the subdir-age
291    ///    short-circuit since we modify the top-level tmp dir often
292    ///    enough that it would never expire.
293    pub fn cleanup_tmp_objects(&self) {
294        let tmp = self.root.join("tmp");
295        if !tmp.exists() {
296            return;
297        }
298        let cutoff =
299            match std::time::SystemTime::now().checked_sub(std::time::Duration::from_secs(3600)) {
300                Some(t) => t,
301                None => return,
302            };
303        // Cache subdir mtimes so the 1-hour exemption check doesn't
304        // re-stat the same dir per file.
305        let mut dir_mtimes: HashMap<PathBuf, std::time::SystemTime> = HashMap::new();
306        self.walk_tmp(&tmp, &tmp, cutoff, &mut dir_mtimes);
307    }
308
309    fn walk_tmp(
310        &self,
311        root: &Path,
312        dir: &Path,
313        cutoff: std::time::SystemTime,
314        dir_mtimes: &mut HashMap<PathBuf, std::time::SystemTime>,
315    ) {
316        let Ok(entries) = std::fs::read_dir(dir) else {
317            return;
318        };
319        for entry in entries.flatten() {
320            let path = entry.path();
321            let Ok(file_type) = entry.file_type() else {
322                continue;
323            };
324            if file_type.is_dir() {
325                self.walk_tmp(root, &path, cutoff, dir_mtimes);
326                continue;
327            }
328            let name = entry.file_name();
329            let name_str = name.to_string_lossy();
330            // Rule 1: "<oid>-..." file whose object is already complete.
331            // The filesystem-level cleanup accepts any 64-char prefix
332            // (no hex validation) so upstream test sentinels like
333            // `good...` / `bad...` round-trip.
334            if name_str.len() > 64 && name_str.as_bytes().get(64) == Some(&b'-') {
335                let oid_str = &name_str[..64];
336                let object_path = self
337                    .root
338                    .join("objects")
339                    .join(&oid_str[0..2])
340                    .join(&oid_str[2..4])
341                    .join(oid_str);
342                if object_path.is_file() {
343                    let _ = std::fs::remove_file(&path);
344                    continue;
345                }
346            }
347            // Rule 2a: skip files in young subdirectories. The
348            // top-level tmp/ itself is exempt (otherwise it'd never
349            // expire). Cache the dir's mtime so we don't restat per
350            // file.
351            if dir != root {
352                let dir_mtime = *dir_mtimes.entry(dir.to_path_buf()).or_insert_with(|| {
353                    std::fs::metadata(dir)
354                        .and_then(|m| m.modified())
355                        .unwrap_or(std::time::UNIX_EPOCH)
356                });
357                if dir_mtime > cutoff {
358                    continue;
359                }
360            }
361            // Rule 2b: remove file if older than the cutoff.
362            let Ok(meta) = entry.metadata() else { continue };
363            let Ok(mtime) = meta.modified() else { continue };
364            if mtime < cutoff {
365                let _ = std::fs::remove_file(&path);
366            }
367        }
368    }
369
370    /// Where the object with this OID lives on disk.
371    ///
372    /// For [`Oid::EMPTY`] this returns the platform null device, mirroring
373    /// upstream's behavior so callers can `open` an empty object without
374    /// special-casing.
375    pub fn object_path(&self, oid: Oid) -> PathBuf {
376        if oid == Oid::EMPTY {
377            return PathBuf::from(NULL_DEVICE);
378        }
379        let hex = oid.to_string();
380        self.root
381            .join("objects")
382            .join(&hex[0..2])
383            .join(&hex[2..4])
384            .join(&hex)
385    }
386
387    /// Check if this object is present locally as a regular file.
388    ///
389    /// The empty OID is always considered present. If the local copy
390    /// is missing but an alternate store has the object, materializes
391    /// it locally first.
392    pub fn contains(&self, oid: Oid) -> bool {
393        if oid == Oid::EMPTY {
394            return true;
395        }
396        if self.object_path(oid).is_file() {
397            return true;
398        }
399        self.materialize_from_reference(oid, None)
400    }
401
402    /// Check if the object is present and its on-disk size matches `size`.
403    ///
404    /// Used to detect partial/corrupted local copies. Like
405    /// [`contains`](Self::contains), will fault in a matching alternate-store
406    /// object on demand.
407    pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
408        if oid == Oid::EMPTY {
409            return size == 0;
410        }
411        let local = std::fs::metadata(self.object_path(oid))
412            .map(|m| m.is_file() && m.len() == size)
413            .unwrap_or(false);
414        if local {
415            return true;
416        }
417        self.materialize_from_reference(oid, Some(size))
418    }
419
420    /// Materialize the object from a reference store, if one is available.
421    ///
422    /// Walk reference stores looking for `oid`; the first hit (matching
423    /// `size` if specified) is hardlinked (or copied, on cross-device
424    /// fallback) into the local store. Returns `true` if the object
425    /// is now present locally as a result.
426    fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
427        if self.references.is_empty() {
428            return false;
429        }
430        let hex = oid.to_string();
431        for refdir in &self.references {
432            let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
433            let Ok(meta) = std::fs::metadata(&src) else {
434                continue;
435            };
436            if !meta.is_file() {
437                continue;
438            }
439            if let Some(want) = size
440                && meta.len() != want
441            {
442                continue;
443            }
444            let dest = self.object_path(oid);
445            if let Some(parent) = dest.parent() {
446                let _ = self.create_dir_all_with_mode(parent);
447            }
448            // Hardlink first (free, O(1), shares inode); fall back to
449            // copy on EXDEV / NotSupported (e.g. alternate on a
450            // different filesystem).
451            if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
452                let _ = self.set_file_mode(&dest);
453                return true;
454            }
455        }
456        false
457    }
458
459    /// Walk every object file in the store, yielding (oid, size_on_disk).
460    ///
461    /// Traverses the sharded `objects/<aa>/<bb>/<oid>` layout. Filenames
462    /// that don't parse as 64-char SHA-256 hex are silently skipped, as
463    /// are unexpected directories. The store directory not existing is
464    /// not an error; the result is just empty.
465    pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
466        let objects_dir = self.root.join("objects");
467        if !objects_dir.exists() {
468            return Ok(Vec::new());
469        }
470        let mut out = Vec::new();
471        for aa in std::fs::read_dir(&objects_dir)? {
472            let aa = aa?;
473            if !aa.file_type()?.is_dir() {
474                continue;
475            }
476            for bb in std::fs::read_dir(aa.path())? {
477                let bb = bb?;
478                if !bb.file_type()?.is_dir() {
479                    continue;
480                }
481                for entry in std::fs::read_dir(bb.path())? {
482                    let entry = entry?;
483                    let name = entry.file_name();
484                    let Some(name_str) = name.to_str() else {
485                        continue;
486                    };
487                    let Ok(oid) = name_str.parse::<Oid>() else {
488                        continue;
489                    };
490                    let meta = entry.metadata()?;
491                    if !meta.is_file() {
492                        continue;
493                    }
494                    out.push((oid, meta.len()));
495                }
496            }
497        }
498        Ok(out)
499    }
500
501    /// Open an object for reading.
502    ///
503    /// Errors with [`io::ErrorKind::NotFound`] if the object isn't in the store.
504    /// Faults in from a reference store if needed.
505    pub fn open(&self, oid: Oid) -> io::Result<File> {
506        let path = self.object_path(oid);
507        match File::open(&path) {
508            Ok(f) => Ok(f),
509            Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
510                if self.materialize_from_reference(oid, None) {
511                    File::open(&path)
512                } else {
513                    Err(e)
514                }
515            }
516            Err(e) => Err(e),
517        }
518    }
519
520    /// Stream `src` into the store, computing SHA-256 as we go, returning
521    /// the resulting OID and byte count.
522    ///
523    /// This is the clean-filter path: the OID isn't known until the
524    /// content has been hashed. Inserting bytes that already exist
525    /// locally under the same OID is a no-op; in particular, the
526    /// existing on-disk file (which may be a hardlink into an
527    /// alternate store) is left untouched.
528    pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
529        let (oid, size, tmp) = self.stream_to_tmp(src)?;
530        if oid != Oid::EMPTY && self.object_path(oid).is_file() {
531            drop(tmp);
532            return Ok((oid, size));
533        }
534        self.commit(oid, tmp)?;
535        Ok((oid, size))
536    }
537
538    /// Stream `src` into the store, requiring the resulting hash to equal
539    /// `expected`.
540    ///
541    /// On mismatch, returns [`StoreError::HashMismatch`] and the
542    /// temp file is dropped without being committed.
543    ///
544    /// This is the download path: we know the OID upfront and must verify
545    /// what the server sent.
546    pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
547        let (actual, size, tmp) = self.stream_to_tmp(src)?;
548        if actual != expected {
549            // Drop the tmp file; it goes away on Drop.
550            return Err(StoreError::HashMismatch { expected, actual });
551        }
552        self.commit(actual, tmp)?;
553        Ok(size)
554    }
555
556    fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
557        self.create_dir_all_with_mode(&self.tmp_dir())?;
558        let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
559        let mut hasher = Sha256::new();
560        let mut total: u64 = 0;
561        let mut buf = vec![0u8; COPY_BUFFER];
562        let file = tmp.as_file_mut();
563        loop {
564            let n = src.read(&mut buf)?;
565            if n == 0 {
566                break;
567            }
568            hasher.update(&buf[..n]);
569            file.write_all(&buf[..n])?;
570            total += n as u64;
571        }
572        file.flush()?;
573        let bytes: [u8; 32] = hasher.finalize().into();
574        Ok((Oid::from_bytes(bytes), total, tmp))
575    }
576
577    fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
578        // The empty object lives at /dev/null, never persist it.
579        if oid == Oid::EMPTY {
580            return Ok(());
581        }
582        let dest = self.object_path(oid);
583        if let Some(parent) = dest.parent() {
584            self.create_dir_all_with_mode(parent)?;
585        }
586        // Atomic rename, *clobbering* any existing file at the target
587        // path. The store is content-addressed: anything already there
588        // is either the same content (no-op overwrite) or corrupt
589        // (truncated, half-written), and the latter is exactly what
590        // `git lfs fetch --refetch` exists to recover from.
591        tmp.persist(&dest).map_err(|e| e.error)?;
592        self.set_file_mode(&dest)?;
593        Ok(())
594    }
595
596    /// Create the directory `target` and its parents, setting the mode
597    /// to the configured mode policy.
598    ///
599    /// `mkdir -p` walking the path, chmoding each directory under
600    /// `root` to the configured `mode_policy.dir_mode()`. Components
601    /// outside `root` (e.g. the user's home directory) are left
602    /// alone: we only own the LFS subtree.
603    fn create_dir_all_with_mode(&self, target: &Path) -> io::Result<()> {
604        std::fs::create_dir_all(target)?;
605        #[cfg(unix)]
606        {
607            use std::os::unix::fs::PermissionsExt;
608            let mode = self.mode_policy.dir_mode();
609            // Walk from root → target, chmoding each component that
610            // exists under our LFS root. The check `starts_with(root)`
611            // guards against calls with an unrelated path.
612            let mut cursor = self.root.clone();
613            if cursor.is_dir() {
614                let _ = std::fs::set_permissions(&cursor, std::fs::Permissions::from_mode(mode));
615            }
616            if let Ok(rel) = target.strip_prefix(&self.root) {
617                for component in rel.components() {
618                    cursor.push(component);
619                    if cursor.is_dir() {
620                        let _ = std::fs::set_permissions(
621                            &cursor,
622                            std::fs::Permissions::from_mode(mode),
623                        );
624                    }
625                }
626            }
627        }
628        Ok(())
629    }
630
631    /// Ensure `<root>/incomplete/` exists with the configured
632    /// directory mode.
633    ///
634    /// Call before staging `.part` files yourself
635    /// so the resulting directory honors any `core.sharedRepository`
636    /// policy on this `Store`.
637    pub fn prepare_incomplete_dir(&self) -> io::Result<()> {
638        self.create_dir_all_with_mode(&self.incomplete_dir())
639    }
640
641    /// Chmod a committed object file to the configured file mode.
642    ///
643    /// No-op on non-unix.
644    fn set_file_mode(&self, path: &Path) -> io::Result<()> {
645        #[cfg(unix)]
646        {
647            use std::os::unix::fs::PermissionsExt;
648            let mode = self.mode_policy.file_mode();
649            std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode))?;
650        }
651        #[cfg(not(unix))]
652        {
653            let _ = path;
654        }
655        Ok(())
656    }
657}
658
659#[cfg(test)]
660mod tests {
661    use super::*;
662    use tempfile::TempDir;
663
664    fn fixture() -> (TempDir, Store) {
665        let tmp = TempDir::new().unwrap();
666        let store = Store::new(tmp.path().join("lfs"));
667        (tmp, store)
668    }
669
670    /// Sample non-empty OID used across tests (SHA-256 of "abc").
671    const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
672
673    fn abc_oid() -> Oid {
674        ABC_OID_HEX.parse().unwrap()
675    }
676
677    #[test]
678    fn object_path_is_sharded() {
679        let (_tmp, store) = fixture();
680        let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
681            .parse()
682            .unwrap();
683        let path = store.object_path(oid);
684        let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
685        assert!(
686            path.ends_with(&suffix),
687            "{path:?} does not end with {suffix:?}"
688        );
689    }
690
691    #[test]
692    fn empty_oid_short_circuits() {
693        let (_tmp, store) = fixture();
694        assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
695        assert!(store.contains(Oid::EMPTY));
696        assert!(store.contains_with_size(Oid::EMPTY, 0));
697        assert!(!store.contains_with_size(Oid::EMPTY, 1));
698        // Opening the empty OID yields zero bytes.
699        let mut buf = Vec::new();
700        store
701            .open(Oid::EMPTY)
702            .unwrap()
703            .read_to_end(&mut buf)
704            .unwrap();
705        assert!(buf.is_empty());
706    }
707
708    #[test]
709    fn insert_round_trip() {
710        let (_tmp, store) = fixture();
711        let content = b"hello world!";
712        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
713        assert_eq!(size, content.len() as u64);
714        assert!(store.contains(oid));
715        assert!(store.contains_with_size(oid, size));
716        let mut readback = Vec::new();
717        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
718        assert_eq!(readback, content);
719    }
720
721    #[test]
722    fn insert_computes_correct_sha256() {
723        let (_tmp, store) = fixture();
724        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
725        assert_eq!(oid, abc_oid());
726    }
727
728    #[test]
729    fn insert_empty_yields_empty_oid_and_no_object_file() {
730        let (_tmp, store) = fixture();
731        let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
732        assert_eq!(oid, Oid::EMPTY);
733        assert_eq!(size, 0);
734        // Critically: nothing was persisted under objects/.
735        assert!(!store.root.join("objects").exists());
736    }
737
738    #[test]
739    fn insert_idempotent() {
740        let (_tmp, store) = fixture();
741        let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
742        let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
743        assert_eq!(oid1, oid2);
744        assert!(store.contains(oid1));
745    }
746
747    #[test]
748    fn insert_verified_succeeds_on_match() {
749        let (_tmp, store) = fixture();
750        let size = store
751            .insert_verified(abc_oid(), &mut b"abc".as_slice())
752            .unwrap();
753        assert_eq!(size, 3);
754        assert!(store.contains(abc_oid()));
755    }
756
757    #[test]
758    fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
759        let (_tmp, store) = fixture();
760        let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
761            .parse()
762            .unwrap();
763        let err = store
764            .insert_verified(wrong, &mut b"abc".as_slice())
765            .unwrap_err();
766        match err {
767            StoreError::HashMismatch { expected, actual } => {
768                assert_eq!(expected, wrong);
769                assert_eq!(actual, abc_oid());
770            }
771            other => panic!("expected HashMismatch, got {other:?}"),
772        }
773        // Neither the wrong OID nor the actual OID should be present:
774        // a failed verify must not leak a half-committed file.
775        assert!(!store.contains(wrong));
776        assert!(!store.contains(abc_oid()));
777        // And no leftover tmp file.
778        let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
779            .unwrap()
780            .collect::<Result<_, _>>()
781            .unwrap();
782        assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
783    }
784
785    #[test]
786    fn open_missing_oid_is_not_found() {
787        let (_tmp, store) = fixture();
788        let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
789            .parse()
790            .unwrap();
791        let err = store.open(oid).unwrap_err();
792        assert_eq!(err.kind(), io::ErrorKind::NotFound);
793    }
794
795    #[test]
796    fn streaming_megabyte_input() {
797        let (_tmp, store) = fixture();
798        // ~1 MiB to exercise the streaming loop across many buffer fills.
799        let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
800        let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
801        assert_eq!(size, content.len() as u64);
802        let mut readback = Vec::new();
803        store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
804        assert_eq!(readback, content);
805    }
806
807    #[test]
808    fn each_object_returns_empty_when_no_objects_dir() {
809        let (_tmp, store) = fixture();
810        // Store dir doesn't exist yet.
811        assert!(store.each_object().unwrap().is_empty());
812    }
813
814    #[test]
815    fn each_object_finds_inserted_objects_with_correct_size() {
816        let (_tmp, store) = fixture();
817        let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
818        let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
819        let mut got = store.each_object().unwrap();
820        got.sort_by_key(|(_, size)| *size);
821        assert_eq!(got.len(), 2);
822        // Order by size: "hello" (5 bytes) first, then "world!!!" (8 bytes).
823        assert_eq!(got[0].0, oid_a);
824        assert_eq!(got[0].1, 5);
825        assert_eq!(got[1].0, oid_b);
826        assert_eq!(got[1].1, 8);
827    }
828
829    #[test]
830    fn each_object_skips_unrecognized_filenames() {
831        let (_tmp, store) = fixture();
832        let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
833        // Drop a stray file in the same shard directory that isn't a
834        // 64-char hex name: must not crash or be reported.
835        let shard = store
836            .root()
837            .join("objects")
838            .join(&oid.to_string()[0..2])
839            .join(&oid.to_string()[2..4]);
840        std::fs::write(shard.join("README"), b"ignored").unwrap();
841        let got = store.each_object().unwrap();
842        assert_eq!(got.len(), 1);
843        assert_eq!(got[0].0, oid);
844    }
845
846    #[test]
847    fn insert_verified_overwrites_corrupt_existing_file() {
848        // Mirrors the scenario t-fetch's `--refetch` test exercises:
849        // a previous fetch landed an object, then the file got
850        // truncated (cp /dev/null over it). A subsequent verified
851        // insert must replace the corrupt file rather than silently
852        // skipping the write.
853        let (_tmp, store) = fixture();
854        let dest = store.object_path(abc_oid());
855        std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
856        std::fs::write(&dest, b"").unwrap();
857        assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
858
859        store
860            .insert_verified(abc_oid(), &mut b"abc".as_slice())
861            .unwrap();
862        let bytes = std::fs::read(&dest).unwrap();
863        assert_eq!(bytes, b"abc");
864    }
865
866    #[test]
867    fn insert_creates_dirs_on_demand() {
868        let (_tmp, store) = fixture();
869        // Before any insert, neither objects/ nor tmp/ exists.
870        assert!(!store.root.exists());
871        let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
872        assert!(store.tmp_dir().is_dir());
873        assert!(store.object_path(oid).is_file());
874    }
875
876    /// Build a "source" store with an object pre-installed, plus an
877    /// empty "shared" store that references it. Mirrors the
878    /// `git clone --shared` setup from t-fetch's init.
879    fn shared_fixture() -> (TempDir, Store, Store, Oid) {
880        let tmp = TempDir::new().unwrap();
881        let source = Store::new(tmp.path().join("src/lfs"));
882        let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
883        let shared = Store::new(tmp.path().join("shared/lfs"))
884            .with_references([source.root().join("objects")]);
885        (tmp, source, shared, oid)
886    }
887
888    #[test]
889    fn contains_finds_object_via_reference() {
890        let (_tmp, _source, shared, oid) = shared_fixture();
891        // Object lives only in the source's lfs/objects/ at this
892        // point. `contains` should report it as present (and fault
893        // it in along the way).
894        assert!(shared.contains(oid));
895        assert!(shared.object_path(oid).is_file());
896    }
897
898    #[test]
899    fn open_faults_in_from_reference() {
900        let (_tmp, _source, shared, oid) = shared_fixture();
901        let mut buf = Vec::new();
902        shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
903        assert_eq!(buf, b"abc");
904        // After open, the object is materialized locally so future
905        // reads are independent of the alternate.
906        assert!(shared.object_path(oid).is_file());
907    }
908
909    #[test]
910    fn contains_with_size_rejects_size_mismatch_in_reference() {
911        let (_tmp, _source, shared, oid) = shared_fixture();
912        // Real size is 3; ask for 4 → reference hit gets rejected.
913        assert!(!shared.contains_with_size(oid, 4));
914        assert!(!shared.object_path(oid).is_file());
915    }
916
917    #[test]
918    fn store_without_references_misses() {
919        // Sanity: same OID that the shared fixture finds via
920        // alternates is genuinely absent in a plain store.
921        let (_tmp, store) = fixture();
922        let oid = abc_oid();
923        assert!(!store.contains(oid));
924        assert!(matches!(
925            store.open(oid).unwrap_err().kind(),
926            io::ErrorKind::NotFound,
927        ));
928    }
929}