git_lfs_store/lib.rs
1//! Content-addressable on-disk store for Git LFS objects.
2//!
3//! Git LFS keeps large files outside git's object database, leaving
4//! small pointer blobs committed to git in their place. This crate
5//! owns the local half of that split: where the actual file bytes
6//! live on disk, how they get there, and how they're served back out.
7//!
8//! Objects live under `<lfs_dir>/objects/aa/bb/aabbcc…` where the
9//! hex string is the SHA-256 of the content, sharded by the first
10//! two bytes (see [`docs/spec.md`]). Writes go through a tempfile in
11//! `<lfs_dir>/tmp/` and are atomically renamed into place once their
12//! hash is known.
13//!
14//! Two insert paths cover the two callers: [`Store::insert`] hashes
15//! bytes as they're written (the clean-filter path: bytes in, OID
16//! out), and [`Store::insert_verified`] checks the resulting hash
17//! against a caller-supplied expected OID (the download path: the
18//! server names the OID, we confirm what arrived).
19//!
20//! In-progress downloads stage as `.part` files at
21//! [`Store::incomplete_path`] and rename into place via
22//! [`Store::commit_partial`], so an interrupted transfer resumes
23//! with a `Range:` request rather than restarting. Alternate object
24//! stores attached via [`Store::with_references`] are hardlinked or
25//! copied into the primary on a miss (the LFS analogue of
26//! `git clone --shared`). File and directory modes follow
27//! `core.sharedRepository`, see [`Store::with_shared_repository`].
28//!
29//! ```
30//! use git_lfs_store::Store;
31//!
32//! # let _tmp = tempfile::TempDir::new().unwrap();
33//! # let lfs_dir = _tmp.path().join("lfs");
34//! let store = Store::new(&lfs_dir);
35//! let (oid, size) = store.insert(&mut &b"hello world"[..]).unwrap();
36//! assert!(store.contains(oid));
37//! assert_eq!(size, 11);
38//! ```
39//!
40//! [`docs/spec.md`]: https://gitlab.com/rustutils/git-lfs/-/blob/master/docs/spec.md
41
42use std::collections::HashMap;
43use std::fs::File;
44use std::io::{self, Read, Write};
45use std::path::{Path, PathBuf};
46
47use git_lfs_pointer::Oid;
48use sha2::{Digest, Sha256};
49use tempfile::NamedTempFile;
50
51/// Platform null device
52///
53/// What `object_path` returns for [`Oid::EMPTY`].
54const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
55
56const COPY_BUFFER: usize = 64 * 1024;
57
58/// A local LFS object store rooted at `<lfs_dir>` (typically `.git/lfs`).
59///
60/// May reference any number of alternate stores (typically the LFS
61/// objects of a `git clone --shared` source) and will materialize a
62/// hit from one of them into the local store on demand. See
63/// [`Store::with_references`].
64#[derive(Debug, Clone)]
65pub struct Store {
66 root: PathBuf,
67 /// Paths to alternate `lfs/objects/` directories. Each maps to a
68 /// `.git/objects/info/alternates` entry: when the local store
69 /// misses, [`Store::contains_with_size`] / [`Store::open`] walk
70 /// these in order and hardlink (or copy) any hit into `root`.
71 references: Vec<PathBuf>,
72 /// File/directory mode policy for objects committed into the
73 /// store. Defaults to "honor process umask"; set via
74 /// [`Store::with_shared_repository`] to override (e.g. to apply
75 /// `core.sharedRepository=group` semantics).
76 mode_policy: ModePolicy,
77}
78
79/// File-mode rule used when committing objects and creating their
80/// containing directories. Mirrors git's `core.sharedRepository`
81/// semantics, see `config/config.go::getMask` upstream.
82#[derive(Debug, Clone, Copy)]
83struct ModePolicy {
84 /// Bits to mask off `0o666` when chmoding a committed object;
85 /// resolved eagerly at construction (from the process umask or
86 /// from `core.sharedRepository`). An explicit chmod is always
87 /// applied because the `tempfile` crate creates files at 0o600
88 /// regardless of umask.
89 mask: u32,
90}
91
92impl ModePolicy {
93 fn from_umask() -> Self {
94 Self {
95 mask: process_umask(),
96 }
97 }
98
99 /// Parse a `core.sharedRepository` config value into a mask.
100 /// Recognized: `umask`/`false`/`0`/unset → process umask;
101 /// `group`/`true`/`1` → 0o007; `all`/`world`/`everybody`/`2` →
102 /// 0o002; any other octal value N → `0o666 & !N`. Unrecognized
103 /// strings fall back to umask.
104 fn from_shared_repository(value: &str) -> Self {
105 let v = value.trim().to_ascii_lowercase();
106 let mask = match v.as_str() {
107 "group" | "true" | "1" => 0o007,
108 "all" | "world" | "everybody" | "2" => 0o002,
109 "umask" | "false" | "0" | "" => process_umask(),
110 other => {
111 // Try octal interpretation. Strip any leading `0` to
112 // match git's `strconv.ParseInt(v, 8, ...)` behavior.
113 match u32::from_str_radix(other.trim_start_matches('0'), 8) {
114 Ok(mode) if mode <= 0o777 => 0o666 & !mode,
115 _ => process_umask(),
116 }
117 }
118 };
119 Self { mask: mask & 0o777 }
120 }
121
122 /// Target file mode for committed objects (and the temp files
123 /// they're persisted from).
124 fn file_mode(self) -> u32 {
125 0o666 & !self.mask & 0o777
126 }
127
128 /// Target directory mode. Matches git's
129 /// `tools.ExecutablePermissions`: copy read bits to execute bits.
130 fn dir_mode(self) -> u32 {
131 let f = self.file_mode();
132 (f | ((f & 0o444) >> 2)) & 0o777
133 }
134}
135
136/// Read the process umask without permanently changing it. POSIX's
137/// `umask` syscall is read-modify-write; the bracketed `(0,prev)`
138/// dance is the standard way to capture it without races.
139#[cfg(unix)]
140fn process_umask() -> u32 {
141 // SAFETY: `libc::umask` is signal-safe and thread-safe to call;
142 // the brief window where umask is 0 only matters if another
143 // thread creates a file in that interval. Stores live for the
144 // duration of a single command and are constructed before any
145 // worker threads spawn.
146 unsafe {
147 let prev = libc::umask(0o022);
148 libc::umask(prev);
149 (prev as u32) & 0o777
150 }
151}
152
153#[cfg(not(unix))]
154fn process_umask() -> u32 {
155 0o022
156}
157
158/// Things that can go wrong while inserting an object.
159///
160/// Reads from the store ([`Store::open`], [`Store::contains`], and others)
161/// return a plain [`io::Error`]. This enum is only surfaced by the
162/// insert paths because they have a non-IO failure mode (hash
163/// mismatch) that needs its own variant.
164#[derive(Debug, thiserror::Error)]
165pub enum StoreError {
166 /// Filesystem-level failure.
167 ///
168 /// Surfaced by operations like tempfile creation, write, rename,
169 /// permission, etc.
170 #[error(transparent)]
171 Io(#[from] io::Error),
172 /// [`Store::insert_verified`] received bytes that hashed to
173 /// something other than the OID the caller asserted.
174 ///
175 /// The tempfile is dropped, so no half-committed object is left behind.
176 #[error("expected OID {expected}, got {actual}")]
177 HashMismatch { expected: Oid, actual: Oid },
178}
179
180impl Store {
181 /// Create a store rooted at the given LFS directory. The directory is not
182 /// created eagerly; subdirectories are created on demand as objects land.
183 pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
184 Self {
185 root: lfs_dir.into(),
186 references: Vec::new(),
187 mode_policy: ModePolicy::from_umask(),
188 }
189 }
190
191 /// Apply `core.sharedRepository` semantics to objects this store
192 /// commits.
193 ///
194 /// `value` is the literal string from `git config`
195 /// (`group`, `everybody`, octal `0660`, etc). Unrecognized values
196 /// fall back to honoring the process umask. Resets any prior
197 /// policy on this `Store`.
198 #[must_use]
199 pub fn with_shared_repository(mut self, value: &str) -> Self {
200 self.mode_policy = ModePolicy::from_shared_repository(value);
201 self
202 }
203
204 /// Attach alternate `lfs/objects/` directories that the store may
205 /// hardlink-or-copy from when a local lookup misses.
206 ///
207 /// Used by `git clone --shared` setups so the new repo can read the
208 /// source's existing LFS objects without re-downloading.
209 ///
210 /// Pass [`git_lfs_git::lfs_alternate_dirs`](https://docs.rs/git-lfs-git)
211 /// (`<git-dir>/objects/info/alternates` resolved to LFS-objects
212 /// dirs) at construction.
213 #[must_use]
214 pub fn with_references(mut self, refs: impl IntoIterator<Item = PathBuf>) -> Self {
215 self.references = refs.into_iter().collect();
216 self
217 }
218
219 /// Root LFS directory.
220 pub fn root(&self) -> &Path {
221 &self.root
222 }
223
224 /// Directory holding temp files for in-flight inserts.
225 pub fn tmp_dir(&self) -> PathBuf {
226 self.root.join("tmp")
227 }
228
229 /// Directory holding partial or in-progress downloads.
230 ///
231 /// Files are named `<oid>.part` and persist across process
232 /// invocations so a later attempt can pick up where a prior
233 /// one left off (issuing a `Range:` request). Mirrors upstream's
234 /// `incomplete/` layout.
235 pub fn incomplete_dir(&self) -> PathBuf {
236 self.root.join("incomplete")
237 }
238
239 /// Path to the partial-download file for `oid`.
240 ///
241 /// The file may not exist; the caller is responsible for creating
242 /// and writing it.
243 pub fn incomplete_path(&self, oid: Oid) -> PathBuf {
244 self.incomplete_dir().join(format!("{oid}.part"))
245 }
246
247 /// Atomically move a fully-downloaded partial file into its final
248 /// object-path location.
249 ///
250 /// The caller is responsible for confirming
251 /// the file's bytes hash to `oid` first; this is a pure rename.
252 /// Clobbers any existing file at the destination, see
253 /// [`insert_verified`](Self::insert_verified) for the rationale.
254 pub fn commit_partial(&self, oid: Oid, partial: &Path) -> io::Result<()> {
255 if oid == Oid::EMPTY {
256 return Ok(());
257 }
258 let dest = self.object_path(oid);
259 if let Some(parent) = dest.parent() {
260 self.create_dir_all_with_mode(parent)?;
261 }
262 std::fs::rename(partial, &dest)?;
263 self.set_file_mode(&dest)?;
264 Ok(())
265 }
266
267 /// Sweep `<root>/tmp/` for stale temp files left behind by
268 /// interrupted prior runs.
269 ///
270 /// Filenames matching `<64-hex>-<random>`
271 /// whose object is already complete in the store are removed
272 /// unconditionally (upstream's in-flight download tempfile shape);
273 /// everything else older than an hour is pruned.
274 ///
275 /// Best-effort: the dir not existing, or any individual remove
276 /// failing, is silently ignored. Intended to run once per
277 /// command invocation, before the command's main work, so an
278 /// interrupted prior run doesn't leak temp files indefinitely
279 /// (matches upstream's `lfs.cleanupTempFiles` startup task in
280 /// `fs/cleanup.go`).
281 ///
282 /// Per-file rules, mirroring upstream:
283 /// 1. Filenames starting with `<64-hex>-` whose object is already
284 /// complete in the store are removed unconditionally
285 /// (interrupted-rename leftovers).
286 /// 2. Otherwise, files older than 1 hour are removed *unless*
287 /// they live in a subdirectory whose own mtime is fresher than
288 /// 1 hour, since active processes may have stale-looking files
289 /// they still hold open (hard-linked across repos). Files
290 /// directly under `tmp/` are exempt from the subdir-age
291 /// short-circuit since we modify the top-level tmp dir often
292 /// enough that it would never expire.
293 pub fn cleanup_tmp_objects(&self) {
294 let tmp = self.root.join("tmp");
295 if !tmp.exists() {
296 return;
297 }
298 let cutoff =
299 match std::time::SystemTime::now().checked_sub(std::time::Duration::from_secs(3600)) {
300 Some(t) => t,
301 None => return,
302 };
303 // Cache subdir mtimes so the 1-hour exemption check doesn't
304 // re-stat the same dir per file.
305 let mut dir_mtimes: HashMap<PathBuf, std::time::SystemTime> = HashMap::new();
306 self.walk_tmp(&tmp, &tmp, cutoff, &mut dir_mtimes);
307 }
308
309 fn walk_tmp(
310 &self,
311 root: &Path,
312 dir: &Path,
313 cutoff: std::time::SystemTime,
314 dir_mtimes: &mut HashMap<PathBuf, std::time::SystemTime>,
315 ) {
316 let Ok(entries) = std::fs::read_dir(dir) else {
317 return;
318 };
319 for entry in entries.flatten() {
320 let path = entry.path();
321 let Ok(file_type) = entry.file_type() else {
322 continue;
323 };
324 if file_type.is_dir() {
325 self.walk_tmp(root, &path, cutoff, dir_mtimes);
326 continue;
327 }
328 let name = entry.file_name();
329 let name_str = name.to_string_lossy();
330 // Rule 1: "<oid>-..." file whose object is already complete.
331 // The filesystem-level cleanup accepts any 64-char prefix
332 // (no hex validation) so upstream test sentinels like
333 // `good...` / `bad...` round-trip.
334 if name_str.len() > 64 && name_str.as_bytes().get(64) == Some(&b'-') {
335 let oid_str = &name_str[..64];
336 let object_path = self
337 .root
338 .join("objects")
339 .join(&oid_str[0..2])
340 .join(&oid_str[2..4])
341 .join(oid_str);
342 if object_path.is_file() {
343 let _ = std::fs::remove_file(&path);
344 continue;
345 }
346 }
347 // Rule 2a: skip files in young subdirectories. The
348 // top-level tmp/ itself is exempt (otherwise it'd never
349 // expire). Cache the dir's mtime so we don't restat per
350 // file.
351 if dir != root {
352 let dir_mtime = *dir_mtimes.entry(dir.to_path_buf()).or_insert_with(|| {
353 std::fs::metadata(dir)
354 .and_then(|m| m.modified())
355 .unwrap_or(std::time::UNIX_EPOCH)
356 });
357 if dir_mtime > cutoff {
358 continue;
359 }
360 }
361 // Rule 2b: remove file if older than the cutoff.
362 let Ok(meta) = entry.metadata() else { continue };
363 let Ok(mtime) = meta.modified() else { continue };
364 if mtime < cutoff {
365 let _ = std::fs::remove_file(&path);
366 }
367 }
368 }
369
370 /// Where the object with this OID lives on disk.
371 ///
372 /// For [`Oid::EMPTY`] this returns the platform null device, mirroring
373 /// upstream's behavior so callers can `open` an empty object without
374 /// special-casing.
375 pub fn object_path(&self, oid: Oid) -> PathBuf {
376 if oid == Oid::EMPTY {
377 return PathBuf::from(NULL_DEVICE);
378 }
379 let hex = oid.to_string();
380 self.root
381 .join("objects")
382 .join(&hex[0..2])
383 .join(&hex[2..4])
384 .join(&hex)
385 }
386
387 /// Check if this object is present locally as a regular file.
388 ///
389 /// The empty OID is always considered present. If the local copy
390 /// is missing but an alternate store has the object, materializes
391 /// it locally first.
392 pub fn contains(&self, oid: Oid) -> bool {
393 if oid == Oid::EMPTY {
394 return true;
395 }
396 if self.object_path(oid).is_file() {
397 return true;
398 }
399 self.materialize_from_reference(oid, None)
400 }
401
402 /// Check if the object is present and its on-disk size matches `size`.
403 ///
404 /// Used to detect partial/corrupted local copies. Like
405 /// [`contains`](Self::contains), will fault in a matching alternate-store
406 /// object on demand.
407 pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
408 if oid == Oid::EMPTY {
409 return size == 0;
410 }
411 let local = std::fs::metadata(self.object_path(oid))
412 .map(|m| m.is_file() && m.len() == size)
413 .unwrap_or(false);
414 if local {
415 return true;
416 }
417 self.materialize_from_reference(oid, Some(size))
418 }
419
420 /// Materialize the object from a reference store, if one is available.
421 ///
422 /// Walk reference stores looking for `oid`; the first hit (matching
423 /// `size` if specified) is hardlinked (or copied, on cross-device
424 /// fallback) into the local store. Returns `true` if the object
425 /// is now present locally as a result.
426 fn materialize_from_reference(&self, oid: Oid, size: Option<u64>) -> bool {
427 if self.references.is_empty() {
428 return false;
429 }
430 let hex = oid.to_string();
431 for refdir in &self.references {
432 let src = refdir.join(&hex[0..2]).join(&hex[2..4]).join(&hex);
433 let Ok(meta) = std::fs::metadata(&src) else {
434 continue;
435 };
436 if !meta.is_file() {
437 continue;
438 }
439 if let Some(want) = size
440 && meta.len() != want
441 {
442 continue;
443 }
444 let dest = self.object_path(oid);
445 if let Some(parent) = dest.parent() {
446 let _ = self.create_dir_all_with_mode(parent);
447 }
448 // Hardlink first (free, O(1), shares inode); fall back to
449 // copy on EXDEV / NotSupported (e.g. alternate on a
450 // different filesystem).
451 if std::fs::hard_link(&src, &dest).is_ok() || std::fs::copy(&src, &dest).is_ok() {
452 let _ = self.set_file_mode(&dest);
453 return true;
454 }
455 }
456 false
457 }
458
459 /// Walk every object file in the store, yielding (oid, size_on_disk).
460 ///
461 /// Traverses the sharded `objects/<aa>/<bb>/<oid>` layout. Filenames
462 /// that don't parse as 64-char SHA-256 hex are silently skipped, as
463 /// are unexpected directories. The store directory not existing is
464 /// not an error; the result is just empty.
465 pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
466 let objects_dir = self.root.join("objects");
467 if !objects_dir.exists() {
468 return Ok(Vec::new());
469 }
470 let mut out = Vec::new();
471 for aa in std::fs::read_dir(&objects_dir)? {
472 let aa = aa?;
473 if !aa.file_type()?.is_dir() {
474 continue;
475 }
476 for bb in std::fs::read_dir(aa.path())? {
477 let bb = bb?;
478 if !bb.file_type()?.is_dir() {
479 continue;
480 }
481 for entry in std::fs::read_dir(bb.path())? {
482 let entry = entry?;
483 let name = entry.file_name();
484 let Some(name_str) = name.to_str() else {
485 continue;
486 };
487 let Ok(oid) = name_str.parse::<Oid>() else {
488 continue;
489 };
490 let meta = entry.metadata()?;
491 if !meta.is_file() {
492 continue;
493 }
494 out.push((oid, meta.len()));
495 }
496 }
497 }
498 Ok(out)
499 }
500
501 /// Open an object for reading.
502 ///
503 /// Errors with [`io::ErrorKind::NotFound`] if the object isn't in the store.
504 /// Faults in from a reference store if needed.
505 pub fn open(&self, oid: Oid) -> io::Result<File> {
506 let path = self.object_path(oid);
507 match File::open(&path) {
508 Ok(f) => Ok(f),
509 Err(e) if e.kind() == io::ErrorKind::NotFound && oid != Oid::EMPTY => {
510 if self.materialize_from_reference(oid, None) {
511 File::open(&path)
512 } else {
513 Err(e)
514 }
515 }
516 Err(e) => Err(e),
517 }
518 }
519
520 /// Stream `src` into the store, computing SHA-256 as we go, returning
521 /// the resulting OID and byte count.
522 ///
523 /// This is the clean-filter path: the OID isn't known until the
524 /// content has been hashed. Inserting bytes that already exist
525 /// locally under the same OID is a no-op; in particular, the
526 /// existing on-disk file (which may be a hardlink into an
527 /// alternate store) is left untouched.
528 pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
529 let (oid, size, tmp) = self.stream_to_tmp(src)?;
530 if oid != Oid::EMPTY && self.object_path(oid).is_file() {
531 drop(tmp);
532 return Ok((oid, size));
533 }
534 self.commit(oid, tmp)?;
535 Ok((oid, size))
536 }
537
538 /// Stream `src` into the store, requiring the resulting hash to equal
539 /// `expected`.
540 ///
541 /// On mismatch, returns [`StoreError::HashMismatch`] and the
542 /// temp file is dropped without being committed.
543 ///
544 /// This is the download path: we know the OID upfront and must verify
545 /// what the server sent.
546 pub fn insert_verified(&self, expected: Oid, src: &mut impl Read) -> Result<u64, StoreError> {
547 let (actual, size, tmp) = self.stream_to_tmp(src)?;
548 if actual != expected {
549 // Drop the tmp file; it goes away on Drop.
550 return Err(StoreError::HashMismatch { expected, actual });
551 }
552 self.commit(actual, tmp)?;
553 Ok(size)
554 }
555
556 fn stream_to_tmp(&self, src: &mut impl Read) -> io::Result<(Oid, u64, NamedTempFile)> {
557 self.create_dir_all_with_mode(&self.tmp_dir())?;
558 let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
559 let mut hasher = Sha256::new();
560 let mut total: u64 = 0;
561 let mut buf = vec![0u8; COPY_BUFFER];
562 let file = tmp.as_file_mut();
563 loop {
564 let n = src.read(&mut buf)?;
565 if n == 0 {
566 break;
567 }
568 hasher.update(&buf[..n]);
569 file.write_all(&buf[..n])?;
570 total += n as u64;
571 }
572 file.flush()?;
573 let bytes: [u8; 32] = hasher.finalize().into();
574 Ok((Oid::from_bytes(bytes), total, tmp))
575 }
576
577 fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
578 // The empty object lives at /dev/null, never persist it.
579 if oid == Oid::EMPTY {
580 return Ok(());
581 }
582 let dest = self.object_path(oid);
583 if let Some(parent) = dest.parent() {
584 self.create_dir_all_with_mode(parent)?;
585 }
586 // Atomic rename, *clobbering* any existing file at the target
587 // path. The store is content-addressed: anything already there
588 // is either the same content (no-op overwrite) or corrupt
589 // (truncated, half-written), and the latter is exactly what
590 // `git lfs fetch --refetch` exists to recover from.
591 tmp.persist(&dest).map_err(|e| e.error)?;
592 self.set_file_mode(&dest)?;
593 Ok(())
594 }
595
596 /// Create the directory `target` and its parents, setting the mode
597 /// to the configured mode policy.
598 ///
599 /// `mkdir -p` walking the path, chmoding each directory under
600 /// `root` to the configured `mode_policy.dir_mode()`. Components
601 /// outside `root` (e.g. the user's home directory) are left
602 /// alone: we only own the LFS subtree.
603 fn create_dir_all_with_mode(&self, target: &Path) -> io::Result<()> {
604 std::fs::create_dir_all(target)?;
605 #[cfg(unix)]
606 {
607 use std::os::unix::fs::PermissionsExt;
608 let mode = self.mode_policy.dir_mode();
609 // Walk from root → target, chmoding each component that
610 // exists under our LFS root. The check `starts_with(root)`
611 // guards against calls with an unrelated path.
612 let mut cursor = self.root.clone();
613 if cursor.is_dir() {
614 let _ = std::fs::set_permissions(&cursor, std::fs::Permissions::from_mode(mode));
615 }
616 if let Ok(rel) = target.strip_prefix(&self.root) {
617 for component in rel.components() {
618 cursor.push(component);
619 if cursor.is_dir() {
620 let _ = std::fs::set_permissions(
621 &cursor,
622 std::fs::Permissions::from_mode(mode),
623 );
624 }
625 }
626 }
627 }
628 Ok(())
629 }
630
631 /// Ensure `<root>/incomplete/` exists with the configured
632 /// directory mode.
633 ///
634 /// Call before staging `.part` files yourself
635 /// so the resulting directory honors any `core.sharedRepository`
636 /// policy on this `Store`.
637 pub fn prepare_incomplete_dir(&self) -> io::Result<()> {
638 self.create_dir_all_with_mode(&self.incomplete_dir())
639 }
640
641 /// Chmod a committed object file to the configured file mode.
642 ///
643 /// No-op on non-unix.
644 fn set_file_mode(&self, path: &Path) -> io::Result<()> {
645 #[cfg(unix)]
646 {
647 use std::os::unix::fs::PermissionsExt;
648 let mode = self.mode_policy.file_mode();
649 std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode))?;
650 }
651 #[cfg(not(unix))]
652 {
653 let _ = path;
654 }
655 Ok(())
656 }
657}
658
659#[cfg(test)]
660mod tests {
661 use super::*;
662 use tempfile::TempDir;
663
664 fn fixture() -> (TempDir, Store) {
665 let tmp = TempDir::new().unwrap();
666 let store = Store::new(tmp.path().join("lfs"));
667 (tmp, store)
668 }
669
670 /// Sample non-empty OID used across tests (SHA-256 of "abc").
671 const ABC_OID_HEX: &str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
672
673 fn abc_oid() -> Oid {
674 ABC_OID_HEX.parse().unwrap()
675 }
676
677 #[test]
678 fn object_path_is_sharded() {
679 let (_tmp, store) = fixture();
680 let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
681 .parse()
682 .unwrap();
683 let path = store.object_path(oid);
684 let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
685 assert!(
686 path.ends_with(&suffix),
687 "{path:?} does not end with {suffix:?}"
688 );
689 }
690
691 #[test]
692 fn empty_oid_short_circuits() {
693 let (_tmp, store) = fixture();
694 assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
695 assert!(store.contains(Oid::EMPTY));
696 assert!(store.contains_with_size(Oid::EMPTY, 0));
697 assert!(!store.contains_with_size(Oid::EMPTY, 1));
698 // Opening the empty OID yields zero bytes.
699 let mut buf = Vec::new();
700 store
701 .open(Oid::EMPTY)
702 .unwrap()
703 .read_to_end(&mut buf)
704 .unwrap();
705 assert!(buf.is_empty());
706 }
707
708 #[test]
709 fn insert_round_trip() {
710 let (_tmp, store) = fixture();
711 let content = b"hello world!";
712 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
713 assert_eq!(size, content.len() as u64);
714 assert!(store.contains(oid));
715 assert!(store.contains_with_size(oid, size));
716 let mut readback = Vec::new();
717 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
718 assert_eq!(readback, content);
719 }
720
721 #[test]
722 fn insert_computes_correct_sha256() {
723 let (_tmp, store) = fixture();
724 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
725 assert_eq!(oid, abc_oid());
726 }
727
728 #[test]
729 fn insert_empty_yields_empty_oid_and_no_object_file() {
730 let (_tmp, store) = fixture();
731 let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
732 assert_eq!(oid, Oid::EMPTY);
733 assert_eq!(size, 0);
734 // Critically: nothing was persisted under objects/.
735 assert!(!store.root.join("objects").exists());
736 }
737
738 #[test]
739 fn insert_idempotent() {
740 let (_tmp, store) = fixture();
741 let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
742 let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
743 assert_eq!(oid1, oid2);
744 assert!(store.contains(oid1));
745 }
746
747 #[test]
748 fn insert_verified_succeeds_on_match() {
749 let (_tmp, store) = fixture();
750 let size = store
751 .insert_verified(abc_oid(), &mut b"abc".as_slice())
752 .unwrap();
753 assert_eq!(size, 3);
754 assert!(store.contains(abc_oid()));
755 }
756
757 #[test]
758 fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
759 let (_tmp, store) = fixture();
760 let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
761 .parse()
762 .unwrap();
763 let err = store
764 .insert_verified(wrong, &mut b"abc".as_slice())
765 .unwrap_err();
766 match err {
767 StoreError::HashMismatch { expected, actual } => {
768 assert_eq!(expected, wrong);
769 assert_eq!(actual, abc_oid());
770 }
771 other => panic!("expected HashMismatch, got {other:?}"),
772 }
773 // Neither the wrong OID nor the actual OID should be present:
774 // a failed verify must not leak a half-committed file.
775 assert!(!store.contains(wrong));
776 assert!(!store.contains(abc_oid()));
777 // And no leftover tmp file.
778 let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
779 .unwrap()
780 .collect::<Result<_, _>>()
781 .unwrap();
782 assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
783 }
784
785 #[test]
786 fn open_missing_oid_is_not_found() {
787 let (_tmp, store) = fixture();
788 let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
789 .parse()
790 .unwrap();
791 let err = store.open(oid).unwrap_err();
792 assert_eq!(err.kind(), io::ErrorKind::NotFound);
793 }
794
795 #[test]
796 fn streaming_megabyte_input() {
797 let (_tmp, store) = fixture();
798 // ~1 MiB to exercise the streaming loop across many buffer fills.
799 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
800 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
801 assert_eq!(size, content.len() as u64);
802 let mut readback = Vec::new();
803 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
804 assert_eq!(readback, content);
805 }
806
807 #[test]
808 fn each_object_returns_empty_when_no_objects_dir() {
809 let (_tmp, store) = fixture();
810 // Store dir doesn't exist yet.
811 assert!(store.each_object().unwrap().is_empty());
812 }
813
814 #[test]
815 fn each_object_finds_inserted_objects_with_correct_size() {
816 let (_tmp, store) = fixture();
817 let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
818 let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
819 let mut got = store.each_object().unwrap();
820 got.sort_by_key(|(_, size)| *size);
821 assert_eq!(got.len(), 2);
822 // Order by size: "hello" (5 bytes) first, then "world!!!" (8 bytes).
823 assert_eq!(got[0].0, oid_a);
824 assert_eq!(got[0].1, 5);
825 assert_eq!(got[1].0, oid_b);
826 assert_eq!(got[1].1, 8);
827 }
828
829 #[test]
830 fn each_object_skips_unrecognized_filenames() {
831 let (_tmp, store) = fixture();
832 let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
833 // Drop a stray file in the same shard directory that isn't a
834 // 64-char hex name: must not crash or be reported.
835 let shard = store
836 .root()
837 .join("objects")
838 .join(&oid.to_string()[0..2])
839 .join(&oid.to_string()[2..4]);
840 std::fs::write(shard.join("README"), b"ignored").unwrap();
841 let got = store.each_object().unwrap();
842 assert_eq!(got.len(), 1);
843 assert_eq!(got[0].0, oid);
844 }
845
846 #[test]
847 fn insert_verified_overwrites_corrupt_existing_file() {
848 // Mirrors the scenario t-fetch's `--refetch` test exercises:
849 // a previous fetch landed an object, then the file got
850 // truncated (cp /dev/null over it). A subsequent verified
851 // insert must replace the corrupt file rather than silently
852 // skipping the write.
853 let (_tmp, store) = fixture();
854 let dest = store.object_path(abc_oid());
855 std::fs::create_dir_all(dest.parent().unwrap()).unwrap();
856 std::fs::write(&dest, b"").unwrap();
857 assert_eq!(std::fs::metadata(&dest).unwrap().len(), 0);
858
859 store
860 .insert_verified(abc_oid(), &mut b"abc".as_slice())
861 .unwrap();
862 let bytes = std::fs::read(&dest).unwrap();
863 assert_eq!(bytes, b"abc");
864 }
865
866 #[test]
867 fn insert_creates_dirs_on_demand() {
868 let (_tmp, store) = fixture();
869 // Before any insert, neither objects/ nor tmp/ exists.
870 assert!(!store.root.exists());
871 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
872 assert!(store.tmp_dir().is_dir());
873 assert!(store.object_path(oid).is_file());
874 }
875
876 /// Build a "source" store with an object pre-installed, plus an
877 /// empty "shared" store that references it. Mirrors the
878 /// `git clone --shared` setup from t-fetch's init.
879 fn shared_fixture() -> (TempDir, Store, Store, Oid) {
880 let tmp = TempDir::new().unwrap();
881 let source = Store::new(tmp.path().join("src/lfs"));
882 let (oid, _) = source.insert(&mut b"abc".as_slice()).unwrap();
883 let shared = Store::new(tmp.path().join("shared/lfs"))
884 .with_references([source.root().join("objects")]);
885 (tmp, source, shared, oid)
886 }
887
888 #[test]
889 fn contains_finds_object_via_reference() {
890 let (_tmp, _source, shared, oid) = shared_fixture();
891 // Object lives only in the source's lfs/objects/ at this
892 // point. `contains` should report it as present (and fault
893 // it in along the way).
894 assert!(shared.contains(oid));
895 assert!(shared.object_path(oid).is_file());
896 }
897
898 #[test]
899 fn open_faults_in_from_reference() {
900 let (_tmp, _source, shared, oid) = shared_fixture();
901 let mut buf = Vec::new();
902 shared.open(oid).unwrap().read_to_end(&mut buf).unwrap();
903 assert_eq!(buf, b"abc");
904 // After open, the object is materialized locally so future
905 // reads are independent of the alternate.
906 assert!(shared.object_path(oid).is_file());
907 }
908
909 #[test]
910 fn contains_with_size_rejects_size_mismatch_in_reference() {
911 let (_tmp, _source, shared, oid) = shared_fixture();
912 // Real size is 3; ask for 4 → reference hit gets rejected.
913 assert!(!shared.contains_with_size(oid, 4));
914 assert!(!shared.object_path(oid).is_file());
915 }
916
917 #[test]
918 fn store_without_references_misses() {
919 // Sanity: same OID that the shared fixture finds via
920 // alternates is genuinely absent in a plain store.
921 let (_tmp, store) = fixture();
922 let oid = abc_oid();
923 assert!(!store.contains(oid));
924 assert!(matches!(
925 store.open(oid).unwrap_err().kind(),
926 io::ErrorKind::NotFound,
927 ));
928 }
929}