1use std::fs::File;
18use std::io::{self, Read, Write};
19use std::path::{Path, PathBuf};
20
21use git_lfs_pointer::Oid;
22use sha2::{Digest, Sha256};
23use tempfile::NamedTempFile;
24
25const NULL_DEVICE: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
27
28const COPY_BUFFER: usize = 64 * 1024;
29
30#[derive(Debug, Clone)]
32pub struct Store {
33 root: PathBuf,
34}
35
36#[derive(Debug, thiserror::Error)]
37pub enum StoreError {
38 #[error(transparent)]
39 Io(#[from] io::Error),
40 #[error("hash mismatch: expected {expected}, got {actual}")]
41 HashMismatch { expected: Oid, actual: Oid },
42}
43
44impl Store {
45 pub fn new(lfs_dir: impl Into<PathBuf>) -> Self {
48 Self {
49 root: lfs_dir.into(),
50 }
51 }
52
53 pub fn root(&self) -> &Path {
55 &self.root
56 }
57
58 pub fn tmp_dir(&self) -> PathBuf {
60 self.root.join("tmp")
61 }
62
63 pub fn object_path(&self, oid: Oid) -> PathBuf {
69 if oid == Oid::EMPTY {
70 return PathBuf::from(NULL_DEVICE);
71 }
72 let hex = oid.to_string();
73 self.root
74 .join("objects")
75 .join(&hex[0..2])
76 .join(&hex[2..4])
77 .join(&hex)
78 }
79
80 pub fn contains(&self, oid: Oid) -> bool {
83 if oid == Oid::EMPTY {
84 return true;
85 }
86 self.object_path(oid).is_file()
87 }
88
89 pub fn contains_with_size(&self, oid: Oid, size: u64) -> bool {
92 if oid == Oid::EMPTY {
93 return size == 0;
94 }
95 std::fs::metadata(self.object_path(oid))
96 .map(|m| m.is_file() && m.len() == size)
97 .unwrap_or(false)
98 }
99
100 pub fn each_object(&self) -> io::Result<Vec<(Oid, u64)>> {
109 let objects_dir = self.root.join("objects");
110 if !objects_dir.exists() {
111 return Ok(Vec::new());
112 }
113 let mut out = Vec::new();
114 for aa in std::fs::read_dir(&objects_dir)? {
115 let aa = aa?;
116 if !aa.file_type()?.is_dir() {
117 continue;
118 }
119 for bb in std::fs::read_dir(aa.path())? {
120 let bb = bb?;
121 if !bb.file_type()?.is_dir() {
122 continue;
123 }
124 for entry in std::fs::read_dir(bb.path())? {
125 let entry = entry?;
126 let name = entry.file_name();
127 let Some(name_str) = name.to_str() else { continue };
128 let Ok(oid) = name_str.parse::<Oid>() else { continue };
129 let meta = entry.metadata()?;
130 if !meta.is_file() {
131 continue;
132 }
133 out.push((oid, meta.len()));
134 }
135 }
136 }
137 Ok(out)
138 }
139
140 pub fn open(&self, oid: Oid) -> io::Result<File> {
143 File::open(self.object_path(oid))
144 }
145
146 pub fn insert(&self, src: &mut impl Read) -> Result<(Oid, u64), StoreError> {
152 let (oid, size, tmp) = self.stream_to_tmp(src)?;
153 self.commit(oid, tmp)?;
154 Ok((oid, size))
155 }
156
157 pub fn insert_verified(
164 &self,
165 expected: Oid,
166 src: &mut impl Read,
167 ) -> Result<u64, StoreError> {
168 let (actual, size, tmp) = self.stream_to_tmp(src)?;
169 if actual != expected {
170 return Err(StoreError::HashMismatch { expected, actual });
172 }
173 self.commit(actual, tmp)?;
174 Ok(size)
175 }
176
177 fn stream_to_tmp(
178 &self,
179 src: &mut impl Read,
180 ) -> io::Result<(Oid, u64, NamedTempFile)> {
181 std::fs::create_dir_all(self.tmp_dir())?;
182 let mut tmp = NamedTempFile::new_in(self.tmp_dir())?;
183 let mut hasher = Sha256::new();
184 let mut total: u64 = 0;
185 let mut buf = vec![0u8; COPY_BUFFER];
186 let file = tmp.as_file_mut();
187 loop {
188 let n = src.read(&mut buf)?;
189 if n == 0 {
190 break;
191 }
192 hasher.update(&buf[..n]);
193 file.write_all(&buf[..n])?;
194 total += n as u64;
195 }
196 file.flush()?;
197 let bytes: [u8; 32] = hasher.finalize().into();
198 Ok((Oid::from_bytes(bytes), total, tmp))
199 }
200
201 fn commit(&self, oid: Oid, tmp: NamedTempFile) -> io::Result<()> {
202 if oid == Oid::EMPTY {
204 return Ok(());
205 }
206 let dest = self.object_path(oid);
207 if dest.is_file() {
210 return Ok(());
211 }
212 if let Some(parent) = dest.parent() {
213 std::fs::create_dir_all(parent)?;
214 }
215 match tmp.persist_noclobber(&dest) {
216 Ok(_) => Ok(()),
217 Err(e) if e.error.kind() == io::ErrorKind::AlreadyExists => {
218 Ok(())
220 }
221 Err(e) => Err(e.error),
222 }
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229 use tempfile::TempDir;
230
231 fn fixture() -> (TempDir, Store) {
232 let tmp = TempDir::new().unwrap();
233 let store = Store::new(tmp.path().join("lfs"));
234 (tmp, store)
235 }
236
237 const ABC_OID_HEX: &str =
239 "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
240
241 fn abc_oid() -> Oid {
242 ABC_OID_HEX.parse().unwrap()
243 }
244
245 #[test]
246 fn object_path_is_sharded() {
247 let (_tmp, store) = fixture();
248 let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
249 .parse()
250 .unwrap();
251 let path = store.object_path(oid);
252 let suffix: PathBuf = ["objects", "4d", "7a", &oid.to_string()].iter().collect();
253 assert!(path.ends_with(&suffix), "{path:?} does not end with {suffix:?}");
254 }
255
256 #[test]
257 fn empty_oid_short_circuits() {
258 let (_tmp, store) = fixture();
259 assert_eq!(store.object_path(Oid::EMPTY), PathBuf::from(NULL_DEVICE));
260 assert!(store.contains(Oid::EMPTY));
261 assert!(store.contains_with_size(Oid::EMPTY, 0));
262 assert!(!store.contains_with_size(Oid::EMPTY, 1));
263 let mut buf = Vec::new();
265 store.open(Oid::EMPTY).unwrap().read_to_end(&mut buf).unwrap();
266 assert!(buf.is_empty());
267 }
268
269 #[test]
270 fn insert_round_trip() {
271 let (_tmp, store) = fixture();
272 let content = b"hello world!";
273 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
274 assert_eq!(size, content.len() as u64);
275 assert!(store.contains(oid));
276 assert!(store.contains_with_size(oid, size));
277 let mut readback = Vec::new();
278 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
279 assert_eq!(readback, content);
280 }
281
282 #[test]
283 fn insert_computes_correct_sha256() {
284 let (_tmp, store) = fixture();
285 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
286 assert_eq!(oid, abc_oid());
287 }
288
289 #[test]
290 fn insert_empty_yields_empty_oid_and_no_object_file() {
291 let (_tmp, store) = fixture();
292 let (oid, size) = store.insert(&mut [].as_slice()).unwrap();
293 assert_eq!(oid, Oid::EMPTY);
294 assert_eq!(size, 0);
295 assert!(!store.root.join("objects").exists());
297 }
298
299 #[test]
300 fn insert_idempotent() {
301 let (_tmp, store) = fixture();
302 let (oid1, _) = store.insert(&mut b"abc".as_slice()).unwrap();
303 let (oid2, _) = store.insert(&mut b"abc".as_slice()).unwrap();
304 assert_eq!(oid1, oid2);
305 assert!(store.contains(oid1));
306 }
307
308 #[test]
309 fn insert_verified_succeeds_on_match() {
310 let (_tmp, store) = fixture();
311 let size = store
312 .insert_verified(abc_oid(), &mut b"abc".as_slice())
313 .unwrap();
314 assert_eq!(size, 3);
315 assert!(store.contains(abc_oid()));
316 }
317
318 #[test]
319 fn insert_verified_errors_on_mismatch_and_leaves_no_file() {
320 let (_tmp, store) = fixture();
321 let wrong: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
322 .parse()
323 .unwrap();
324 let err = store
325 .insert_verified(wrong, &mut b"abc".as_slice())
326 .unwrap_err();
327 match err {
328 StoreError::HashMismatch { expected, actual } => {
329 assert_eq!(expected, wrong);
330 assert_eq!(actual, abc_oid());
331 }
332 other => panic!("expected HashMismatch, got {other:?}"),
333 }
334 assert!(!store.contains(wrong));
337 assert!(!store.contains(abc_oid()));
338 let tmp_entries: Vec<_> = std::fs::read_dir(store.tmp_dir())
340 .unwrap()
341 .collect::<Result<_, _>>()
342 .unwrap();
343 assert!(tmp_entries.is_empty(), "tmp dir not empty: {tmp_entries:?}");
344 }
345
346 #[test]
347 fn open_missing_oid_is_not_found() {
348 let (_tmp, store) = fixture();
349 let oid: Oid = "0000000000000000000000000000000000000000000000000000000000000001"
350 .parse()
351 .unwrap();
352 let err = store.open(oid).unwrap_err();
353 assert_eq!(err.kind(), io::ErrorKind::NotFound);
354 }
355
356 #[test]
357 fn streaming_megabyte_input() {
358 let (_tmp, store) = fixture();
359 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
361 let (oid, size) = store.insert(&mut content.as_slice()).unwrap();
362 assert_eq!(size, content.len() as u64);
363 let mut readback = Vec::new();
364 store.open(oid).unwrap().read_to_end(&mut readback).unwrap();
365 assert_eq!(readback, content);
366 }
367
368 #[test]
369 fn each_object_returns_empty_when_no_objects_dir() {
370 let (_tmp, store) = fixture();
371 assert!(store.each_object().unwrap().is_empty());
373 }
374
375 #[test]
376 fn each_object_finds_inserted_objects_with_correct_size() {
377 let (_tmp, store) = fixture();
378 let (oid_a, _) = store.insert(&mut b"hello".as_slice()).unwrap();
379 let (oid_b, _) = store.insert(&mut b"world!!!".as_slice()).unwrap();
380 let mut got = store.each_object().unwrap();
381 got.sort_by_key(|(_, size)| *size);
382 assert_eq!(got.len(), 2);
383 assert_eq!(got[0].0, oid_a);
385 assert_eq!(got[0].1, 5);
386 assert_eq!(got[1].0, oid_b);
387 assert_eq!(got[1].1, 8);
388 }
389
390 #[test]
391 fn each_object_skips_unrecognized_filenames() {
392 let (_tmp, store) = fixture();
393 let (oid, _) = store.insert(&mut b"hi".as_slice()).unwrap();
394 let shard = store
397 .root()
398 .join("objects")
399 .join(&oid.to_string()[0..2])
400 .join(&oid.to_string()[2..4]);
401 std::fs::write(shard.join("README"), b"ignored").unwrap();
402 let got = store.each_object().unwrap();
403 assert_eq!(got.len(), 1);
404 assert_eq!(got[0].0, oid);
405 }
406
407 #[test]
408 fn insert_creates_dirs_on_demand() {
409 let (_tmp, store) = fixture();
410 assert!(!store.root.exists());
412 let (oid, _) = store.insert(&mut b"abc".as_slice()).unwrap();
413 assert!(store.tmp_dir().is_dir());
414 assert!(store.object_path(oid).is_file());
415 }
416}