1use std::fs;
4use std::io::{self, Read, Write};
5use std::path::Path;
6use std::process::{Command, Stdio};
7
8use git_lfs_pointer::{Oid, Pointer};
9use git_lfs_store::Store;
10use sha2::{Digest, Sha256};
11use tempfile::NamedTempFile;
12
13use crate::FetchError;
14use crate::detect_pointer;
15
16const COPY_BUFFER: usize = 64 * 1024;
17
18#[derive(Debug, Clone)]
23pub struct SmudgeExtension {
24 pub name: String,
25 pub priority: u8,
26 pub command: String,
27}
28
29#[derive(Debug)]
31pub enum SmudgeOutcome {
32 Passthrough,
37 Resolved(Pointer),
40}
41
42#[derive(Debug, thiserror::Error)]
43pub enum SmudgeError {
44 #[error(transparent)]
45 Io(#[from] io::Error),
46 #[error("object {} (size {}) is not present in the local store", .0.oid, .0.size)]
50 ObjectMissing(Pointer),
51 #[error("fetch failed: {0}")]
54 FetchFailed(FetchError),
55 #[error("extension {name:?} is not configured")]
59 ExtensionNotConfigured { name: String },
60 #[error("extension {name:?} has no smudge command configured")]
62 ExtensionMissingCommand { name: String },
63 #[error("failed to spawn extension {name:?}: {source}")]
65 ExtensionSpawnFailed {
66 name: String,
67 #[source]
68 source: io::Error,
69 },
70 #[error("extension {name:?} exited with status {status:?}")]
72 ExtensionFailed { name: String, status: Option<i32> },
73 #[error("OID mismatch for {stage}: expected {expected}, got {actual}")]
78 OidMismatch {
79 stage: String,
80 expected: Oid,
81 actual: Oid,
82 },
83}
84
85pub fn smudge<R: Read, W: Write>(
98 store: &Store,
99 input: &mut R,
100 output: &mut W,
101 path: &str,
102 extensions: &[SmudgeExtension],
103) -> Result<SmudgeOutcome, SmudgeError> {
104 let (head, maybe_pointer) = detect_pointer(input)?;
105
106 let Some(pointer) = maybe_pointer else {
107 output.write_all(&head)?;
109 io::copy(input, output)?;
110 return Ok(SmudgeOutcome::Passthrough);
111 };
112
113 if pointer.is_empty() {
114 return Ok(SmudgeOutcome::Resolved(pointer));
115 }
116
117 if !store.contains_with_size(pointer.oid, pointer.size) {
121 return Err(SmudgeError::ObjectMissing(pointer));
122 }
123
124 smudge_object_to(store, &pointer, output, path, extensions, None)?;
125 Ok(SmudgeOutcome::Resolved(pointer))
126}
127
128pub fn smudge_with_fetch<R, W, F>(
137 store: &Store,
138 input: &mut R,
139 output: &mut W,
140 path: &str,
141 extensions: &[SmudgeExtension],
142 mut fetch: F,
143) -> Result<SmudgeOutcome, SmudgeError>
144where
145 R: Read,
146 W: Write,
147 F: FnMut(&Pointer) -> Result<(), FetchError>,
148{
149 match smudge(store, input, output, path, extensions) {
150 Err(SmudgeError::ObjectMissing(pointer)) => {
151 fetch(&pointer).map_err(SmudgeError::FetchFailed)?;
152 if !store.contains_with_size(pointer.oid, pointer.size) {
153 return Err(SmudgeError::ObjectMissing(pointer));
154 }
155 smudge_object_to(store, &pointer, output, path, extensions, None)?;
156 Ok(SmudgeOutcome::Resolved(pointer))
157 }
158 other => other,
159 }
160}
161
162pub fn smudge_object_to<W: Write>(
173 store: &Store,
174 pointer: &Pointer,
175 output: &mut W,
176 path: &str,
177 extensions: &[SmudgeExtension],
178 spawn_cwd: Option<&Path>,
179) -> Result<(), SmudgeError> {
180 if pointer.extensions.is_empty() {
181 let mut file = store.open(pointer.oid)?;
182 io::copy(&mut file, output)?;
183 return Ok(());
184 }
185 apply_smudge_chain(store, pointer, output, path, extensions, spawn_cwd)
186}
187
188fn apply_smudge_chain<W: Write>(
189 store: &Store,
190 pointer: &Pointer,
191 output: &mut W,
192 path: &str,
193 extensions: &[SmudgeExtension],
194 spawn_cwd: Option<&Path>,
195) -> Result<(), SmudgeError> {
196 let mut chain: Vec<(&SmudgeExtension, Oid)> = Vec::with_capacity(pointer.extensions.len());
200 for ptr_ext in &pointer.extensions {
201 let registered = extensions
202 .iter()
203 .find(|e| e.name == ptr_ext.name)
204 .ok_or_else(|| SmudgeError::ExtensionNotConfigured {
205 name: ptr_ext.name.clone(),
206 })?;
207 if registered.command.trim().is_empty() {
208 return Err(SmudgeError::ExtensionMissingCommand {
209 name: registered.name.clone(),
210 });
211 }
212 chain.push((registered, ptr_ext.oid));
213 }
214 chain.reverse();
215
216 let tmp_dir = store.tmp_dir();
217 fs::create_dir_all(&tmp_dir)?;
218
219 let mut current_tmp = NamedTempFile::new_in(&tmp_dir)?;
224 let mut store_file = store.open(pointer.oid)?;
225 let initial_oid = hash_and_write(&mut store_file, current_tmp.as_file_mut())?;
226 if initial_oid != pointer.oid {
227 return Err(SmudgeError::OidMismatch {
228 stage: format!("stored object {}", pointer.oid),
229 expected: pointer.oid,
230 actual: initial_oid,
231 });
232 }
233
234 for (i, (ext, expected_out_oid)) in chain.iter().enumerate() {
235 let cmd_str = ext.command.replace("%f", path);
236 let mut parts = cmd_str.split_whitespace();
237 let prog = parts
238 .next()
239 .ok_or_else(|| SmudgeError::ExtensionMissingCommand {
240 name: ext.name.clone(),
241 })?;
242 let args: Vec<&str> = parts.collect();
243
244 let stdin_file = std::fs::File::open(current_tmp.path())?;
245 let mut command = Command::new(prog);
246 command
247 .args(&args)
248 .stdin(stdin_file)
249 .stdout(Stdio::piped())
250 .stderr(Stdio::inherit());
251 if let Some(dir) = spawn_cwd {
252 command.current_dir(dir);
253 }
254 let mut child = command
255 .spawn()
256 .map_err(|e| SmudgeError::ExtensionSpawnFailed {
257 name: ext.name.clone(),
258 source: e,
259 })?;
260 let mut stdout = child.stdout.take().expect("piped stdout");
261
262 let is_last = i + 1 == chain.len();
263 if is_last {
264 let actual_oid = hash_and_copy(&mut stdout, output)?;
265 let status = child.wait()?;
266 if !status.success() {
267 return Err(SmudgeError::ExtensionFailed {
268 name: ext.name.clone(),
269 status: status.code(),
270 });
271 }
272 if actual_oid != *expected_out_oid {
273 return Err(SmudgeError::OidMismatch {
274 stage: format!("smudge output of extension {:?}", ext.name),
275 expected: *expected_out_oid,
276 actual: actual_oid,
277 });
278 }
279 return Ok(());
280 }
281
282 let mut next_tmp = NamedTempFile::new_in(&tmp_dir)?;
283 let actual_oid = hash_and_write(&mut stdout, next_tmp.as_file_mut())?;
284 let status = child.wait()?;
285 if !status.success() {
286 return Err(SmudgeError::ExtensionFailed {
287 name: ext.name.clone(),
288 status: status.code(),
289 });
290 }
291 if actual_oid != *expected_out_oid {
292 return Err(SmudgeError::OidMismatch {
293 stage: format!("smudge output of extension {:?}", ext.name),
294 expected: *expected_out_oid,
295 actual: actual_oid,
296 });
297 }
298 current_tmp = next_tmp;
299 }
300 unreachable!("smudge chain exited without writing output")
301}
302
303fn hash_and_write<R: Read>(src: &mut R, dst: &mut std::fs::File) -> io::Result<Oid> {
304 let mut hasher = Sha256::new();
305 let mut buf = vec![0u8; COPY_BUFFER];
306 loop {
307 let n = src.read(&mut buf)?;
308 if n == 0 {
309 break;
310 }
311 hasher.update(&buf[..n]);
312 dst.write_all(&buf[..n])?;
313 }
314 dst.flush()?;
315 let bytes: [u8; 32] = hasher.finalize().into();
316 Ok(Oid::from_bytes(bytes))
317}
318
319fn hash_and_copy<R: Read, W: Write>(src: &mut R, dst: &mut W) -> io::Result<Oid> {
320 let mut hasher = Sha256::new();
321 let mut buf = vec![0u8; COPY_BUFFER];
322 loop {
323 let n = src.read(&mut buf)?;
324 if n == 0 {
325 break;
326 }
327 hasher.update(&buf[..n]);
328 dst.write_all(&buf[..n])?;
329 }
330 let bytes: [u8; 32] = hasher.finalize().into();
331 Ok(Oid::from_bytes(bytes))
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337 use crate::clean;
338 use git_lfs_pointer::VERSION_LATEST;
339 use tempfile::TempDir;
340
341 fn fixture() -> (TempDir, Store) {
342 let tmp = TempDir::new().unwrap();
343 let store = Store::new(tmp.path().join("lfs"));
344 (tmp, store)
345 }
346
347 fn run(store: &Store, input: &[u8]) -> (Result<SmudgeOutcome, SmudgeError>, Vec<u8>) {
348 let mut out = Vec::new();
349 let outcome = smudge(store, &mut { input }, &mut out, "", &[]);
350 (outcome, out)
351 }
352
353 fn clean_into(store: &Store, content: &[u8]) -> Vec<u8> {
355 let mut out = Vec::new();
356 clean(store, &mut { content }, &mut out, "", &[]).unwrap();
357 out
358 }
359
360 #[test]
363 fn pointer_resolves_from_store() {
364 let (_t, store) = fixture();
365 let content = b"smudge a\n";
366 let pointer_text = clean_into(&store, content);
367
368 let (outcome, out) = run(&store, &pointer_text);
369 let p = match outcome.unwrap() {
370 SmudgeOutcome::Resolved(p) => p,
371 o => panic!("expected Resolved, got {o:?}"),
372 };
373 assert_eq!(p.size, content.len() as u64);
374 assert_eq!(out, content);
375 }
376
377 #[test]
378 fn empty_pointer_writes_nothing() {
379 let (_t, store) = fixture();
380 let (outcome, out) = run(&store, b"");
381 match outcome.unwrap() {
382 SmudgeOutcome::Resolved(p) => assert!(p.is_empty()),
383 o => panic!("expected Resolved(empty), got {o:?}"),
384 }
385 assert!(out.is_empty());
386 }
387
388 #[test]
389 fn clean_smudge_round_trip_preserves_bytes() {
390 let (_t, store) = fixture();
391 for content in [
392 &b""[..],
393 &b"hello"[..],
394 &b"binary \x00\x01\xff data"[..],
395 &(0..256u16).map(|i| i as u8).collect::<Vec<_>>(),
396 ] {
397 let pointer_text = clean_into(&store, content);
398 let mut out = Vec::new();
399 smudge(&store, &mut { &pointer_text[..] }, &mut out, "", &[]).unwrap();
400 assert_eq!(out, content, "round-trip failed for {content:?}");
401 }
402 }
403
404 #[test]
407 fn invalid_pointer_passes_through_short() {
408 let (_t, store) = fixture();
409 for input in [&b"wat"[..], b"not a git-lfs file", b"version "] {
410 let (outcome, out) = run(&store, input);
411 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
412 assert_eq!(out, input);
413 }
414 }
415
416 #[test]
417 fn long_non_pointer_passes_through() {
418 let (_t, store) = fixture();
420 let content: Vec<u8> = (0..2048u32).map(|i| (i ^ (i >> 3)) as u8).collect();
421 let (outcome, out) = run(&store, &content);
422 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
423 assert_eq!(out, content);
424 }
425
426 #[test]
429 fn missing_object_errors() {
430 let (_t, store) = fixture();
431 let unknown_oid = "0000000000000000000000000000000000000000000000000000000000000001";
432 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown_oid}\nsize 5\n");
433 let (outcome, out) = run(&store, pointer_text.as_bytes());
434 match outcome.unwrap_err() {
435 SmudgeError::ObjectMissing(pointer) => {
436 assert_eq!(pointer.oid.to_string(), unknown_oid);
437 assert_eq!(pointer.size, 5);
438 }
439 e => panic!("expected ObjectMissing, got {e:?}"),
440 }
441 assert!(out.is_empty(), "no partial output on miss");
442 }
443
444 #[test]
445 fn size_mismatch_treated_as_missing() {
446 let (_t, store) = fixture();
447 let pointer_text = clean_into(&store, b"abc"); let tampered = String::from_utf8(pointer_text)
451 .unwrap()
452 .replace("size 3", "size 99");
453 let (outcome, _) = run(&store, tampered.as_bytes());
454 match outcome.unwrap_err() {
455 SmudgeError::ObjectMissing(p) => assert_eq!(p.size, 99),
456 e => panic!("expected ObjectMissing, got {e:?}"),
457 }
458 }
459
460 #[test]
463 fn fetch_populates_store_then_streams() {
464 let (_t, store) = fixture();
465 let content = b"to be fetched\n";
466 let pointer_text = clean_into(&store, content);
470 let parsed = git_lfs_pointer::Pointer::parse(&pointer_text).unwrap();
472 std::fs::remove_file(store.object_path(parsed.oid)).unwrap();
473 assert!(!store.contains(parsed.oid));
474
475 let mut out = Vec::new();
476 let store_ref = &store;
477 let outcome = smudge_with_fetch(
478 &store,
479 &mut { &pointer_text[..] },
480 &mut out,
481 "",
482 &[],
483 |p: &Pointer| {
484 store_ref.insert(&mut { &content[..] }).unwrap();
486 assert_eq!(p.size, content.len() as u64);
487 Ok(())
488 },
489 );
490 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Resolved(_)));
491 assert_eq!(out, content);
492 }
493
494 #[test]
495 fn fetch_failure_surfaces_as_fetch_failed() {
496 let (_t, store) = fixture();
497 let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
498 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
499 let mut out = Vec::new();
500 let outcome = smudge_with_fetch(
501 &store,
502 &mut { pointer_text.as_bytes() },
503 &mut out,
504 "",
505 &[],
506 |_p: &Pointer| Err("server is on fire".into()),
507 );
508 match outcome.unwrap_err() {
509 SmudgeError::FetchFailed(e) => {
510 assert!(e.to_string().contains("server is on fire"));
511 }
512 other => panic!("expected FetchFailed, got {other:?}"),
513 }
514 assert!(out.is_empty());
515 }
516
517 #[test]
518 fn fetch_returning_ok_but_not_inserting_still_errors() {
519 let (_t, store) = fixture();
521 let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
522 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
523 let mut out = Vec::new();
524 let outcome = smudge_with_fetch(
525 &store,
526 &mut { pointer_text.as_bytes() },
527 &mut out,
528 "",
529 &[],
530 |_p: &Pointer| Ok(()),
531 );
532 assert!(matches!(
533 outcome.unwrap_err(),
534 SmudgeError::ObjectMissing(_)
535 ));
536 }
537
538 #[test]
539 fn fetch_not_invoked_when_object_already_present() {
540 let (_t, store) = fixture();
541 let content = b"already here";
542 let pointer_text = clean_into(&store, content);
543 let mut out = Vec::new();
544 let mut calls = 0;
545 smudge_with_fetch(
546 &store,
547 &mut { &pointer_text[..] },
548 &mut out,
549 "",
550 &[],
551 |_p: &Pointer| {
552 calls += 1;
553 Ok(())
554 },
555 )
556 .unwrap();
557 assert_eq!(
558 calls, 0,
559 "fetch must not be called when store has the object"
560 );
561 assert_eq!(out, content);
562 }
563
564 #[test]
571 fn single_extension_round_trips() {
572 let (_t, store) = fixture();
573 let clean_exts = vec![crate::CleanExtension {
574 name: "upper".into(),
575 priority: 0,
576 command: "tr a-z A-Z".into(),
577 }];
578 let smudge_exts = vec![SmudgeExtension {
579 name: "upper".into(),
580 priority: 0,
581 command: "tr A-Z a-z".into(),
582 }];
583
584 let mut pointer_buf = Vec::new();
586 crate::clean(
587 &store,
588 &mut &b"abc"[..],
589 &mut pointer_buf,
590 "foo.txt",
591 &clean_exts,
592 )
593 .unwrap();
594
595 let mut out = Vec::new();
597 let outcome = smudge(
598 &store,
599 &mut pointer_buf.as_slice(),
600 &mut out,
601 "foo.txt",
602 &smudge_exts,
603 )
604 .unwrap();
605 assert!(matches!(outcome, SmudgeOutcome::Resolved(_)));
606 assert_eq!(out, b"abc");
607 }
608
609 #[test]
610 fn extension_not_configured_errors() {
611 let (_t, store) = fixture();
612 let oid_hex = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
613 let ext_oid = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
614 let pointer_text = format!(
615 "version {VERSION_LATEST}\n\
616 ext-0-foo sha256:{ext_oid}\n\
617 oid sha256:{oid_hex}\n\
618 size 12345\n",
619 );
620 let mut out = Vec::new();
621 let err = smudge(&store, &mut pointer_text.as_bytes(), &mut out, "x", &[]).unwrap_err();
622 assert!(matches!(err, SmudgeError::ObjectMissing(_)));
628 }
629}