1use std::fs;
4use std::io::{self, Read, Write};
5use std::path::Path;
6use std::process::{Command, Stdio};
7
8use git_lfs_pointer::{Oid, Pointer};
9use git_lfs_store::Store;
10use sha2::{Digest, Sha256};
11use tempfile::NamedTempFile;
12
13use crate::FetchError;
14use crate::detect_pointer;
15
16const COPY_BUFFER: usize = 64 * 1024;
17
18#[derive(Debug, Clone)]
25pub struct SmudgeExtension {
26 pub name: String,
28 pub priority: u8,
31 pub command: String,
34}
35
36#[derive(Debug)]
38pub enum SmudgeOutcome {
39 Passthrough,
46 Resolved(Pointer),
50}
51
52#[derive(Debug, thiserror::Error)]
54pub enum SmudgeError {
55 #[error(transparent)]
58 Io(#[from] io::Error),
59 #[error("object {} (size {}) is not present in the local store", .0.oid, .0.size)]
63 ObjectMissing(Pointer),
64 #[error("fetch failed: {0}")]
67 FetchFailed(FetchError),
68 #[error("extension {name:?} is not configured")]
72 ExtensionNotConfigured { name: String },
73 #[error("extension {name:?} has no smudge command configured")]
75 ExtensionMissingCommand { name: String },
76 #[error("failed to spawn extension {name:?}: {source}")]
78 ExtensionSpawnFailed {
79 name: String,
80 #[source]
81 source: io::Error,
82 },
83 #[error("extension {name:?} exited with status {status:?}")]
85 ExtensionFailed { name: String, status: Option<i32> },
86 #[error("OID mismatch for {stage}: expected {expected}, got {actual}")]
91 OidMismatch {
92 stage: String,
93 expected: Oid,
94 actual: Oid,
95 },
96}
97
98pub fn smudge<R: Read, W: Write>(
111 store: &Store,
112 input: &mut R,
113 output: &mut W,
114 path: &str,
115 extensions: &[SmudgeExtension],
116) -> Result<SmudgeOutcome, SmudgeError> {
117 let (head, maybe_pointer) = detect_pointer(input)?;
118
119 let Some(pointer) = maybe_pointer else {
120 output.write_all(&head)?;
122 io::copy(input, output)?;
123 return Ok(SmudgeOutcome::Passthrough);
124 };
125
126 if pointer.is_empty() {
127 return Ok(SmudgeOutcome::Resolved(pointer));
128 }
129
130 if !store.contains_with_size(pointer.oid, pointer.size) {
134 return Err(SmudgeError::ObjectMissing(pointer));
135 }
136
137 smudge_object_to(store, &pointer, output, path, extensions, None)?;
138 Ok(SmudgeOutcome::Resolved(pointer))
139}
140
141pub fn smudge_with_fetch<R, W, F>(
151 store: &Store,
152 input: &mut R,
153 output: &mut W,
154 path: &str,
155 extensions: &[SmudgeExtension],
156 mut fetch: F,
157) -> Result<SmudgeOutcome, SmudgeError>
158where
159 R: Read,
160 W: Write,
161 F: FnMut(&Pointer) -> Result<(), FetchError>,
162{
163 match smudge(store, input, output, path, extensions) {
164 Err(SmudgeError::ObjectMissing(pointer)) => {
165 fetch(&pointer).map_err(SmudgeError::FetchFailed)?;
166 if !store.contains_with_size(pointer.oid, pointer.size) {
167 return Err(SmudgeError::ObjectMissing(pointer));
168 }
169 smudge_object_to(store, &pointer, output, path, extensions, None)?;
170 Ok(SmudgeOutcome::Resolved(pointer))
171 }
172 other => other,
173 }
174}
175
176pub fn smudge_object_to<W: Write>(
189 store: &Store,
190 pointer: &Pointer,
191 output: &mut W,
192 path: &str,
193 extensions: &[SmudgeExtension],
194 spawn_cwd: Option<&Path>,
195) -> Result<(), SmudgeError> {
196 if pointer.extensions.is_empty() {
197 let mut file = store.open(pointer.oid)?;
198 io::copy(&mut file, output)?;
199 return Ok(());
200 }
201 apply_smudge_chain(store, pointer, output, path, extensions, spawn_cwd)
202}
203
204fn apply_smudge_chain<W: Write>(
205 store: &Store,
206 pointer: &Pointer,
207 output: &mut W,
208 path: &str,
209 extensions: &[SmudgeExtension],
210 spawn_cwd: Option<&Path>,
211) -> Result<(), SmudgeError> {
212 let mut chain: Vec<(&SmudgeExtension, Oid)> = Vec::with_capacity(pointer.extensions.len());
216 for ptr_ext in &pointer.extensions {
217 let registered = extensions
218 .iter()
219 .find(|e| e.name == ptr_ext.name)
220 .ok_or_else(|| SmudgeError::ExtensionNotConfigured {
221 name: ptr_ext.name.clone(),
222 })?;
223 if registered.command.trim().is_empty() {
224 return Err(SmudgeError::ExtensionMissingCommand {
225 name: registered.name.clone(),
226 });
227 }
228 chain.push((registered, ptr_ext.oid));
229 }
230 chain.reverse();
231
232 let tmp_dir = store.tmp_dir();
233 fs::create_dir_all(&tmp_dir)?;
234
235 let mut current_tmp = NamedTempFile::new_in(&tmp_dir)?;
240 let mut store_file = store.open(pointer.oid)?;
241 let initial_oid = hash_and_write(&mut store_file, current_tmp.as_file_mut())?;
242 if initial_oid != pointer.oid {
243 return Err(SmudgeError::OidMismatch {
244 stage: format!("stored object {}", pointer.oid),
245 expected: pointer.oid,
246 actual: initial_oid,
247 });
248 }
249
250 for (i, (ext, expected_out_oid)) in chain.iter().enumerate() {
251 let cmd_str = ext.command.replace("%f", path);
252 let mut parts = cmd_str.split_whitespace();
253 let prog = parts
254 .next()
255 .ok_or_else(|| SmudgeError::ExtensionMissingCommand {
256 name: ext.name.clone(),
257 })?;
258 let args: Vec<&str> = parts.collect();
259
260 let stdin_file = std::fs::File::open(current_tmp.path())?;
261 let mut command = Command::new(prog);
262 command
263 .args(&args)
264 .stdin(stdin_file)
265 .stdout(Stdio::piped())
266 .stderr(Stdio::inherit());
267 if let Some(dir) = spawn_cwd {
268 command.current_dir(dir);
269 }
270 let mut child = command
271 .spawn()
272 .map_err(|e| SmudgeError::ExtensionSpawnFailed {
273 name: ext.name.clone(),
274 source: e,
275 })?;
276 let mut stdout = child.stdout.take().expect("piped stdout");
277
278 let is_last = i + 1 == chain.len();
279 if is_last {
280 let actual_oid = hash_and_copy(&mut stdout, output)?;
281 let status = child.wait()?;
282 if !status.success() {
283 return Err(SmudgeError::ExtensionFailed {
284 name: ext.name.clone(),
285 status: status.code(),
286 });
287 }
288 if actual_oid != *expected_out_oid {
289 return Err(SmudgeError::OidMismatch {
290 stage: format!("smudge output of extension {:?}", ext.name),
291 expected: *expected_out_oid,
292 actual: actual_oid,
293 });
294 }
295 return Ok(());
296 }
297
298 let mut next_tmp = NamedTempFile::new_in(&tmp_dir)?;
299 let actual_oid = hash_and_write(&mut stdout, next_tmp.as_file_mut())?;
300 let status = child.wait()?;
301 if !status.success() {
302 return Err(SmudgeError::ExtensionFailed {
303 name: ext.name.clone(),
304 status: status.code(),
305 });
306 }
307 if actual_oid != *expected_out_oid {
308 return Err(SmudgeError::OidMismatch {
309 stage: format!("smudge output of extension {:?}", ext.name),
310 expected: *expected_out_oid,
311 actual: actual_oid,
312 });
313 }
314 current_tmp = next_tmp;
315 }
316 unreachable!("smudge chain exited without writing output")
317}
318
319fn hash_and_write<R: Read>(src: &mut R, dst: &mut std::fs::File) -> io::Result<Oid> {
320 let mut hasher = Sha256::new();
321 let mut buf = vec![0u8; COPY_BUFFER];
322 loop {
323 let n = src.read(&mut buf)?;
324 if n == 0 {
325 break;
326 }
327 hasher.update(&buf[..n]);
328 dst.write_all(&buf[..n])?;
329 }
330 dst.flush()?;
331 let bytes: [u8; 32] = hasher.finalize().into();
332 Ok(Oid::from_bytes(bytes))
333}
334
335fn hash_and_copy<R: Read, W: Write>(src: &mut R, dst: &mut W) -> io::Result<Oid> {
336 let mut hasher = Sha256::new();
337 let mut buf = vec![0u8; COPY_BUFFER];
338 loop {
339 let n = src.read(&mut buf)?;
340 if n == 0 {
341 break;
342 }
343 hasher.update(&buf[..n]);
344 dst.write_all(&buf[..n])?;
345 }
346 let bytes: [u8; 32] = hasher.finalize().into();
347 Ok(Oid::from_bytes(bytes))
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353 use crate::clean;
354 use git_lfs_pointer::VERSION_LATEST;
355 use tempfile::TempDir;
356
357 fn fixture() -> (TempDir, Store) {
358 let tmp = TempDir::new().unwrap();
359 let store = Store::new(tmp.path().join("lfs"));
360 (tmp, store)
361 }
362
363 fn run(store: &Store, input: &[u8]) -> (Result<SmudgeOutcome, SmudgeError>, Vec<u8>) {
364 let mut out = Vec::new();
365 let outcome = smudge(store, &mut { input }, &mut out, "", &[]);
366 (outcome, out)
367 }
368
369 fn clean_into(store: &Store, content: &[u8]) -> Vec<u8> {
371 let mut out = Vec::new();
372 clean(store, &mut { content }, &mut out, "", &[]).unwrap();
373 out
374 }
375
376 #[test]
379 fn pointer_resolves_from_store() {
380 let (_t, store) = fixture();
381 let content = b"smudge a\n";
382 let pointer_text = clean_into(&store, content);
383
384 let (outcome, out) = run(&store, &pointer_text);
385 let p = match outcome.unwrap() {
386 SmudgeOutcome::Resolved(p) => p,
387 o => panic!("expected Resolved, got {o:?}"),
388 };
389 assert_eq!(p.size, content.len() as u64);
390 assert_eq!(out, content);
391 }
392
393 #[test]
394 fn empty_pointer_writes_nothing() {
395 let (_t, store) = fixture();
396 let (outcome, out) = run(&store, b"");
397 match outcome.unwrap() {
398 SmudgeOutcome::Resolved(p) => assert!(p.is_empty()),
399 o => panic!("expected Resolved(empty), got {o:?}"),
400 }
401 assert!(out.is_empty());
402 }
403
404 #[test]
405 fn clean_smudge_round_trip_preserves_bytes() {
406 let (_t, store) = fixture();
407 for content in [
408 &b""[..],
409 &b"hello"[..],
410 &b"binary \x00\x01\xff data"[..],
411 &(0..256u16).map(|i| i as u8).collect::<Vec<_>>(),
412 ] {
413 let pointer_text = clean_into(&store, content);
414 let mut out = Vec::new();
415 smudge(&store, &mut { &pointer_text[..] }, &mut out, "", &[]).unwrap();
416 assert_eq!(out, content, "round-trip failed for {content:?}");
417 }
418 }
419
420 #[test]
423 fn invalid_pointer_passes_through_short() {
424 let (_t, store) = fixture();
425 for input in [&b"wat"[..], b"not a git-lfs file", b"version "] {
426 let (outcome, out) = run(&store, input);
427 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
428 assert_eq!(out, input);
429 }
430 }
431
432 #[test]
433 fn long_non_pointer_passes_through() {
434 let (_t, store) = fixture();
436 let content: Vec<u8> = (0..2048u32).map(|i| (i ^ (i >> 3)) as u8).collect();
437 let (outcome, out) = run(&store, &content);
438 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
439 assert_eq!(out, content);
440 }
441
442 #[test]
445 fn missing_object_errors() {
446 let (_t, store) = fixture();
447 let unknown_oid = "0000000000000000000000000000000000000000000000000000000000000001";
448 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown_oid}\nsize 5\n");
449 let (outcome, out) = run(&store, pointer_text.as_bytes());
450 match outcome.unwrap_err() {
451 SmudgeError::ObjectMissing(pointer) => {
452 assert_eq!(pointer.oid.to_string(), unknown_oid);
453 assert_eq!(pointer.size, 5);
454 }
455 e => panic!("expected ObjectMissing, got {e:?}"),
456 }
457 assert!(out.is_empty(), "no partial output on miss");
458 }
459
460 #[test]
461 fn size_mismatch_treated_as_missing() {
462 let (_t, store) = fixture();
463 let pointer_text = clean_into(&store, b"abc"); let tampered = String::from_utf8(pointer_text)
467 .unwrap()
468 .replace("size 3", "size 99");
469 let (outcome, _) = run(&store, tampered.as_bytes());
470 match outcome.unwrap_err() {
471 SmudgeError::ObjectMissing(p) => assert_eq!(p.size, 99),
472 e => panic!("expected ObjectMissing, got {e:?}"),
473 }
474 }
475
476 #[test]
479 fn fetch_populates_store_then_streams() {
480 let (_t, store) = fixture();
481 let content = b"to be fetched\n";
482 let pointer_text = clean_into(&store, content);
486 let parsed = git_lfs_pointer::Pointer::parse(&pointer_text).unwrap();
488 std::fs::remove_file(store.object_path(parsed.oid)).unwrap();
489 assert!(!store.contains(parsed.oid));
490
491 let mut out = Vec::new();
492 let store_ref = &store;
493 let outcome = smudge_with_fetch(
494 &store,
495 &mut { &pointer_text[..] },
496 &mut out,
497 "",
498 &[],
499 |p: &Pointer| {
500 store_ref.insert(&mut { &content[..] }).unwrap();
502 assert_eq!(p.size, content.len() as u64);
503 Ok(())
504 },
505 );
506 assert!(matches!(outcome.unwrap(), SmudgeOutcome::Resolved(_)));
507 assert_eq!(out, content);
508 }
509
510 #[test]
511 fn fetch_failure_surfaces_as_fetch_failed() {
512 let (_t, store) = fixture();
513 let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
514 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
515 let mut out = Vec::new();
516 let outcome = smudge_with_fetch(
517 &store,
518 &mut { pointer_text.as_bytes() },
519 &mut out,
520 "",
521 &[],
522 |_p: &Pointer| Err("server is on fire".into()),
523 );
524 match outcome.unwrap_err() {
525 SmudgeError::FetchFailed(e) => {
526 assert!(e.to_string().contains("server is on fire"));
527 }
528 other => panic!("expected FetchFailed, got {other:?}"),
529 }
530 assert!(out.is_empty());
531 }
532
533 #[test]
534 fn fetch_returning_ok_but_not_inserting_still_errors() {
535 let (_t, store) = fixture();
537 let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
538 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
539 let mut out = Vec::new();
540 let outcome = smudge_with_fetch(
541 &store,
542 &mut { pointer_text.as_bytes() },
543 &mut out,
544 "",
545 &[],
546 |_p: &Pointer| Ok(()),
547 );
548 assert!(matches!(
549 outcome.unwrap_err(),
550 SmudgeError::ObjectMissing(_)
551 ));
552 }
553
554 #[test]
555 fn fetch_not_invoked_when_object_already_present() {
556 let (_t, store) = fixture();
557 let content = b"already here";
558 let pointer_text = clean_into(&store, content);
559 let mut out = Vec::new();
560 let mut calls = 0;
561 smudge_with_fetch(
562 &store,
563 &mut { &pointer_text[..] },
564 &mut out,
565 "",
566 &[],
567 |_p: &Pointer| {
568 calls += 1;
569 Ok(())
570 },
571 )
572 .unwrap();
573 assert_eq!(
574 calls, 0,
575 "fetch must not be called when store has the object"
576 );
577 assert_eq!(out, content);
578 }
579
580 #[test]
587 fn single_extension_round_trips() {
588 let (_t, store) = fixture();
589 let clean_exts = vec![crate::CleanExtension {
590 name: "upper".into(),
591 priority: 0,
592 command: "tr a-z A-Z".into(),
593 }];
594 let smudge_exts = vec![SmudgeExtension {
595 name: "upper".into(),
596 priority: 0,
597 command: "tr A-Z a-z".into(),
598 }];
599
600 let mut pointer_buf = Vec::new();
602 crate::clean(
603 &store,
604 &mut &b"abc"[..],
605 &mut pointer_buf,
606 "foo.txt",
607 &clean_exts,
608 )
609 .unwrap();
610
611 let mut out = Vec::new();
613 let outcome = smudge(
614 &store,
615 &mut pointer_buf.as_slice(),
616 &mut out,
617 "foo.txt",
618 &smudge_exts,
619 )
620 .unwrap();
621 assert!(matches!(outcome, SmudgeOutcome::Resolved(_)));
622 assert_eq!(out, b"abc");
623 }
624
625 #[test]
626 fn extension_not_configured_errors() {
627 let (_t, store) = fixture();
628 let oid_hex = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
629 let ext_oid = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
630 let pointer_text = format!(
631 "version {VERSION_LATEST}\n\
632 ext-0-foo sha256:{ext_oid}\n\
633 oid sha256:{oid_hex}\n\
634 size 12345\n",
635 );
636 let mut out = Vec::new();
637 let err = smudge(&store, &mut pointer_text.as_bytes(), &mut out, "x", &[]).unwrap_err();
638 assert!(matches!(err, SmudgeError::ObjectMissing(_)));
644 }
645}