Skip to main content

git_lfs_filter/
smudge.rs

1//! The smudge filter: pointer-on-stdin → content-on-stdout.
2
3use std::io::{self, Read, Write};
4
5use git_lfs_pointer::Pointer;
6use git_lfs_store::Store;
7
8use crate::FetchError;
9use crate::detect_pointer;
10
11/// Result of running the [`smudge`] filter on a piece of input.
12#[derive(Debug)]
13pub enum SmudgeOutcome {
14    /// Input wasn't a pointer (or was malformed) and was emitted to the
15    /// output stream verbatim. This matches upstream's "smudge with invalid
16    /// pointer" behavior — git wraps everything through the filter, and
17    /// non-LFS content has to come out unchanged.
18    Passthrough,
19    /// Input was a pointer; its content was streamed from the store to the
20    /// output (or it was the empty pointer, which writes nothing).
21    Resolved(Pointer),
22}
23
24#[derive(Debug, thiserror::Error)]
25pub enum SmudgeError {
26    #[error(transparent)]
27    Io(#[from] io::Error),
28    /// The pointer references an object that isn't in the local store.
29    /// [`smudge_with_fetch`] handles this by invoking the caller's fetch
30    /// closure; bare [`smudge`] surfaces it for the caller to react to.
31    #[error("object {} (size {}) is not present in the local store", .0.oid, .0.size)]
32    ObjectMissing(Pointer),
33    /// The fetch closure passed to [`smudge_with_fetch`] failed to produce
34    /// the missing object.
35    #[error("fetch failed: {0}")]
36    FetchFailed(FetchError),
37    /// Pointer extensions aren't supported yet.
38    #[error("pointer extensions are not yet supported")]
39    ExtensionsUnsupported,
40}
41
42/// Apply the smudge filter to `input`, writing the working-tree content
43/// (or pass-through bytes) to `output`.
44///
45/// 1. If `input` parses as a pointer, look the OID up in the store and
46///    stream the bytes out. Empty pointer → write nothing.
47/// 2. If `input` doesn't parse as a pointer, pass it through verbatim
48///    (head buffer + remaining stream).
49pub fn smudge<R: Read, W: Write>(
50    store: &Store,
51    input: &mut R,
52    output: &mut W,
53) -> Result<SmudgeOutcome, SmudgeError> {
54    let (head, maybe_pointer) = detect_pointer(input)?;
55
56    let Some(pointer) = maybe_pointer else {
57        // Not a pointer: pass bytes through unchanged.
58        output.write_all(&head)?;
59        io::copy(input, output)?;
60        return Ok(SmudgeOutcome::Passthrough);
61    };
62
63    if pointer.is_empty() {
64        return Ok(SmudgeOutcome::Resolved(pointer));
65    }
66
67    if !pointer.extensions.is_empty() {
68        return Err(SmudgeError::ExtensionsUnsupported);
69    }
70
71    // Treat any size mismatch as "missing": same OID + different size means
72    // a corrupt or partial local copy, and the recovery path is the same
73    // as a real miss — re-download.
74    if !store.contains_with_size(pointer.oid, pointer.size) {
75        return Err(SmudgeError::ObjectMissing(pointer));
76    }
77
78    let mut file = store.open(pointer.oid)?;
79    io::copy(&mut file, output)?;
80    Ok(SmudgeOutcome::Resolved(pointer))
81}
82
83/// Like [`smudge`], but on a missing-object miss invokes `fetch` to populate
84/// the store, then streams the freshly-fetched bytes to `output`.
85///
86/// `fetch` receives the [`Pointer`] of the missing object — the caller is
87/// expected to download exactly that OID into the local store. After a
88/// successful return, this function re-checks the store and streams the
89/// content; if the store *still* doesn't have the object, an
90/// [`SmudgeError::ObjectMissing`] is surfaced (i.e. the fetch lied).
91///
92/// All other [`SmudgeError`] variants from the inner `smudge` call are
93/// propagated unchanged.
94pub fn smudge_with_fetch<R, W, F>(
95    store: &Store,
96    input: &mut R,
97    output: &mut W,
98    mut fetch: F,
99) -> Result<SmudgeOutcome, SmudgeError>
100where
101    R: Read,
102    W: Write,
103    F: FnMut(&Pointer) -> Result<(), FetchError>,
104{
105    match smudge(store, input, output) {
106        Err(SmudgeError::ObjectMissing(pointer)) => {
107            fetch(&pointer).map_err(SmudgeError::FetchFailed)?;
108            if !store.contains_with_size(pointer.oid, pointer.size) {
109                return Err(SmudgeError::ObjectMissing(pointer));
110            }
111            let mut file = store.open(pointer.oid)?;
112            io::copy(&mut file, output)?;
113            Ok(SmudgeOutcome::Resolved(pointer))
114        }
115        other => other,
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use crate::clean;
123    use git_lfs_pointer::VERSION_LATEST;
124    use tempfile::TempDir;
125
126    fn fixture() -> (TempDir, Store) {
127        let tmp = TempDir::new().unwrap();
128        let store = Store::new(tmp.path().join("lfs"));
129        (tmp, store)
130    }
131
132    fn run(store: &Store, input: &[u8]) -> (Result<SmudgeOutcome, SmudgeError>, Vec<u8>) {
133        let mut out = Vec::new();
134        let outcome = smudge(store, &mut { input }, &mut out);
135        (outcome, out)
136    }
137
138    /// Insert content via the clean filter and return the resulting pointer text.
139    fn clean_into(store: &Store, content: &[u8]) -> Vec<u8> {
140        let mut out = Vec::new();
141        clean(store, &mut { content }, &mut out, "", &[]).unwrap();
142        out
143    }
144
145    // ---------- Resolved ----------
146
147    #[test]
148    fn pointer_resolves_from_store() {
149        let (_t, store) = fixture();
150        let content = b"smudge a\n";
151        let pointer_text = clean_into(&store, content);
152
153        let (outcome, out) = run(&store, &pointer_text);
154        let p = match outcome.unwrap() {
155            SmudgeOutcome::Resolved(p) => p,
156            o => panic!("expected Resolved, got {o:?}"),
157        };
158        assert_eq!(p.size, content.len() as u64);
159        assert_eq!(out, content);
160    }
161
162    #[test]
163    fn empty_pointer_writes_nothing() {
164        let (_t, store) = fixture();
165        let (outcome, out) = run(&store, b"");
166        match outcome.unwrap() {
167            SmudgeOutcome::Resolved(p) => assert!(p.is_empty()),
168            o => panic!("expected Resolved(empty), got {o:?}"),
169        }
170        assert!(out.is_empty());
171    }
172
173    #[test]
174    fn clean_smudge_round_trip_preserves_bytes() {
175        let (_t, store) = fixture();
176        for content in [
177            &b""[..],
178            &b"hello"[..],
179            &b"binary \x00\x01\xff data"[..],
180            &(0..256u16).map(|i| i as u8).collect::<Vec<_>>(),
181        ] {
182            let pointer_text = clean_into(&store, content);
183            let mut out = Vec::new();
184            smudge(&store, &mut { &pointer_text[..] }, &mut out).unwrap();
185            assert_eq!(out, content, "round-trip failed for {content:?}");
186        }
187    }
188
189    // ---------- Passthrough ----------
190
191    #[test]
192    fn invalid_pointer_passes_through_short() {
193        let (_t, store) = fixture();
194        for input in [&b"wat"[..], b"not a git-lfs file", b"version "] {
195            let (outcome, out) = run(&store, input);
196            assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
197            assert_eq!(out, input);
198        }
199    }
200
201    #[test]
202    fn long_non_pointer_passes_through() {
203        // > MAX_POINTER_SIZE bytes — exercises the head buffer + io::copy path.
204        let (_t, store) = fixture();
205        let content: Vec<u8> = (0..2048u32).map(|i| (i ^ (i >> 3)) as u8).collect();
206        let (outcome, out) = run(&store, &content);
207        assert!(matches!(outcome.unwrap(), SmudgeOutcome::Passthrough));
208        assert_eq!(out, content);
209    }
210
211    // ---------- Errors ----------
212
213    #[test]
214    fn missing_object_errors() {
215        let (_t, store) = fixture();
216        let unknown_oid = "0000000000000000000000000000000000000000000000000000000000000001";
217        let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown_oid}\nsize 5\n");
218        let (outcome, out) = run(&store, pointer_text.as_bytes());
219        match outcome.unwrap_err() {
220            SmudgeError::ObjectMissing(pointer) => {
221                assert_eq!(pointer.oid.to_string(), unknown_oid);
222                assert_eq!(pointer.size, 5);
223            }
224            e => panic!("expected ObjectMissing, got {e:?}"),
225        }
226        assert!(out.is_empty(), "no partial output on miss");
227    }
228
229    #[test]
230    fn size_mismatch_treated_as_missing() {
231        let (_t, store) = fixture();
232        let pointer_text = clean_into(&store, b"abc"); // size = 3
233        // Replace "size 3" with "size 99" — parses fine, but won't match the
234        // 3-byte object on disk.
235        let tampered = String::from_utf8(pointer_text)
236            .unwrap()
237            .replace("size 3", "size 99");
238        let (outcome, _) = run(&store, tampered.as_bytes());
239        match outcome.unwrap_err() {
240            SmudgeError::ObjectMissing(p) => assert_eq!(p.size, 99),
241            e => panic!("expected ObjectMissing, got {e:?}"),
242        }
243    }
244
245    // ---------- smudge_with_fetch ----------
246
247    #[test]
248    fn fetch_populates_store_then_streams() {
249        let (_t, store) = fixture();
250        let content = b"to be fetched\n";
251        // Build the pointer text without inserting the object — the store
252        // is "empty" from the smudge's perspective. The fetch closure will
253        // be the one to actually populate it.
254        let pointer_text = clean_into(&store, content);
255        // Wipe the just-inserted object to simulate a true miss.
256        let parsed = git_lfs_pointer::Pointer::parse(&pointer_text).unwrap();
257        std::fs::remove_file(store.object_path(parsed.oid)).unwrap();
258        assert!(!store.contains(parsed.oid));
259
260        let mut out = Vec::new();
261        let store_ref = &store;
262        let outcome = smudge_with_fetch(
263            &store,
264            &mut { &pointer_text[..] },
265            &mut out,
266            |p: &Pointer| {
267                // "Download" by inserting the bytes synchronously.
268                store_ref.insert(&mut { &content[..] }).unwrap();
269                assert_eq!(p.size, content.len() as u64);
270                Ok(())
271            },
272        );
273        assert!(matches!(outcome.unwrap(), SmudgeOutcome::Resolved(_)));
274        assert_eq!(out, content);
275    }
276
277    #[test]
278    fn fetch_failure_surfaces_as_fetch_failed() {
279        let (_t, store) = fixture();
280        let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
281        let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
282        let mut out = Vec::new();
283        let outcome = smudge_with_fetch(
284            &store,
285            &mut { pointer_text.as_bytes() },
286            &mut out,
287            |_p: &Pointer| Err("server is on fire".into()),
288        );
289        match outcome.unwrap_err() {
290            SmudgeError::FetchFailed(e) => {
291                assert!(e.to_string().contains("server is on fire"));
292            }
293            other => panic!("expected FetchFailed, got {other:?}"),
294        }
295        assert!(out.is_empty());
296    }
297
298    #[test]
299    fn fetch_returning_ok_but_not_inserting_still_errors() {
300        // Closure lies — claims success but didn't populate the store.
301        let (_t, store) = fixture();
302        let unknown = "0000000000000000000000000000000000000000000000000000000000000001";
303        let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{unknown}\nsize 5\n");
304        let mut out = Vec::new();
305        let outcome = smudge_with_fetch(
306            &store,
307            &mut { pointer_text.as_bytes() },
308            &mut out,
309            |_p: &Pointer| Ok(()),
310        );
311        assert!(matches!(
312            outcome.unwrap_err(),
313            SmudgeError::ObjectMissing(_)
314        ));
315    }
316
317    #[test]
318    fn fetch_not_invoked_when_object_already_present() {
319        let (_t, store) = fixture();
320        let content = b"already here";
321        let pointer_text = clean_into(&store, content);
322        let mut out = Vec::new();
323        let mut calls = 0;
324        smudge_with_fetch(
325            &store,
326            &mut { &pointer_text[..] },
327            &mut out,
328            |_p: &Pointer| {
329                calls += 1;
330                Ok(())
331            },
332        )
333        .unwrap();
334        assert_eq!(
335            calls, 0,
336            "fetch must not be called when store has the object"
337        );
338        assert_eq!(out, content);
339    }
340
341    #[test]
342    fn extensions_are_not_yet_supported() {
343        let (_t, store) = fixture();
344        let oid_hex = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
345        let ext_oid = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
346        let pointer_text = format!(
347            "version {VERSION_LATEST}\n\
348             ext-0-foo sha256:{ext_oid}\n\
349             oid sha256:{oid_hex}\n\
350             size 12345\n",
351        );
352        let (outcome, _) = run(&store, pointer_text.as_bytes());
353        assert!(matches!(
354            outcome.unwrap_err(),
355            SmudgeError::ExtensionsUnsupported
356        ));
357    }
358}