1use std::io::{Read, Write};
4
5use git_lfs_pointer::Pointer;
6use git_lfs_store::{Store, StoreError};
7
8use crate::detect_pointer;
9
10#[derive(Debug)]
12pub enum CleanOutcome {
13 Passthrough(Pointer),
17 Stored(Pointer),
21}
22
23impl CleanOutcome {
24 pub fn pointer(&self) -> &Pointer {
27 match self {
28 Self::Passthrough(p) | Self::Stored(p) => p,
29 }
30 }
31
32 pub fn was_passthrough(&self) -> bool {
34 matches!(self, Self::Passthrough(_))
35 }
36}
37
38pub fn clean<R: Read, W: Write>(
49 store: &Store,
50 input: &mut R,
51 output: &mut W,
52) -> Result<CleanOutcome, StoreError> {
53 let (head, maybe_pointer) = detect_pointer(input)?;
54
55 if let Some(pointer) = maybe_pointer {
56 output.write_all(&head)?;
57 return Ok(CleanOutcome::Passthrough(pointer));
58 }
59
60 let mut combined = head.as_slice().chain(input);
61 let (oid, size) = store.insert(&mut combined)?;
62 let pointer = Pointer::new(oid, size);
63 output.write_all(pointer.encode().as_bytes())?;
64 Ok(CleanOutcome::Stored(pointer))
65}
66
67#[cfg(test)]
68mod tests {
69 use super::*;
70 use git_lfs_pointer::{Oid, VERSION_LATEST};
71 use tempfile::TempDir;
72
73 fn fixture() -> (TempDir, Store) {
74 let tmp = TempDir::new().unwrap();
75 let store = Store::new(tmp.path().join("lfs"));
76 (tmp, store)
77 }
78
79 fn run(store: &Store, input: &[u8]) -> (CleanOutcome, Vec<u8>) {
80 let mut out = Vec::new();
81 let outcome = clean(store, &mut { input }, &mut out).unwrap();
82 (outcome, out)
83 }
84
85 #[test]
88 fn small_content_is_hashed_and_stored() {
89 let (_t, store) = fixture();
90 let (outcome, out) = run(&store, b"hello world!");
91 let p = match outcome {
92 CleanOutcome::Stored(p) => p,
93 o => panic!("expected Stored, got {o:?}"),
94 };
95 assert_eq!(p.size, 12);
96 assert!(store.contains(p.oid));
97 assert_eq!(out, p.encode().as_bytes());
98 }
99
100 #[test]
101 fn known_sha256_for_abc() {
102 let (_t, store) = fixture();
103 let (outcome, _) = run(&store, b"abc");
104 let expected: Oid = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
105 .parse()
106 .unwrap();
107 assert_eq!(outcome.pointer().oid, expected);
108 }
109
110 #[test]
111 fn pseudo_pointer_with_extra_text_is_hashed() {
112 let input = b"version https://git-lfs.github.com/spec/v1\n\
113 oid sha256:7cd8be1d2cd0dd22cd9d229bb6b5785009a05e8b39d405615d882caac56562b5\n\
114 size 1024\n\
115 \n\
116 This is my test pointer.\n";
117 let (_t, store) = fixture();
118 let (outcome, out) = run(&store, input);
119 let p = match outcome {
120 CleanOutcome::Stored(p) => p,
121 o => panic!("expected Stored, got {o:?}"),
122 };
123 assert_eq!(p.size, input.len() as u64);
124 assert!(store.contains(p.oid));
125 assert_eq!(out, p.encode().as_bytes());
126 }
127
128 #[test]
129 fn oversized_pointer_shaped_input_is_hashed() {
130 let mut input = Vec::from(
131 &b"version https://git-lfs.github.com/spec/v1\n\
132 oid sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\
133 size 5\n"[..],
134 );
135 input.extend(std::iter::repeat_n(b'x', 2000));
136 let (_t, store) = fixture();
137 let (outcome, _) = run(&store, &input);
138 let p = match outcome {
139 CleanOutcome::Stored(p) => p,
140 o => panic!("expected Stored, got {o:?}"),
141 };
142 assert_eq!(p.size, input.len() as u64);
143 assert!(store.contains(p.oid));
144 }
145
146 #[test]
147 fn streaming_megabyte_input_works() {
148 let (_t, store) = fixture();
149 let content: Vec<u8> = (0..1_048_576u32).map(|i| (i ^ (i >> 5)) as u8).collect();
150 let (outcome, _) = run(&store, &content);
151 assert_eq!(outcome.pointer().size, content.len() as u64);
152 assert!(store.contains(outcome.pointer().oid));
153 }
154
155 #[test]
158 fn canonical_pointer_passes_through_verbatim() {
159 let (_t, store) = fixture();
160 let oid_hex = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
161 let pointer_text = format!("version {VERSION_LATEST}\noid sha256:{oid_hex}\nsize 12345\n");
162 let (outcome, out) = run(&store, pointer_text.as_bytes());
163 match &outcome {
164 CleanOutcome::Passthrough(p) => assert!(p.canonical),
165 o => panic!("expected Passthrough, got {o:?}"),
166 }
167 assert_eq!(out, pointer_text.as_bytes(), "output must be input verbatim");
168 assert!(!store.root().join("objects").exists());
169 }
170
171 #[test]
172 fn non_canonical_pointer_passes_through_verbatim() {
173 let (_t, store) = fixture();
177 let oid_hex = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
178 let crlf = format!("version {VERSION_LATEST}\r\noid sha256:{oid_hex}\r\nsize 12345\r\n");
179 let (outcome, out) = run(&store, crlf.as_bytes());
180 match &outcome {
181 CleanOutcome::Passthrough(p) => assert!(!p.canonical),
182 o => panic!("expected Passthrough, got {o:?}"),
183 }
184 assert_eq!(out, crlf.as_bytes());
185 }
186
187 #[test]
188 fn empty_input_is_passthrough_empty_pointer() {
189 let (_t, store) = fixture();
190 let (outcome, out) = run(&store, b"");
191 match &outcome {
192 CleanOutcome::Passthrough(p) => {
193 assert_eq!(p, &Pointer::empty());
194 }
195 o => panic!("expected Passthrough, got {o:?}"),
196 }
197 assert!(out.is_empty(), "empty pointer encodes to empty bytes");
198 }
199
200 #[test]
201 fn passthrough_is_idempotent() {
202 let (_t, store) = fixture();
203 let (_, first) = run(&store, b"some content here");
204 let (outcome2, second) = run(&store, &first);
205 assert!(matches!(outcome2, CleanOutcome::Passthrough(_)));
206 assert_eq!(first, second);
207 }
208}