objects/delta/
delta_encoder.rs1use std::collections::HashMap;
16
17const MIN_MATCH_LENGTH_LARGE: usize = 16;
19const MIN_MATCH_LENGTH_SMALL: usize = 8;
21
22#[derive(Debug)]
24pub struct DeltaEncoder;
25
26impl DeltaEncoder {
27 pub fn new() -> Self {
29 Self
30 }
31
32 pub fn encode(base: &[u8], target: &[u8]) -> Vec<u8> {
34 if base.is_empty() {
35 return Self::encode_insert(target);
36 }
37
38 let index = Self::build_index(base);
39 Self::encode_with_index(&index, base, target)
40 }
41
42 pub fn encode_with_index(
44 index: &HashMap<[u8; 4], Vec<usize>>,
45 base: &[u8],
46 target: &[u8],
47 ) -> Vec<u8> {
48 if base.is_empty() {
49 return Self::encode_insert(target);
50 }
51
52 let min_match = Self::min_match_for(target.len());
53 let mut delta = Vec::new();
54 let mut pos = 0;
55
56 while pos < target.len() {
57 if let Some((offset, length)) =
58 Self::find_best_match(index, base, target, pos, min_match)
59 {
60 Self::emit_copy(&mut delta, offset, length);
61 pos += length;
62 } else {
63 let start = pos;
64 while pos < target.len() && pos - start < 127 {
65 if Self::find_best_match(index, base, target, pos, min_match).is_some() {
66 break;
67 }
68 pos += 1;
69 }
70
71 let len = pos - start;
72 delta.push(len as u8 - 1);
73 delta.extend_from_slice(&target[start..pos]);
74 }
75 }
76
77 delta
78 }
79
80 pub fn estimate_delta_size(base: &[u8], target: &[u8]) -> usize {
82 if base.is_empty() {
83 return target.len() + target.len().div_ceil(128);
84 }
85
86 let index = Self::build_index(base);
87 Self::estimate_delta_size_with_index(&index, base, target)
88 }
89
90 pub fn estimate_delta_size_with_index(
92 index: &HashMap<[u8; 4], Vec<usize>>,
93 base: &[u8],
94 target: &[u8],
95 ) -> usize {
96 if base.is_empty() {
97 return target.len() + target.len().div_ceil(128);
98 }
99
100 let min_match = Self::min_match_for(target.len());
101 let mut size = 0usize;
102 let mut pos = 0;
103
104 while pos < target.len() {
105 if let Some((offset, length)) =
106 Self::find_best_match(index, base, target, pos, min_match)
107 {
108 size += Self::copy_instruction_size(offset, length);
109 pos += length;
110 } else {
111 let start = pos;
112 while pos < target.len() && pos - start < 127 {
113 if Self::find_best_match(index, base, target, pos, min_match).is_some() {
114 break;
115 }
116 pos += 1;
117 }
118 size += 1 + (pos - start);
119 }
120 }
121
122 size
123 }
124
125 pub fn build_index(base: &[u8]) -> HashMap<[u8; 4], Vec<usize>> {
127 let mut index: HashMap<[u8; 4], Vec<usize>> = HashMap::new();
128
129 for i in 0..base.len().saturating_sub(4) {
130 let key = [base[i], base[i + 1], base[i + 2], base[i + 3]];
131 index.entry(key).or_default().push(i);
132 }
133
134 index
135 }
136
137 fn emit_copy(delta: &mut Vec<u8>, offset: usize, length: usize) {
145 let mut cmd: u8 = 0x80;
146 let offset = offset as u32;
147 let length = length as u32;
148
149 cmd |= 0x01; if offset & 0xFF00 != 0 {
154 cmd |= 0x02;
155 }
156 if offset & 0xFF_0000 != 0 {
157 cmd |= 0x04;
158 }
159 if offset & 0xFF00_0000 != 0 {
160 cmd |= 0x08;
161 }
162
163 if length != 0x10000 {
166 if length & 0xFF != 0 {
167 cmd |= 0x10;
168 }
169 if length & 0xFF00 != 0 {
170 cmd |= 0x20;
171 }
172 if length & 0xFF_0000 != 0 {
173 cmd |= 0x40;
174 }
175 }
176
177 delta.push(cmd);
178
179 delta.push(offset as u8); if offset & 0xFF00 != 0 {
182 delta.push((offset >> 8) as u8);
183 }
184 if offset & 0xFF_0000 != 0 {
185 delta.push((offset >> 16) as u8);
186 }
187 if offset & 0xFF00_0000 != 0 {
188 delta.push((offset >> 24) as u8);
189 }
190
191 if length != 0x10000 {
193 if length & 0xFF != 0 {
194 delta.push(length as u8);
195 }
196 if length & 0xFF00 != 0 {
197 delta.push((length >> 8) as u8);
198 }
199 if length & 0xFF_0000 != 0 {
200 delta.push((length >> 16) as u8);
201 }
202 }
203 }
204
205 fn copy_instruction_size(offset: usize, length: usize) -> usize {
207 let offset = offset as u32;
208 let length = length as u32;
209 let mut n = 1 + 1; if offset & 0xFF00 != 0 {
213 n += 1;
214 }
215 if offset & 0xFF_0000 != 0 {
216 n += 1;
217 }
218 if offset & 0xFF00_0000 != 0 {
219 n += 1;
220 }
221
222 if length != 0x10000 {
224 if length & 0xFF != 0 {
225 n += 1;
226 }
227 if length & 0xFF00 != 0 {
228 n += 1;
229 }
230 if length & 0xFF_0000 != 0 {
231 n += 1;
232 }
233 }
234
235 n
236 }
237
238 fn min_match_for(target_len: usize) -> usize {
240 if target_len < 1024 {
241 MIN_MATCH_LENGTH_SMALL
242 } else {
243 MIN_MATCH_LENGTH_LARGE
244 }
245 }
246
247 fn encode_insert(data: &[u8]) -> Vec<u8> {
248 let mut delta = Vec::new();
249 for chunk in data.chunks(128) {
250 delta.push((chunk.len() - 1) as u8);
251 delta.extend_from_slice(chunk);
252 }
253 delta
254 }
255
256 fn find_best_match(
257 index: &HashMap<[u8; 4], Vec<usize>>,
258 base: &[u8],
259 target: &[u8],
260 pos: usize,
261 min_match: usize,
262 ) -> Option<(usize, usize)> {
263 if pos + 4 > target.len() {
264 return None;
265 }
266
267 let key = [
268 target[pos],
269 target[pos + 1],
270 target[pos + 2],
271 target[pos + 3],
272 ];
273 let offsets = index.get(&key)?;
274
275 let mut best_offset = 0;
276 let mut best_length = 0;
277
278 for &offset in offsets {
279 let length = Self::match_length(base, offset, target, pos);
280 if length > best_length {
281 best_length = length;
282 best_offset = offset;
283 }
284 }
285
286 if best_length >= min_match {
287 Some((best_offset, best_length))
288 } else {
289 None
290 }
291 }
292
293 fn match_length(base: &[u8], base_pos: usize, target: &[u8], target_pos: usize) -> usize {
294 let max_len = (base.len() - base_pos).min(target.len() - target_pos);
295 let mut len = 0;
296 while len < max_len && base[base_pos + len] == target[target_pos + len] {
297 len += 1;
298 }
299 len
300 }
301}
302
303impl Default for DeltaEncoder {
304 fn default() -> Self {
305 Self::new()
306 }
307}