1use std::collections::{HashMap, VecDeque};
5
6use super::{
7 ObjectType, PackObjectId, PackObjectRecord, PackStats, append_container_checksum,
8 compress_pack_payload, encode_tagged_entry, pack_container_spec, pack_index::PackIndex,
9 write_container_header,
10};
11use crate::{
12 delta::DeltaEncoder,
13 object::ContentHash,
14 store::{Result, compression::CompressionConfig},
15};
16
17const MIN_DELTA_SIZE: usize = 64;
18const MAX_DELTA_CHAIN_DEPTH: usize = 50;
20const WINDOW_SIZE: usize = 10;
22
23type GroupedPackMap = HashMap<ObjectType, Vec<PackObjectRecord>>;
24
25pub struct PackBuilder {
27 objects: Vec<PackObjectRecord>,
28 compression: CompressionConfig,
29}
30
31struct WindowEntry {
33 hash: ContentHash,
34 data: Vec<u8>,
35 index: HashMap<[u8; 4], Vec<usize>>,
36 chain_depth: usize,
37}
38
39impl PackBuilder {
40 pub fn new(compression: CompressionConfig) -> Self {
42 Self {
43 objects: Vec::new(),
44 compression,
45 }
46 }
47
48 pub fn add(&mut self, hash: ContentHash, obj_type: ObjectType, data: Vec<u8>) {
50 self.add_id(PackObjectId::Hash(hash), obj_type, data);
51 }
52
53 pub fn add_id(&mut self, id: PackObjectId, obj_type: ObjectType, data: Vec<u8>) {
54 self.objects.push(PackObjectRecord {
55 id,
56 obj_type,
57 data,
58 delta_base: None,
59 path_hint: None,
60 });
61 }
62
63 pub fn add_with_path(
69 &mut self,
70 hash: ContentHash,
71 obj_type: ObjectType,
72 data: Vec<u8>,
73 path: Option<String>,
74 ) {
75 self.add_with_path_id(PackObjectId::Hash(hash), obj_type, data, path);
76 }
77
78 pub fn add_with_path_id(
79 &mut self,
80 id: PackObjectId,
81 obj_type: ObjectType,
82 data: Vec<u8>,
83 path: Option<String>,
84 ) {
85 self.objects.push(PackObjectRecord {
86 id,
87 obj_type,
88 data,
89 delta_base: None,
90 path_hint: path,
91 });
92 }
93
94 pub fn build(self) -> Result<(Vec<u8>, Vec<u8>, PackStats)> {
98 let mut pack_data = Vec::new();
99 let mut index = PackIndex::new();
100
101 write_container_header(
102 &mut pack_data,
103 pack_container_spec(),
104 self.objects.len() as u64,
105 );
106
107 let mut total_uncompressed = 0u64;
108 let mut total_compressed = 0u64;
109 let mut delta_count = 0u64;
110
111 let object_count = self.objects.len() as u64;
112 let grouped = Self::group_by_type(self.objects);
113
114 for (obj_type, mut objects) in grouped {
115 if objects.len() < 2
116 || obj_type == ObjectType::State
117 || self.compression.max_delta_size == 0
118 {
119 for record in objects {
122 let offset = pack_data.len() as u64;
123 index.add(record.id, offset);
124
125 total_uncompressed += record.data.len() as u64;
126 let compressed = compress_pack_payload(&record.data, &self.compression)?;
127 total_compressed += compressed.len() as u64;
128
129 Self::write_entry(&mut pack_data, &record, obj_type, &compressed)?;
130 }
131 } else {
132 Self::sort_for_delta_window(&mut objects);
133 Self::encode_with_sliding_window(
134 &mut pack_data,
135 &mut index,
136 &mut total_uncompressed,
137 &mut total_compressed,
138 &mut delta_count,
139 obj_type,
140 objects,
141 &self.compression,
142 )?;
143 }
144 }
145
146 index.sort();
147
148 append_container_checksum(&mut pack_data);
149
150 let stats = PackStats {
151 object_count,
152 total_uncompressed,
153 total_compressed,
154 delta_count,
155 compression_ratio: total_compressed as f64 / total_uncompressed as f64,
156 };
157
158 Ok((pack_data, index.to_bytes(), stats))
159 }
160
161 fn sort_for_delta_window(objects: &mut [PackObjectRecord]) {
168 objects.sort_by(|a, b| {
169 let key_a = Self::sort_key(&a.path_hint);
170 let key_b = Self::sort_key(&b.path_hint);
171 key_a.cmp(&key_b).then(b.data.len().cmp(&a.data.len()))
172 });
173 }
174
175 fn sort_key(path: &Option<String>) -> (String, String) {
178 match path {
179 Some(p) => {
180 let filename = p.rsplit('/').next().unwrap_or(p);
181 if let Some(dot_pos) = filename.rfind('.') {
182 let ext = filename[dot_pos + 1..].to_string();
183 let stem = filename[..dot_pos].to_string();
184 (ext, stem)
185 } else {
186 (String::new(), filename.to_string())
187 }
188 }
189 None => ("\u{FFFF}".to_string(), String::new()),
190 }
191 }
192
193 #[allow(clippy::too_many_arguments)]
199 fn encode_with_sliding_window(
200 pack_data: &mut Vec<u8>,
201 index: &mut PackIndex,
202 total_uncompressed: &mut u64,
203 total_compressed: &mut u64,
204 delta_count: &mut u64,
205 obj_type: ObjectType,
206 objects: Vec<PackObjectRecord>,
207 compression: &CompressionConfig,
208 ) -> Result<()> {
209 let mut window: VecDeque<WindowEntry> = VecDeque::with_capacity(WINDOW_SIZE);
210
211 for record in objects {
212 let hash = match record.id {
213 PackObjectId::Hash(hash) => hash,
214 PackObjectId::ChangeId(_) => {
215 let offset = pack_data.len() as u64;
216 index.add(record.id, offset);
217 *total_uncompressed += record.data.len() as u64;
218 let compressed = compress_pack_payload(&record.data, compression)?;
219 *total_compressed += compressed.len() as u64;
220 Self::write_entry(pack_data, &record, obj_type, &compressed)?;
221 continue;
222 }
223 };
224 let data = record.data;
225 let offset = pack_data.len() as u64;
226 index.add(PackObjectId::Hash(hash), offset);
227 *total_uncompressed += data.len() as u64;
228
229 let mut best_base_idx: Option<usize> = None;
231 let mut best_delta_estimate = usize::MAX;
232
233 if data.len() >= MIN_DELTA_SIZE {
234 for (i, entry) in window.iter().enumerate() {
235 if entry.chain_depth >= MAX_DELTA_CHAIN_DEPTH {
237 continue;
238 }
239 if entry.data.len() < MIN_DELTA_SIZE {
241 continue;
242 }
243
244 let estimate = DeltaEncoder::estimate_delta_size_with_index(
245 &entry.index,
246 &entry.data,
247 &data,
248 );
249
250 if estimate < best_delta_estimate {
251 best_delta_estimate = estimate;
252 best_base_idx = Some(i);
253 }
254 }
255 }
256
257 let (final_data, entry_type, base_hash, chain_depth) =
259 if let Some(base_idx) = best_base_idx {
260 let base_entry = &window[base_idx];
261 let delta =
262 DeltaEncoder::encode_with_index(&base_entry.index, &base_entry.data, &data);
263 let delta_compressed = compress_pack_payload(&delta, compression)?;
264
265 if delta_compressed.len() < data.len() {
266 *delta_count += 1;
267 let bh = base_entry.hash;
268 let depth = base_entry.chain_depth + 1;
269 (delta_compressed, ObjectType::Delta, Some(bh), depth)
270 } else {
271 let compressed = compress_pack_payload(&data, compression)?;
272 (compressed, obj_type, None, 0)
273 }
274 } else {
275 let compressed = compress_pack_payload(&data, compression)?;
276 (compressed, obj_type, None, 0)
277 };
278
279 *total_compressed += final_data.len() as u64;
280
281 let record = PackObjectRecord {
282 id: PackObjectId::Hash(hash),
283 obj_type,
284 data: data.clone(),
285 delta_base: base_hash.map(PackObjectId::Hash),
286 path_hint: None,
287 };
288 Self::write_entry(pack_data, &record, entry_type, &final_data)?;
289
290 let entry_index = DeltaEncoder::build_index(&data);
292 if window.len() >= WINDOW_SIZE {
293 window.pop_front();
294 }
295 window.push_back(WindowEntry {
296 hash,
297 data,
298 index: entry_index,
299 chain_depth,
300 });
301 }
302
303 Ok(())
304 }
305
306 fn group_by_type(objects: Vec<PackObjectRecord>) -> GroupedPackMap {
307 let mut groups: GroupedPackMap = HashMap::new();
308
309 for record in objects {
310 groups.entry(record.obj_type).or_default().push(record);
311 }
312
313 groups
314 }
315
316 fn write_entry(
327 pack: &mut Vec<u8>,
328 record: &PackObjectRecord,
329 obj_type: ObjectType,
330 compressed: &[u8],
331 ) -> Result<()> {
332 encode_tagged_entry(pack, record, obj_type, compressed)
333 }
334}