1use anyhow::{Context, Result};
14use hexz_core::format::header::Header;
15use hexz_core::format::index::{IndexPage, MasterIndex};
16use hexz_ops::inspect::inspect_archive;
17use indicatif::HumanBytes;
18use std::collections::HashMap;
19use std::fs::File;
20use std::io::{Read, Seek, SeekFrom};
21use std::path::Path;
22
23use crate::ui::color::palette;
24
25struct BlockSummary {
27 data: HashMap<[u8; 32], u64>,
32 parent_ref_bytes: u64,
34 parent_ref_blocks: usize,
36}
37
38fn scan(path: &Path) -> Result<BlockSummary> {
39 let mut f = File::open(path)?;
40 let header = Header::read_from(&mut f)?;
41 let master = MasterIndex::read_from(&mut f, header.index_offset)?;
42
43 let mut data: HashMap<[u8; 32], u64> = HashMap::new();
44 let mut parent_ref_bytes = 0u64;
45 let mut parent_ref_blocks = 0usize;
46
47 for page_meta in &master.main_pages {
48 let _ = f.seek(SeekFrom::Start(page_meta.offset))?;
49 let mut buf = vec![0u8; page_meta.length as usize];
50 f.read_exact(&mut buf)?;
51 let page: IndexPage = bincode::deserialize(&buf)?;
52 for block in page.blocks {
53 if block.is_parent_ref() {
54 parent_ref_blocks += 1;
55 parent_ref_bytes += block.logical_len as u64;
56 } else if !block.is_sparse() && block.hash != [0u8; 32] {
57 let _ = data.entry(block.hash).or_insert(block.logical_len as u64);
60 }
61 }
62 }
63
64 Ok(BlockSummary {
65 data,
66 parent_ref_bytes,
67 parent_ref_blocks,
68 })
69}
70
71struct CheckpointDelta {
73 xor_delta_count: usize,
75 xor_base_bytes: u64,
77 parent_is_a: bool,
79 parent_name: String,
81}
82
83fn parse_checkpoint_delta(
88 meta_b: &str,
89 name_a: &str,
90 parent_paths_b: &[String],
91) -> Option<CheckpointDelta> {
92 let obj: serde_json::Value = serde_json::from_str(meta_b).ok()?;
93 let _ = obj.get("hexz_checkpoint")?; let tensors = obj.get("tensors")?.as_object()?;
95
96 let mut xor_delta_count = 0usize;
97 let mut xor_base_bytes = 0u64;
98
99 for (_, tensor) in tensors {
100 let storage = tensor
101 .get("storage")
102 .and_then(|v| v.as_str())
103 .unwrap_or("raw");
104 let base_length = tensor
105 .get("base_length")
106 .and_then(serde_json::Value::as_u64)
107 .unwrap_or(0);
108
109 if storage == "xor_delta" {
110 xor_delta_count += 1;
111 xor_base_bytes += base_length;
112 }
113 }
114
115 if xor_delta_count == 0 {
116 return None;
117 }
118
119 let parent_name = parent_paths_b
120 .first()
121 .and_then(|p| Path::new(p).file_name())
122 .map(|f| f.to_string_lossy().into_owned())
123 .unwrap_or_default();
124 let parent_is_a = parent_name == name_a;
125
126 Some(CheckpointDelta {
127 xor_delta_count,
128 xor_base_bytes,
129 parent_is_a,
130 parent_name,
131 })
132}
133
134pub fn run(a: &Path, b: &Path) -> Result<()> {
136 let info_a = inspect_archive(a).with_context(|| format!("Failed to read {}", a.display()))?;
137 let info_b = inspect_archive(b).with_context(|| format!("Failed to read {}", b.display()))?;
138
139 let summary_a =
140 scan(a).with_context(|| format!("Failed to read blocks from {}", a.display()))?;
141 let summary_b =
142 scan(b).with_context(|| format!("Failed to read blocks from {}", b.display()))?;
143
144 let shared_data_blocks: usize = summary_b
147 .data
148 .keys()
149 .filter(|h| summary_a.data.contains_key(*h))
150 .count();
151 let shared_data_bytes: u64 = summary_b
152 .data
153 .iter()
154 .filter(|(h, _)| summary_a.data.contains_key(*h))
155 .map(|(_, &len)| len)
156 .sum();
157
158 let shared_blocks = shared_data_blocks + summary_b.parent_ref_blocks;
159 let shared_bytes = shared_data_bytes + summary_b.parent_ref_bytes;
160
161 let new_b_blocks: usize = summary_b
162 .data
163 .keys()
164 .filter(|h| !summary_a.data.contains_key(*h))
165 .count();
166 let new_b_bytes: u64 = summary_b
167 .data
168 .iter()
169 .filter(|(h, _)| !summary_a.data.contains_key(*h))
170 .map(|(_, &len)| len)
171 .sum();
172
173 let only_a_blocks: usize = summary_a
174 .data
175 .keys()
176 .filter(|h| !summary_b.data.contains_key(*h))
177 .count();
178 let only_a_bytes: u64 = summary_a
179 .data
180 .iter()
181 .filter(|(h, _)| !summary_b.data.contains_key(*h))
182 .map(|(_, &len)| len)
183 .sum();
184
185 let name_a_str = a
187 .file_name()
188 .unwrap_or(a.as_os_str())
189 .to_string_lossy()
190 .into_owned();
191 let name_b_str = b
192 .file_name()
193 .unwrap_or(b.as_os_str())
194 .to_string_lossy()
195 .into_owned();
196
197 let cp_delta = info_b
198 .metadata
199 .as_deref()
200 .and_then(|m| parse_checkpoint_delta(m, &name_a_str, &info_b.parent_paths));
201
202 let p = palette();
204
205 let max_name = name_a_str.len().max(name_b_str.len());
206
207 let name_a_col = format!("{name_a_str:<max_name$}");
210 let name_b_col = format!("{name_b_str:<max_name$}");
211 let size_a_col = format!("{:>10}", HumanBytes(info_a.file_size));
212 let size_b_col = format!("{:>10}", HumanBytes(info_b.file_size));
213 let blk_a_col = format!("{:>6}", summary_a.data.len() + summary_a.parent_ref_blocks);
214 let blk_b_col = format!("{:>6}", summary_b.data.len() + summary_b.parent_ref_blocks);
215
216 let lbl_w = "Only in ".len() + max_name + 1;
218 let shared_lbl = format!("{:<lbl_w$}", "Shared:");
219 let new_b_lbl = format!("{:<lbl_w$}", format!("New in {name_b_str}:"));
220 let only_a_lbl = format!("{:<lbl_w$}", format!("Only in {name_a_str}:"));
221 let shared_size_col = format!("{:>10}", HumanBytes(shared_bytes));
222 let new_b_size_col = format!("{:>10}", HumanBytes(new_b_bytes));
223 let only_a_size_col = format!("{:>10}", HumanBytes(only_a_bytes));
224 let shared_blk_col = format!("{shared_blocks:>6}");
225 let new_b_blk_col = format!("{new_b_blocks:>6}");
226 let only_a_blk_col = format!("{only_a_blocks:>6}");
227
228 let total_b_bytes = (shared_bytes + new_b_bytes).max(1);
229 let pct = |n: u64| n as f64 / total_b_bytes as f64 * 100.0;
230
231 let is_thin_b = summary_b.parent_ref_blocks > 0;
232 let is_xor_delta = cp_delta.is_some();
233
234 let thin_note = if is_thin_b {
235 format!(
236 " {}({} via parent refs){}",
237 p.gray, summary_b.parent_ref_blocks, p.reset
238 )
239 } else {
240 String::new()
241 };
242
243 let (saved_label, saved_bytes) = if is_xor_delta {
247 (
248 "Delta saving:",
249 info_a.file_size.saturating_sub(info_b.file_size),
250 )
251 } else {
252 ("Storage saved:", shared_bytes)
253 };
254 let saved_lbl = format!("{saved_label:<lbl_w$}");
255 let saved_size_col = format!("{:>10}", HumanBytes(saved_bytes));
256
257 let b_delta_tag = if is_xor_delta {
259 format!(" {}(XOR delta checkpoint){}", p.dim, p.reset)
260 } else {
261 String::new()
262 };
263
264 println!();
266 println!(
267 " {}{}{} {}{}{} {} blocks",
268 p.bold, name_a_col, p.reset, p.green, size_a_col, p.reset, blk_a_col
269 );
270 println!(
271 " {}{}{} {}{}{} {} blocks{}",
272 p.bold, name_b_col, p.reset, p.green, size_b_col, p.reset, blk_b_col, b_delta_tag
273 );
274 println!();
275
276 if is_xor_delta {
277 } else {
282 println!(
284 " {}{}{} {}{}{} {} blocks {}({:.0}%){}{}",
285 p.cyan,
286 shared_lbl,
287 p.reset,
288 p.green,
289 shared_size_col,
290 p.reset,
291 shared_blk_col,
292 p.bold,
293 pct(shared_bytes),
294 p.reset,
295 thin_note,
296 );
297 println!(
298 " {}{}{} {}{}{} {} blocks",
299 p.cyan, new_b_lbl, p.reset, p.yellow, new_b_size_col, p.reset, new_b_blk_col,
300 );
301 if !is_thin_b {
302 println!(
303 " {}{}{} {}{}{} {} blocks",
304 p.cyan, only_a_lbl, p.reset, p.dim, only_a_size_col, p.reset, only_a_blk_col,
305 );
306 }
307 println!();
308 }
309
310 if let Some(ref d) = cp_delta {
312 let base_name = if d.parent_is_a {
313 &name_a_str
314 } else {
315 &d.parent_name
316 };
317 let base_tag = if d.parent_is_a {
318 format!("{}{}{}", p.yellow, base_name, p.reset)
319 } else {
320 format!(
322 "{}{}{} {}(not {}){}",
323 p.yellow, base_name, p.reset, p.gray, name_a_str, p.reset
324 )
325 };
326
327 let compression_ratio = d.xor_base_bytes as f64 / info_b.file_size as f64;
328
329 println!(
330 " {}Checkpoint delta{} ({} tensors use XOR delta off {})",
331 p.bold, p.reset, d.xor_delta_count, base_tag,
332 );
333 println!(
334 " {}{}{} base → {}{}{} on disk {}({:.1}× compression){}",
335 p.green,
336 HumanBytes(d.xor_base_bytes),
337 p.reset,
338 p.yellow,
339 HumanBytes(info_b.file_size),
340 p.reset,
341 p.dim,
342 compression_ratio,
343 p.reset,
344 );
345 println!(
346 " {}{}{} {}required for reconstruction{}",
347 p.yellow, base_name, p.reset, p.dim, p.reset,
348 );
349 println!();
350 }
351
352 println!(
353 " {}{}{} {}{}{}",
354 p.cyan, saved_lbl, p.reset, p.green, saved_size_col, p.reset
355 );
356 println!();
357
358 Ok(())
359}