hexz_cli/cmd/data/
diff.rs1use anyhow::{Context, Result};
13use hexz_core::format::header::Header;
14use hexz_core::format::index::{IndexPage, MasterIndex};
15use hexz_ops::inspect::inspect_snapshot;
16use indicatif::HumanBytes;
17use std::collections::HashSet;
18use std::fs::File;
19use std::io::{Read, Seek, SeekFrom};
20use std::path::{Path, PathBuf};
21
22struct BlockSummary {
24 hashes: HashSet<[u8; 32]>,
26 parent_ref_bytes: u64,
28 parent_ref_blocks: usize,
30 unique_bytes: u64,
32 unique_blocks: usize,
33}
34
35fn scan(path: &Path) -> Result<BlockSummary> {
36 let mut f = File::open(path)?;
37 let header = Header::read_from(&mut f)?;
38 let master = MasterIndex::read_from(&mut f, header.index_offset)?;
39
40 let mut hashes = HashSet::new();
41 let mut parent_ref_bytes = 0u64;
42 let mut parent_ref_blocks = 0usize;
43
44 for page_meta in &master.primary_pages {
45 f.seek(SeekFrom::Start(page_meta.offset))?;
46 let mut buf = vec![0u8; page_meta.length as usize];
47 f.read_exact(&mut buf)?;
48 let page: IndexPage = bincode::deserialize(&buf)?;
49 for block in page.blocks {
50 if block.is_parent_ref() {
51 parent_ref_blocks += 1;
52 parent_ref_bytes += block.logical_len as u64;
53 } else if !block.is_sparse() && block.hash != [0u8; 32] {
54 hashes.insert(block.hash);
55 }
56 }
57 }
58
59 Ok(BlockSummary {
60 hashes,
61 parent_ref_bytes,
62 parent_ref_blocks,
63 unique_bytes: 0,
64 unique_blocks: 0,
65 })
66}
67
68pub fn run(a: PathBuf, b: PathBuf) -> Result<()> {
70 let info_a = inspect_snapshot(&a).with_context(|| format!("Failed to read {}", a.display()))?;
71 let info_b = inspect_snapshot(&b).with_context(|| format!("Failed to read {}", b.display()))?;
72
73 let mut summary_a =
74 scan(&a).with_context(|| format!("Failed to read blocks from {}", a.display()))?;
75 let mut summary_b =
76 scan(&b).with_context(|| format!("Failed to read blocks from {}", b.display()))?;
77
78 let mut shared_blocks = summary_b.parent_ref_blocks;
81 let mut shared_bytes = summary_b.parent_ref_bytes;
82
83 {
85 let mut f = File::open(&b)?;
86 let header = Header::read_from(&mut f)?;
87 let master = MasterIndex::read_from(&mut f, header.index_offset)?;
88
89 for page_meta in &master.primary_pages {
90 f.seek(SeekFrom::Start(page_meta.offset))?;
91 let mut buf = vec![0u8; page_meta.length as usize];
92 f.read_exact(&mut buf)?;
93 let page: IndexPage = bincode::deserialize(&buf)?;
94 for block in page.blocks {
95 if block.is_parent_ref() || block.is_sparse() || block.hash == [0u8; 32] {
96 continue;
97 }
98 if summary_a.hashes.contains(&block.hash) {
99 shared_blocks += 1;
100 shared_bytes += block.logical_len as u64;
101 } else {
102 summary_b.unique_blocks += 1;
103 summary_b.unique_bytes += block.logical_len as u64;
104 }
105 }
106 }
107 }
108
109 {
111 let mut f = File::open(&a)?;
112 let header = Header::read_from(&mut f)?;
113 let master = MasterIndex::read_from(&mut f, header.index_offset)?;
114
115 for page_meta in &master.primary_pages {
116 f.seek(SeekFrom::Start(page_meta.offset))?;
117 let mut buf = vec![0u8; page_meta.length as usize];
118 f.read_exact(&mut buf)?;
119 let page: IndexPage = bincode::deserialize(&buf)?;
120 for block in page.blocks {
121 if block.is_parent_ref() || block.is_sparse() || block.hash == [0u8; 32] {
122 continue;
123 }
124 if !summary_b.hashes.contains(&block.hash) {
125 summary_a.unique_blocks += 1;
126 summary_a.unique_bytes += block.logical_len as u64;
127 }
128 }
129 }
130 }
131
132 let name_a = a.file_name().unwrap_or(a.as_os_str()).to_string_lossy();
134 let name_b = b.file_name().unwrap_or(b.as_os_str()).to_string_lossy();
135 let max_name = name_a.len().max(name_b.len());
136
137 let total_a_data_blocks = summary_a.hashes.len();
138 let total_b_data_blocks = summary_b.hashes.len() + summary_b.parent_ref_blocks;
139
140 println!();
141 println!(
142 " {:<width$} {:>10} {:>6} blocks",
143 name_a,
144 HumanBytes(info_a.file_size),
145 total_a_data_blocks,
146 width = max_name,
147 );
148 println!(
149 " {:<width$} {:>10} {:>6} blocks",
150 name_b,
151 HumanBytes(info_b.file_size),
152 total_b_data_blocks,
153 width = max_name,
154 );
155 println!();
156
157 let total_b_bytes = (shared_bytes + summary_b.unique_bytes).max(1);
158 let pct = |n: u64| n as f64 / total_b_bytes as f64 * 100.0;
159
160 let is_thin_b = summary_b.parent_ref_blocks > 0;
164 let thin_note = if is_thin_b {
165 format!(" ({} via parent refs)", summary_b.parent_ref_blocks)
166 } else {
167 String::new()
168 };
169
170 println!(
171 " Shared: {:>10} {:>6} blocks ({:.0}%){}",
172 HumanBytes(shared_bytes),
173 shared_blocks,
174 pct(shared_bytes),
175 thin_note,
176 );
177 println!(
178 " New in {:<width$} {:>10} {:>6} blocks",
179 format!("{}:", name_b),
180 HumanBytes(summary_b.unique_bytes),
181 summary_b.unique_blocks,
182 width = max_name + 1,
183 );
184 if !is_thin_b {
185 println!(
186 " Only in {:<width$} {:>10} {:>6} blocks",
187 format!("{}:", name_a),
188 HumanBytes(summary_a.unique_bytes),
189 summary_a.unique_blocks,
190 width = max_name + 1,
191 );
192 }
193 println!();
194 println!(" Storage saved: {}", HumanBytes(shared_bytes));
195 println!();
196
197 Ok(())
198}