1use hexz_common::Result;
4use hexz_core::format::header::{CompressionType, Header};
5use hexz_core::format::index::{IndexPage, MasterIndex};
6use std::collections::HashSet;
7use std::fs::File;
8use std::io::{Read, Seek, SeekFrom};
9use std::path::Path;
10
11#[derive(Debug, Default, serde::Serialize)]
13pub struct BlockStats {
14 pub data_blocks: usize,
16 pub data_bytes: u64,
18 pub parent_ref_blocks: usize,
20 pub parent_ref_bytes: u64,
22 pub zero_blocks: usize,
24 pub zero_bytes: u64,
26
27 pub min_block_size: u32,
29 pub max_block_size: u32,
31 pub avg_block_size: u32,
33
34 pub unique_blocks: usize,
36 pub dedup_blocks: usize,
38 pub dedup_bytes_saved: u64,
40
41 pub compressed_data_bytes: u64,
43}
44
45#[derive(Debug, Default)]
47#[allow(clippy::struct_excessive_bools)]
48pub struct ArchiveFeatures {
49 pub encrypted: bool,
51 pub has_main: bool,
53 pub has_auxiliary: bool,
55 pub variable_blocks: bool,
57 pub signature_present: bool,
59 pub dictionary_present: bool,
61}
62
63pub struct ArchiveInfo {
65 pub version: u32,
67 pub block_size: u32,
69 pub compression: CompressionType,
71 pub parent_paths: Vec<String>,
73 pub features: ArchiveFeatures,
75 pub main_size: u64,
77 pub auxiliary_size: u64,
79 pub file_size: u64,
81 pub index_offset: u64,
83 pub main_pages: usize,
85 pub auxiliary_pages: usize,
87 pub metadata_offset: Option<u64>,
89 pub metadata_length: Option<u32>,
91 pub metadata: Option<String>,
93 pub block_stats: Option<BlockStats>,
95}
96
97impl std::fmt::Debug for ArchiveInfo {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 f.debug_struct("ArchiveInfo")
100 .field("version", &self.version)
101 .field("block_size", &self.block_size)
102 .field("compression", &self.compression)
103 .field("file_size", &self.file_size)
104 .finish_non_exhaustive()
105 }
106}
107
108impl ArchiveInfo {
109 pub const fn total_uncompressed(&self) -> u64 {
111 self.main_size + self.auxiliary_size
112 }
113
114 pub fn compression_ratio(&self) -> f64 {
116 if self.file_size > 0 {
117 self.total_uncompressed() as f64 / self.file_size as f64
118 } else {
119 0.0
120 }
121 }
122}
123
124pub fn inspect_archive(path: impl AsRef<Path>) -> Result<ArchiveInfo> {
126 let mut f = File::open(path.as_ref())?;
127 let file_size = f.metadata()?.len();
128
129 let header = Header::read_from(&mut f)?;
130 let master = MasterIndex::read_from(&mut f, header.index_offset)?;
131
132 let metadata = if let (Some(off), Some(len)) = (header.metadata_offset, header.metadata_length)
133 {
134 let mut buf = vec![0u8; len as usize];
135 _ = f.seek(SeekFrom::Start(off))?;
136 f.read_exact(&mut buf)?;
137 Some(String::from_utf8_lossy(&buf).to_string())
138 } else {
139 None
140 };
141
142 let mut stats = BlockStats {
143 min_block_size: u32::MAX,
144 ..Default::default()
145 };
146 let mut seen_offsets: HashSet<u64> = HashSet::new();
147
148 for page_meta in &master.main_pages {
149 _ = f.seek(SeekFrom::Start(page_meta.offset))?;
150 let mut page_bytes = vec![0u8; page_meta.length as usize];
151 f.read_exact(&mut page_bytes)?;
152
153 let page: IndexPage = bincode::deserialize(&page_bytes)?;
154
155 for block in page.blocks {
156 if block.is_parent_ref() {
157 stats.parent_ref_blocks += 1;
158 stats.parent_ref_bytes += block.logical_len as u64;
159 } else if block.is_sparse() {
160 stats.zero_blocks += 1;
161 stats.zero_bytes += block.logical_len as u64;
162 } else {
163 stats.data_blocks += 1;
164 stats.data_bytes += block.logical_len as u64;
165 stats.compressed_data_bytes += block.length as u64;
166
167 if block.logical_len < stats.min_block_size {
168 stats.min_block_size = block.logical_len;
169 }
170 if block.logical_len > stats.max_block_size {
171 stats.max_block_size = block.logical_len;
172 }
173
174 if seen_offsets.insert(block.offset) {
175 stats.unique_blocks += 1;
176 } else {
177 stats.dedup_blocks += 1;
178 stats.dedup_bytes_saved += block.logical_len as u64;
179 }
180 }
181 }
182 }
183
184 if stats.data_blocks > 0 {
185 stats.avg_block_size = (stats.data_bytes / stats.data_blocks as u64) as u32;
186 } else {
187 stats.min_block_size = 0;
188 }
189
190 Ok(ArchiveInfo {
191 version: header.version,
192 block_size: header.block_size,
193 compression: header.compression,
194 parent_paths: header.parent_paths,
195 features: ArchiveFeatures {
196 encrypted: header.encryption.is_some(),
197 has_main: header.features.has_main,
198 has_auxiliary: header.features.has_auxiliary,
199 variable_blocks: header.features.variable_blocks,
200 signature_present: header.signature_offset.is_some(),
201 dictionary_present: header.dictionary_offset.is_some(),
202 },
203 main_size: master.main_size,
204 auxiliary_size: master.auxiliary_size,
205 file_size,
206 index_offset: header.index_offset,
207 main_pages: master.main_pages.len(),
208 auxiliary_pages: master.auxiliary_pages.len(),
209 metadata_offset: header.metadata_offset,
210 metadata_length: header.metadata_length,
211 metadata,
212 block_stats: Some(stats),
213 })
214}