1use std::collections::VecDeque;
7use std::fs::File;
8use std::io::Read;
9use std::io::Seek;
10use std::io::SeekFrom;
11use std::path::PathBuf;
12use std::sync::Arc;
13
14use flatbuffers::root;
15use itertools::Itertools;
16use vortex::buffer::Alignment;
17use vortex::buffer::ByteBuffer;
18use vortex::error::VortexExpect;
19use vortex::error::VortexResult;
20use vortex::error::vortex_bail;
21use vortex::error::vortex_err;
22use vortex::file::EOF_SIZE;
23use vortex::file::Footer;
24use vortex::file::MAGIC_BYTES;
25use vortex::file::MAX_POSTSCRIPT_SIZE;
26use vortex::file::OpenOptionsSessionExt;
27use vortex::file::VERSION;
28use vortex::flatbuffers::footer as fb;
29use vortex::layout::LayoutRef;
30use vortex::session::VortexSession;
31
32#[derive(Debug, clap::Parser)]
34pub struct InspectArgs {
35 #[clap(subcommand)]
37 pub mode: Option<InspectMode>,
38
39 pub file: PathBuf,
41}
42
43#[derive(Debug, clap::Subcommand)]
45pub enum InspectMode {
46 Eof,
48
49 Postscript,
51
52 Footer,
54}
55
56pub async fn exec_inspect(session: &VortexSession, args: InspectArgs) -> anyhow::Result<()> {
62 let mut inspector = VortexInspector::new(session, args.file.clone())?;
63
64 println!("File: {}", args.file.display());
65 println!("Size: {} bytes", inspector.file_size);
66 println!();
67
68 let mode = args.mode.unwrap_or(InspectMode::Footer);
69
70 match mode {
71 InspectMode::Eof => {
72 let eof = inspector.read_eof()?;
73 eof.display();
74 }
75 InspectMode::Postscript => {
76 let eof = inspector.read_eof()?;
77 eof.display();
78
79 if !eof.valid_magic {
80 anyhow::bail!("Invalid magic bytes, cannot read postscript");
81 }
82
83 let postscript = inspector.read_postscript(eof.postscript_size)?;
84 postscript.display();
85 }
86 InspectMode::Footer => {
87 let eof = match inspector.read_eof() {
88 Ok(eof) => {
89 eof.display();
90 eof
91 }
92 Err(e) => {
93 eprintln!("Error reading EOF: {}", e);
94 return Err(e.into());
95 }
96 };
97
98 if !eof.valid_magic {
99 eprintln!("\nError: Invalid magic bytes, stopping here");
100 return Ok(());
101 }
102
103 match inspector.read_postscript(eof.postscript_size) {
104 Ok(ps) => {
105 ps.display();
106 }
107 Err(e) => {
108 eprintln!("\nError reading postscript: {}", e);
109 return Ok(());
110 }
111 };
112
113 match inspector.read_footer().await {
114 Ok(footer) => FooterSegments(footer).display(),
115 Err(e) => {
116 eprintln!("\nError reading footer segments: {}", e);
117 }
118 }
119 }
120 }
121
122 Ok(())
123}
124
125struct VortexInspector<'a> {
126 session: &'a VortexSession,
127 path: PathBuf,
128 file: File,
129 file_size: u64,
130}
131
132impl<'a> VortexInspector<'a> {
133 fn new(session: &'a VortexSession, path: PathBuf) -> VortexResult<Self> {
134 let mut file =
135 File::open(&path).map_err(|e| vortex_err!("Failed to open file {:?}: {}", path, e))?;
136
137 let file_size = file
138 .seek(SeekFrom::End(0))
139 .map_err(|e| vortex_err!("Failed to get file size: {}", e))?;
140
141 Ok(Self {
142 session,
143 path,
144 file,
145 file_size,
146 })
147 }
148
149 fn read_eof(&mut self) -> VortexResult<EofInfo> {
150 if self.file_size < EOF_SIZE as u64 {
151 vortex_bail!(
152 "File too small ({} bytes) to contain EOF marker (requires {} bytes)",
153 self.file_size,
154 EOF_SIZE
155 );
156 }
157
158 let mut eof_bytes = [0u8; EOF_SIZE];
159 self.file
160 .seek(SeekFrom::End(-(EOF_SIZE as i64)))
161 .map_err(|e| vortex_err!("Failed to seek to EOF: {}", e))?;
162 self.file
163 .read_exact(&mut eof_bytes)
164 .map_err(|e| vortex_err!("Failed to read EOF bytes: {}", e))?;
165
166 let version = u16::from_le_bytes([eof_bytes[0], eof_bytes[1]]);
167 let postscript_size = u16::from_le_bytes([eof_bytes[2], eof_bytes[3]]);
168 let magic_bytes = [eof_bytes[4], eof_bytes[5], eof_bytes[6], eof_bytes[7]];
169
170 Ok(EofInfo {
171 version,
172 postscript_size,
173 magic_bytes,
174 valid_magic: magic_bytes == MAGIC_BYTES,
175 })
176 }
177
178 fn read_postscript(&mut self, postscript_size: u16) -> VortexResult<PostscriptInfo> {
179 let postscript_offset = self.file_size - EOF_SIZE as u64 - postscript_size as u64;
180
181 let mut postscript_bytes = vec![0u8; postscript_size as usize];
182 self.file
183 .seek(SeekFrom::Start(postscript_offset))
184 .map_err(|e| vortex_err!("Failed to seek to postscript: {}", e))?;
185 self.file
186 .read_exact(&mut postscript_bytes)
187 .map_err(|e| vortex_err!("Failed to read postscript: {}", e))?;
188
189 let postscript_buffer = ByteBuffer::from(postscript_bytes);
190 let fb_postscript = root::<fb::Postscript>(&postscript_buffer)
191 .map_err(|e| vortex_err!("Failed to parse postscript flatbuffer: {}", e))?;
192
193 let dtype = fb_postscript.dtype().map(|s| SegmentInfo {
194 offset: s.offset(),
195 length: s.length(),
196 alignment: Alignment::from_exponent(s.alignment_exponent()),
197 });
198
199 let layout = fb_postscript
200 .layout()
201 .map(|s| SegmentInfo {
202 offset: s.offset(),
203 length: s.length(),
204 alignment: Alignment::from_exponent(s.alignment_exponent()),
205 })
206 .ok_or_else(|| vortex_err!("Postscript missing layout segment"))?;
207
208 let statistics = fb_postscript.statistics().map(|s| SegmentInfo {
209 offset: s.offset(),
210 length: s.length(),
211 alignment: Alignment::from_exponent(s.alignment_exponent()),
212 });
213
214 let footer = fb_postscript
215 .footer()
216 .map(|s| SegmentInfo {
217 offset: s.offset(),
218 length: s.length(),
219 alignment: Alignment::from_exponent(s.alignment_exponent()),
220 })
221 .ok_or_else(|| vortex_err!("Postscript missing footer segment"))?;
222
223 Ok(PostscriptInfo {
224 dtype,
225 layout,
226 statistics,
227 footer,
228 })
229 }
230
231 async fn read_footer(&mut self) -> VortexResult<Footer> {
232 Ok(self
233 .session
234 .open_options()
235 .open(self.path.as_path())
236 .await?
237 .footer()
238 .clone())
239 }
240}
241
242#[derive(Debug)]
243struct EofInfo {
244 version: u16,
245 postscript_size: u16,
246 magic_bytes: [u8; 4],
247 valid_magic: bool,
248}
249
250#[derive(Debug, Clone)]
251struct SegmentInfo {
252 offset: u64,
253 length: u32,
254 alignment: Alignment,
255}
256
257#[derive(Debug)]
258struct PostscriptInfo {
259 pub dtype: Option<SegmentInfo>,
260 pub layout: SegmentInfo,
261 pub statistics: Option<SegmentInfo>,
262 pub footer: SegmentInfo,
263}
264
265#[derive(Debug)]
266struct FooterSegments(Footer);
267
268impl EofInfo {
269 fn display(&self) {
270 println!("=== EOF Marker ===");
271 println!("Version: {} (current: {})", self.version, VERSION);
272 println!("Postscript size: {} bytes", self.postscript_size);
273 println!(
274 "Magic bytes: {} ({})",
275 std::str::from_utf8(&self.magic_bytes).unwrap_or("<invalid utf8>"),
276 if self.valid_magic { "VALID" } else { "INVALID" }
277 );
278
279 if self.postscript_size > MAX_POSTSCRIPT_SIZE {
280 println!(
281 "WARNING: Postscript size exceeds maximum ({} > {})",
282 self.postscript_size, MAX_POSTSCRIPT_SIZE
283 );
284 }
285 }
286}
287
288impl SegmentInfo {
289 fn display(&self, name: &str) {
290 println!(
291 " {}: offset={}, length={}, alignment={}",
292 name, self.offset, self.length, self.alignment
293 );
294 }
295}
296
297impl PostscriptInfo {
298 fn display(&self) {
299 println!("\n=== Postscript ===");
300 if let Some(ref dtype) = self.dtype {
301 dtype.display("DType");
302 } else {
303 println!(" DType: <not embedded>");
304 }
305 self.layout.display("Layout");
306 if let Some(ref stats) = self.statistics {
307 stats.display("Statistics");
308 } else {
309 println!(" Statistics: <not present>");
310 }
311 self.footer.display("Footer");
312 }
313}
314
315impl FooterSegments {
316 fn display(&self) {
317 println!("\n=== Footer Segments ===");
318 println!("Total segments: {}", self.0.segment_map().len());
319 let total_size = self
320 .0
321 .segment_map()
322 .iter()
323 .map(|s| s.length as u64)
324 .sum::<u64>();
325 println!("Total data size: {} bytes", total_size);
326
327 println!("\nSegment details:\n");
328
329 let segment_map = self.0.segment_map().clone();
330 if segment_map.is_empty() {
331 println!("<no segments>");
332 return;
333 }
334
335 let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
336 let root_layout = self.0.layout().clone();
337
338 let mut queue =
339 VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
340 while !queue.is_empty() {
341 let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
342 for segment in layout.segment_ids() {
343 segment_paths[*segment as usize] = Some(path.clone());
344 }
345
346 for (child_layout, child_name) in layout
347 .children()
348 .vortex_expect("Failed to deserialize children")
349 .into_iter()
350 .zip(layout.child_names())
351 {
352 let child_path = path.iter().cloned().chain([child_name]).collect();
353 queue.push_back((child_path, child_layout));
354 }
355 }
356
357 let max_offset = segment_map.last().vortex_expect("non-empty").offset;
359 let max_length = segment_map
360 .iter()
361 .map(|s| s.length)
362 .max()
363 .vortex_expect("non-empty");
364 let max_alignment = segment_map
365 .iter()
366 .map(|s| s.alignment)
367 .max()
368 .vortex_expect("non-empty");
369
370 let offset_width = max_offset.to_string().len();
372 let end_width = (max_offset + max_length as u64).to_string().len();
373 let length_width = max_length.to_string().len().max(6);
374 let alignment_width = max_alignment.to_string().len().max(5);
375 let index_width = segment_paths.len().to_string().len();
376
377 println!(
379 "{:>index_w$} {:>offset_w$}..{:<end_w$} {:>length_w$} {:>align_w$} Path",
380 "#",
381 "Start",
382 "End",
383 "Length",
384 "Align",
385 index_w = index_width,
386 offset_w = offset_width,
387 end_w = end_width,
388 length_w = length_width,
389 align_w = alignment_width,
390 );
391
392 for (i, name) in segment_paths.iter().enumerate() {
393 let segment = &segment_map[i];
394 let end_offset = segment.offset + segment.length as u64;
395
396 print!(
397 "{:>index_w$} {:>offset_w$}..{:<end_w$} ",
398 i,
399 segment.offset,
400 end_offset,
401 index_w = index_width,
402 offset_w = offset_width,
403 end_w = end_width,
404 );
405 print!(
406 "{:>length_w$} {:>align_w$} ",
407 segment.length,
408 *segment.alignment,
409 length_w = length_width,
410 align_w = alignment_width,
411 );
412 println!(
413 "{}",
414 match name.as_ref() {
415 Some(path) => format!("{}", path.iter().format(".")),
416 None => "<missing>".to_string(),
417 }
418 );
419 }
420 }
421}