1use std::collections::VecDeque;
7use std::fs::File;
8use std::io::Read;
9use std::io::Seek;
10use std::io::SeekFrom;
11use std::path::Path;
12use std::path::PathBuf;
13use std::sync::Arc;
14
15use flatbuffers::root;
16use itertools::Itertools;
17use serde::Serialize;
18use vortex::buffer::Alignment;
19use vortex::buffer::ByteBuffer;
20use vortex::error::VortexExpect;
21use vortex::error::VortexResult;
22use vortex::error::vortex_bail;
23use vortex::error::vortex_err;
24use vortex::file::EOF_SIZE;
25use vortex::file::Footer;
26use vortex::file::MAGIC_BYTES;
27use vortex::file::MAX_POSTSCRIPT_SIZE;
28use vortex::file::OpenOptionsSessionExt;
29use vortex::file::VERSION;
30use vortex::flatbuffers::footer as fb;
31use vortex::layout::LayoutRef;
32use vortex::session::VortexSession;
33
34#[derive(Debug, clap::Parser)]
36pub struct InspectArgs {
37 #[clap(subcommand)]
39 pub mode: Option<InspectMode>,
40
41 pub file: PathBuf,
43
44 #[arg(long, global = true)]
46 pub json: bool,
47}
48
49#[derive(Debug, clap::Subcommand)]
51pub enum InspectMode {
52 Eof,
54
55 Postscript,
57
58 Footer,
60}
61
62#[derive(Serialize)]
64pub struct InspectOutput {
65 pub file_path: String,
67 pub file_size: u64,
69 pub eof: EofInfoJson,
71 #[serde(skip_serializing_if = "Option::is_none")]
73 pub postscript: Option<PostscriptInfoJson>,
74 #[serde(skip_serializing_if = "Option::is_none")]
76 pub footer: Option<FooterInfoJson>,
77}
78
79#[derive(Serialize)]
81pub struct EofInfoJson {
82 pub version: u16,
84 pub current_version: u16,
86 pub postscript_size: u16,
88 pub magic_bytes: String,
90 pub valid_magic: bool,
92}
93
94#[derive(Serialize)]
96pub struct SegmentInfoJson {
97 pub offset: u64,
99 pub length: u32,
101 pub alignment: usize,
103}
104
105#[derive(Serialize)]
107pub struct PostscriptInfoJson {
108 pub dtype: Option<SegmentInfoJson>,
110 pub layout: SegmentInfoJson,
112 pub statistics: Option<SegmentInfoJson>,
114 pub footer: SegmentInfoJson,
116}
117
118#[derive(Serialize)]
120pub struct FooterInfoJson {
121 pub total_segments: usize,
123 pub total_data_size: u64,
125 pub segments: Vec<FooterSegmentJson>,
127}
128
129#[derive(Serialize)]
131pub struct FooterSegmentJson {
132 pub index: usize,
134 pub offset: u64,
136 pub end_offset: u64,
138 pub length: u32,
140 pub alignment: usize,
142 pub path: Option<String>,
144}
145
146pub async fn exec_inspect(session: &VortexSession, args: InspectArgs) -> anyhow::Result<()> {
152 let mut inspector = VortexInspector::new(session, args.file.clone())?;
153
154 let mode = args.mode.unwrap_or(InspectMode::Footer);
155
156 if args.json {
157 exec_inspect_json(&mut inspector, &args.file, mode).await
158 } else {
159 exec_inspect_text(&mut inspector, &args.file, mode).await
160 }
161}
162
163async fn exec_inspect_json(
164 inspector: &mut VortexInspector<'_>,
165 file_path: &Path,
166 mode: InspectMode,
167) -> anyhow::Result<()> {
168 let eof = inspector.read_eof()?;
169 let eof_json = EofInfoJson {
170 version: eof.version,
171 current_version: VERSION,
172 postscript_size: eof.postscript_size,
173 magic_bytes: std::str::from_utf8(&eof.magic_bytes)
174 .unwrap_or("<invalid utf8>")
175 .to_string(),
176 valid_magic: eof.valid_magic,
177 };
178
179 let postscript_json =
180 if matches!(mode, InspectMode::Postscript | InspectMode::Footer) && eof.valid_magic {
181 inspector
182 .read_postscript(eof.postscript_size)
183 .ok()
184 .map(|ps| PostscriptInfoJson {
185 dtype: ps.dtype.map(|s| SegmentInfoJson {
186 offset: s.offset,
187 length: s.length,
188 alignment: *s.alignment,
189 }),
190 layout: SegmentInfoJson {
191 offset: ps.layout.offset,
192 length: ps.layout.length,
193 alignment: *ps.layout.alignment,
194 },
195 statistics: ps.statistics.map(|s| SegmentInfoJson {
196 offset: s.offset,
197 length: s.length,
198 alignment: *s.alignment,
199 }),
200 footer: SegmentInfoJson {
201 offset: ps.footer.offset,
202 length: ps.footer.length,
203 alignment: *ps.footer.alignment,
204 },
205 })
206 } else {
207 None
208 };
209
210 let footer_json =
211 if matches!(mode, InspectMode::Footer) && eof.valid_magic && postscript_json.is_some() {
212 inspector.read_footer().await.ok().map(|footer| {
213 let segment_map = footer.segment_map().clone();
214 let root_layout = footer.layout().clone();
215
216 let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
217 let mut queue =
218 VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
219 while !queue.is_empty() {
220 let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
221 for segment in layout.segment_ids() {
222 segment_paths[*segment as usize] = Some(path.clone());
223 }
224 if let Ok(children) = layout.children() {
225 for (child_layout, child_name) in
226 children.into_iter().zip(layout.child_names())
227 {
228 let child_path = path.iter().cloned().chain([child_name]).collect();
229 queue.push_back((child_path, child_layout));
230 }
231 }
232 }
233
234 let segments: Vec<FooterSegmentJson> = segment_map
235 .iter()
236 .enumerate()
237 .map(|(i, segment)| FooterSegmentJson {
238 index: i,
239 offset: segment.offset,
240 end_offset: segment.offset + segment.length as u64,
241 length: segment.length,
242 alignment: *segment.alignment,
243 path: segment_paths[i]
244 .as_ref()
245 .map(|p| p.iter().map(|s| s.as_ref()).collect::<Vec<_>>().join(".")),
246 })
247 .collect();
248
249 FooterInfoJson {
250 total_segments: segment_map.len(),
251 total_data_size: segment_map.iter().map(|s| s.length as u64).sum(),
252 segments,
253 }
254 })
255 } else {
256 None
257 };
258
259 let output = InspectOutput {
260 file_path: file_path.display().to_string(),
261 file_size: inspector.file_size,
262 eof: eof_json,
263 postscript: postscript_json,
264 footer: footer_json,
265 };
266
267 let json_output = serde_json::to_string_pretty(&output)?;
268 println!("{json_output}");
269
270 Ok(())
271}
272
273async fn exec_inspect_text(
274 inspector: &mut VortexInspector<'_>,
275 file_path: &Path,
276 mode: InspectMode,
277) -> anyhow::Result<()> {
278 println!("File: {}", file_path.display());
279 println!("Size: {} bytes", inspector.file_size);
280 println!();
281
282 match mode {
283 InspectMode::Eof => {
284 let eof = inspector.read_eof()?;
285 eof.display();
286 }
287 InspectMode::Postscript => {
288 let eof = inspector.read_eof()?;
289 eof.display();
290
291 if !eof.valid_magic {
292 anyhow::bail!("Invalid magic bytes, cannot read postscript");
293 }
294
295 let postscript = inspector.read_postscript(eof.postscript_size)?;
296 postscript.display();
297 }
298 InspectMode::Footer => {
299 let eof = match inspector.read_eof() {
300 Ok(eof) => {
301 eof.display();
302 eof
303 }
304 Err(e) => {
305 eprintln!("Error reading EOF: {}", e);
306 return Err(e.into());
307 }
308 };
309
310 if !eof.valid_magic {
311 eprintln!("\nError: Invalid magic bytes, stopping here");
312 return Ok(());
313 }
314
315 match inspector.read_postscript(eof.postscript_size) {
316 Ok(ps) => {
317 ps.display();
318 }
319 Err(e) => {
320 eprintln!("\nError reading postscript: {}", e);
321 return Ok(());
322 }
323 };
324
325 match inspector.read_footer().await {
326 Ok(footer) => FooterSegments(footer).display(),
327 Err(e) => {
328 eprintln!("\nError reading footer segments: {}", e);
329 }
330 }
331 }
332 }
333
334 Ok(())
335}
336
337struct VortexInspector<'a> {
338 session: &'a VortexSession,
339 path: PathBuf,
340 file: File,
341 file_size: u64,
342}
343
344impl<'a> VortexInspector<'a> {
345 fn new(session: &'a VortexSession, path: PathBuf) -> VortexResult<Self> {
346 let mut file =
347 File::open(&path).map_err(|e| vortex_err!("Failed to open file {:?}: {}", path, e))?;
348
349 let file_size = file
350 .seek(SeekFrom::End(0))
351 .map_err(|e| vortex_err!("Failed to get file size: {}", e))?;
352
353 Ok(Self {
354 session,
355 path,
356 file,
357 file_size,
358 })
359 }
360
361 fn read_eof(&mut self) -> VortexResult<EofInfo> {
362 if self.file_size < EOF_SIZE as u64 {
363 vortex_bail!(
364 "File too small ({} bytes) to contain EOF marker (requires {} bytes)",
365 self.file_size,
366 EOF_SIZE
367 );
368 }
369
370 let mut eof_bytes = [0u8; EOF_SIZE];
371 self.file
372 .seek(SeekFrom::End(-(EOF_SIZE as i64)))
373 .map_err(|e| vortex_err!("Failed to seek to EOF: {}", e))?;
374 self.file
375 .read_exact(&mut eof_bytes)
376 .map_err(|e| vortex_err!("Failed to read EOF bytes: {}", e))?;
377
378 let version = u16::from_le_bytes([eof_bytes[0], eof_bytes[1]]);
379 let postscript_size = u16::from_le_bytes([eof_bytes[2], eof_bytes[3]]);
380 let magic_bytes = [eof_bytes[4], eof_bytes[5], eof_bytes[6], eof_bytes[7]];
381
382 Ok(EofInfo {
383 version,
384 postscript_size,
385 magic_bytes,
386 valid_magic: magic_bytes == MAGIC_BYTES,
387 })
388 }
389
390 fn read_postscript(&mut self, postscript_size: u16) -> VortexResult<PostscriptInfo> {
391 let postscript_offset = self.file_size - EOF_SIZE as u64 - postscript_size as u64;
392
393 let mut postscript_bytes = vec![0u8; postscript_size as usize];
394 self.file
395 .seek(SeekFrom::Start(postscript_offset))
396 .map_err(|e| vortex_err!("Failed to seek to postscript: {}", e))?;
397 self.file
398 .read_exact(&mut postscript_bytes)
399 .map_err(|e| vortex_err!("Failed to read postscript: {}", e))?;
400
401 let postscript_buffer = ByteBuffer::from(postscript_bytes);
402 let fb_postscript = root::<fb::Postscript>(&postscript_buffer)
403 .map_err(|e| vortex_err!("Failed to parse postscript flatbuffer: {}", e))?;
404
405 let dtype = fb_postscript.dtype().map(|s| SegmentInfo {
406 offset: s.offset(),
407 length: s.length(),
408 alignment: Alignment::from_exponent(s.alignment_exponent()),
409 });
410
411 let layout = fb_postscript
412 .layout()
413 .map(|s| SegmentInfo {
414 offset: s.offset(),
415 length: s.length(),
416 alignment: Alignment::from_exponent(s.alignment_exponent()),
417 })
418 .ok_or_else(|| vortex_err!("Postscript missing layout segment"))?;
419
420 let statistics = fb_postscript.statistics().map(|s| SegmentInfo {
421 offset: s.offset(),
422 length: s.length(),
423 alignment: Alignment::from_exponent(s.alignment_exponent()),
424 });
425
426 let footer = fb_postscript
427 .footer()
428 .map(|s| SegmentInfo {
429 offset: s.offset(),
430 length: s.length(),
431 alignment: Alignment::from_exponent(s.alignment_exponent()),
432 })
433 .ok_or_else(|| vortex_err!("Postscript missing footer segment"))?;
434
435 Ok(PostscriptInfo {
436 dtype,
437 layout,
438 statistics,
439 footer,
440 })
441 }
442
443 async fn read_footer(&mut self) -> VortexResult<Footer> {
444 Ok(self
445 .session
446 .open_options()
447 .open_path(self.path.as_path())
448 .await?
449 .footer()
450 .clone())
451 }
452}
453
454#[derive(Debug)]
455struct EofInfo {
456 version: u16,
457 postscript_size: u16,
458 magic_bytes: [u8; 4],
459 valid_magic: bool,
460}
461
462#[derive(Debug, Clone)]
463struct SegmentInfo {
464 offset: u64,
465 length: u32,
466 alignment: Alignment,
467}
468
469#[derive(Debug)]
470struct PostscriptInfo {
471 pub dtype: Option<SegmentInfo>,
472 pub layout: SegmentInfo,
473 pub statistics: Option<SegmentInfo>,
474 pub footer: SegmentInfo,
475}
476
477#[derive(Debug)]
478struct FooterSegments(Footer);
479
480impl EofInfo {
481 fn display(&self) {
482 println!("=== EOF Marker ===");
483 println!("Version: {} (current: {})", self.version, VERSION);
484 println!("Postscript size: {} bytes", self.postscript_size);
485 println!(
486 "Magic bytes: {} ({})",
487 std::str::from_utf8(&self.magic_bytes).unwrap_or("<invalid utf8>"),
488 if self.valid_magic { "VALID" } else { "INVALID" }
489 );
490
491 if self.postscript_size > MAX_POSTSCRIPT_SIZE {
492 println!(
493 "WARNING: Postscript size exceeds maximum ({} > {})",
494 self.postscript_size, MAX_POSTSCRIPT_SIZE
495 );
496 }
497 }
498}
499
500impl SegmentInfo {
501 fn display(&self, name: &str) {
502 println!(
503 " {}: offset={}, length={}, alignment={}",
504 name, self.offset, self.length, self.alignment
505 );
506 }
507}
508
509impl PostscriptInfo {
510 fn display(&self) {
511 println!("\n=== Postscript ===");
512 if let Some(ref dtype) = self.dtype {
513 dtype.display("DType");
514 } else {
515 println!(" DType: <not embedded>");
516 }
517 self.layout.display("Layout");
518 if let Some(ref stats) = self.statistics {
519 stats.display("Statistics");
520 } else {
521 println!(" Statistics: <not present>");
522 }
523 self.footer.display("Footer");
524 }
525}
526
527impl FooterSegments {
528 fn display(&self) {
529 println!("\n=== Footer Segments ===");
530 println!("Total segments: {}", self.0.segment_map().len());
531 let total_size = self
532 .0
533 .segment_map()
534 .iter()
535 .map(|s| s.length as u64)
536 .sum::<u64>();
537 println!("Total data size: {} bytes", total_size);
538
539 println!("\nSegment details:\n");
540
541 let segment_map = self.0.segment_map().clone();
542 if segment_map.is_empty() {
543 println!("<no segments>");
544 return;
545 }
546
547 let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
548 let root_layout = self.0.layout().clone();
549
550 let mut queue =
551 VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
552 while !queue.is_empty() {
553 let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
554 for segment in layout.segment_ids() {
555 segment_paths[*segment as usize] = Some(path.clone());
556 }
557
558 for (child_layout, child_name) in layout
559 .children()
560 .vortex_expect("Failed to deserialize children")
561 .into_iter()
562 .zip(layout.child_names())
563 {
564 let child_path = path.iter().cloned().chain([child_name]).collect();
565 queue.push_back((child_path, child_layout));
566 }
567 }
568
569 let max_offset = segment_map.last().vortex_expect("non-empty").offset;
571 let max_length = segment_map
572 .iter()
573 .map(|s| s.length)
574 .max()
575 .vortex_expect("non-empty");
576 let max_alignment = segment_map
577 .iter()
578 .map(|s| s.alignment)
579 .max()
580 .vortex_expect("non-empty");
581
582 let offset_width = max_offset.to_string().len();
584 let end_width = (max_offset + max_length as u64).to_string().len();
585 let length_width = max_length.to_string().len().max(6);
586 let alignment_width = max_alignment.to_string().len().max(5);
587 let index_width = segment_paths.len().to_string().len();
588
589 println!(
591 "{:>index_w$} {:>offset_w$}..{:<end_w$} {:>length_w$} {:>align_w$} Path",
592 "#",
593 "Start",
594 "End",
595 "Length",
596 "Align",
597 index_w = index_width,
598 offset_w = offset_width,
599 end_w = end_width,
600 length_w = length_width,
601 align_w = alignment_width,
602 );
603
604 for (i, name) in segment_paths.iter().enumerate() {
605 let segment = &segment_map[i];
606 let end_offset = segment.offset + segment.length as u64;
607
608 print!(
609 "{:>index_w$} {:>offset_w$}..{:<end_w$} ",
610 i,
611 segment.offset,
612 end_offset,
613 index_w = index_width,
614 offset_w = offset_width,
615 end_w = end_width,
616 );
617 print!(
618 "{:>length_w$} {:>align_w$} ",
619 segment.length,
620 *segment.alignment,
621 length_w = length_width,
622 align_w = alignment_width,
623 );
624 println!(
625 "{}",
626 match name.as_ref() {
627 Some(path) => format!("{}", path.iter().format(".")),
628 None => "<missing>".to_string(),
629 }
630 );
631 }
632 }
633}