Skip to main content

vortex_tui/
inspect.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Inspect Vortex file metadata and structure.
5
6use std::collections::VecDeque;
7use std::fs::File;
8use std::io::Read;
9use std::io::Seek;
10use std::io::SeekFrom;
11use std::path::Path;
12use std::path::PathBuf;
13use std::sync::Arc;
14
15use flatbuffers::root;
16use itertools::Itertools;
17use serde::Serialize;
18use vortex::buffer::Alignment;
19use vortex::buffer::ByteBuffer;
20use vortex::error::VortexExpect;
21use vortex::error::VortexResult;
22use vortex::error::vortex_bail;
23use vortex::error::vortex_err;
24use vortex::file::EOF_SIZE;
25use vortex::file::Footer;
26use vortex::file::MAGIC_BYTES;
27use vortex::file::MAX_POSTSCRIPT_SIZE;
28use vortex::file::OpenOptionsSessionExt;
29use vortex::file::VERSION;
30use vortex::flatbuffers::footer as fb;
31use vortex::layout::LayoutRef;
32use vortex::session::VortexSession;
33
34/// Command-line arguments for the inspect command.
35#[derive(Debug, clap::Parser)]
36pub struct InspectArgs {
37    /// What to inspect.
38    #[clap(subcommand)]
39    pub mode: Option<InspectMode>,
40
41    /// Path to the Vortex file to inspect.
42    pub file: PathBuf,
43
44    /// Output as JSON
45    #[arg(long, global = true)]
46    pub json: bool,
47}
48
49/// What component of the Vortex file to inspect.
50#[derive(Debug, clap::Subcommand)]
51pub enum InspectMode {
52    /// Read and display the EOF marker (8 bytes at end of file).
53    Eof,
54
55    /// Read and display the postscript
56    Postscript,
57
58    /// Read and display all footer segments
59    Footer,
60}
61
62/// JSON output structure for inspect command.
63#[derive(Serialize)]
64pub struct InspectOutput {
65    /// Path to the inspected file.
66    pub file_path: String,
67    /// Size of the file in bytes.
68    pub file_size: u64,
69    /// EOF marker information.
70    pub eof: EofInfoJson,
71    /// Postscript information (if available).
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub postscript: Option<PostscriptInfoJson>,
74    /// Footer information (if available).
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub footer: Option<FooterInfoJson>,
77}
78
79/// EOF marker information for JSON output.
80#[derive(Serialize)]
81pub struct EofInfoJson {
82    /// File format version.
83    pub version: u16,
84    /// Current supported version.
85    pub current_version: u16,
86    /// Postscript size in bytes.
87    pub postscript_size: u16,
88    /// Magic bytes as string.
89    pub magic_bytes: String,
90    /// Whether magic bytes are valid.
91    pub valid_magic: bool,
92}
93
94/// Segment information for JSON output.
95#[derive(Serialize)]
96pub struct SegmentInfoJson {
97    /// Offset in file.
98    pub offset: u64,
99    /// Length in bytes.
100    pub length: u32,
101    /// Alignment requirement.
102    pub alignment: usize,
103}
104
105/// Postscript information for JSON output.
106#[derive(Serialize)]
107pub struct PostscriptInfoJson {
108    /// DType segment info.
109    pub dtype: Option<SegmentInfoJson>,
110    /// Layout segment info.
111    pub layout: SegmentInfoJson,
112    /// Statistics segment info.
113    pub statistics: Option<SegmentInfoJson>,
114    /// Footer segment info.
115    pub footer: SegmentInfoJson,
116}
117
118/// Footer information for JSON output.
119#[derive(Serialize)]
120pub struct FooterInfoJson {
121    /// Total number of segments.
122    pub total_segments: usize,
123    /// Total data size in bytes.
124    pub total_data_size: u64,
125    /// Individual segment details.
126    pub segments: Vec<FooterSegmentJson>,
127}
128
129/// Footer segment information for JSON output.
130#[derive(Serialize)]
131pub struct FooterSegmentJson {
132    /// Segment index.
133    pub index: usize,
134    /// Start offset in file.
135    pub offset: u64,
136    /// End offset in file.
137    pub end_offset: u64,
138    /// Length in bytes.
139    pub length: u32,
140    /// Alignment requirement.
141    pub alignment: usize,
142    /// Path in layout tree.
143    pub path: Option<String>,
144}
145
146/// Inspect Vortex file footer and metadata.
147///
148/// # Errors
149///
150/// Returns an error if the file cannot be opened or its metadata cannot be read.
151pub async fn exec_inspect(session: &VortexSession, args: InspectArgs) -> anyhow::Result<()> {
152    let mut inspector = VortexInspector::new(session, args.file.clone())?;
153
154    let mode = args.mode.unwrap_or(InspectMode::Footer);
155
156    if args.json {
157        exec_inspect_json(&mut inspector, &args.file, mode).await
158    } else {
159        exec_inspect_text(&mut inspector, &args.file, mode).await
160    }
161}
162
163async fn exec_inspect_json(
164    inspector: &mut VortexInspector<'_>,
165    file_path: &Path,
166    mode: InspectMode,
167) -> anyhow::Result<()> {
168    let eof = inspector.read_eof()?;
169    let eof_json = EofInfoJson {
170        version: eof.version,
171        current_version: VERSION,
172        postscript_size: eof.postscript_size,
173        magic_bytes: std::str::from_utf8(&eof.magic_bytes)
174            .unwrap_or("<invalid utf8>")
175            .to_string(),
176        valid_magic: eof.valid_magic,
177    };
178
179    let postscript_json =
180        if matches!(mode, InspectMode::Postscript | InspectMode::Footer) && eof.valid_magic {
181            inspector
182                .read_postscript(eof.postscript_size)
183                .ok()
184                .map(|ps| PostscriptInfoJson {
185                    dtype: ps.dtype.map(|s| SegmentInfoJson {
186                        offset: s.offset,
187                        length: s.length,
188                        alignment: *s.alignment,
189                    }),
190                    layout: SegmentInfoJson {
191                        offset: ps.layout.offset,
192                        length: ps.layout.length,
193                        alignment: *ps.layout.alignment,
194                    },
195                    statistics: ps.statistics.map(|s| SegmentInfoJson {
196                        offset: s.offset,
197                        length: s.length,
198                        alignment: *s.alignment,
199                    }),
200                    footer: SegmentInfoJson {
201                        offset: ps.footer.offset,
202                        length: ps.footer.length,
203                        alignment: *ps.footer.alignment,
204                    },
205                })
206        } else {
207            None
208        };
209
210    let footer_json =
211        if matches!(mode, InspectMode::Footer) && eof.valid_magic && postscript_json.is_some() {
212            inspector.read_footer().await.ok().map(|footer| {
213                let segment_map = footer.segment_map().clone();
214                let root_layout = footer.layout().clone();
215
216                let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
217                let mut queue =
218                    VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
219                while !queue.is_empty() {
220                    let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
221                    for segment in layout.segment_ids() {
222                        segment_paths[*segment as usize] = Some(path.clone());
223                    }
224                    if let Ok(children) = layout.children() {
225                        for (child_layout, child_name) in
226                            children.into_iter().zip(layout.child_names())
227                        {
228                            let child_path = path.iter().cloned().chain([child_name]).collect();
229                            queue.push_back((child_path, child_layout));
230                        }
231                    }
232                }
233
234                let segments: Vec<FooterSegmentJson> = segment_map
235                    .iter()
236                    .enumerate()
237                    .map(|(i, segment)| FooterSegmentJson {
238                        index: i,
239                        offset: segment.offset,
240                        end_offset: segment.offset + segment.length as u64,
241                        length: segment.length,
242                        alignment: *segment.alignment,
243                        path: segment_paths[i]
244                            .as_ref()
245                            .map(|p| p.iter().map(|s| s.as_ref()).collect::<Vec<_>>().join(".")),
246                    })
247                    .collect();
248
249                FooterInfoJson {
250                    total_segments: segment_map.len(),
251                    total_data_size: segment_map.iter().map(|s| s.length as u64).sum(),
252                    segments,
253                }
254            })
255        } else {
256            None
257        };
258
259    let output = InspectOutput {
260        file_path: file_path.display().to_string(),
261        file_size: inspector.file_size,
262        eof: eof_json,
263        postscript: postscript_json,
264        footer: footer_json,
265    };
266
267    let json_output = serde_json::to_string_pretty(&output)?;
268    println!("{json_output}");
269
270    Ok(())
271}
272
273async fn exec_inspect_text(
274    inspector: &mut VortexInspector<'_>,
275    file_path: &Path,
276    mode: InspectMode,
277) -> anyhow::Result<()> {
278    println!("File: {}", file_path.display());
279    println!("Size: {} bytes", inspector.file_size);
280    println!();
281
282    match mode {
283        InspectMode::Eof => {
284            let eof = inspector.read_eof()?;
285            eof.display();
286        }
287        InspectMode::Postscript => {
288            let eof = inspector.read_eof()?;
289            eof.display();
290
291            if !eof.valid_magic {
292                anyhow::bail!("Invalid magic bytes, cannot read postscript");
293            }
294
295            let postscript = inspector.read_postscript(eof.postscript_size)?;
296            postscript.display();
297        }
298        InspectMode::Footer => {
299            let eof = match inspector.read_eof() {
300                Ok(eof) => {
301                    eof.display();
302                    eof
303                }
304                Err(e) => {
305                    eprintln!("Error reading EOF: {}", e);
306                    return Err(e.into());
307                }
308            };
309
310            if !eof.valid_magic {
311                eprintln!("\nError: Invalid magic bytes, stopping here");
312                return Ok(());
313            }
314
315            match inspector.read_postscript(eof.postscript_size) {
316                Ok(ps) => {
317                    ps.display();
318                }
319                Err(e) => {
320                    eprintln!("\nError reading postscript: {}", e);
321                    return Ok(());
322                }
323            };
324
325            match inspector.read_footer().await {
326                Ok(footer) => FooterSegments(footer).display(),
327                Err(e) => {
328                    eprintln!("\nError reading footer segments: {}", e);
329                }
330            }
331        }
332    }
333
334    Ok(())
335}
336
337struct VortexInspector<'a> {
338    session: &'a VortexSession,
339    path: PathBuf,
340    file: File,
341    file_size: u64,
342}
343
344impl<'a> VortexInspector<'a> {
345    fn new(session: &'a VortexSession, path: PathBuf) -> VortexResult<Self> {
346        let mut file =
347            File::open(&path).map_err(|e| vortex_err!("Failed to open file {:?}: {}", path, e))?;
348
349        let file_size = file
350            .seek(SeekFrom::End(0))
351            .map_err(|e| vortex_err!("Failed to get file size: {}", e))?;
352
353        Ok(Self {
354            session,
355            path,
356            file,
357            file_size,
358        })
359    }
360
361    fn read_eof(&mut self) -> VortexResult<EofInfo> {
362        if self.file_size < EOF_SIZE as u64 {
363            vortex_bail!(
364                "File too small ({} bytes) to contain EOF marker (requires {} bytes)",
365                self.file_size,
366                EOF_SIZE
367            );
368        }
369
370        let mut eof_bytes = [0u8; EOF_SIZE];
371        self.file
372            .seek(SeekFrom::End(-(EOF_SIZE as i64)))
373            .map_err(|e| vortex_err!("Failed to seek to EOF: {}", e))?;
374        self.file
375            .read_exact(&mut eof_bytes)
376            .map_err(|e| vortex_err!("Failed to read EOF bytes: {}", e))?;
377
378        let version = u16::from_le_bytes([eof_bytes[0], eof_bytes[1]]);
379        let postscript_size = u16::from_le_bytes([eof_bytes[2], eof_bytes[3]]);
380        let magic_bytes = [eof_bytes[4], eof_bytes[5], eof_bytes[6], eof_bytes[7]];
381
382        Ok(EofInfo {
383            version,
384            postscript_size,
385            magic_bytes,
386            valid_magic: magic_bytes == MAGIC_BYTES,
387        })
388    }
389
390    fn read_postscript(&mut self, postscript_size: u16) -> VortexResult<PostscriptInfo> {
391        let postscript_offset = self.file_size - EOF_SIZE as u64 - postscript_size as u64;
392
393        let mut postscript_bytes = vec![0u8; postscript_size as usize];
394        self.file
395            .seek(SeekFrom::Start(postscript_offset))
396            .map_err(|e| vortex_err!("Failed to seek to postscript: {}", e))?;
397        self.file
398            .read_exact(&mut postscript_bytes)
399            .map_err(|e| vortex_err!("Failed to read postscript: {}", e))?;
400
401        let postscript_buffer = ByteBuffer::from(postscript_bytes);
402        let fb_postscript = root::<fb::Postscript>(&postscript_buffer)
403            .map_err(|e| vortex_err!("Failed to parse postscript flatbuffer: {}", e))?;
404
405        let dtype = fb_postscript.dtype().map(|s| SegmentInfo {
406            offset: s.offset(),
407            length: s.length(),
408            alignment: Alignment::from_exponent(s.alignment_exponent()),
409        });
410
411        let layout = fb_postscript
412            .layout()
413            .map(|s| SegmentInfo {
414                offset: s.offset(),
415                length: s.length(),
416                alignment: Alignment::from_exponent(s.alignment_exponent()),
417            })
418            .ok_or_else(|| vortex_err!("Postscript missing layout segment"))?;
419
420        let statistics = fb_postscript.statistics().map(|s| SegmentInfo {
421            offset: s.offset(),
422            length: s.length(),
423            alignment: Alignment::from_exponent(s.alignment_exponent()),
424        });
425
426        let footer = fb_postscript
427            .footer()
428            .map(|s| SegmentInfo {
429                offset: s.offset(),
430                length: s.length(),
431                alignment: Alignment::from_exponent(s.alignment_exponent()),
432            })
433            .ok_or_else(|| vortex_err!("Postscript missing footer segment"))?;
434
435        Ok(PostscriptInfo {
436            dtype,
437            layout,
438            statistics,
439            footer,
440        })
441    }
442
443    async fn read_footer(&mut self) -> VortexResult<Footer> {
444        Ok(self
445            .session
446            .open_options()
447            .open_path(self.path.as_path())
448            .await?
449            .footer()
450            .clone())
451    }
452}
453
454#[derive(Debug)]
455struct EofInfo {
456    version: u16,
457    postscript_size: u16,
458    magic_bytes: [u8; 4],
459    valid_magic: bool,
460}
461
462#[derive(Debug, Clone)]
463struct SegmentInfo {
464    offset: u64,
465    length: u32,
466    alignment: Alignment,
467}
468
469#[derive(Debug)]
470struct PostscriptInfo {
471    pub dtype: Option<SegmentInfo>,
472    pub layout: SegmentInfo,
473    pub statistics: Option<SegmentInfo>,
474    pub footer: SegmentInfo,
475}
476
477#[derive(Debug)]
478struct FooterSegments(Footer);
479
480impl EofInfo {
481    fn display(&self) {
482        println!("=== EOF Marker ===");
483        println!("Version: {} (current: {})", self.version, VERSION);
484        println!("Postscript size: {} bytes", self.postscript_size);
485        println!(
486            "Magic bytes: {} ({})",
487            std::str::from_utf8(&self.magic_bytes).unwrap_or("<invalid utf8>"),
488            if self.valid_magic { "VALID" } else { "INVALID" }
489        );
490
491        if self.postscript_size > MAX_POSTSCRIPT_SIZE {
492            println!(
493                "WARNING: Postscript size exceeds maximum ({} > {})",
494                self.postscript_size, MAX_POSTSCRIPT_SIZE
495            );
496        }
497    }
498}
499
500impl SegmentInfo {
501    fn display(&self, name: &str) {
502        println!(
503            "  {}: offset={}, length={}, alignment={}",
504            name, self.offset, self.length, self.alignment
505        );
506    }
507}
508
509impl PostscriptInfo {
510    fn display(&self) {
511        println!("\n=== Postscript ===");
512        if let Some(ref dtype) = self.dtype {
513            dtype.display("DType");
514        } else {
515            println!("  DType: <not embedded>");
516        }
517        self.layout.display("Layout");
518        if let Some(ref stats) = self.statistics {
519            stats.display("Statistics");
520        } else {
521            println!("  Statistics: <not present>");
522        }
523        self.footer.display("Footer");
524    }
525}
526
527impl FooterSegments {
528    fn display(&self) {
529        println!("\n=== Footer Segments ===");
530        println!("Total segments: {}", self.0.segment_map().len());
531        let total_size = self
532            .0
533            .segment_map()
534            .iter()
535            .map(|s| s.length as u64)
536            .sum::<u64>();
537        println!("Total data size: {} bytes", total_size);
538
539        println!("\nSegment details:\n");
540
541        let segment_map = self.0.segment_map().clone();
542        if segment_map.is_empty() {
543            println!("<no segments>");
544            return;
545        }
546
547        let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
548        let root_layout = self.0.layout().clone();
549
550        let mut queue =
551            VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
552        while !queue.is_empty() {
553            let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
554            for segment in layout.segment_ids() {
555                segment_paths[*segment as usize] = Some(path.clone());
556            }
557
558            for (child_layout, child_name) in layout
559                .children()
560                .vortex_expect("Failed to deserialize children")
561                .into_iter()
562                .zip(layout.child_names())
563            {
564                let child_path = path.iter().cloned().chain([child_name]).collect();
565                queue.push_back((child_path, child_layout));
566            }
567        }
568
569        // Find the largest values for formatting
570        let max_offset = segment_map.last().vortex_expect("non-empty").offset;
571        let max_length = segment_map
572            .iter()
573            .map(|s| s.length)
574            .max()
575            .vortex_expect("non-empty");
576        let max_alignment = segment_map
577            .iter()
578            .map(|s| s.alignment)
579            .max()
580            .vortex_expect("non-empty");
581
582        // Calculate all widths
583        let offset_width = max_offset.to_string().len();
584        let end_width = (max_offset + max_length as u64).to_string().len();
585        let length_width = max_length.to_string().len().max(6);
586        let alignment_width = max_alignment.to_string().len().max(5);
587        let index_width = segment_paths.len().to_string().len();
588
589        // Print header
590        println!(
591            "{:>index_w$}  {:>offset_w$}..{:<end_w$}  {:>length_w$}  {:>align_w$}  Path",
592            "#",
593            "Start",
594            "End",
595            "Length",
596            "Align",
597            index_w = index_width,
598            offset_w = offset_width,
599            end_w = end_width,
600            length_w = length_width,
601            align_w = alignment_width,
602        );
603
604        for (i, name) in segment_paths.iter().enumerate() {
605            let segment = &segment_map[i];
606            let end_offset = segment.offset + segment.length as u64;
607
608            print!(
609                "{:>index_w$}  {:>offset_w$}..{:<end_w$}  ",
610                i,
611                segment.offset,
612                end_offset,
613                index_w = index_width,
614                offset_w = offset_width,
615                end_w = end_width,
616            );
617            print!(
618                "{:>length_w$}  {:>align_w$}  ",
619                segment.length,
620                *segment.alignment,
621                length_w = length_width,
622                align_w = alignment_width,
623            );
624            println!(
625                "{}",
626                match name.as_ref() {
627                    Some(path) => format!("{}", path.iter().format(".")),
628                    None => "<missing>".to_string(),
629                }
630            );
631        }
632    }
633}