vortex_tui/
inspect.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Inspect Vortex file metadata and structure.
5
6use std::collections::VecDeque;
7use std::fs::File;
8use std::io::Read;
9use std::io::Seek;
10use std::io::SeekFrom;
11use std::path::PathBuf;
12use std::sync::Arc;
13
14use flatbuffers::root;
15use itertools::Itertools;
16use vortex::buffer::Alignment;
17use vortex::buffer::ByteBuffer;
18use vortex::error::VortexExpect;
19use vortex::error::VortexResult;
20use vortex::error::vortex_bail;
21use vortex::error::vortex_err;
22use vortex::file::EOF_SIZE;
23use vortex::file::Footer;
24use vortex::file::MAGIC_BYTES;
25use vortex::file::MAX_POSTSCRIPT_SIZE;
26use vortex::file::OpenOptionsSessionExt;
27use vortex::file::VERSION;
28use vortex::flatbuffers::footer as fb;
29use vortex::layout::LayoutRef;
30use vortex::session::VortexSession;
31
32/// Command-line arguments for the inspect command.
33#[derive(Debug, clap::Parser)]
34pub struct InspectArgs {
35    /// What to inspect.
36    #[clap(subcommand)]
37    pub mode: Option<InspectMode>,
38
39    /// Path to the Vortex file to inspect.
40    pub file: PathBuf,
41}
42
43/// What component of the Vortex file to inspect.
44#[derive(Debug, clap::Subcommand)]
45pub enum InspectMode {
46    /// Read and display the EOF marker (8 bytes at end of file).
47    Eof,
48
49    /// Read and display the postscript
50    Postscript,
51
52    /// Read and display all footer segments
53    Footer,
54}
55
56/// Inspect Vortex file footer and metadata.
57///
58/// # Errors
59///
60/// Returns an error if the file cannot be opened or its metadata cannot be read.
61pub async fn exec_inspect(session: &VortexSession, args: InspectArgs) -> anyhow::Result<()> {
62    let mut inspector = VortexInspector::new(session, args.file.clone())?;
63
64    println!("File: {}", args.file.display());
65    println!("Size: {} bytes", inspector.file_size);
66    println!();
67
68    let mode = args.mode.unwrap_or(InspectMode::Footer);
69
70    match mode {
71        InspectMode::Eof => {
72            let eof = inspector.read_eof()?;
73            eof.display();
74        }
75        InspectMode::Postscript => {
76            let eof = inspector.read_eof()?;
77            eof.display();
78
79            if !eof.valid_magic {
80                anyhow::bail!("Invalid magic bytes, cannot read postscript");
81            }
82
83            let postscript = inspector.read_postscript(eof.postscript_size)?;
84            postscript.display();
85        }
86        InspectMode::Footer => {
87            let eof = match inspector.read_eof() {
88                Ok(eof) => {
89                    eof.display();
90                    eof
91                }
92                Err(e) => {
93                    eprintln!("Error reading EOF: {}", e);
94                    return Err(e.into());
95                }
96            };
97
98            if !eof.valid_magic {
99                eprintln!("\nError: Invalid magic bytes, stopping here");
100                return Ok(());
101            }
102
103            match inspector.read_postscript(eof.postscript_size) {
104                Ok(ps) => {
105                    ps.display();
106                }
107                Err(e) => {
108                    eprintln!("\nError reading postscript: {}", e);
109                    return Ok(());
110                }
111            };
112
113            match inspector.read_footer().await {
114                Ok(footer) => FooterSegments(footer).display(),
115                Err(e) => {
116                    eprintln!("\nError reading footer segments: {}", e);
117                }
118            }
119        }
120    }
121
122    Ok(())
123}
124
125struct VortexInspector<'a> {
126    session: &'a VortexSession,
127    path: PathBuf,
128    file: File,
129    file_size: u64,
130}
131
132impl<'a> VortexInspector<'a> {
133    fn new(session: &'a VortexSession, path: PathBuf) -> VortexResult<Self> {
134        let mut file =
135            File::open(&path).map_err(|e| vortex_err!("Failed to open file {:?}: {}", path, e))?;
136
137        let file_size = file
138            .seek(SeekFrom::End(0))
139            .map_err(|e| vortex_err!("Failed to get file size: {}", e))?;
140
141        Ok(Self {
142            session,
143            path,
144            file,
145            file_size,
146        })
147    }
148
149    fn read_eof(&mut self) -> VortexResult<EofInfo> {
150        if self.file_size < EOF_SIZE as u64 {
151            vortex_bail!(
152                "File too small ({} bytes) to contain EOF marker (requires {} bytes)",
153                self.file_size,
154                EOF_SIZE
155            );
156        }
157
158        let mut eof_bytes = [0u8; EOF_SIZE];
159        self.file
160            .seek(SeekFrom::End(-(EOF_SIZE as i64)))
161            .map_err(|e| vortex_err!("Failed to seek to EOF: {}", e))?;
162        self.file
163            .read_exact(&mut eof_bytes)
164            .map_err(|e| vortex_err!("Failed to read EOF bytes: {}", e))?;
165
166        let version = u16::from_le_bytes([eof_bytes[0], eof_bytes[1]]);
167        let postscript_size = u16::from_le_bytes([eof_bytes[2], eof_bytes[3]]);
168        let magic_bytes = [eof_bytes[4], eof_bytes[5], eof_bytes[6], eof_bytes[7]];
169
170        Ok(EofInfo {
171            version,
172            postscript_size,
173            magic_bytes,
174            valid_magic: magic_bytes == MAGIC_BYTES,
175        })
176    }
177
178    fn read_postscript(&mut self, postscript_size: u16) -> VortexResult<PostscriptInfo> {
179        let postscript_offset = self.file_size - EOF_SIZE as u64 - postscript_size as u64;
180
181        let mut postscript_bytes = vec![0u8; postscript_size as usize];
182        self.file
183            .seek(SeekFrom::Start(postscript_offset))
184            .map_err(|e| vortex_err!("Failed to seek to postscript: {}", e))?;
185        self.file
186            .read_exact(&mut postscript_bytes)
187            .map_err(|e| vortex_err!("Failed to read postscript: {}", e))?;
188
189        let postscript_buffer = ByteBuffer::from(postscript_bytes);
190        let fb_postscript = root::<fb::Postscript>(&postscript_buffer)
191            .map_err(|e| vortex_err!("Failed to parse postscript flatbuffer: {}", e))?;
192
193        let dtype = fb_postscript.dtype().map(|s| SegmentInfo {
194            offset: s.offset(),
195            length: s.length(),
196            alignment: Alignment::from_exponent(s.alignment_exponent()),
197        });
198
199        let layout = fb_postscript
200            .layout()
201            .map(|s| SegmentInfo {
202                offset: s.offset(),
203                length: s.length(),
204                alignment: Alignment::from_exponent(s.alignment_exponent()),
205            })
206            .ok_or_else(|| vortex_err!("Postscript missing layout segment"))?;
207
208        let statistics = fb_postscript.statistics().map(|s| SegmentInfo {
209            offset: s.offset(),
210            length: s.length(),
211            alignment: Alignment::from_exponent(s.alignment_exponent()),
212        });
213
214        let footer = fb_postscript
215            .footer()
216            .map(|s| SegmentInfo {
217                offset: s.offset(),
218                length: s.length(),
219                alignment: Alignment::from_exponent(s.alignment_exponent()),
220            })
221            .ok_or_else(|| vortex_err!("Postscript missing footer segment"))?;
222
223        Ok(PostscriptInfo {
224            dtype,
225            layout,
226            statistics,
227            footer,
228        })
229    }
230
231    async fn read_footer(&mut self) -> VortexResult<Footer> {
232        Ok(self
233            .session
234            .open_options()
235            .open(self.path.as_path())
236            .await?
237            .footer()
238            .clone())
239    }
240}
241
242#[derive(Debug)]
243struct EofInfo {
244    version: u16,
245    postscript_size: u16,
246    magic_bytes: [u8; 4],
247    valid_magic: bool,
248}
249
250#[derive(Debug, Clone)]
251struct SegmentInfo {
252    offset: u64,
253    length: u32,
254    alignment: Alignment,
255}
256
257#[derive(Debug)]
258struct PostscriptInfo {
259    pub dtype: Option<SegmentInfo>,
260    pub layout: SegmentInfo,
261    pub statistics: Option<SegmentInfo>,
262    pub footer: SegmentInfo,
263}
264
265#[derive(Debug)]
266struct FooterSegments(Footer);
267
268impl EofInfo {
269    fn display(&self) {
270        println!("=== EOF Marker ===");
271        println!("Version: {} (current: {})", self.version, VERSION);
272        println!("Postscript size: {} bytes", self.postscript_size);
273        println!(
274            "Magic bytes: {} ({})",
275            std::str::from_utf8(&self.magic_bytes).unwrap_or("<invalid utf8>"),
276            if self.valid_magic { "VALID" } else { "INVALID" }
277        );
278
279        if self.postscript_size > MAX_POSTSCRIPT_SIZE {
280            println!(
281                "WARNING: Postscript size exceeds maximum ({} > {})",
282                self.postscript_size, MAX_POSTSCRIPT_SIZE
283            );
284        }
285    }
286}
287
288impl SegmentInfo {
289    fn display(&self, name: &str) {
290        println!(
291            "  {}: offset={}, length={}, alignment={}",
292            name, self.offset, self.length, self.alignment
293        );
294    }
295}
296
297impl PostscriptInfo {
298    fn display(&self) {
299        println!("\n=== Postscript ===");
300        if let Some(ref dtype) = self.dtype {
301            dtype.display("DType");
302        } else {
303            println!("  DType: <not embedded>");
304        }
305        self.layout.display("Layout");
306        if let Some(ref stats) = self.statistics {
307            stats.display("Statistics");
308        } else {
309            println!("  Statistics: <not present>");
310        }
311        self.footer.display("Footer");
312    }
313}
314
315impl FooterSegments {
316    fn display(&self) {
317        println!("\n=== Footer Segments ===");
318        println!("Total segments: {}", self.0.segment_map().len());
319        let total_size = self
320            .0
321            .segment_map()
322            .iter()
323            .map(|s| s.length as u64)
324            .sum::<u64>();
325        println!("Total data size: {} bytes", total_size);
326
327        println!("\nSegment details:\n");
328
329        let segment_map = self.0.segment_map().clone();
330        if segment_map.is_empty() {
331            println!("<no segments>");
332            return;
333        }
334
335        let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
336        let root_layout = self.0.layout().clone();
337
338        let mut queue =
339            VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
340        while !queue.is_empty() {
341            let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
342            for segment in layout.segment_ids() {
343                segment_paths[*segment as usize] = Some(path.clone());
344            }
345
346            for (child_layout, child_name) in layout
347                .children()
348                .vortex_expect("Failed to deserialize children")
349                .into_iter()
350                .zip(layout.child_names())
351            {
352                let child_path = path.iter().cloned().chain([child_name]).collect();
353                queue.push_back((child_path, child_layout));
354            }
355        }
356
357        // Find the largest values for formatting
358        let max_offset = segment_map.last().vortex_expect("non-empty").offset;
359        let max_length = segment_map
360            .iter()
361            .map(|s| s.length)
362            .max()
363            .vortex_expect("non-empty");
364        let max_alignment = segment_map
365            .iter()
366            .map(|s| s.alignment)
367            .max()
368            .vortex_expect("non-empty");
369
370        // Calculate all widths
371        let offset_width = max_offset.to_string().len();
372        let end_width = (max_offset + max_length as u64).to_string().len();
373        let length_width = max_length.to_string().len().max(6);
374        let alignment_width = max_alignment.to_string().len().max(5);
375        let index_width = segment_paths.len().to_string().len();
376
377        // Print header
378        println!(
379            "{:>index_w$}  {:>offset_w$}..{:<end_w$}  {:>length_w$}  {:>align_w$}  Path",
380            "#",
381            "Start",
382            "End",
383            "Length",
384            "Align",
385            index_w = index_width,
386            offset_w = offset_width,
387            end_w = end_width,
388            length_w = length_width,
389            align_w = alignment_width,
390        );
391
392        for (i, name) in segment_paths.iter().enumerate() {
393            let segment = &segment_map[i];
394            let end_offset = segment.offset + segment.length as u64;
395
396            print!(
397                "{:>index_w$}  {:>offset_w$}..{:<end_w$}  ",
398                i,
399                segment.offset,
400                end_offset,
401                index_w = index_width,
402                offset_w = offset_width,
403                end_w = end_width,
404            );
405            print!(
406                "{:>length_w$}  {:>align_w$}  ",
407                segment.length,
408                *segment.alignment,
409                length_w = length_width,
410                align_w = alignment_width,
411            );
412            println!(
413                "{}",
414                match name.as_ref() {
415                    Some(path) => format!("{}", path.iter().format(".")),
416                    None => "<missing>".to_string(),
417                }
418            );
419        }
420    }
421}