use std::fs::File;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{Context, Result};
use clap::Parser;
use orc_rust::reader::metadata::read_metadata;
use orc_rust::stripe::Stripe;
#[derive(Debug, Parser)]
#[command(about = "Display file metadata, schema, and basic information")]
pub struct Args {
#[arg(required = true)]
files: Vec<PathBuf>,
#[arg(short, long)]
verbose: bool,
#[arg(long)]
row_count_only: bool,
}
pub fn run(args: Args) -> Result<()> {
if args.row_count_only {
for path in &args.files {
let mut file =
File::open(path).with_context(|| format!("failed to open {:?}", path.display()))?;
let metadata = read_metadata(&mut file)?;
println!("{}: {}", path.display(), metadata.number_of_rows());
}
return Ok(());
}
for (idx, path) in args.files.iter().enumerate() {
if idx > 0 {
println!("\n---\n");
}
print_file_info(path, args.verbose)?;
}
Ok(())
}
fn print_file_info(path: &PathBuf, verbose: bool) -> Result<()> {
let mut file =
File::open(path).with_context(|| format!("failed to open {:?}", path.display()))?;
let metadata = Arc::new(read_metadata(&mut file)?);
println!("File: {}", path.display());
println!("Format version: {}", metadata.file_format_version());
println!(
"Compression: {}",
metadata
.compression()
.map(|c| c.to_string())
.unwrap_or_else(|| "None".to_string())
);
if let Some(stride) = metadata.row_index_stride() {
println!("Row index stride: {stride}");
} else {
println!("Row index stride: None");
}
println!("Rows: {}", metadata.number_of_rows());
println!("Stripes: {}", metadata.stripe_metadatas().len());
println!();
println!("Schema:\n{}", metadata.root_data_type());
if verbose {
println!("\nStripe layout:");
for (idx, stripe_meta) in metadata.stripe_metadatas().iter().enumerate() {
let stripe = Stripe::new(&mut file, &metadata, metadata.root_data_type(), stripe_meta)?;
println!("Stripe {idx}:");
println!(" offset: {}", stripe_meta.offset());
println!(" index length: {}", stripe_meta.index_length());
println!(" data length: {}", stripe_meta.data_length());
println!(" footer length: {}", stripe_meta.footer_length());
println!(" rows: {}", stripe.number_of_rows());
println!(
" writer timezone: {}",
stripe
.writer_tz()
.map(|tz| tz.to_string())
.unwrap_or_else(|| "None".to_string())
);
}
}
Ok(())
}