use clap::{Parser, Subcommand, ValueEnum};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(name = "omniparse")]
#[command(version)]
#[command(about = "A Rust toolkit for detecting and extracting metadata, text, and content from various file formats")]
#[command(long_about = "Omniparse - Extract text and metadata from 35+ file formats\n\n\
Supported formats:\n\
Text: TXT, JSON, CSV, XML, HTML, CSS, RTF\n\
Documents: PDF, DOCX, DOC, XLSX, XLS, PPTX, PPT, ODT, ODS, ODP\n\
Images: JPEG, PNG, TIFF\n\
Archives: ZIP, TAR\n\n\
Examples:\n\
omniparse document.pdf\n\
omniparse --format json webpage.html\n\
omniparse --metadata-only spreadsheet.xlsx\n\
omniparse --parallel *.pdf *.docx\n\
omniparse models download # pre-fetch ML OCR models\n\
omniparse models verify # check sha256 of cached models")]
#[command(args_conflicts_with_subcommands = true, subcommand_negates_reqs = true)]
pub struct Cli {
#[command(subcommand)]
pub command: Option<Command>,
#[arg(required = true)]
pub files: Vec<PathBuf>,
#[arg(short, long, value_enum, default_value = "text")]
pub format: OutputFormat,
#[arg(short, long)]
pub metadata_only: bool,
#[arg(short, long)]
pub detect_only: bool,
#[arg(short, long)]
pub output: Option<PathBuf>,
#[arg(short, long)]
pub verbose: bool,
#[arg(short, long)]
pub parallel: bool,
}
#[derive(Subcommand, Debug)]
pub enum Command {
Models {
#[command(subcommand)]
action: ModelsAction,
},
}
#[derive(Subcommand, Debug)]
pub enum ModelsAction {
Download {
#[arg(long)]
force: bool,
},
Verify,
Path,
List,
}
#[derive(Clone, Debug, ValueEnum)]
pub enum OutputFormat {
Text,
Json,
Yaml,
}