use std::path::{Path, PathBuf};
use clap::{Parser, Subcommand};
use crate::verify::Check;
use super::format::filename_compression_format;
#[derive(Parser, Debug)]
#[command(version)]
pub struct Args {
#[command(subcommand)]
pub command: Command,
#[clap(long, short)]
pub quiet: bool,
#[clap(long, default_value = "off")]
pub log_level: super::logging::Level,
#[clap(long)]
pub log_file: Option<PathBuf>,
#[clap(long)]
pub log_json: bool,
}
#[derive(Debug, Subcommand)]
pub enum Command {
Export(ExportCommand),
Import(ImportCommand),
List(ListCommand),
Get(GetCommand),
Extract(ExtractCommand),
Verify(VerifyCommand),
Self_(SelfCommand),
#[command(hide(true))]
DumpHelp,
}
#[derive(Parser, Debug)]
pub struct ExportCommand {
#[clap(long, default_value = "-")]
pub input: Vec<PathBuf>,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, default_value = "-")]
pub output: PathBuf,
#[clap(long, default_value = "json-seq")]
pub format: SerializationFormat,
#[clap(long)]
pub no_block: bool,
#[clap(long)]
pub extract: bool,
}
#[derive(Parser, Debug)]
pub struct ImportCommand {
#[clap(long, default_value = "-")]
pub input: Vec<PathBuf>,
#[clap(long, default_value = "json-seq")]
pub format: SerializationFormat,
#[clap(long, default_value = "-")]
pub output: PathBuf,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, default_value = "high")]
pub compression_level: CompressionLevel,
}
#[derive(Parser, Debug)]
pub struct ListCommand {
#[clap(long, default_value = "-")]
pub input: Vec<PathBuf>,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, default_value = "-")]
pub output: PathBuf,
#[clap(long, default_value = "json-seq")]
pub format: ListSerializationFormat,
#[clap(
long,
value_delimiter = ',',
default_value = ":position,WARC-Record-ID,WARC-Type,Content-Type,WARC-Target-URI"
)]
pub field: Vec<String>,
}
#[derive(Parser, Debug)]
pub struct GetCommand {
#[command(subcommand)]
pub subcommand: GetSubcommand,
}
#[derive(Debug, Subcommand)]
pub enum GetSubcommand {
Export(GetExportSubcommand),
Extract(GetExtractSubcommand),
}
#[derive(Parser, Debug)]
pub struct GetExportSubcommand {
#[clap(long, default_value = "-")]
pub input: PathBuf,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, required = true)]
pub position: u64,
#[clap(long, required = true)]
pub id: String,
#[clap(long, default_value = "-")]
pub output: PathBuf,
#[clap(long, default_value = "json-seq")]
pub format: SerializationFormat,
#[clap(long)]
pub no_block: bool,
#[clap(long)]
pub extract: bool,
}
#[derive(Parser, Debug)]
pub struct GetExtractSubcommand {
#[clap(long, default_value = "-")]
pub input: PathBuf,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, required = true)]
pub position: u64,
#[clap(long, required = true)]
pub id: String,
#[clap(long, default_value = "-")]
pub output: PathBuf,
}
#[derive(Parser, Debug)]
pub struct ExtractCommand {
#[clap(long, default_value = "-")]
pub input: Vec<PathBuf>,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, default_value = "./")]
pub output: PathBuf,
#[clap(long)]
pub continue_on_error: bool,
#[clap(long)]
pub include: Vec<String>,
#[clap(long)]
pub include_pattern: Vec<String>,
#[clap(long)]
pub exclude: Vec<String>,
#[clap(long)]
pub exclude_pattern: Vec<String>,
}
#[derive(Parser, Debug)]
pub struct VerifyCommand {
#[clap(long, default_value = "-")]
pub input: Vec<PathBuf>,
#[clap(long, default_value = "auto")]
pub compression: CompressionFormat,
#[clap(long, default_value = "-")]
pub output: PathBuf,
#[clap(long, default_value = "json-seq")]
pub format: ListSerializationFormat,
#[clap(long, value_delimiter = ',')]
pub exclude_check: Vec<VerifyCheck>,
#[clap(long)]
pub database: Option<PathBuf>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum VerifyCheck {
MandatoryFields,
KnownRecordType,
ContentType,
ConcurrentTo,
BlockDigest,
PayloadDigest,
IpAddress,
RefersTo,
RefersToTargetUri,
RefersToDate,
TargetUri,
Truncated,
WarcinfoId,
Filename,
Profile,
Segment,
RecordAtTimeCompression,
}
impl From<VerifyCheck> for Check {
fn from(value: VerifyCheck) -> Self {
match value {
VerifyCheck::MandatoryFields => Self::MandatoryFields,
VerifyCheck::KnownRecordType => Self::KnownRecordType,
VerifyCheck::ContentType => Self::ContentType,
VerifyCheck::ConcurrentTo => Self::ConcurrentTo,
VerifyCheck::BlockDigest => Self::BlockDigest,
VerifyCheck::PayloadDigest => Self::PayloadDigest,
VerifyCheck::IpAddress => Self::IpAddress,
VerifyCheck::RefersTo => Self::RefersTo,
VerifyCheck::RefersToTargetUri => Self::RefersToTargetUri,
VerifyCheck::RefersToDate => Self::RefersToDate,
VerifyCheck::TargetUri => Self::TargetUri,
VerifyCheck::WarcinfoId => Self::WarcinfoId,
VerifyCheck::Truncated => Self::Truncated,
VerifyCheck::Filename => Self::Filename,
VerifyCheck::Profile => Self::Profile,
VerifyCheck::Segment => Self::Segment,
VerifyCheck::RecordAtTimeCompression => Self::RecordAtTimeCompression,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum CompressionFormat {
Auto,
None,
Gzip,
#[cfg(feature = "zstd")]
Zstandard,
}
impl CompressionFormat {
pub fn try_into_native(&self, path: &Path) -> anyhow::Result<crate::compress::Format> {
if *self == Self::Auto {
Ok(filename_compression_format(path)
.ok_or_else(|| anyhow::anyhow!("unsupported compression or file format"))?)
} else {
Ok((*self)
.try_into()
.map_err(|_| anyhow::anyhow!("unsupported compression or file format"))?)
}
}
}
impl TryFrom<CompressionFormat> for crate::compress::Format {
type Error = ();
fn try_from(value: CompressionFormat) -> Result<Self, Self::Error> {
match value {
CompressionFormat::Auto => Err(()),
CompressionFormat::None => Ok(Self::Identity),
CompressionFormat::Gzip => Ok(Self::Gzip),
#[cfg(feature = "zstd")]
CompressionFormat::Zstandard => Ok(Self::Zstandard),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum CompressionLevel {
Balanced,
High,
Low,
}
impl From<CompressionLevel> for crate::compress::Level {
fn from(value: CompressionLevel) -> Self {
match value {
CompressionLevel::Balanced => Self::Balanced,
CompressionLevel::High => Self::High,
CompressionLevel::Low => Self::Low,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum SerializationFormat {
JsonSeq,
Jsonl,
CborSeq,
}
impl From<SerializationFormat> for crate::dataseq::SeqFormat {
fn from(value: SerializationFormat) -> Self {
match value {
SerializationFormat::JsonSeq => Self::JsonSeq,
SerializationFormat::Jsonl => Self::JsonL,
SerializationFormat::CborSeq => Self::CborSeq,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum ListSerializationFormat {
JsonSeq,
Jsonl,
CborSeq,
Csv,
}
impl From<ListSerializationFormat> for crate::dataseq::SeqFormat {
fn from(value: ListSerializationFormat) -> Self {
match value {
ListSerializationFormat::JsonSeq => Self::JsonSeq,
ListSerializationFormat::Jsonl => Self::JsonL,
ListSerializationFormat::CborSeq => Self::CborSeq,
ListSerializationFormat::Csv => Self::Csv,
}
}
}
#[derive(Debug, Parser)]
pub struct SelfCommand {
#[command(subcommand)]
pub command: SelfSubcommand,
}
#[derive(Debug, Subcommand)]
pub enum SelfSubcommand {
Install {
#[arg(long)]
quiet: bool,
},
Uninstall {
#[arg(long)]
quiet: bool,
},
}