znippy-cli 0.9.2

CLI for Znippy, a parallel chunked compression system.
// znippy-cli/src/main.rs

use anyhow::Result;
use clap::{Parser, Subcommand};
use std::path::PathBuf;

use znippy_common::{VerifyReport, list_archive_contents, verify_archive_integrity};
use znippy_common::plugin::PluginRegistry;
use znippy_common::plugins::wasm_loader::WasmPlugin;
use znippy_compress::compress_dir;
use znippy_decompress::{decompress_archive, decompress_archive_filtered};

pub mod handlers;

#[derive(Parser)]
#[command(name = "znippy")]
#[command(about = "Znippy: fast archive format with per-file compression", long_about = None)]
struct Cli {
    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// Compress a directory into a .znippy archive
    Compress {
        #[arg(short, long)]
        input: PathBuf,

        #[arg(short, long)]
        output: PathBuf,

        #[arg(long)]
        no_skip: bool,

        /// Package handler to use: a name or alias (`rust`/`cargo`, `python`, `maven`).
        /// One archive carries one package type.
        #[arg(long, default_value = "rust")]
        format: String,

        /// Where the metadata index is written: `arrow-ipc` (default β€” inline in
        /// the .znippy container) or `iceberg` (a real Iceberg table in
        /// --warehouse; blobs stay in the .znippy sidecar). The iceberg backend
        /// requires the CLI to be built with `--features iceberg`.
        #[arg(long, default_value = "arrow-ipc")]
        meta_format: String,

        /// Warehouse directory for `--meta-format iceberg`. Required for, and
        /// only used by, the iceberg backend.
        #[arg(long)]
        warehouse: Option<PathBuf>,

        /// Path to a .wasm plugin for metadata extraction (overrides --format).
        #[arg(long)]
        plugin: Option<PathBuf>,

        /// DenseUnion type_id for the WASM plugin given via --plugin.
        #[arg(long, default_value_t = 1)]
        plugin_type_id: i8,
    },

    /// Decompress a .znippy archive
    Decompress {
        #[arg(short, long)]
        input: PathBuf,

        #[arg(short, long)]
        output: PathBuf,

        /// Selective extract: only files of this package type (a handler
        /// name/alias, e.g. `maven`/`rust`/`python`). Omit to extract all types.
        #[arg(long = "type")]
        pkg_type: Option<String>,

        /// Selective extract: only files from this repo. Omit to extract all repos.
        #[arg(long)]
        repo: Option<String>,
    },

    /// List contents of a .znippy archive
    List {
        #[arg(short, long)]
        input: PathBuf,
    },

    /// Random-access read of one file by its relative path (O(log n)/O(key) via
    /// the lookup sub-index + trie). Writes to --output, or stdout if omitted.
    Get {
        #[arg(short, long)]
        input: PathBuf,

        /// Relative path of the file inside the archive.
        #[arg(short, long)]
        path: String,

        /// Destination file. When omitted, the bytes are written to stdout.
        #[arg(short, long)]
        output: Option<PathBuf>,
    },

    /// Verify archive integrity (checksum)
    Verify {
        #[arg(short, long)]
        input: PathBuf,
    },

    /// Seal a dynamic, iceberg-backed archive into a static, immutable native
    /// `.znippy` (inline Arrow-IPC sub-indexes + lookup + trie + footer).
    ///
    /// Reads the archive metadata from the skade-iceberg `--warehouse` and
    /// writes it as the v0.7 inline container, REUSING the blob bytes already in
    /// the `--input` `.znippy` sidecar (no recompress, content-addressed). The
    /// sealed artifact opens with the ordinary reader β€” the warehouse is no
    /// longer needed to read it. Requires `--features iceberg`.
    Seal {
        /// The `.znippy` blob sidecar written when the archive was compressed
        /// with `--meta-format iceberg` (pure blobs, no footer).
        #[arg(short, long)]
        input: PathBuf,

        /// The skade-iceberg warehouse holding the archive metadata tables.
        #[arg(long)]
        warehouse: PathBuf,

        /// Iceberg namespace of the archive (its file stem). Defaults to the
        /// `--input` file stem, matching how `compress` derives it.
        #[arg(long)]
        namespace: Option<String>,

        /// Destination for the sealed native `.znippy`.
        #[arg(short, long)]
        output: PathBuf,
    },

    /// List the available package handlers (the compiled-in register).
    Handlers,

    /// Run a handler-specific subcommand, e.g. `znippy run rust coords foo.crate`.
    Run {
        /// Handler name/alias to dispatch to.
        format: String,
        /// Subcommand advertised by the handler's meta().
        cmd: String,
        /// Arguments passed to the subcommand.
        args: Vec<String>,
    },
}

/// Build the metadata-sink factory for `--meta-format` / `--warehouse`.
/// `arrow-ipc` (default) β†’ `None`, so `compress_dir` uses the inline
/// `ArrowIpcSink`. `iceberg` (CLI feature `iceberg`) β†’ a factory that builds an
/// `IcebergSink` over `--warehouse`; the namespace is the archive's file stem.
/// Blobs always stay in the `.znippy` file; only the index location changes.
fn build_meta_sink(
    meta_format: &str,
    warehouse: Option<PathBuf>,
    output: &std::path::Path,
) -> Result<Option<znippy_common::MetaSinkFactory>> {
    match meta_format {
        "arrow-ipc" => Ok(None),
        "iceberg" => {
            #[cfg(feature = "iceberg")]
            {
                let wh = warehouse.ok_or_else(|| {
                    anyhow::anyhow!("--warehouse <DIR> is required for --meta-format iceberg")
                })?;
                let namespace = output
                    .file_stem()
                    .map(|s| s.to_string_lossy().to_string())
                    .unwrap_or_else(|| "znippy".to_string());
                println!(
                    "🧊 Metadata β†’ Iceberg table (namespace `{namespace}`) in {}",
                    wh.display()
                );
                Ok(Some(Box::new(move |_file, _off| {
                    Box::new(znippy_iceberg::IcebergSink::new(wh, namespace))
                        as Box<dyn znippy_common::ArchiveMetaSink>
                })))
            }
            #[cfg(not(feature = "iceberg"))]
            {
                let _ = (warehouse, output);
                anyhow::bail!(
                    "iceberg metadata backend not compiled in; rebuild znippy-cli with `--features iceberg`"
                )
            }
        }
        other => anyhow::bail!("unknown --meta-format '{other}' (expected arrow-ipc|iceberg)"),
    }
}

pub fn run() -> Result<()> {
    env_logger::init();
    let cli = Cli::parse();

    match cli.command {
        Commands::Compress {
            input,
            output,
            no_skip,
            format,
            meta_format,
            warehouse,
            plugin,
            plugin_type_id,
        } => {
            let registry = match plugin {
                Some(wasm_path) => {
                    let wp = WasmPlugin::load(&wasm_path.to_string_lossy(), "wasm-plugin", plugin_type_id)?;
                    PluginRegistry::with_plugin(Box::new(wp))
                }
                None => {
                    let handler = handlers::find_handler(&format)?;
                    println!("πŸ”Œ Handler: {} (type_id {})", handler.meta().name, handler.type_id());
                    PluginRegistry::with_plugin(handler)
                }
            };
            let sink_factory = build_meta_sink(&meta_format, warehouse, &output)?;
            let report = compress_dir(&input, &output, no_skip, Some(&registry), None, sink_factory)?;
            println!("\nβœ… Komprimering klar:");
            println!("πŸ“ Totalt antal filer:         {}", report.total_files);
            println!("πŸ“ Totalt antal chunks:         {}", report.chunks);

            println!("πŸ“‚ Totalt antal kataloger:     {}", report.total_dirs);
            println!("πŸ“¦ Filer komprimerade:         {}", report.compressed_files);
            println!(
                "πŸ“„ Filer ej komprimerade:      {}",
                report.uncompressed_files
            );
            println!("πŸ“₯ Totalt inlΓ€sta bytes:       {}", report.total_bytes_in);
            println!("πŸ“€ Totalt skrivna bytes:       {}", report.total_bytes_out);
            println!("πŸ“‰ Bytes som komprimerades:    {}", report.compressed_bytes);
            println!(
                "πŸ“ƒ Bytes ej komprimerade:      {}",
                report.uncompressed_bytes
            );
            println!(
                "πŸ“Š Komprimeringsgrad:          {:.2}%",
                report.compression_ratio
            );
        }

        Commands::Decompress { input, output, pkg_type, repo } => {
            let filter = znippy_common::IndexFilter {
                pkg_type: match &pkg_type {
                    Some(name) => Some(handlers::find_handler(name)?.type_id()),
                    None => None,
                },
                repo: repo.clone(),
            };
            let report: VerifyReport = if filter.is_empty() {
                decompress_archive(&input, &output)?
            } else {
                println!(
                    "πŸ”Ž Selective extract: type={} repo={}",
                    pkg_type.as_deref().unwrap_or("*"),
                    repo.as_deref().unwrap_or("*"),
                );
                decompress_archive_filtered(&input, &output, &filter)?
            };
            println!("\nβœ… Dekomprimering och verifiering klar:");
            println!("πŸ“ Totala filer:       {}", report.total_files);
            println!("πŸ” Verifierade filer:  {}", report.verified_files);
            println!("πŸ“₯  chunks:    {}", report.chunks);
            println!("❌ Korrupta filer:     {}", report.corrupt_files);
            println!("πŸ“₯ Totala bytes:       {}", report.total_bytes);
            println!("πŸ“€ Verifierade bytes:  {}", report.verified_bytes);
            println!("⚠️  Korrupta bytes:    {}", report.corrupt_bytes);
        }

        Commands::List { input } => {
            list_archive_contents(&input)?;
        }

        Commands::Get { input, path, output } => {
            let data = znippy_common::get_file(&input, &path)?;
            match output {
                Some(dest) => {
                    std::fs::write(&dest, &data)?;
                    eprintln!("πŸ“€ {} ({} bytes) β†’ {}", path, data.len(), dest.display());
                }
                None => {
                    use std::io::Write;
                    std::io::stdout().write_all(&data)?;
                }
            }
        }

        Commands::Verify { input } => {
            let report: VerifyReport = verify_archive_integrity(&input)?;
            println!("\nπŸ” Verifiering klar:");
            println!("πŸ“ Totala filer:       {}", report.total_files);
            println!("πŸ” Verifierade filer:  {}", report.verified_files);
            println!("❌ Korrupta filer:     {}", report.corrupt_files);
            println!("πŸ“₯ Totala bytes:       {}", report.total_bytes);
            println!("πŸ“€ Verifierade bytes:  {}", report.verified_bytes);
            println!("⚠️  Korrupta bytes:    {}", report.corrupt_bytes);
        }

        Commands::Seal { input, warehouse, namespace, output } => {
            #[cfg(feature = "iceberg")]
            {
                let ns = namespace.unwrap_or_else(|| {
                    input
                        .file_stem()
                        .map(|s| s.to_string_lossy().to_string())
                        .unwrap_or_else(|| "znippy".to_string())
                });
                println!(
                    "πŸ§Šβ†’πŸ“¦ Sealing iceberg archive (namespace `{ns}`) in {} β†’ {}",
                    warehouse.display(),
                    output.display()
                );
                let report = znippy_iceberg::seal(&input, &warehouse, &ns, &output)?;
                println!("\nβœ… Sealed (static native .znippy):");
                println!("πŸ“ Filer:                      {}", report.files);
                println!("🧱 Chunk-rader:                {}", report.rows);
                println!(
                    "πŸ“€ Blob-bytes Γ₯teranvΓ€nda:     {} (ingen omkomprimering)",
                    report.blob_bytes_copied
                );
                println!("πŸ“¦ Sealad total storlek:       {}", report.sealed_total_bytes);
                println!(
                    "πŸ“Š Metadata-svans + footer:    {} bytes",
                    report.sealed_total_bytes - report.blob_bytes_copied
                );
            }
            #[cfg(not(feature = "iceberg"))]
            {
                let _ = (input, warehouse, namespace, output);
                anyhow::bail!(
                    "iceberg backend not compiled in; rebuild znippy-cli with `--features iceberg`"
                );
            }
        }

        Commands::Handlers => {
            handlers::print_catalog();
        }

        Commands::Run { format, cmd, args } => {
            let handler = handlers::find_handler(&format)?;
            handler.run_command(&cmd, &args)?;
        }
    }

    Ok(())
}