parx-cli 0.1.0

CLI tool for building and inspecting PARX sidecar files
/*
 * Copyright 2026 PARX Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
//! PARX CLI - Build and inspect PARX sidecar files.

mod commands;

use clap::{Parser, Subcommand};
use commands::build::{BuildOptions, CompressionOption};
use commands::bundle::PageIndexPolicy;
use tracing_subscriber::EnvFilter;

#[derive(Parser)]
#[command(name = "parx")]
#[command(about = "Build and inspect PARX sidecar files for Parquet")]
#[command(version)]
struct Cli {
    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// Build a .parx sidecar file from a Parquet file
    Build {
        /// Path to the Parquet file (local path or s3://bucket/path)
        #[arg(value_name = "PARQUET_FILE")]
        input: String,

        /// Output path for the .parx file (defaults to <input>.parx)
        #[arg(short, long)]
        output: Option<String>,

        /// Compression: none, auto, zstd, lz4, gzip (default: none)
        #[arg(long, default_value = "none")]
        compress: String,
    },
    /// Inspect a .parx file and show its manifest
    Inspect {
        /// Path to the .parx file
        #[arg(value_name = "PARX_FILE")]
        input: String,
    },
    /// Verify checksums in a .parx file
    Verify {
        /// Path to the .parx file
        #[arg(value_name = "PARX_FILE")]
        input: String,

        /// Also verify against the source Parquet file
        #[arg(long)]
        with_source: bool,
    },
    /// Manage PARX bundles (multiple files in one)
    Bundle {
        #[command(subcommand)]
        action: BundleAction,
    },
}

#[derive(Subcommand)]
enum BundleAction {
    /// Build a bundle from Parquet files in a directory
    Build {
        /// Directory containing Parquet files
        #[arg(value_name = "DIRECTORY")]
        input: String,

        /// Output path for the bundle (defaults to `<directory>`/_parx_bundle.parx)
        #[arg(short, long)]
        output: Option<String>,

        /// Include Parquet page indexes in bundle entries (policy-controlled).
        #[arg(long, default_value_t = false)]
        include_page_indexes: bool,

        /// Maximum page-index bytes per file when `--include-page-indexes` is set.
        #[arg(long, default_value_t = 262_144)]
        max_page_index_bytes_per_file: usize,

        /// Maximum total page-index bytes across the whole bundle.
        #[arg(long, default_value_t = 16_777_216)]
        max_total_page_index_bytes: usize,
    },

    /// Inspect a bundle file
    Inspect {
        /// Path to the bundle file
        #[arg(value_name = "BUNDLE_FILE")]
        input: String,
    },

    /// Verify bundle integrity
    Verify {
        /// Path to the bundle file
        #[arg(value_name = "BUNDLE_FILE")]
        input: String,

        /// Also verify source files exist and match sizes
        #[arg(long)]
        with_sources: bool,
    },

    /// Extract individual .parx files from a bundle
    Extract {
        /// Path to the bundle file
        #[arg(value_name = "BUNDLE_FILE")]
        input: String,

        /// Output directory for extracted .parx files
        #[arg(short, long)]
        output: String,
    },
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    // Initialize tracing
    tracing_subscriber::fmt()
        .with_env_filter(EnvFilter::from_default_env())
        .init();

    let cli = Cli::parse();

    match cli.command {
        Commands::Build {
            input,
            output,
            compress,
        } => {
            let compression: CompressionOption =
                compress.parse().map_err(|e: String| anyhow::anyhow!(e))?;

            let options = BuildOptions { compression };

            commands::build::run_with_options(&input, output.as_deref(), options).await?;
        }
        Commands::Inspect { input } => {
            commands::inspect::run(&input).await?;
        }
        Commands::Verify { input, with_source } => {
            commands::verify::run(&input, with_source).await?;
        }
        Commands::Bundle { action } => match action {
            BundleAction::Build {
                input,
                output,
                include_page_indexes,
                max_page_index_bytes_per_file,
                max_total_page_index_bytes,
            } => {
                let policy = PageIndexPolicy {
                    enabled: include_page_indexes,
                    max_per_file_bytes: max_page_index_bytes_per_file,
                    max_total_bytes: max_total_page_index_bytes,
                };
                commands::bundle::build(&input, output.as_deref(), policy).await?;
            }
            BundleAction::Inspect { input } => {
                commands::bundle::inspect(&input).await?;
            }
            BundleAction::Verify {
                input,
                with_sources,
            } => {
                commands::bundle::verify(&input, with_sources).await?;
            }
            BundleAction::Extract { input, output } => {
                commands::bundle::extract(&input, &output).await?;
            }
        },
    }

    Ok(())
}