nanocov 0.1.0

Rust Coverage Calculator and QC Plot Generation Tool
Documentation
// src/cli/mod.rs
// CLI argument parsing module for nanocov
// Contains the Cli struct and related logic

use clap::{ArgGroup, Parser};
use std::path::PathBuf;

#[derive(Parser, Debug, Clone)]
#[command(name = "nanocov")]
#[command(about = "Calculates per-base coverage from a BAM file", long_about = None)]
#[command(group(
    ArgGroup::new("mode")
        .required(true)
        .args(["input", "batch_tsv"])
))]
pub struct Cli {
    /// Input BAM file
    #[arg(short, long)]
    pub input: Option<PathBuf>,

    /// TSV with at least two columns: BAM path, BED path, and optional prefix
    #[arg(long = "batch-tsv")]
    pub batch_tsv: Option<PathBuf>,

    /// BED file with regions to include (chrom, start, end)
    #[arg(short = 'b', long = "bed")]
    pub bed: Option<PathBuf>,

    /// Number of worker threads (alias for --async-tasks)
    #[arg(short = 't', long = "threads")]
    pub threads: Option<usize>,

    /// Output directory for generated files (TSV, plots, statistics)
    #[arg(short = 'o', long = "output-dir")]
    pub output_dir: PathBuf,

    /// Output filename prefix (defaults to input file stem)
    #[arg(long = "prefix")]
    pub prefix: Option<String>,

    /// Output path for consolidated batch statistics (default: <output-dir>/batch.statistics.tsv)
    #[arg(long = "batch-output")]
    pub batch_output: Option<PathBuf>,

    /// Chunk size for parallel processing (default: 100,000)
    #[arg(short = 'c', long = "chunk-size", default_value_t = 100_000)]
    pub chunk_size: usize,

    /// Use SVG output format for plots instead of PNG
    #[arg(long = "svg")]
    pub svg_output: bool,

    /// Fixed plot bin size in base pairs (overrides adaptive binning)
    #[arg(long = "plot-bin-size")]
    pub plot_bin_size: Option<u32>,

    /// Color theme for plots [latte, frappe, nord, gruvbox]
    #[arg(long = "theme")]
    pub theme: Option<String>,

    /// Show regions with zero coverage in plots
    #[arg(long = "show-zeros")]
    pub show_zero_regions: bool,

    /// Generate overview plot (disabled by default)
    #[arg(long = "overview-plot")]
    pub overview_plot: bool,

    /// Generate per-chromosome plots (disabled by default)
    #[arg(long = "per-chromosome-plot")]
    pub per_chromosome_plot: bool,

    /// Include non-canonical chromosomes in coverage statistics (default: canonical + MT/EBV)
    #[arg(long = "non-canonical")]
    pub include_non_canonical: bool,

    /// Invert BED regions (analyze the complement within each chromosome)
    #[arg(long = "invert-regions")]
    pub invert_regions: bool,

    /// Use linear scale for coverage plots (log scale is default)
    #[arg(long = "linear")]
    pub linear_scale: bool,

    /// Generate plots sequentially (concurrent by default)
    #[arg(long = "sequential-plots")]
    pub sequential_plots: bool,

    /// Number of async tasks for parallel processing (default: number of CPU cores)
    #[arg(long = "async-tasks")]
    pub async_tasks: Option<usize>,

    /// Buffer size for async I/O operations in KB (default: 64KB)
    #[arg(long = "io-buffer-size")]
    pub io_buffer_size_kb: Option<usize>,

    /// Enable memory-mapped file I/O for faster access
    #[arg(long = "mmap")]
    pub use_mmap: bool,

    /// Use adaptive chunk sizing based on data density
    #[arg(long = "adaptive-chunks")]
    pub adaptive_chunks: bool,
}

impl Cli {
    pub fn output_prefix(&self) -> String {
        if let Some(prefix) = &self.prefix {
            return prefix.clone();
        }
        self.input
            .as_ref()
            .and_then(|path| path.file_stem())
            .and_then(|s| s.to_str())
            .map(|s| s.to_string())
            .unwrap_or_else(|| "coverage".to_string())
    }

    pub fn coverage_output_path(&self) -> PathBuf {
        self.output_dir()
            .join(format!("{}.tsv", self.output_prefix()))
    }

    pub fn output_dir(&self) -> PathBuf {
        self.output_dir.clone()
    }

    pub fn batch_output_path(&self) -> PathBuf {
        if let Some(path) = &self.batch_output {
            path.clone()
        } else {
            self.output_dir().join("batch.statistics.tsv")
        }
    }

    pub fn wants_overview_plot(&self) -> bool {
        self.overview_plot
    }

    pub fn wants_per_chromosome_plot(&self) -> bool {
        self.per_chromosome_plot
    }

    pub fn wants_any_plot(&self) -> bool {
        self.wants_overview_plot() || self.wants_per_chromosome_plot()
    }

    pub fn use_concurrent_plots(&self) -> bool {
        !self.sequential_plots
    }

    pub fn use_log_scale(&self) -> bool {
        !self.linear_scale
    }

    pub fn input_path(&self) -> &std::path::Path {
        self.input
            .as_deref()
            .expect("input BAM path must be set before processing")
    }
}