fastars 0.1.0 - Docs.rs

//! Command-line interface definitions.
//!
//! This module provides the CLI argument parsing using clap's derive macros.
//! It defines all command-line options for QC, trimming, and filtering,
//! with fastp-compatible option names where applicable.

use clap::{Parser, ValueEnum};
use std::path::PathBuf;

use crate::correction::CorrectionConfig;
use crate::filter::FilterConfig;
use crate::merge::MergeConfig;
use crate::trim::{AdapterConfig, GlobalTrimConfig, LengthConfig, Mode, QualityTrimConfig, TailConfig};
use crate::umi::{UmiConfig, UmiLocation};

// ============================================================================
// Input format detection
// ============================================================================

/// Format for stdin input.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum)]
pub enum StdinFormat {
    /// Auto-detect format from magic bytes (default)
    #[default]
    Auto,
    /// Force gzip compressed input
    Gzip,
    /// Force plain text input
    Plain,
}

// ============================================================================
// Split configuration
// ============================================================================

/// Output splitting mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SplitMode {
    /// Split by number of files.
    ByFile(usize),
    /// Split by number of lines (4 lines = 1 read).
    ByLines(usize),
}

/// Configuration for output file splitting.
#[derive(Debug, Clone)]
pub struct SplitConfig {
    /// Splitting mode.
    pub mode: SplitMode,
    /// Number of digits for split file suffix.
    pub prefix_digits: usize,
}

// ============================================================================
// Mode enum (clap-compatible)
// ============================================================================

/// Processing mode for short vs long reads.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum)]
pub enum ReadMode {
    /// Auto-detect mode based on read length.
    #[default]
    Auto,
    /// Short read mode (Illumina-style).
    Short,
    /// Long read mode (PacBio/ONT-style).
    Long,
}

impl From<ReadMode> for Mode {
    fn from(mode: ReadMode) -> Self {
        match mode {
            ReadMode::Auto => unreachable!("Auto mode should be resolved before conversion"),
            ReadMode::Short => Mode::Short,
            ReadMode::Long => Mode::Long,
        }
    }
}

// ============================================================================
// Main CLI struct
// ============================================================================

/// Ultra-fast QC and trimming for short and long reads.
///
/// Fastars provides efficient FASTQ processing with quality control metrics,
/// adapter trimming, quality trimming, and filtering.
#[derive(Parser, Debug)]
#[command(name = "fastars")]
#[command(version)]
#[command(author = "Your Name <your.email@example.com>")]
#[command(about = "Ultra-fast QC and trimming for short and long reads")]
#[command(long_about = None)]
#[command(disable_help_flag = true)]
#[command(disable_version_flag = true)]
pub struct Cli {
    /// Print help information
    #[arg(long = "help", action = clap::ArgAction::Help)]
    help: Option<bool>,

    /// Print version information
    #[arg(long = "version", action = clap::ArgAction::Version)]
    version: Option<bool>,
    // ========================================================================
    // Input/Output
    // ========================================================================
    /// Read 1 input file (required)
    #[arg(short = 'i', long = "in1", value_name = "FILE")]
    pub in1: PathBuf,

    /// Read 2 input file (for paired-end)
    #[arg(short = 'I', long = "in2", value_name = "FILE")]
    pub in2: Option<PathBuf>,

    /// Input is interleaved paired-end data (alternating R1/R2 in single file)
    #[arg(long = "interleaved_in")]
    pub interleaved_in: bool,

    /// Read 1 output file
    #[arg(short = 'o', long = "out1", value_name = "FILE")]
    pub out1: Option<PathBuf>,

    /// Read 2 output file (for paired-end)
    #[arg(short = 'O', long = "out2", value_name = "FILE")]
    pub out2: Option<PathBuf>,

    /// Stream output to stdout (interleaved for PE)
    #[arg(long)]
    pub stdout: bool,

    /// Input format when reading from stdin (auto/gzip/plain)
    #[arg(long = "stdin_format", default_value = "auto", value_enum)]
    pub stdin_format: StdinFormat,

    /// Input quality encoding is Phred64 (ASCII 64-126) instead of Phred33 (ASCII 33-126)
    #[arg(short = '6', long = "phred64")]
    pub phred64: bool,

    /// Fix MGI sequencer read IDs to Illumina-compatible format
    #[arg(long = "fix_mgi_id")]
    pub fix_mgi_id: bool,

    /// Failed reads output file
    #[arg(long = "failed_out", value_name = "FILE")]
    pub failed_out: Option<PathBuf>,

    /// Unpaired read 1 output file (for paired-end when one read fails)
    #[arg(long = "unpaired1_out", value_name = "FILE")]
    pub unpaired1_out: Option<PathBuf>,

    /// Unpaired read 2 output file (for paired-end when one read fails)
    #[arg(long = "unpaired2_out", value_name = "FILE")]
    pub unpaired2_out: Option<PathBuf>,

    // ========================================================================
    // Mode selection
    // ========================================================================
    /// Processing mode (auto, short, or long reads)
    #[arg(long, default_value = "auto", value_enum)]
    pub mode: ReadMode,

    /// Number of reads to sample for auto mode detection
    #[arg(long = "mode_detect_sample", default_value_t = 100)]
    pub mode_detect_sample: usize,

    /// Read length threshold for short/long mode detection (bp)
    #[arg(long = "mode_detect_threshold", default_value_t = 500)]
    pub mode_detect_threshold: usize,

    // ========================================================================
    // Threading
    // ========================================================================
    /// Number of worker threads (0 = auto-detect)
    #[arg(short = 'w', long = "thread", default_value_t = 0)]
    pub threads: usize,
    // ========================================================================
    // Global (fixed-position) trimming
    // ========================================================================
    /// Trim N bases from the front of read 1
    #[arg(short = 'f', long = "trim_front1", default_value_t = 0)]
    pub trim_front1: usize,

    /// Trim N bases from the tail of read 1
    #[arg(short = 't', long = "trim_tail1", default_value_t = 0)]
    pub trim_tail1: usize,

    /// Trim N bases from the front of read 2
    #[arg(short = 'F', long = "trim_front2", default_value_t = 0)]
    pub trim_front2: usize,

    /// Trim N bases from the tail of read 2
    #[arg(short = 'T', long = "trim_tail2", default_value_t = 0)]
    pub trim_tail2: usize,


    // ========================================================================
    // Quality trimming
    // ========================================================================
    /// Enable quality trimming from the 5' (front) end
    #[arg(short = '5', long = "cut_front")]
    pub cut_front: bool,

    /// Quality threshold for front trimming
    #[arg(long = "cut_front_quality", default_value_t = 15)]
    pub cut_front_quality: u8,

    /// Window size for cut_front (default: uses --cut_window_size)
    #[arg(long = "cut_front_window_size")]
    pub cut_front_window_size: Option<usize>,

    /// Quality threshold for cut_front (default: uses --cut_mean_quality)
    #[arg(long = "cut_front_mean_quality")]
    pub cut_front_mean_quality: Option<u8>,

    /// Enable quality trimming from the 3' (tail) end
    #[arg(short = '3', long = "cut_tail")]
    pub cut_tail: bool,

    /// Quality threshold for tail trimming
    #[arg(long = "cut_tail_quality", default_value_t = 15)]
    pub cut_tail_quality: u8,

    /// Sliding window size for quality trimming
    #[arg(long = "cut_window_size", default_value_t = 4)]
    pub cut_window_size: usize,

    /// Mean quality threshold for sliding window
    #[arg(long = "cut_mean_quality", default_value_t = 15)]
    pub cut_mean_quality: u8,

    /// Quality trimming: scan from 5' to 3', trim when quality drops
    #[arg(long = "cut_right")]
    pub cut_right: bool,

    /// Window size for cut_right (default: uses --cut_window_size)
    #[arg(long = "cut_right_window_size")]
    pub cut_right_window_size: Option<usize>,

    /// Quality threshold for cut_right (default: uses --cut_mean_quality)
    #[arg(long = "cut_right_mean_quality")]
    pub cut_right_mean_quality: Option<u8>,

    /// Window size for cut_tail (default: uses --cut_window_size)
    #[arg(long = "cut_tail_window_size")]
    pub cut_tail_window_size: Option<usize>,

    /// Quality threshold for cut_tail (default: uses --cut_mean_quality)
    #[arg(long = "cut_tail_mean_quality")]
    pub cut_tail_mean_quality: Option<u8>,

    // ========================================================================
    // Quality filtering
    // ========================================================================
    /// Disable quality filtering
    #[arg(short = 'Q', long = "disable_quality_filtering")]
    pub disable_quality_filtering: bool,

    /// Minimum Phred quality for a base to be qualified
    #[arg(short = 'q', long = "qualified_quality_phred", default_value_t = 15)]
    pub qualified_quality_phred: u8,

    /// Maximum percentage of unqualified bases (0-100)
    #[arg(short = 'u', long = "unqualified_percent_limit", default_value_t = 40)]
    pub unqualified_percent_limit: u8,

    /// Average quality required for a read to pass
    #[arg(short = 'e', long = "average_qual", default_value_t = 0)]
    pub average_qual: u8,

    // ========================================================================
    // Adapter trimming
    // ========================================================================
    /// Adapter sequence for read 1
    #[arg(short = 'a', long = "adapter_sequence", value_name = "SEQ")]
    pub adapter_sequence: Option<String>,

    /// Adapter sequence for read 2
    #[arg(short = 'A', long = "adapter_sequence_r2", value_name = "SEQ")]
    pub adapter_sequence_r2: Option<String>,

    /// Enable adapter auto-detection for paired-end reads
    #[arg(long = "detect_adapter_for_pe")]
    pub detect_adapter_for_pe: bool,

    /// Disable adapter trimming
    #[arg(long = "disable_adapter_trimming")]
    pub disable_adapter_trimming: bool,

    /// 5' adapter sequence for long read mode (fastplong compatible)
    #[arg(short = 's', long = "start_adapter", value_name = "SEQ")]
    pub start_adapter: Option<String>,

    /// 3' adapter sequence for long read mode (fastplong compatible)
    #[arg(short = 'E', long = "end_adapter", value_name = "SEQ")]
    pub end_adapter: Option<String>,

    /// Adapter distance threshold for long reads (ratio: 0.0-1.0, default: 0.25)
    #[arg(short = 'd', long = "distance_threshold", default_value_t = 0.25)]
    pub distance_threshold: f64,

    /// Extend trimming N bases past adapter position (long mode only, default: 10)
    #[arg(long = "trimming_extension", default_value_t = 10)]
    pub trimming_extension: usize,

    // ========================================================================
    // Length filtering
    // ========================================================================
    /// Minimum read length required (reads shorter will be discarded)
    #[arg(short = 'l', long = "length_required", default_value_t = 15)]
    pub length_required: usize,

    /// Maximum read length allowed (reads longer will be discarded, 0 = no limit)
    #[arg(long = "length_limit", default_value_t = 0)]
    pub length_limit: usize,

    /// Disable all length filtering (keep reads regardless of length)
    #[arg(short = 'L', long = "disable_length_filtering")]
    pub disable_length_filtering: bool,

    /// Maximum read length for R1. Reads will be truncated to this length.
    #[arg(long = "max_len1", value_name = "LEN")]
    pub max_len1: Option<usize>,

    /// Maximum read length for R2. Reads will be truncated to this length.
    #[arg(long = "max_len2", value_name = "LEN")]
    pub max_len2: Option<usize>,

    // ========================================================================
    // N filtering
    // ========================================================================
    /// Maximum N base count allowed (reads with more Ns will be discarded)
    #[arg(short = 'n', long = "n_base_limit", default_value_t = 5)]
    pub n_base_limit: usize,

    /// Maximum N content as percentage (long mode only, default: 10%)
    #[arg(long = "n_percent_limit", value_name = "PERCENT")]
    pub n_percent_limit: Option<f64>,

    // ========================================================================
    // Index barcode filtering
    // ========================================================================
    /// Filter reads by index 1 barcode (from Illumina header)
    #[arg(long = "filter_by_index1", value_name = "BARCODE")]
    pub filter_by_index1: Option<String>,

    /// Filter reads by index 2 barcode (from Illumina header)
    #[arg(long = "filter_by_index2", value_name = "BARCODE")]
    pub filter_by_index2: Option<String>,

    /// Maximum mismatches allowed when filtering by index barcode
    #[arg(long = "filter_by_index_threshold", default_value_t = 0)]
    pub filter_by_index_threshold: usize,

    // ========================================================================
    // Poly-X tail trimming
    // ========================================================================
    /// Enable poly-G tail trimming (NextSeq/NovaSeq artifacts)
    #[arg(short = 'g', long = "trim_poly_g")]
    pub trim_poly_g: bool,

    /// Minimum length of poly-G tail
    #[arg(long = "poly_g_min_len", default_value_t = 10)]
    pub poly_g_min_len: usize,

    /// Enable poly-X (any homopolymer) tail trimming
    #[arg(short = 'x', long = "trim_poly_x")]
    pub trim_poly_x: bool,

    /// Minimum length of poly-X tail
    #[arg(long = "poly_x_min_len", default_value_t = 10)]
    pub poly_x_min_len: usize,

    /// Disable poly-G tail trimming (overrides --trim_poly_g)
    #[arg(short = 'G', long = "disable_trim_poly_g")]
    pub disable_trim_poly_g: bool,

    // ========================================================================
    // Low complexity filtering
    // ========================================================================
    /// Enable low complexity filter
    #[arg(short = 'y', long = "low_complexity_filter")]
    pub low_complexity_filter: bool,

    /// Complexity threshold (0-100, reads below this are discarded)
    #[arg(short = 'Y', long = "complexity_threshold", default_value_t = 30)]
    pub complexity_threshold: u8,

    // ========================================================================
    // Overrepresentation analysis
    // ========================================================================
    /// Enable overrepresented sequence analysis (enabled by default)
    #[arg(short = 'p', long = "overrepresentation_analysis", default_value_t = true, action = clap::ArgAction::Set)]
    pub overrepresentation_analysis: bool,

    /// One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower
    #[arg(short = 'P', long = "overrepresentation_sampling", default_value_t = 20)]
    pub overrepresentation_sampling: u32,

    // ========================================================================
    // Deduplication
    // ========================================================================
    /// Enable read deduplication
    #[arg(short = 'D', long = "dedup")]
    pub dedup: bool,

    /// Deduplication accuracy level (1-6, higher = more memory, more accurate)
    #[arg(long = "dup_calc_accuracy", default_value_t = 3, value_parser = clap::value_parser!(u8).range(1..=6))]
    pub dup_calc_accuracy: u8,

    /// Disable duplication rate evaluation (saves memory and CPU, just skips stats)
    #[arg(long = "dont_eval_duplication")]
    pub dont_eval_duplication: bool,

    // ========================================================================
    // Output formats
    // ========================================================================
    /// JSON report output file
    #[arg(short = 'j', long = "json", value_name = "FILE")]
    pub json: Option<PathBuf>,

    /// HTML report output file
    #[arg(short = 'h', long = "html", value_name = "FILE")]
    pub html: Option<PathBuf>,

    /// Report title
    #[arg(short = 'R', long = "report_title", default_value = "fastars report")]
    pub report_title: String,

    // ========================================================================
    // Compression
    // ========================================================================
    /// Compression level for gzip output (1-9, higher = smaller file, slower)
    #[arg(short = 'z', long = "compression", default_value_t = 4)]
    pub compression: i32,

    /// Do not overwrite existing output files
    #[arg(long = "dont_overwrite")]
    pub dont_overwrite: bool,

    // ========================================================================
    // Output splitting
    // ========================================================================
    /// Split output into N files
    #[arg(long = "split", value_name = "N")]
    pub split: Option<usize>,

    /// Split output by number of lines (4 lines = 1 read)
    #[arg(long = "split_by_lines", value_name = "LINES")]
    pub split_by_lines: Option<usize>,

    /// Number of digits in split file suffix (default: 4)
    #[arg(long = "split_prefix_digits", default_value_t = 4)]
    pub split_prefix_digits: usize,

    // ========================================================================
    // UMI (Unique Molecular Identifier) processing
    // ========================================================================
    /// Enable UMI (Unique Molecular Identifier) processing
    #[arg(short = 'U', long = "umi")]
    pub umi: bool,

    /// UMI location: read1, read2, index, or per_index
    #[arg(long = "umi_loc", value_name = "LOC", default_value = "read1")]
    pub umi_loc: String,

    /// UMI length (required if --umi is enabled)
    #[arg(long = "umi_len", value_name = "LEN", default_value_t = 0)]
    pub umi_len: usize,

    /// Prefix added before UMI in read name (default: empty)
    #[arg(long = "umi_prefix", value_name = "PREFIX", default_value = "")]
    pub umi_prefix: String,

    /// Skip first N bases before UMI (default: 0)
    #[arg(long = "umi_skip", value_name = "N", default_value_t = 0)]
    pub umi_skip: usize,

    /// Separator between read name and UMI (default: ":")
    #[arg(long = "umi_separator", value_name = "SEP", default_value = ":")]
    pub umi_separator: String,

    // ========================================================================
    // Paired-end read merging
    // ========================================================================
    /// Enable paired-end read merging (merge overlapping PE reads into single reads)
    #[arg(short = 'm', long = "merge")]
    pub merge: bool,

    /// Output file for merged reads (when --merge is enabled)
    #[arg(long = "merged_out", value_name = "FILE")]
    pub merged_out: Option<PathBuf>,

    /// Output file for unmerged read 1 (when --merge is enabled)
    #[arg(long = "out_unmerged1", value_name = "FILE")]
    pub out_unmerged1: Option<PathBuf>,

    /// Output file for unmerged read 2 (when --merge is enabled)
    #[arg(long = "out_unmerged2", value_name = "FILE")]
    pub out_unmerged2: Option<PathBuf>,

    /// Minimum overlap length required for merging (default: 30)
    #[arg(long = "merge_min_overlap", default_value_t = 30)]
    pub merge_min_overlap: usize,

    /// Maximum mismatch ratio in overlap region (0.0-1.0, default: 0.1)
    #[arg(long = "merge_max_mismatch_ratio", default_value_t = 0.1)]
    pub merge_max_mismatch_ratio: f64,

    /// Correct mismatches in overlap using quality scores (default: true)
    #[arg(long = "merge_correct_mismatches", default_value_t = true, action = clap::ArgAction::Set)]
    pub merge_correct_mismatches: bool,


    // ========================================================================
    // Base correction (overlap-based)
    // ========================================================================
    /// Enable overlap-based base correction for paired-end reads
    #[arg(short = 'c', long = "correction")]
    pub correction: bool,

    /// Minimum overlap length required for base correction (default: 30)
    #[arg(long = "overlap_len_require", default_value_t = 30)]
    pub overlap_len_require: usize,

    /// Maximum number of mismatches allowed in overlap region for correction (default: 5)
    #[arg(long = "overlap_diff_limit", default_value_t = 5)]
    pub overlap_diff_limit: usize,

    /// Maximum percentage of mismatches allowed in overlap region for correction (default: 5.0%)
    #[arg(long = "overlap_diff_percent_limit", default_value_t = 5.0)]
    pub overlap_diff_percent_limit: f64,

    /// FASTA file containing adapter sequences
    #[arg(long = "adapter_fasta", value_name = "PATH")]
    pub adapter_fasta: Option<PathBuf>,

    /// Allow gaps during overlap trimming detection (for paired-end merging/correction)
    #[arg(long = "allow_gap_overlap_trimming")]
    pub allow_gap_overlap_trimming: bool,

    /// Output file for only the overlapped region of merged reads
    #[arg(long = "overlapped_out", value_name = "PATH")]
    pub overlapped_out: Option<PathBuf>,

    // ========================================================================
    // Verbosity
    // ========================================================================
    /// Verbose output
    #[arg(short = 'V', long = "verbose")]
    pub verbose: bool,

    /// Number of reads to process (0 = all reads)
    #[arg(long = "reads_to_process", default_value_t = 0)]
    pub reads_to_process: usize,
    // ========================================================================
    // Quality masking and read breaking (long read mode only)
    // ========================================================================
    /// Enable quality masking mode (mask low quality regions with N instead of trimming)
    #[arg(short = 'N', long = "mask")]
    pub mask: bool,

    /// Window size for quality masking
    #[arg(long = "mask_window_size", default_value_t = 50)]
    pub mask_window_size: usize,

    /// Mean quality threshold for masking
    #[arg(long = "mask_mean_quality", default_value_t = 10)]
    pub mask_mean_quality: u8,

    /// Enable read breaking mode (split reads at low quality regions)
    #[arg(short = 'b', long = "break_reads")]
    pub break_reads: bool,

    /// Window size for read breaking
    #[arg(long = "break_window_size", default_value_t = 100)]
    pub break_window_size: usize,

    /// Mean quality threshold for breaking
    #[arg(long = "break_mean_quality", default_value_t = 10)]
    pub break_mean_quality: u8,
}

// ============================================================================
// Validation and Configuration Building
// ============================================================================

impl Cli {
    /// Validate CLI arguments and return an error message if invalid.
    pub fn validate(&self) -> Result<(), String> {
        // Check for stdin mode
        if self.is_stdin() {
            // Stdin mode: only single-end is supported
            if self.in2.is_some() {
                return Err(
                    "Paired-end mode is not supported when reading from stdin".to_string()
                );
            }
        } else {
            // Regular file mode: check that input file exists
            if !self.in1.exists() {
                return Err(format!(
                    "Input file does not exist: {}",
                    self.in1.display()
                ));
            }
        }

        // Check interleaved mode conflicts
        if self.interleaved_in {
            if self.in2.is_some() {
                return Err("--interleaved_in requires only -i (cannot use with --in2)".to_string());
            }
            // Interleaved mode is treated as paired-end, so out2 should be specified if out1 is
            if self.out1.is_some() && self.out2.is_none() {
                return Err(
                    "Interleaved mode (paired-end): --out2 is required when --out1 is specified"
                        .to_string(),
                );
            }
        }

        // Check paired-end consistency
        if let Some(ref in2) = self.in2 {
            if !in2.exists() {
                return Err(format!(
                    "Read 2 input file does not exist: {}",
                    in2.display()
                ));
            }

            // If we have in2, we should have out2 if we have out1
            if self.out1.is_some() && self.out2.is_none() {
                return Err(
                    "Paired-end mode: --out2 is required when --out1 and --in2 are specified"
                        .to_string(),
                );
            }
        } else {
            // Single-end mode
            if !self.interleaved_in {
            if self.out2.is_some() {
                return Err("--out2 specified but --in2 is not provided".to_string());
            }
            if self.detect_adapter_for_pe {
                return Err(
                    "--detect_adapter_for_pe requires paired-end input (--in2 or --interleaved_in)".to_string()
                );
            }
            if self.unpaired1_out.is_some() || self.unpaired2_out.is_some() {
                return Err(
                    "--unpaired1_out and --unpaired2_out require paired-end input (--in2 or --interleaved_in)"
                        .to_string(),
                );
            }
            }
        }

        // Validate stdout conflicts with output files
        if self.stdout && (self.out1.is_some() || self.out2.is_some()) {
            return Err("--stdout cannot be used with --out1 or --out2".to_string());
        }

        // Validate failed_out requires out1
        if self.failed_out.is_some() && self.out1.is_none() && !self.stdout {
            return Err("--failed_out requires --out1 to be specified".to_string());
        }

        // Validate compression level
        if self.compression < 1 || self.compression > 9 {
            return Err(format!(
                "Compression level must be between 1 and 9, got: {}",
                self.compression
            ));
        }

        // Validate quality values
        if self.qualified_quality_phred > 93 {
            return Err(format!(
                "Quality Phred score cannot exceed 93, got: {}",
                self.qualified_quality_phred
            ));
        }

        // Validate window size
        if self.cut_window_size == 0 {
            return Err("Window size must be greater than 0".to_string());
        }

        // Validate split options
        if self.split.is_some() && self.split_by_lines.is_some() {
            return Err("Cannot use both --split and --split_by_lines at the same time".to_string());
        }
        if let Some(n) = self.split {
            if n == 0 {
                return Err("--split value must be greater than 0".to_string());
            }
        }
        if let Some(lines) = self.split_by_lines {
            if lines == 0 {
                return Err("--split_by_lines value must be greater than 0".to_string());
            }
            if lines % 4 != 0 {
                return Err("--split_by_lines value must be a multiple of 4 (1 FASTQ record = 4 lines)".to_string());
            }
        }
        if self.split_prefix_digits == 0 {
            return Err("--split_prefix_digits must be greater than 0".to_string());
        }

        // Validate UMI options
        if self.umi {
            if self.umi_len == 0 {
                return Err("--umi_len is required when --umi is enabled".to_string());
            }
            if UmiLocation::from_str(&self.umi_loc).is_none() {
                return Err(format!(
                    "Invalid UMI location '{}'. Valid options: read1, read2, index, per_index",
                    self.umi_loc
                ));
            }
        }

        // Validate merge options
        if self.merge {
            // Merge requires paired-end input
            if self.in2.is_none() {
                return Err("--merge requires paired-end input (--in2)".to_string());
            }
            // Validate min_overlap
            if self.merge_min_overlap == 0 {
                return Err("--merge_min_overlap must be greater than 0".to_string());
            }
            // Validate mismatch ratio
            if self.merge_max_mismatch_ratio < 0.0 || self.merge_max_mismatch_ratio > 1.0 {
                return Err(format!(
                    "--merge_max_mismatch_ratio must be between 0.0 and 1.0, got: {}",
                    self.merge_max_mismatch_ratio
                ));
            }
        } else {
            // Merge output options require --merge
            if self.merged_out.is_some() {
                return Err("--merged_out requires --merge to be enabled".to_string());
            }
            if self.out_unmerged1.is_some() || self.out_unmerged2.is_some() {
                return Err(
                    "--out_unmerged1 and --out_unmerged2 require --merge to be enabled".to_string()
                );
            }
        }

        // Check dont_overwrite constraint
        if self.dont_overwrite {
            if let Some(ref out1) = self.out1 {
                if out1.exists() {
                    return Err(format!(
                        "Output file already exists (--dont_overwrite): {}",
                        out1.display()
                    ));
                }
            }
            if let Some(ref out2) = self.out2 {
                if out2.exists() {
                    return Err(format!(
                        "Output file already exists (--dont_overwrite): {}",
                        out2.display()
                    ));
                }
            }
            if let Some(ref json) = self.json {
                if json.exists() {
                    return Err(format!(
                        "JSON report file already exists (--dont_overwrite): {}",
                        json.display()
                    ));
                }
            }
            if let Some(ref html) = self.html {
                if html.exists() {
                    return Err(format!(
                        "HTML report file already exists (--dont_overwrite): {}",
                        html.display()
                    ));
                }
            }
            if let Some(ref failed) = self.failed_out {
                if failed.exists() {
                    return Err(format!(
                        "Failed reads file already exists (--dont_overwrite): {}",
                        failed.display()
                    ));
                }
            }
            if let Some(ref unpaired1) = self.unpaired1_out {
                if unpaired1.exists() {
                    return Err(format!(
                        "Unpaired R1 file already exists (--dont_overwrite): {}",
                        unpaired1.display()
                    ));
                }
            }
            if let Some(ref unpaired2) = self.unpaired2_out {
                if unpaired2.exists() {
                    return Err(format!(
                        "Unpaired R2 file already exists (--dont_overwrite): {}",
                        unpaired2.display()
                    ));
                }
            }
            // Check merge output files
            if let Some(ref merged) = self.merged_out {
                if merged.exists() {
                    return Err(format!(
                        "Merged output file already exists (--dont_overwrite): {}",
                        merged.display()
                    ));
                }
            }
            if let Some(ref unmerged1) = self.out_unmerged1 {
                if unmerged1.exists() {
                    return Err(format!(
                        "Unmerged R1 file already exists (--dont_overwrite): {}",
                        unmerged1.display()
                    ));
                }
            }
            if let Some(ref unmerged2) = self.out_unmerged2 {
                if unmerged2.exists() {
                    return Err(format!(
                        "Unmerged R2 file already exists (--dont_overwrite): {}",
                        unmerged2.display()
                    ));
                }
            }
        }

        Ok(())
    }

    /// Validate mode-specific options and print warnings to stderr.
    ///
    /// Validate mode-specific options.
    ///
    /// This function checks if options that are only valid for a specific mode
    /// (short or long) are used with the wrong mode, and returns an error if so.
    pub fn validate_mode_options(&self) -> Result<(), String> {
        let mut incompatible_options: Vec<&str> = Vec::new();

        match self.mode {
            ReadMode::Auto => {
                // In auto mode, we can't validate yet (mode not determined)
                return Ok(());
            }
            ReadMode::Long => {
                // Short mode only options - incompatible with long mode
                if self.dedup {
                    incompatible_options.push("--dedup");
                }
                if self.dup_calc_accuracy != 3 {
                    incompatible_options.push("--dup_calc_accuracy");
                }
                if self.filter_by_index1.is_some() {
                    incompatible_options.push("--filter_by_index1");
                }
                if self.filter_by_index2.is_some() {
                    incompatible_options.push("--filter_by_index2");
                }
                if self.filter_by_index_threshold != 0 {
                    incompatible_options.push("--filter_by_index_threshold");
                }
                if self.fix_mgi_id {
                    incompatible_options.push("--fix_mgi_id");
                }
                if self.allow_gap_overlap_trimming {
                    incompatible_options.push("--allow_gap_overlap_trimming");
                }
                if self.overlapped_out.is_some() {
                    incompatible_options.push("--overlapped_out");
                }
                if self.overlap_diff_percent_limit != 5.0 {
                    incompatible_options.push("--overlap_diff_percent_limit");
                }

                if !incompatible_options.is_empty() {
                    let options_str = incompatible_options.join(", ");
                    return Err(format!(
                        "The following options are only valid for short read mode: {}\n\
                         Current mode: long\n\
                         Use --mode short to process with these options.",
                        options_str
                    ));
                }
            }
            ReadMode::Short => {
                // Long mode only options - incompatible with short mode
                if self.start_adapter.is_some() {
                    incompatible_options.push("--start_adapter");
                }
                if self.end_adapter.is_some() {
                    incompatible_options.push("--end_adapter");
                }
                if self.distance_threshold != 0.25 {
                    incompatible_options.push("--distance_threshold");
                }
                if self.trimming_extension != 10 {
                    incompatible_options.push("--trimming_extension");
                }
                if self.n_percent_limit.is_some() {
                    incompatible_options.push("--n_percent_limit");
                }
                if self.mask {
                    incompatible_options.push("--mask");
                }
                if self.mask_window_size != 50 {
                    incompatible_options.push("--mask_window_size");
                }
                if self.mask_mean_quality != 10 {
                    incompatible_options.push("--mask_mean_quality");
                }
                if self.break_reads {
                    incompatible_options.push("--break_reads");
                }
                if self.break_window_size != 100 {
                    incompatible_options.push("--break_window_size");
                }
                if self.break_mean_quality != 10 {
                    incompatible_options.push("--break_mean_quality");
                }

                if !incompatible_options.is_empty() {
                    let options_str = incompatible_options.join(", ");
                    return Err(format!(
                        "The following options are only valid for long read mode: {}\n\
                         Current mode: short\n\
                         Use --mode long to process with these options.",
                        options_str
                    ));
                }
            }
        }
        Ok(())
    }

    /// Check if this is paired-end mode.
    #[inline]
    pub fn is_paired_end(&self) -> bool {
        self.in2.is_some() || self.interleaved_in
    }

    /// Check if input is from stdin.
    #[inline]
    pub fn is_stdin(&self) -> bool {
        let path_str = self.in1.to_string_lossy();
        path_str == "-" || path_str.to_lowercase() == "stdin"
    }

    /// Check if output is to stdout.
    #[inline]
    pub fn is_stdout(&self) -> bool {
        if let Some(ref out1) = self.out1 {
            let path_str = out1.to_string_lossy();
            path_str == "-" || path_str.to_lowercase() == "stdout"
        } else {
            false
        }
    }

    /// Get the number of threads to use (auto-detect if 0).
    pub fn get_threads(&self) -> usize {
        if self.threads == 0 {
            num_cpus::get()
        } else {
            self.threads
        }
    }

    /// Build global (fixed-position) trimming configuration from CLI arguments.
    pub fn build_global_trim_config(&self) -> GlobalTrimConfig {
        GlobalTrimConfig::new()
            .with_trim_front1(self.trim_front1)
            .with_trim_tail1(self.trim_tail1)
            .with_trim_front2(self.trim_front2)
            .with_trim_tail2(self.trim_tail2)
    }

    /// Build quality trimming configuration from CLI arguments.
    ///
    /// If neither cut_front nor cut_tail is specified, defaults to tail trimming.
    /// If the user explicitly sets one or both, those settings are used.
    pub fn build_quality_trim_config(&self) -> QualityTrimConfig {
        let mode: Mode = self.mode.into();
        let base = match mode {
            Mode::Short => QualityTrimConfig::short_read(),
            Mode::Long => QualityTrimConfig::long_read(),
        };

        // If neither is specified, enable tail trimming by default
        let (do_front, do_tail) = if !self.cut_front && !self.cut_tail {
            (false, true) // Default: tail trimming only
        } else {
            (self.cut_front, self.cut_tail)
        };

        let mut config = base.with_window_size(self.cut_window_size)
            .with_threshold(self.cut_mean_quality)
            .with_cut_front(do_front)
            .with_cut_tail(do_tail)
            .with_cut_right(self.cut_right);

        if let Some(window_size) = self.cut_right_window_size {
            config = config.with_right_window_size(window_size);
        }
        if let Some(threshold) = self.cut_right_mean_quality {
            config = config.with_right_threshold(threshold);
        }

        if let Some(window_size) = self.cut_tail_window_size {
            config = config.with_tail_window_size(window_size);
        }
        if let Some(threshold) = self.cut_tail_mean_quality {
            config = config.with_tail_threshold(threshold);
        }

        if let Some(window_size) = self.cut_front_window_size {
            config = config.with_front_window_size(window_size);
        }
        if let Some(threshold) = self.cut_front_mean_quality {
            config = config.with_front_threshold(threshold);
        }

        config
    }

    /// Build adapter trimming configuration from CLI arguments.
    pub fn build_adapter_config(&self) -> AdapterConfig {
        if self.disable_adapter_trimming {
            return AdapterConfig::disabled();
        }

        let mut config = AdapterConfig::new();

        // Set custom adapters if provided
        if let Some(ref adapter) = self.adapter_sequence {
            config = config.with_adapter_r1(adapter.as_bytes().to_vec());
        }
        if let Some(ref adapter) = self.adapter_sequence_r2 {
            config = config.with_adapter_r2(adapter.as_bytes().to_vec());
        }

        // Load adapters from FASTA file if provided
        if let Some(ref fasta_path) = self.adapter_fasta {
            match crate::trim::adapter::parse_adapter_fasta(fasta_path) {
                Ok(adapters) => {
                    config = config.with_adapter_list(adapters);
                }
                Err(e) => {
                    eprintln!("Warning: Failed to parse adapter FASTA file '{}': {}",
                              fasta_path.display(), e);
                }
            }
        }

        // Set long read mode adapters if provided
        if let Some(ref adapter) = self.start_adapter {
            config = config.with_start_adapter(adapter.as_bytes().to_vec());
        }
        if let Some(ref adapter) = self.end_adapter {
            config = config.with_end_adapter(adapter.as_bytes().to_vec());
        }

        config
            .with_auto_detect(self.detect_adapter_for_pe)
            .with_distance_threshold(self.distance_threshold)
            .with_trimming_extension(self.trimming_extension)
    }

    /// Build length filtering configuration from CLI arguments.
    pub fn build_length_config(&self) -> LengthConfig {
        // If length filtering is disabled, return a config that accepts all lengths
        if self.disable_length_filtering {
            return LengthConfig::new().with_min_length(0);
        }

        let mut config = LengthConfig::new().with_min_length(self.length_required);

        if self.length_limit > 0 {
            config = config.with_max_length(self.length_limit);
        }

        // Add truncation settings
        if let Some(max_len1) = self.max_len1 {
            config = config.with_max_len_r1(max_len1);
        }
        if let Some(max_len2) = self.max_len2 {
            config = config.with_max_len_r2(max_len2);
        }

        config
    }

    /// Build tail trimming configuration from CLI arguments.
    pub fn build_tail_config(&self) -> Option<TailConfig> {
        // Check if poly-G is disabled
        let trim_poly_g = self.trim_poly_g && !self.disable_trim_poly_g;

        if !trim_poly_g && !self.trim_poly_x {
            return None;
        }

        let config = if self.trim_poly_x {
            TailConfig::poly_x().with_min_length(self.poly_x_min_len)
        } else {
            TailConfig::poly_g().with_min_length(self.poly_g_min_len)
        };

        Some(config)
    }

    /// Build split configuration from CLI arguments.
    ///
    /// Returns None if splitting is disabled.
    pub fn build_split_config(&self) -> Option<SplitConfig> {
        if let Some(n) = self.split {
            Some(SplitConfig {
                mode: SplitMode::ByFile(n),
                prefix_digits: self.split_prefix_digits,
            })
        } else if let Some(lines) = self.split_by_lines {
            Some(SplitConfig {
                mode: SplitMode::ByLines(lines),
                prefix_digits: self.split_prefix_digits,
            })
        } else {
            None
        }
    }

    /// Build filter configuration from CLI arguments.
    ///
    /// Matches fastp's filtering logic:
    /// - qualified_quality_phred (-q): minimum quality for a base to be "qualified"
    /// - unqualified_percent_limit (-u): max % of unqualified bases
    /// - n_base_limit (-n): max number of N bases (absolute count)
    /// - average_qual (-e): optional average quality filter
    pub fn build_filter_config(&self) -> FilterConfig {
        let mode: Mode = self.mode.into();
        let base = match mode {
            Mode::Short => FilterConfig::short_read(),
            Mode::Long => FilterConfig::long_read(),
        };

        let mut config = base.with_length_config(self.build_length_config());

        // Apply fastp-compatible quality filtering settings
        config = config
            .with_qualified_quality(self.qualified_quality_phred)
            .with_unqualified_percent_limit(self.unqualified_percent_limit as f64)
            .with_n_base_limit(self.n_base_limit);

        // N percent limit (long mode only)
        if let Some(percent) = self.n_percent_limit {
            config = config.with_n_percent_limit(percent);
        }

        // Quality filtering toggle
        if self.disable_quality_filtering {
            config = config.without_quality_filter();
        } else {
            config = config.with_quality_filter();
        }

        // Average quality filter (optional, fastp's -e)
        if self.average_qual > 0 {
            config = config.with_min_avg_quality(self.average_qual);
        }

        // Complexity filtering
        if self.low_complexity_filter {
            let threshold = self.complexity_threshold as f64 / 100.0;
            config = config.with_complexity_threshold(threshold);
        } else {
            config = config.without_complexity_filter();
        }

        // Index barcode filtering
        if let Some(ref index1) = self.filter_by_index1 {
            config = config.with_index1_filter(index1.clone(), self.filter_by_index_threshold);
        }
        if let Some(ref index2) = self.filter_by_index2 {
            config = config.with_index2_filter(index2.clone(), self.filter_by_index_threshold);
        }

        config
    }

    /// Build UMI configuration from CLI arguments.
    ///
    /// Returns a UmiConfig that can be used by the UMI processor.
    pub fn build_umi_config(&self) -> UmiConfig {
        if !self.umi || self.umi_len == 0 {
            return UmiConfig::disabled();
        }

        let location = UmiLocation::from_str(&self.umi_loc).unwrap_or(UmiLocation::Read1);

        UmiConfig::new()
            .enabled()
            .with_location(location)
            .with_length(self.umi_len)
            .with_prefix(&self.umi_prefix)
            .with_skip(self.umi_skip)
            .with_separator(&self.umi_separator)
    }

    /// Build merge configuration from CLI arguments.
    ///
    /// Returns a MergeConfig for paired-end read merging.
    pub fn build_merge_config(&self) -> MergeConfig {
        if !self.merge {
            return MergeConfig::disabled();
        }

        MergeConfig::new()
            .with_enabled(true)
            .with_min_overlap(self.merge_min_overlap)
            .with_max_mismatch_ratio(self.merge_max_mismatch_ratio)
            .with_correct_mismatches(self.merge_correct_mismatches)
    }

    /// Build correction configuration from CLI arguments.
    ///
    /// Returns a CorrectionConfig for overlap-based base correction.
    pub fn build_correction_config(&self) -> CorrectionConfig {
        if !self.correction {
            return CorrectionConfig::new();
        }

        CorrectionConfig::new()
            .enabled()
            .with_min_overlap(self.overlap_len_require)
            .with_diff_limit(self.overlap_diff_limit)
            .with_diff_percent_limit(self.overlap_diff_percent_limit)
            .with_allow_gap_overlap(self.allow_gap_overlap_trimming)
            .with_overlapped_out(self.overlapped_out.clone())
    }

    /// Build deduplication configuration from CLI arguments.
    ///
    /// Returns an Option<DedupConfig>. None if deduplication is disabled.
    pub fn build_dedup_config(&self) -> Option<crate::filter::DedupConfig> {
        if !self.dedup {
            return None;
        }

        Some(crate::filter::DedupConfig::new(self.dup_calc_accuracy))
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs::File;
    use tempfile::tempdir;

    fn create_test_file(dir: &std::path::Path, name: &str) -> PathBuf {
        let path = dir.join(name);
        File::create(&path).unwrap();
        path
    }

    #[test]
    fn test_cli_parse_minimal() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq"]).unwrap();
        assert_eq!(args.in1, PathBuf::from("test.fq"));
        assert!(args.in2.is_none());
        assert!(args.out1.is_none());
        assert_eq!(args.mode, ReadMode::Auto); // Default is now Auto
    }

    #[test]
    fn test_cli_parse_paired_end() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "r1.fq",
            "-I",
            "r2.fq",
            "-o",
            "out1.fq",
            "-O",
            "out2.fq",
        ])
        .unwrap();
        assert_eq!(args.in1, PathBuf::from("r1.fq"));
        assert_eq!(args.in2, Some(PathBuf::from("r2.fq")));
        assert_eq!(args.out1, Some(PathBuf::from("out1.fq")));
        assert_eq!(args.out2, Some(PathBuf::from("out2.fq")));
        assert!(args.is_paired_end());
    }

    #[test]
    fn test_cli_parse_long_mode() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "--mode", "long"]).unwrap();
        assert_eq!(args.mode, ReadMode::Long);
    }

    #[test]
    fn test_cli_parse_threads() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "-w", "8"]).unwrap();
        assert_eq!(args.threads, 8);
    }

    #[test]
    fn test_cli_parse_quality_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--cut_front",
            "--cut_front_quality",
            "20",
            "--cut_tail",
            "--cut_tail_quality",
            "18",
            "--cut_window_size",
            "5",
        ])
        .unwrap();
        assert!(args.cut_front);
        assert_eq!(args.cut_front_quality, 20);
        assert!(args.cut_tail);
        assert_eq!(args.cut_tail_quality, 18);
        assert_eq!(args.cut_window_size, 5);
    }

    #[test]
    fn test_cli_parse_adapter_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-a",
            "AGATCGGAAGAG",
            "-A",
            "AGATCGGAAGAG",
            "--detect_adapter_for_pe",
        ])
        .unwrap();
        assert_eq!(args.adapter_sequence, Some("AGATCGGAAGAG".to_string()));
        assert_eq!(args.adapter_sequence_r2, Some("AGATCGGAAGAG".to_string()));
        assert!(args.detect_adapter_for_pe);
    }

    #[test]
    fn test_cli_parse_disable_adapter() {
        let args =
            Cli::try_parse_from(["fastars", "-i", "test.fq", "--disable_adapter_trimming"])
                .unwrap();
        assert!(args.disable_adapter_trimming);
    }

    #[test]
    fn test_cli_parse_length_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-l",
            "50",
            "--length_limit",
            "300",
        ])
        .unwrap();
        assert_eq!(args.length_required, 50);
        assert_eq!(args.length_limit, 300);
    }

    #[test]
    fn test_cli_parse_poly_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--trim_poly_g",
            "--poly_g_min_len",
            "8",
            "--trim_poly_x",
        ])
        .unwrap();
        assert!(args.trim_poly_g);
        assert_eq!(args.poly_g_min_len, 8);
        assert!(args.trim_poly_x);
    }

    #[test]
    fn test_cli_parse_complexity_filter() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-y",
            "-Y",
            "40",
        ])
        .unwrap();
        assert!(args.low_complexity_filter);
        assert_eq!(args.complexity_threshold, 40);
    }

    #[test]
    fn test_cli_parse_report_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-j",
            "report.json",
            "-h",
            "report.html",
            "-R",
            "My Report",
        ])
        .unwrap();
        assert_eq!(args.json, Some(PathBuf::from("report.json")));
        assert_eq!(args.html, Some(PathBuf::from("report.html")));
        assert_eq!(args.report_title, "My Report");
    }

    #[test]
    fn test_cli_parse_compression() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "-z", "6"]).unwrap();
        assert_eq!(args.compression, 6);
    }

    #[test]
    fn test_cli_validate_missing_input() {
        let args = Cli::try_parse_from(["fastars", "-i", "nonexistent.fq"]).unwrap();
        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("does not exist"));
    }

    #[test]
    fn test_cli_validate_valid_input() {
        let dir = tempdir().unwrap();
        let input = create_test_file(dir.path(), "input.fq");
        let args = Cli::try_parse_from(["fastars", "-i", input.to_str().unwrap()]).unwrap();
        assert!(args.validate().is_ok());
    }

    #[test]
    fn test_cli_validate_paired_end_missing_out2() {
        let dir = tempdir().unwrap();
        let in1 = create_test_file(dir.path(), "r1.fq");
        let in2 = create_test_file(dir.path(), "r2.fq");
        let out1 = dir.path().join("out1.fq");

        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            in1.to_str().unwrap(),
            "-I",
            in2.to_str().unwrap(),
            "-o",
            out1.to_str().unwrap(),
        ])
        .unwrap();

        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("--out2 is required"));
    }

    #[test]
    fn test_cli_validate_out2_without_in2() {
        let dir = tempdir().unwrap();
        let in1 = create_test_file(dir.path(), "r1.fq");
        let out2 = dir.path().join("out2.fq");

        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            in1.to_str().unwrap(),
            "-O",
            out2.to_str().unwrap(),
        ])
        .unwrap();

        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("--out2 specified but --in2 is not"));
    }

    #[test]
    fn test_cli_validate_compression_range() {
        let dir = tempdir().unwrap();
        let input = create_test_file(dir.path(), "input.fq");

        let args =
            Cli::try_parse_from(["fastars", "-i", input.to_str().unwrap(), "-z", "0"]).unwrap();
        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("Compression level"));

        let args =
            Cli::try_parse_from(["fastars", "-i", input.to_str().unwrap(), "-z", "10"]).unwrap();
        let result = args.validate();
        assert!(result.is_err());
    }

    #[test]
    fn test_cli_validate_dont_overwrite() {
        let dir = tempdir().unwrap();
        let input = create_test_file(dir.path(), "input.fq");
        let output = create_test_file(dir.path(), "output.fq");

        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            input.to_str().unwrap(),
            "-o",
            output.to_str().unwrap(),
            "--dont_overwrite",
        ])
        .unwrap();

        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("already exists"));
    }

    #[test]
    fn test_get_threads_auto() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "-w", "0"]).unwrap();
        let threads = args.get_threads();
        assert!(threads > 0);
    }

    #[test]
    fn test_get_threads_explicit() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "-w", "4"]).unwrap();
        assert_eq!(args.get_threads(), 4);
    }

    #[test]
    fn test_build_quality_trim_config() {
        // Test with explicit cut_front only (need to specify mode since Auto is default)
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--mode",
            "short",
            "--cut_front",
            "--cut_window_size",
            "8",
            "--cut_mean_quality",
            "20",
        ])
        .unwrap();

        let config = args.build_quality_trim_config();
        assert!(config.cut_front);
        assert!(!config.cut_tail); // Not specified, so false
        assert_eq!(config.window_size, 8);
        assert_eq!(config.threshold, 20);

        // Test default (neither specified)
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "--mode", "short"]).unwrap();
        let config = args.build_quality_trim_config();
        assert!(!config.cut_front);
        assert!(config.cut_tail); // Default is tail trimming

        // Test with both specified
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--mode",
            "short",
            "--cut_front",
            "--cut_tail",
        ])
        .unwrap();
        let config = args.build_quality_trim_config();
        assert!(config.cut_front);
        assert!(config.cut_tail);
    }

    #[test]
    fn test_build_adapter_config_disabled() {
        let args =
            Cli::try_parse_from(["fastars", "-i", "test.fq", "--disable_adapter_trimming"])
                .unwrap();

        let config = args.build_adapter_config();
        assert!(config.adapter_r1.is_none());
        assert!(config.adapter_r2.is_none());
    }

    #[test]
    fn test_build_adapter_config_custom() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-a",
            "ACGT",
            "-A",
            "TGCA",
        ])
        .unwrap();

        let config = args.build_adapter_config();
        assert_eq!(config.adapter_r1.as_deref(), Some(b"ACGT".as_slice()));
        assert_eq!(config.adapter_r2.as_deref(), Some(b"TGCA".as_slice()));
    }

    #[test]
    fn test_build_length_config() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-l",
            "50",
            "--length_limit",
            "250",
        ])
        .unwrap();

        let config = args.build_length_config();
        assert_eq!(config.min_length, 50);
        assert_eq!(config.max_length, Some(250));
    }

    #[test]
    fn test_build_tail_config_none() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq"]).unwrap();
        assert!(args.build_tail_config().is_none());
    }

    #[test]
    fn test_build_tail_config_poly_g() {
        let args =
            Cli::try_parse_from(["fastars", "-i", "test.fq", "--trim_poly_g", "--poly_g_min_len", "8"])
                .unwrap();

        let config = args.build_tail_config().unwrap();
        assert!(config.enabled_bases[2]); // G enabled
        assert!(!config.enabled_bases[0]); // A disabled
        assert_eq!(config.min_length, 8);
    }

    #[test]
    fn test_build_filter_config() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--mode",
            "short",
            "-l",
            "30",
            "--average_qual",  // Use long form since -e is now end_adapter
            "20",
            "-y",
            "-Y",
            "40",
        ])
        .unwrap();

        let config = args.build_filter_config();
        assert_eq!(config.length_config.min_length, 30);
        assert_eq!(config.min_avg_quality, Some(20));
        assert_eq!(config.low_complexity_threshold, Some(0.4));
    }

    #[test]
    fn test_build_filter_config_disabled_quality() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "--mode",
            "short",
            "--disable_quality_filtering",
        ])
        .unwrap();

        let config = args.build_filter_config();
        assert!(config.min_avg_quality.is_none());
    }

    #[test]
    fn test_read_mode_conversion() {
        assert_eq!(Mode::from(ReadMode::Short), Mode::Short);
        assert_eq!(Mode::from(ReadMode::Long), Mode::Long);
    }

    #[test]
    fn test_cli_help_not_conflict() {
        // -h is used for HTML, not help. Verify this works.
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq", "-h", "report.html"]).unwrap();
        assert_eq!(args.html, Some(PathBuf::from("report.html")));
    }

    #[test]
    fn test_cli_version() {
        // Just verify --version doesn't conflict with other options
        let result = Cli::try_parse_from(["fastars", "--version"]);
        // Should be an error that displays version
        assert!(result.is_err());
    }

    #[test]
    fn test_cli_parse_umi_options() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-U",
            "--umi_loc",
            "read1",
            "--umi_len",
            "8",
            "--umi_prefix",
            "UMI",
            "--umi_skip",
            "2",
            "--umi_separator",
            "_",
        ])
        .unwrap();
        assert!(args.umi);
        assert_eq!(args.umi_loc, "read1");
        assert_eq!(args.umi_len, 8);
        assert_eq!(args.umi_prefix, "UMI");
        assert_eq!(args.umi_skip, 2);
        assert_eq!(args.umi_separator, "_");
    }

    #[test]
    fn test_cli_validate_umi_no_length() {
        let dir = tempdir().unwrap();
        let input = create_test_file(dir.path(), "input.fq");

        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            input.to_str().unwrap(),
            "-U",
        ])
        .unwrap();
        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("--umi_len is required"));
    }

    #[test]
    fn test_cli_validate_umi_invalid_location() {
        let dir = tempdir().unwrap();
        let input = create_test_file(dir.path(), "input.fq");

        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            input.to_str().unwrap(),
            "-U",
            "--umi_len",
            "8",
            "--umi_loc",
            "invalid",
        ])
        .unwrap();
        let result = args.validate();
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("Invalid UMI location"));
    }

    #[test]
    fn test_build_umi_config() {
        let args = Cli::try_parse_from([
            "fastars",
            "-i",
            "test.fq",
            "-U",
            "--umi_loc",
            "read2",
            "--umi_len",
            "10",
            "--umi_prefix",
            "UMI",
            "--umi_skip",
            "3",
            "--umi_separator",
            "_",
        ])
        .unwrap();

        let config = args.build_umi_config();
        assert!(config.is_enabled());
        assert_eq!(config.location, UmiLocation::Read2);
        assert_eq!(config.length, 10);
        assert_eq!(config.prefix, "UMI");
        assert_eq!(config.skip, 3);
        assert_eq!(config.separator, "_");
    }

    #[test]
    fn test_build_umi_config_disabled() {
        let args = Cli::try_parse_from(["fastars", "-i", "test.fq"]).unwrap();

        let config = args.build_umi_config();
        assert!(!config.is_enabled());
    }
}