fastars 0.1.0

Ultra-fast QC and trimming for short and long reads
Documentation
//! Fastars CLI entry point.
//!
//! This binary provides the command-line interface for fastars,
//! delegating to the library for all functionality.

use anyhow::{Context, Result};
use clap::Parser;
use std::fs::File;
use std::io::BufWriter;
use std::process::ExitCode;
use std::time::Instant;

use fastars::cli::{Cli, ReadMode};
use fastars::pipeline::{detect_mode, ModeDetectionConfig, PipelineConfig, PipelineExecutor};
use fastars::report::{
    write_full_json_report, write_html_report_filtering, JsonConfig, JsonReport, ReadStats,
};
use fastars::trim::{TailConfig, TrimConfig};

/// Exit codes for the CLI.
mod exit_codes {
    /// Successful execution.
    pub const SUCCESS: u8 = 0;
    /// General error.
    pub const ERROR: u8 = 1;
    /// Invalid arguments.
    pub const INVALID_ARGS: u8 = 2;
    /// Input file error.
    pub const INPUT_ERROR: u8 = 3;
    /// Output file error.
    #[allow(dead_code)]
    pub const OUTPUT_ERROR: u8 = 4;
}

fn main() -> ExitCode {
    // Initialize logger
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();

    // Run the main logic and handle errors
    match run() {
        Ok(()) => ExitCode::from(exit_codes::SUCCESS),
        Err(e) => {
            // Print error chain
            eprintln!("Error: {}", e);
            for cause in e.chain().skip(1) {
                eprintln!("  Caused by: {}", cause);
            }

            // Determine exit code based on error type
            let exit_code = if e.to_string().contains("does not exist") {
                exit_codes::INPUT_ERROR
            } else if e.to_string().contains("Invalid") || e.to_string().contains("required") {
                exit_codes::INVALID_ARGS
            } else {
                exit_codes::ERROR
            };

            ExitCode::from(exit_code)
        }
    }
}

fn run() -> Result<()> {
    let start_time = Instant::now();

    // Parse CLI arguments
    let mut args = Cli::parse();

    // Validate arguments
    args.validate()
        .map_err(|e| anyhow::anyhow!("{}", e))
        .context("Invalid arguments")?;

    // Detect or use specified mode
    let detected_mode = match args.mode {
        ReadMode::Auto => {
            let config = ModeDetectionConfig {
                sample_size: args.mode_detect_sample,
                threshold_bp: args.mode_detect_threshold,
            };
            let (mode, median_len) = detect_mode(&args.in1, &config)
                .context("Failed to detect read mode")?;
            eprintln!(
                "Auto-detected mode: {:?} (median read length: {}bp)",
                mode, median_len
            );
            mode
        }
        ReadMode::Short => ReadMode::Short,
        ReadMode::Long => ReadMode::Long,
    };

    // Update args with the resolved mode
    args.mode = detected_mode;

    // Validate mode-specific options
    if let Err(e) = args.validate_mode_options() {
        return Err(anyhow::anyhow!("Mode validation failed: {}", e));
    }

    // Print startup info
    if args.verbose {
        print_startup_info(&args);
    }

    // Build pipeline configuration
    let mut config = match detected_mode {
        ReadMode::Auto => unreachable!("Auto mode should be resolved by now"),
        ReadMode::Short => PipelineConfig::short_read(),
        ReadMode::Long => PipelineConfig::long_read(),
    };

    // Set threads
    config = config.with_threads(args.get_threads());

    // Add input files
    if args.interleaved_in {
        // Interleaved mode: single input file treated as paired-end
        config = config.with_paired_input(args.in1.clone(), args.in1.clone());
        config = config.with_interleaved_in(true);
    } else if let Some(ref in2) = args.in2 {
        config = config.with_paired_input(args.in1.clone(), in2.clone());
    } else {
        config = config.with_input(args.in1.clone());
    }

    // Set output mode (stdout or file)
    if args.stdout {
        config = config.with_stdout();
    } else if let Some(ref out1) = args.out1 {
        // Use the output file path as the prefix
        config = config.with_output_prefix(out1.clone());
    }

    // Build trim configuration
    let quality_config = args.build_quality_trim_config();
    let adapter_config = args.build_adapter_config();
    let tail_config = args.build_tail_config();
    let length_config = args.build_length_config();

    let trim_config = TrimConfig::new()
        .with_quality(quality_config)
        .with_adapter(adapter_config)
        .with_length(length_config);

    let trim_config = if let Some(tc) = tail_config {
        trim_config.with_tail(tc)
    } else {
        trim_config.with_tail(TailConfig::new().with_min_length(usize::MAX))
    };

    config = config.with_trim_config(trim_config);

    // Build filter configuration
    let filter_config = args.build_filter_config();
    config = config.with_filter_config(filter_config);

    // Set split configuration if provided
    if let Some(split_config) = args.build_split_config() {
        config = config.with_split_config(split_config);
    }

    // Set reads_to_process if specified
    if args.reads_to_process > 0 {
        config = config.with_reads_to_process(args.reads_to_process);
    }

    // Set duplication evaluation flag
    config = config.with_eval_duplication(!args.dont_eval_duplication);

    // Set overrepresentation analysis settings
    config = config
        .with_overrepresentation_analysis(args.overrepresentation_analysis)
        .with_overrepresentation_sampling(args.overrepresentation_sampling);

    // Set phred64 conversion flag
    config.phred64 = args.phred64;

    // Set MGI ID conversion flag
    config.fix_mgi_id = args.fix_mgi_id;

    // Create and run pipeline
    let executor = PipelineExecutor::new(config);

    let result = executor.run().context("Pipeline execution failed")?;

    // Generate JSON report if requested
    if let Some(ref json_path) = args.json {
        let file = File::create(json_path)
            .with_context(|| format!("Failed to create JSON report: {}", json_path.display()))?;
        let mut writer = BufWriter::new(file);

        // Build complete JSON report with before/after and paired-end support
        let is_paired = args.in2.is_some();
        let command = std::env::args().collect::<Vec<_>>().join(" ");

        let mut report = JsonReport::from_qc_stats_pair(
            &result.qc_before,
            &result.qc_after,
            is_paired,
            command,
        );

        // Set filtering result from worker stats
        report.filtering_result.passed_filter_reads = result.qc_after.total_reads;
        report.filtering_result.low_quality_reads =
            result.qc_before.total_reads.saturating_sub(result.qc_after.total_reads);

        write_full_json_report(&report, &JsonConfig::default(), &mut writer)
            .context("Failed to write JSON report")?;
    }

    // Generate HTML report if requested
    if let Some(ref html_path) = args.html {
        let file = File::create(html_path)
            .with_context(|| format!("Failed to create HTML report: {}", html_path.display()))?;
        let mut writer = BufWriter::new(file);
        write_html_report_filtering(&result.qc_before, &result.qc_after, &mut writer)
            .context("Failed to write HTML report")?;
    }

    // Print summary to stderr
    let elapsed = start_time.elapsed();
    print_summary(&args, &result, elapsed);

    Ok(())
}

/// Print startup information.
fn print_startup_info(args: &Cli) {
    eprintln!("fastars v{}", env!("CARGO_PKG_VERSION"));
    eprintln!("----------------------------------------");
    eprintln!("Input:");
    eprintln!("  Read 1: {}", args.in1.display());
    if let Some(ref in2) = args.in2 {
        eprintln!("  Read 2: {}", in2.display());
    }
    eprintln!();
    eprintln!("Output:");
    if let Some(ref out1) = args.out1 {
        eprintln!("  Read 1: {}", out1.display());
    }
    if let Some(ref out2) = args.out2 {
        eprintln!("  Read 2: {}", out2.display());
    }
    if let Some(ref json) = args.json {
        eprintln!("  JSON report: {}", json.display());
    }
    if let Some(ref html) = args.html {
        eprintln!("  HTML report: {}", html.display());
    }
    eprintln!();
    eprintln!("Settings:");
    eprintln!("  Mode: {:?}", args.mode);
    eprintln!("  Threads: {}", args.get_threads());
    if !args.disable_adapter_trimming {
        eprintln!("  Adapter trimming: enabled");
        if let Some(ref adapter) = args.adapter_sequence {
            eprintln!("    R1 adapter: {}", adapter);
        }
        if let Some(ref adapter) = args.adapter_sequence_r2 {
            eprintln!("    R2 adapter: {}", adapter);
        }
    } else {
        eprintln!("  Adapter trimming: disabled");
    }
    if args.cut_front || args.cut_tail {
        eprintln!("  Quality trimming: enabled");
        eprintln!("    Window size: {}", args.cut_window_size);
        eprintln!("    Quality threshold: {}", args.cut_mean_quality);
    }
    if args.trim_poly_g {
        eprintln!(
            "  Poly-G trimming: enabled (min length: {})",
            args.poly_g_min_len
        );
    }
    if args.trim_poly_x {
        eprintln!(
            "  Poly-X trimming: enabled (min length: {})",
            args.poly_x_min_len
        );
    }
    eprintln!(
        "  Length filter: {} - {}",
        args.length_required,
        if args.length_limit > 0 {
            args.length_limit.to_string()
        } else {
            "unlimited".to_string()
        }
    );
    if args.low_complexity_filter {
        eprintln!(
            "  Complexity filter: enabled (threshold: {}%)",
            args.complexity_threshold
        );
    }
    eprintln!("----------------------------------------");
    eprintln!();
}

/// Print processing summary.
fn print_summary(
    args: &Cli,
    result: &fastars::pipeline::PipelineResult,
    elapsed: std::time::Duration,
) {
    eprintln!();
    eprintln!("========================================");
    eprintln!("fastars v{} - Summary", env!("CARGO_PKG_VERSION"));
    eprintln!("========================================");
    eprintln!();
    eprintln!("Input file(s):");
    eprintln!("  {}", args.in1.display());
    if let Some(ref in2) = args.in2 {
        eprintln!("  {}", in2.display());
    }
    eprintln!();

    // Print actual statistics
    eprintln!("Read statistics:");
    eprintln!("  Total reads (before): {}", result.qc_before.total_reads);
    eprintln!("  Total reads (after):  {}", result.qc_after.total_reads);
    eprintln!("  Reads filtered out:   {}", result.reads_filtered());
    eprintln!("  Pass rate:            {:.2}%", result.pass_rate());
    eprintln!();

    eprintln!("Base statistics:");
    eprintln!(
        "  Total bases (before): {}",
        result.worker_stats.bases_before
    );
    eprintln!(
        "  Total bases (after):  {}",
        result.worker_stats.bases_after
    );
    eprintln!(
        "  Base retention:       {:.2}%",
        result.worker_stats.base_retention_rate()
    );
    eprintln!();

    eprintln!("Processing time: {:.2}s", elapsed.as_secs_f64());
    eprintln!();

    if !result.output_files.is_empty() {
        eprintln!("Output files:");
        for path in &result.output_files {
            eprintln!("  {}", path.display());
        }
        eprintln!();
    }

    if let Some(ref json) = args.json {
        eprintln!("JSON report: {}", json.display());
    }
    if let Some(ref html) = args.html {
        eprintln!("HTML report: {}", html.display());
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_exit_codes_defined() {
        assert_eq!(exit_codes::SUCCESS, 0);
        assert_eq!(exit_codes::ERROR, 1);
        assert_eq!(exit_codes::INVALID_ARGS, 2);
        assert_eq!(exit_codes::INPUT_ERROR, 3);
        assert_eq!(exit_codes::OUTPUT_ERROR, 4);
    }
}