nanocov 0.1.0

Rust Coverage Calculator and QC Plot Generation Tool
Documentation
use crate::cli::Cli;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};

#[derive(Debug, Clone)]
struct BatchEntry {
    bam: PathBuf,
    bed: Option<PathBuf>,
    prefix: Option<String>,
}

fn parse_batch_tsv(path: &Path) -> Result<Vec<BatchEntry>, Box<dyn Error>> {
    let file = File::open(path)?;
    let reader = BufReader::new(file);

    let mut entries = Vec::new();
    for (idx, line) in reader.lines().enumerate() {
        let line_no = idx + 1;
        let line = line?;
        let raw = line.trim_end_matches('\r');
        let trimmed = raw.trim();

        if trimmed.is_empty() || trimmed.starts_with('#') {
            continue;
        }

        let parts: Vec<_> = raw.split('\t').collect();
        if parts.len() < 2 {
            return Err(format!(
                "Expected at least two columns (BAM, BED, optional PREFIX) on line {line_no} in {}",
                path.display()
            )
            .into());
        }

        let bam_col = parts[0].trim();
        let bed_col = parts[1].trim();
        let prefix_col = parts.get(2).map(|s| s.trim());

        if bam_col.is_empty() {
            return Err(format!("Missing BAM path on line {line_no} in {}", path.display()).into());
        }

        let bed = parse_bed_column(bed_col);
        entries.push(BatchEntry {
            bam: PathBuf::from(bam_col),
            bed,
            prefix: parse_prefix_column(prefix_col),
        });
    }

    Ok(entries)
}

fn parse_bed_column(raw: &str) -> Option<PathBuf> {
    let normalized = raw.trim();
    if normalized.is_empty() {
        return None;
    }

    let lowered = normalized.to_ascii_lowercase();
    if ["none", "na", "null", "-", "n/a"]
        .iter()
        .any(|v| *v == lowered)
    {
        return None;
    }

    Some(PathBuf::from(normalized))
}

pub async fn run_batch(cli: &Cli) -> Result<(), Box<dyn Error>> {
    let batch_path = cli
        .batch_tsv
        .as_deref()
        .ok_or("batch TSV path was not provided")?;

    let entries = parse_batch_tsv(batch_path)?;
    if entries.is_empty() {
        return Err(format!("No samples found in batch TSV {}", batch_path.display()).into());
    }

    let mut outputs = Vec::new();
    let mut seen_prefixes: HashMap<String, usize> = HashMap::new();

    for (idx, entry) in entries.iter().enumerate() {
        println!(
            "Processing batch sample {}/{}: {} (bed: {})",
            idx + 1,
            entries.len(),
            entry.bam.display(),
            entry
                .bed
                .as_ref()
                .map(|b| b.display().to_string())
                .unwrap_or_else(|| "<none>".to_string())
        );

        let mut sample_cli = cli.clone();
        sample_cli.input = Some(entry.bam.clone());
        sample_cli.bed = entry.bed.clone().or_else(|| cli.bed.clone());

        let desired_prefix = entry
            .prefix
            .as_deref()
            .map(|p| p.to_string())
            .unwrap_or_else(|| default_prefix_for_bam(&entry.bam));
        let prefix = dedup_prefix(&desired_prefix, &mut seen_prefixes);
        sample_cli.prefix = Some(prefix.clone());

        let stats = crate::runner::process_single_sample(&sample_cli).await?;
        outputs.push((stats, sample_cli.bed.clone(), prefix));
    }

    let batch_output_path = cli.batch_output_path();
    crate::io::statistics::write_batch_statistics(&outputs, &batch_output_path)?;

    println!(
        "Batch statistics written to {batch_output_path:?} ({} samples)",
        outputs.len()
    );

    Ok(())
}

fn dedup_prefix(base: &str, seen: &mut HashMap<String, usize>) -> String {
    let counter = seen
        .entry(base.to_string())
        .and_modify(|c| *c += 1)
        .or_insert(1);
    if *counter == 1 {
        base.to_string()
    } else {
        format!("{base}_{counter}")
    }
}

fn default_prefix_for_bam(bam_path: &Path) -> String {
    bam_path
        .file_stem()
        .and_then(|s| s.to_str())
        .map(|s| s.to_string())
        .unwrap_or_else(|| "coverage".to_string())
}

fn parse_prefix_column(raw: Option<&str>) -> Option<String> {
    let value = raw?;
    let trimmed = value.trim();
    if trimmed.is_empty() {
        return None;
    }

    Some(trimmed.to_string())
}