use crate::cli::Cli;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
struct BatchEntry {
bam: PathBuf,
bed: Option<PathBuf>,
prefix: Option<String>,
}
fn parse_batch_tsv(path: &Path) -> Result<Vec<BatchEntry>, Box<dyn Error>> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let mut entries = Vec::new();
for (idx, line) in reader.lines().enumerate() {
let line_no = idx + 1;
let line = line?;
let raw = line.trim_end_matches('\r');
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let parts: Vec<_> = raw.split('\t').collect();
if parts.len() < 2 {
return Err(format!(
"Expected at least two columns (BAM, BED, optional PREFIX) on line {line_no} in {}",
path.display()
)
.into());
}
let bam_col = parts[0].trim();
let bed_col = parts[1].trim();
let prefix_col = parts.get(2).map(|s| s.trim());
if bam_col.is_empty() {
return Err(format!("Missing BAM path on line {line_no} in {}", path.display()).into());
}
let bed = parse_bed_column(bed_col);
entries.push(BatchEntry {
bam: PathBuf::from(bam_col),
bed,
prefix: parse_prefix_column(prefix_col),
});
}
Ok(entries)
}
fn parse_bed_column(raw: &str) -> Option<PathBuf> {
let normalized = raw.trim();
if normalized.is_empty() {
return None;
}
let lowered = normalized.to_ascii_lowercase();
if ["none", "na", "null", "-", "n/a"]
.iter()
.any(|v| *v == lowered)
{
return None;
}
Some(PathBuf::from(normalized))
}
pub async fn run_batch(cli: &Cli) -> Result<(), Box<dyn Error>> {
let batch_path = cli
.batch_tsv
.as_deref()
.ok_or("batch TSV path was not provided")?;
let entries = parse_batch_tsv(batch_path)?;
if entries.is_empty() {
return Err(format!("No samples found in batch TSV {}", batch_path.display()).into());
}
let mut outputs = Vec::new();
let mut seen_prefixes: HashMap<String, usize> = HashMap::new();
for (idx, entry) in entries.iter().enumerate() {
println!(
"Processing batch sample {}/{}: {} (bed: {})",
idx + 1,
entries.len(),
entry.bam.display(),
entry
.bed
.as_ref()
.map(|b| b.display().to_string())
.unwrap_or_else(|| "<none>".to_string())
);
let mut sample_cli = cli.clone();
sample_cli.input = Some(entry.bam.clone());
sample_cli.bed = entry.bed.clone().or_else(|| cli.bed.clone());
let desired_prefix = entry
.prefix
.as_deref()
.map(|p| p.to_string())
.unwrap_or_else(|| default_prefix_for_bam(&entry.bam));
let prefix = dedup_prefix(&desired_prefix, &mut seen_prefixes);
sample_cli.prefix = Some(prefix.clone());
let stats = crate::runner::process_single_sample(&sample_cli).await?;
outputs.push((stats, sample_cli.bed.clone(), prefix));
}
let batch_output_path = cli.batch_output_path();
crate::io::statistics::write_batch_statistics(&outputs, &batch_output_path)?;
println!(
"Batch statistics written to {batch_output_path:?} ({} samples)",
outputs.len()
);
Ok(())
}
fn dedup_prefix(base: &str, seen: &mut HashMap<String, usize>) -> String {
let counter = seen
.entry(base.to_string())
.and_modify(|c| *c += 1)
.or_insert(1);
if *counter == 1 {
base.to_string()
} else {
format!("{base}_{counter}")
}
}
fn default_prefix_for_bam(bam_path: &Path) -> String {
bam_path
.file_stem()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
.unwrap_or_else(|| "coverage".to_string())
}
fn parse_prefix_column(raw: Option<&str>) -> Option<String> {
let value = raw?;
let trimmed = value.trim();
if trimmed.is_empty() {
return None;
}
Some(trimmed.to_string())
}