use crate::tools::depth::{get_runner, BedRecord};
use crate::utils::{index_bam, need_update, prefix_until_dot, BINSIZE, CHAIN_DISTANCE};
use clap::Parser;
use log::info;
use perbase_lib::utils::{get_reader, get_writer};
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
const MIN_READ_SUPPORT: u32 = 5;
const MAX_READ_SUPPORT: u32 = 100;
#[derive(Parser, Debug)]
#[command(arg_required_else_help(true))]
pub struct RegionsArgs {
pub bam_files: Vec<String>,
#[clap(short, long, default_value_t = false)]
pub no_chr_only: bool,
#[clap(long, default_value_t = MIN_READ_SUPPORT)]
pub min_support: u32,
#[clap(long, default_value_t = MAX_READ_SUPPORT)]
pub max_support: u32,
}
pub fn regions(bam_files: &Vec<String>, chr_only: bool, min_support: u32, max_support: u32) {
let mut bed_files = Vec::new();
for bam_file in bam_files {
let bed_file = if bam_file.ends_with(".bam") {
regions_one(bam_file, BINSIZE)
} else {
bam_file.clone()
};
bed_files.push(bed_file);
}
process_bedfiles(bed_files, chr_only, min_support, max_support);
}
fn regions_one(bam_file: &str, bin_size: u32) -> String {
index_bam(bam_file);
let out_prefix = prefix_until_dot(bam_file);
let bed_path = format!("{out_prefix}.regions.bed.gz");
if need_update(&[bam_file], &[&bed_path], true) {
let runner = get_runner(bam_file, bin_size);
let rx = runner.process().unwrap(); let mut writer = get_writer(&Some(&bed_path), true, false, 0, 6).unwrap();
rx.into_iter()
.try_for_each(|d| -> Result<(), csv::Error> {
writer
.write_record([
d.chrom.as_str(),
d.start.to_string().as_str(),
d.end.to_string().as_str(),
d.depth.to_string().as_str(),
])
.expect("Failed to write BED record");
Ok(())
})
.expect("Failed to write BED record");
}
info!("Generated depth `{bed_path}` for `{bam_file}`");
bed_path
}
fn load_bed(bed: &str) -> Vec<BedRecord> {
let mut reader = get_reader(&Some(bed), false, true).expect("Failed to open BED file");
reader.deserialize().map(Result::unwrap).collect()
}
fn process_bedfiles(
bed_files: Vec<String>,
chr_only: bool,
min_support: u32,
max_support: u32,
) -> HashMap<String, i32> {
let child_bed = &bed_files[0];
let parent1_bed = &bed_files[1];
let parent2_bed = if bed_files.len() == 3 {
Some(bed_files[2].clone())
} else {
None
};
let child_records = load_bed(child_bed);
let parent1_records = load_bed(parent1_bed);
let parent2_records = parent2_bed.map(|bed| load_bed(&bed));
let mut regions: BTreeMap<String, Vec<(u32, u32, f64)>> = BTreeMap::new();
for (i, child_record) in child_records.iter().enumerate() {
let child_depth = child_record.depth;
let parent1_depth = parent1_records[i].depth;
let parent2_depth = if let Some(ref parent2_recs) = parent2_records {
parent2_recs[i].depth
} else {
0.0
};
let depth_ratio = child_depth / (parent1_depth + parent2_depth + 1.0);
regions
.entry(child_record.chrom.clone())
.or_default()
.push((child_record.start, child_record.end, depth_ratio));
}
let mut d = Vec::new();
let mut selected = Vec::new();
let support_range = min_support as f64..=max_support as f64;
for (chrom, data) in &mut regions {
if chr_only && !chrom.contains("Chr") && !chrom.contains("chr") {
continue;
}
data.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
let chrom_selected: Vec<_> = data
.iter()
.filter(|&&(_, _, depth)| support_range.contains(&depth))
.map(|&(start, end, depth)| (chrom.clone(), start, end, format!("{}", depth.round())))
.collect();
selected.extend_from_slice(&chrom_selected);
let regions_str = chrom_selected
.iter()
.map(|(chrom, start, end, depth)| {
format!("{}:{}-{}:{}", chrom, start, end, depth.clone())
})
.collect::<Vec<_>>()
.join(",");
d.push((chrom.clone(), regions_str));
}
let prefix = Path::new(child_bed).file_stem().unwrap().to_str().unwrap();
let pf = prefix_until_dot(prefix);
let poi_tsv = format!("{pf}.poi.tsv");
let mut kf_writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_path(&poi_tsv)
.unwrap();
kf_writer.write_record(["Chrom", "Regions"]).unwrap();
for (chrom, regions_str) in d {
kf_writer.write_record(&[chrom, regions_str]).unwrap();
}
info!("Points of interests written to `{}`", poi_tsv);
selected.sort_by_key(|k| (k.0.clone(), k.1));
let mut merged = Vec::new();
for (i, s) in selected.iter().enumerate() {
if i == 0 {
merged.push(s.clone());
continue;
}
let prev = merged.last_mut().unwrap();
let cur = s;
if prev.0 == cur.0 && prev.2 + CHAIN_DISTANCE >= cur.1 {
prev.2 = prev.2.max(cur.2);
prev.3 = format!("{},{}", prev.3, cur.3);
} else {
merged.push(cur.clone());
}
}
let regions_file = format!("{pf}.regions.tsv");
let mut counter = HashMap::new();
let mut regions_writer = BufWriter::new(File::create(®ions_file).unwrap());
for (chrom, start, end, score) in &merged {
writeln!(regions_writer, "{chrom}:{start}-{end}\t{score}").unwrap();
*counter.entry(chrom[..2].to_string()).or_insert(0) += 1;
}
info!("Merged regions written to `{}`", regions_file);
info!("Region counts: {:?}", counter);
counter
}