use std::{collections::HashMap, mem};
use tokio::io::AsyncBufReadExt;
use tracing::{debug, error};
use crate::common::{contig::ContigName, coords::GenomeRegion};
#[derive(Clone, Debug, Default)]
pub struct Regions(HashMap<ContigName, Vec<GenomeRegion>>);
impl Regions {
pub fn into_linear<F: for<'a> Fn(&'a ContigName) -> usize>(
mut self,
cmp: F,
) -> Vec<GenomeRegion> {
let mut keys: Vec<_> = self.0.keys().cloned().collect();
keys.sort_by_key(cmp);
keys.into_iter()
.flat_map(move |key| self.0.remove(&key).unwrap())
.collect()
}
pub fn contains(&self, region: &GenomeRegion) -> bool {
let Some(list) = self.0.get(region.contig()) else {
return false;
};
let ix = match list
.binary_search_by_key(®ion.start().position_0(), |r| r.start().position_0())
{
Ok(ix) => Some(ix),
Err(0) => None,
Err(ix) => Some(ix - 1),
};
ix.is_some_and(|ix| list[ix].contains(region))
}
}
pub(crate) trait RegionsDefinition {
async fn read_regions(&self) -> anyhow::Result<Option<Regions>>;
}
#[derive(clap::Args, Clone, Debug, Default)]
pub struct CliRegionsArgs {
#[arg(short = 'r', long = "regions", value_name = "LIST")]
pub r_list: Vec<String>,
#[arg(short = 'R', long = "regions-file", value_name = "FILE")]
pub r_file: Option<String>,
}
impl RegionsDefinition for CliRegionsArgs {
async fn read_regions(&self) -> anyhow::Result<Option<Regions>> {
read_regions(&self.r_list, self.r_file.as_ref()).await
}
}
#[derive(clap::Args, Clone, Debug, Default)]
#[group(multiple = false)]
pub struct CliTargetsArgs {
#[arg(short = 't', long = "targets", value_name = "LIST")]
pub t_list: Vec<String>,
#[arg(short = 'T', long = "targets-file", value_name = "FILE")]
pub t_file: Option<String>,
}
impl RegionsDefinition for CliTargetsArgs {
async fn read_regions(&self) -> anyhow::Result<Option<Regions>> {
read_regions(&self.t_list, self.t_file.as_ref()).await
}
}
async fn read_regions(
list: &[String],
file: Option<&String>,
) -> Result<Option<Regions>, anyhow::Error> {
let mut result: Option<Regions> = None;
for reg in list {
add_region(result.get_or_insert_default(), reg);
}
if let Some(path) = file {
let fr = tokio::fs::File::open(path).await?;
let mut br = tokio::io::BufReader::new(fr).lines();
while let Some(reg) = br.next_line().await? {
if reg.is_empty() {
continue;
}
add_region(result.get_or_insert_default(), ®);
}
}
if let Some(ref mut regions) = result {
for list in regions.0.values_mut() {
list.sort_by(|a, b| a.partial_cmp(b).unwrap()); merge(list);
}
}
debug!("List: {result:?}");
Ok(result)
}
fn merge(list: &mut Vec<GenomeRegion>) {
let mut reduced_list = Vec::with_capacity(list.len());
let mut iter = list.iter().cloned();
if let Some(mut current) = iter.next() {
for region in iter {
if let Some(merged) = current.merge(®ion) {
current = merged;
} else {
reduced_list.push(current);
current = region;
}
}
reduced_list.push(current);
}
reduced_list.shrink_to_fit();
mem::swap(&mut reduced_list, list);
}
fn add_region(list: &mut Regions, region: &str) {
match GenomeRegion::parse(region.as_bytes()) {
Ok(reg) => list.0.entry(reg.contig().clone()).or_default().push(reg),
Err(err) => {
error!("Cannot parse region '{region}': {err}");
}
}
}