use anyhow::Result;
use itertools::multiunzip;
use rust_htslib::{bam, bam::ext::BamRecordExtensions};
use std::collections::HashMap;
pub fn merge_two_lists<T>(left: &[T], right: &[T]) -> Vec<T>
where
T: Ord,
T: Clone,
{
let mut x: Vec<T> = left.iter().chain(right.iter()).cloned().collect();
x.sort();
x
}
pub fn merge_two_lists_with_qual<T, U>(
left: &[T],
left_q: &[U],
right: &[T],
right_q: &[U],
) -> Vec<(T, U)>
where
T: Ord,
T: Clone,
U: Clone,
{
let l = left
.iter()
.zip(left_q.iter())
.map(|(a, b)| (a.clone(), b.clone()));
let r = right
.iter()
.zip(right_q.iter())
.map(|(a, b)| (a.clone(), b.clone()));
let mut x: Vec<(T, U)> = l.chain(r).collect();
x.sort_by_key(|(a, _b)| a.clone());
x
}
#[inline(always)]
pub fn is_sorted<T>(v: &[T]) -> bool
where
T: Ord,
{
v.windows(2).all(|w| w[0] <= w[1])
}
#[allow(clippy::all)]
fn liftover_closest(
positions: &[i64],
aligned_block_pairs: &Vec<([i64; 2], [i64; 2])>,
) -> Result<Vec<Option<i64>>> {
if positions.is_empty() {
return Ok(vec![]);
}
if aligned_block_pairs.is_empty() {
return Ok(positions.iter().map(|_x| None).collect());
}
if !is_sorted(positions) {
return Err(anyhow::anyhow!(
"Positions must be sorted before calling liftover! Array length: {}, first 5: {:?}, last 5: {:?}",
positions.len(),
&positions[..std::cmp::min(5, positions.len())],
&positions[positions.len().saturating_sub(5)..]
));
}
let mut starting_block = 0;
let ending_block = aligned_block_pairs.len();
let mut pos_mapping = HashMap::new();
for cur_pos in positions {
pos_mapping.insert(cur_pos, (-1, i64::MAX));
let mut current_block = 0;
for block_index in starting_block..ending_block {
let ([q_st, q_en], [r_st, r_en]) = &aligned_block_pairs[block_index];
let (best_r_pos, best_diff) = pos_mapping.get_mut(cur_pos).unwrap();
if cur_pos >= q_st && cur_pos <= q_en {
let dist_from_start = cur_pos - q_st;
*best_diff = 0;
*best_r_pos = r_st + dist_from_start;
break;
}
else if cur_pos < q_st {
let diff = (q_st - cur_pos).abs();
if diff < *best_diff {
*best_diff = diff;
*best_r_pos = *r_st;
}
}
else if cur_pos > q_en {
let diff = (cur_pos - q_en).abs();
if diff < *best_diff {
*best_diff = diff;
*best_r_pos = *r_en;
}
starting_block = current_block;
}
current_block += 1;
}
}
let mut rtn = vec![];
for q_pos in positions {
let (r_pos, diff) = pos_mapping.get(q_pos).unwrap();
if *r_pos == -1 && *diff == i64::MAX {
rtn.push(None);
} else {
rtn.push(Some(*r_pos));
}
}
assert_eq!(rtn.len(), positions.len());
Ok(rtn)
}
pub fn lift_reference_positions(
aligned_block_pairs: &Vec<([i64; 2], [i64; 2])>,
query_positions: &[i64],
) -> Result<Vec<Option<i64>>> {
liftover_closest(query_positions, aligned_block_pairs)
}
pub fn lift_query_positions(
aligned_block_pairs: &[([i64; 2], [i64; 2])],
reference_positions: &[i64],
) -> Result<Vec<Option<i64>>> {
let aligned_block_pairs = aligned_block_pairs.iter().map(|(q, r)| (*r, *q)).collect();
liftover_closest(reference_positions, &aligned_block_pairs)
}
fn lift_positions_with_sort_handling(
aligned_block_pairs: &Vec<([i64; 2], [i64; 2])>,
positions: &[i64],
lift_reference_to_query: bool,
) -> Result<Vec<Option<i64>>> {
if is_sorted(positions) {
if !lift_reference_to_query {
lift_reference_positions(aligned_block_pairs, positions)
} else {
lift_query_positions(aligned_block_pairs, positions)
}
} else {
let mut indices: Vec<usize> = (0..positions.len()).collect();
indices.sort_by_key(|&i| positions[i]);
let sorted_positions: Vec<i64> = indices.iter().map(|&i| positions[i]).collect();
let lifted_positions = if !lift_reference_to_query {
lift_reference_positions(aligned_block_pairs, &sorted_positions)?
} else {
lift_query_positions(aligned_block_pairs, &sorted_positions)?
};
let mut original_positions = vec![None; positions.len()];
for (sorted_idx, &original_idx) in indices.iter().enumerate() {
original_positions[original_idx] = lifted_positions[sorted_idx];
}
Ok(original_positions)
}
}
#[allow(clippy::type_complexity)]
fn lift_range(
aligned_block_pairs: &Vec<([i64; 2], [i64; 2])>,
starts: &[i64],
ends: &[i64],
lift_reference_to_query: bool,
) -> Result<(Vec<Option<i64>>, Vec<Option<i64>>, Vec<Option<i64>>)> {
assert_eq!(starts.len(), ends.len());
let ref_starts =
lift_positions_with_sort_handling(aligned_block_pairs, starts, lift_reference_to_query)?;
let ref_ends =
lift_positions_with_sort_handling(aligned_block_pairs, ends, lift_reference_to_query)?;
assert_eq!(ref_starts.len(), ref_ends.len());
let rtn = ref_starts
.into_iter()
.zip(ref_ends)
.map(|(start, end)| match (start, end) {
(Some(start), Some(end)) => {
if start == end {
(None, None, None)
} else {
(Some(start), Some(end), Some(end - start))
}
}
_ => (None, None, None),
})
.collect::<Vec<_>>();
Ok(multiunzip(rtn))
}
#[allow(clippy::type_complexity)]
pub fn lift_query_range(
record: &bam::Record,
starts: &[i64],
ends: &[i64],
) -> Result<(Vec<Option<i64>>, Vec<Option<i64>>, Vec<Option<i64>>)> {
let aligned_block_pairs: Vec<([i64; 2], [i64; 2])> = record.aligned_block_pairs().collect();
lift_range(&aligned_block_pairs, starts, ends, false)
}
fn liftover_exact(
aligned_block_pairs: &Vec<([i64; 2], [i64; 2])>,
positions: &[i64],
lift_reference_to_query: bool,
) -> Result<Vec<Option<i64>>> {
if !is_sorted(positions) {
return Err(anyhow::anyhow!(
"Positions must be sorted before calling liftover!"
));
}
let mut return_positions = vec![];
let mut cur_idx = 0;
for ([q_st, q_en], [r_st, r_en]) in aligned_block_pairs {
let (st, en) = if !lift_reference_to_query {
(q_st, q_en)
} else {
(r_st, r_en)
};
if cur_idx == positions.len() {
break;
}
let mut cur_pos = positions[cur_idx];
while cur_pos < *en {
if cur_pos >= *st {
let dist_from_start = cur_pos - st;
let rtn_pos = if !lift_reference_to_query {
r_st + dist_from_start
} else {
q_st + dist_from_start
};
return_positions.push(Some(rtn_pos));
} else {
return_positions.push(None);
}
cur_idx += 1;
if cur_idx == positions.len() {
break;
}
cur_pos = positions[cur_idx];
}
}
while positions.len() > return_positions.len() {
return_positions.push(None);
}
assert_eq!(positions.len(), return_positions.len());
Ok(return_positions)
}
pub fn lift_reference_positions_exact(
record: &bam::Record,
query_positions: &[i64],
) -> Result<Vec<Option<i64>>> {
if record.is_unmapped() {
Ok(query_positions.iter().map(|_x| None).collect())
} else {
let aligned_block_pairs: Vec<([i64; 2], [i64; 2])> = record.aligned_block_pairs().collect();
liftover_exact(&aligned_block_pairs, query_positions, false)
}
}
pub fn lift_query_positions_exact(
record: &bam::Record,
reference_positions: &[i64],
) -> Result<Vec<Option<i64>>> {
if record.is_unmapped() {
Ok(reference_positions.iter().map(|_x| None).collect())
} else {
let aligned_block_pairs: Vec<([i64; 2], [i64; 2])> = record.aligned_block_pairs().collect();
liftover_exact(&aligned_block_pairs, reference_positions, true)
}
}
#[allow(clippy::type_complexity)]
fn lift_range_exact(
record: &bam::Record,
starts: &[i64],
ends: &[i64],
lift_reference_to_query: bool,
) -> Result<(Vec<Option<i64>>, Vec<Option<i64>>, Vec<Option<i64>>)> {
assert_eq!(starts.len(), ends.len());
let (ref_starts, ref_ends) = if !lift_reference_to_query {
(
lift_reference_positions_exact(record, starts)?,
lift_reference_positions_exact(record, ends)?,
)
} else {
(
lift_query_positions_exact(record, starts)?,
lift_query_positions_exact(record, ends)?,
)
};
assert_eq!(ref_starts.len(), ref_ends.len());
let rtn = ref_starts
.into_iter()
.zip(ref_ends)
.map(|(start, end)| match (start, end) {
(Some(start), Some(end)) => (Some(start), Some(end), Some(end - start)),
_ => (None, None, None),
})
.collect::<Vec<_>>();
Ok(multiunzip(rtn))
}
#[allow(clippy::type_complexity)]
pub fn lift_query_range_exact(
record: &bam::Record,
starts: &[i64],
ends: &[i64],
) -> Result<(Vec<Option<i64>>, Vec<Option<i64>>, Vec<Option<i64>>)> {
lift_range_exact(record, starts, ends, false)
}
#[allow(clippy::type_complexity)]
pub fn lift_reference_range_exact(
record: &bam::Record,
starts: &[i64],
ends: &[i64],
) -> Result<(Vec<Option<i64>>, Vec<Option<i64>>, Vec<Option<i64>>)> {
lift_range_exact(record, starts, ends, true)
}