use std::ops::{Deref, DerefMut};
use anyhow::{Context as _, bail};
use lib_tsalign::a_star_aligner::{
alignment_result::alignment::Alignment,
template_switch_distance::{
AlignmentType, EqualCostRange, TemplateSwitchDirection, TemplateSwitchPrimary,
TemplateSwitchSecondary,
},
};
use crate::common::aligner::result::TwitcherAlignmentWithStatistics;
#[derive(Clone, Debug)]
pub struct ForwardAlignment(pub Alignment<AlignmentType>);
impl Deref for ForwardAlignment {
type Target = Alignment<AlignmentType>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for ForwardAlignment {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl From<Alignment<AlignmentType>> for ForwardAlignment {
fn from(value: Alignment<AlignmentType>) -> Self {
Self(value)
}
}
impl ForwardAlignment {
pub fn insert_ts_alignment(
&self,
ts_alignment: &TwitcherAlignmentWithStatistics,
) -> anyhow::Result<Alignment<AlignmentType>> {
let mut forward_flat_iter = self.0.iter_flat_cloned();
let mut result = Alignment::new();
let mut roff = ts_alignment.stats.reference_offset();
let mut qoff = ts_alignment.stats.query_offset();
while roff > 0 || qoff > 0 {
let Some(next) = forward_flat_iter.next() else {
bail!("Incomplete forward alignment");
};
roff -= usize::try_from(consumed_reference(1, &next, None)?)?;
qoff -= usize::try_from(consumed_query(1, &next, None)?)?;
result.push(next);
}
let mut ts = None;
let mut rlen = 0isize;
let mut qlen = 0isize;
for (n, ty) in ts_alignment.alignment.alignment.iter_compact_cloned() {
rlen += consumed_reference(n, &ty, ts)?;
qlen += consumed_query(n, &ty, ts)?;
match &ty {
AlignmentType::TemplateSwitchEntrance { primary, .. } => {
ts = Some(*primary);
}
AlignmentType::TemplateSwitchExit { .. } => {
ts.take();
}
_ => {}
}
result.push_n(n, ty);
}
while rlen > 0 || qlen > 0 {
let Some(next) = forward_flat_iter.next() else {
bail!("Incomplete forward alignment");
};
rlen -= consumed_reference(1, &next, None)?;
qlen -= consumed_query(1, &next, None)?;
}
for ty in forward_flat_iter {
result.push(ty);
}
Ok(result)
}
pub fn crop_to_ts_region(
&self,
ts_alignment: &TwitcherAlignmentWithStatistics,
) -> anyhow::Result<Alignment<AlignmentType>> {
let mut offset = ts_alignment.stats.reference_offset();
let mut forward_flat_iter = self.0.iter_flat_cloned();
let mut result = Alignment::new();
while offset > 0 {
let Some(next) = forward_flat_iter.next() else {
bail!("Incomplete forward alignment");
};
offset -= usize::try_from(consumed_reference(1, &next, None)?)?;
}
let mut ts = None;
let mut ref_length = 0isize;
for (n, ty) in ts_alignment.alignment.alignment.iter_compact_cloned() {
ref_length += match &ty {
AlignmentType::TemplateSwitchEntrance { primary, .. } => {
ts = Some(*primary);
consumed_reference(n, &ty, None)? }
AlignmentType::TemplateSwitchExit { .. } => {
let ts = ts.take();
consumed_reference(n, &ty, ts)?
}
ty => consumed_reference(n, ty, ts)?,
};
}
while ref_length > 0 {
let Some(next) = forward_flat_iter.next() else {
bail!("Incomplete forward alignment");
};
ref_length -= consumed_reference(1, &next, None)?;
result.push(next);
}
Ok(result)
}
}
pub fn consumed_reference(
n: usize,
ty: &AlignmentType,
ts: Option<TemplateSwitchPrimary>,
) -> anyhow::Result<isize> {
Ok(consumes(n, ty, ts)?.0)
}
pub fn consumed_query(
n: usize,
ty: &AlignmentType,
ts: Option<TemplateSwitchPrimary>,
) -> anyhow::Result<isize> {
Ok(consumes(n, ty, ts)?.1)
}
fn consumes(
n: usize,
ty: &AlignmentType,
ts: Option<TemplateSwitchPrimary>,
) -> anyhow::Result<(isize, isize)> {
let n = isize::try_from(n)?;
let (r, q) = if let Some(primary) = ts {
let (mut r, mut q) = match ty {
AlignmentType::PrimaryInsertion
| AlignmentType::PrimaryDeletion
| AlignmentType::PrimarySubstitution
| AlignmentType::PrimaryMatch
| AlignmentType::PrimaryFlankInsertion
| AlignmentType::PrimaryFlankDeletion
| AlignmentType::PrimaryFlankSubstitution
| AlignmentType::PrimaryFlankMatch
| AlignmentType::Root
| AlignmentType::PrimaryReentry
| AlignmentType::PrimaryShortcut { .. } => {
bail!("Primary alignments cannot happen inside a template switch")
}
AlignmentType::TemplateSwitchEntrance { .. } => {
bail!("A new template switch cannot start inside a template switch")
}
AlignmentType::SecondaryInsertion
| AlignmentType::SecondarySubstitution
| AlignmentType::SecondaryMatch => (n, 0),
AlignmentType::SecondaryDeletion | AlignmentType::SecondaryRoot => (0, 0),
AlignmentType::TemplateSwitchExit { anti_primary_gap } => (0, *anti_primary_gap),
};
if primary != TemplateSwitchPrimary::Reference {
std::mem::swap(&mut r, &mut q);
}
(r, q)
} else {
match ty {
AlignmentType::PrimaryInsertion | AlignmentType::PrimaryFlankInsertion => (0, n),
AlignmentType::PrimaryDeletion | AlignmentType::PrimaryFlankDeletion => (n, 0),
AlignmentType::PrimarySubstitution
| AlignmentType::PrimaryFlankSubstitution
| AlignmentType::PrimaryMatch
| AlignmentType::PrimaryFlankMatch => (n, n),
AlignmentType::TemplateSwitchEntrance { .. }
| AlignmentType::Root
| AlignmentType::PrimaryReentry => (0, 0),
AlignmentType::PrimaryShortcut {
delta_reference,
delta_query,
} => (*delta_reference, *delta_query),
AlignmentType::SecondaryInsertion
| AlignmentType::SecondaryDeletion
| AlignmentType::SecondarySubstitution
| AlignmentType::SecondaryMatch
| AlignmentType::SecondaryRoot
| AlignmentType::TemplateSwitchExit { .. } => {
bail!("Secondary alignments require a preceding template switch entrance")
}
}
};
Ok((r, q))
}
#[allow(clippy::too_many_lines)]
pub fn cigar_to_alignment<S: AsRef<str>>(cigar: &S) -> anyhow::Result<Alignment<AlignmentType>> {
let s = cigar.as_ref();
let mut alignment = Alignment::new();
let mut chars = s.char_indices().peekable();
let mut inside_ts = false;
while let Some((i, c)) = chars.peek().copied() {
match c {
'0'..='9' => {
let start = i;
while chars.peek().is_some_and(|(_, c)| c.is_ascii_digit()) {
chars.next();
}
let end = chars.peek().map_or(s.len(), |(i, _)| *i);
let amount: usize = s[start..end]
.parse()
.with_context(|| format!("Invalid repeat count at offset {start}"))?;
let (_, type_char) = chars.next().with_context(|| {
format!("Expected alignment type char after count at offset {end}")
})?;
let atype = parse_repeatable_char(type_char, inside_ts).with_context(|| {
format!("Unknown alignment type char '{type_char}' at offset {end}")
})?;
alignment.push_n(amount, atype);
}
'I' | 'D' | 'X' | '=' => {
chars.next();
let atype = parse_repeatable_char(c, inside_ts).unwrap();
alignment.push(atype);
}
'[' => {
chars.next();
let (_, next) = chars.next().context("Unexpected end after '['")?;
match next {
'T' => {
if inside_ts {
bail!("Nested TemplateSwitchEntrance is not allowed");
}
expect_char(&mut chars, 'S').context("Expected 'S' in '[TS...'")?;
let (_, p) = chars.next().context("Expected primary char")?;
let primary = parse_ts_primary(p)
.with_context(|| format!("Invalid TemplateSwitchPrimary '{p}'"))?;
let (_, s_char) = chars.next().context("Expected secondary char")?;
let secondary = parse_ts_secondary(s_char).with_context(|| {
format!("Invalid TemplateSwitchSecondary '{s_char}'")
})?;
let (_, d) = chars.next().context("Expected direction char")?;
let direction = parse_ts_direction(d)
.with_context(|| format!("Invalid TemplateSwitchDirection '{d}'"))?;
expect_char(&mut chars, ':').context("Expected ':' after direction")?;
let equal_cost_range = parse_equal_cost_range(&mut chars)
.context("Failed to parse EqualCostRange")?;
expect_char(&mut chars, ':')
.context("Expected ':' after EqualCostRange")?;
let first_offset = parse_isize(&mut chars, &[':', ']'])
.context("Failed to parse first_offset")?;
expect_char(&mut chars, ':')
.context("Expected trailing ':' after first_offset")?;
inside_ts = true;
alignment.push(AlignmentType::TemplateSwitchEntrance {
primary,
secondary,
direction,
equal_cost_range,
first_offset,
});
}
'P' => {
expect_char(&mut chars, 'S').context("Expected 'S' in '[PS...'")?;
expect_char(&mut chars, ':').context("Expected ':' in '[PS:...'")?;
expect_char(&mut chars, 'R')
.context("Expected 'R' before delta_reference")?;
let delta_reference = parse_isize(&mut chars, &['Q'])
.context("Failed to parse delta_reference")?;
expect_char(&mut chars, 'Q').context("Expected 'Q' before delta_query")?;
let delta_query = parse_isize(&mut chars, &[']'])
.context("Failed to parse delta_query")?;
expect_char(&mut chars, ']').context("Expected ']' to close '[PS...'")?;
alignment.push(AlignmentType::PrimaryShortcut {
delta_reference,
delta_query,
});
}
other => bail!("Unknown bracket expression starting with '[{other}'"),
}
}
':' => {
if !inside_ts {
bail!(
"TemplateSwitchExit at offset {i} without a preceding TemplateSwitchEntrance"
);
}
chars.next();
let anti_primary_gap =
parse_isize(&mut chars, &[']']).context("Failed to parse anti_primary_gap")?;
expect_char(&mut chars, ']').context("Expected ']' to close TemplateSwitchExit")?;
inside_ts = false;
alignment.push(AlignmentType::TemplateSwitchExit { anti_primary_gap });
}
other => bail!("Unexpected character '{other}' at offset {i}"),
}
}
if inside_ts {
bail!("Unexpected end of input: unclosed TemplateSwitchEntrance");
}
Ok(alignment)
}
fn parse_repeatable_char(c: char, inside_ts: bool) -> Option<AlignmentType> {
match (c, inside_ts) {
('I', false) => Some(AlignmentType::PrimaryInsertion),
('D', false) => Some(AlignmentType::PrimaryDeletion),
('X', false) => Some(AlignmentType::PrimarySubstitution),
('=', false) => Some(AlignmentType::PrimaryMatch),
('I', true) => Some(AlignmentType::SecondaryInsertion),
('D', true) => Some(AlignmentType::SecondaryDeletion),
('X', true) => Some(AlignmentType::SecondarySubstitution),
('=', true) => Some(AlignmentType::SecondaryMatch),
_ => None,
}
}
fn parse_ts_primary(c: char) -> Option<TemplateSwitchPrimary> {
match c {
'R' => Some(TemplateSwitchPrimary::Reference),
'Q' => Some(TemplateSwitchPrimary::Query),
_ => None,
}
}
fn parse_ts_secondary(c: char) -> Option<TemplateSwitchSecondary> {
match c {
'R' => Some(TemplateSwitchSecondary::Reference),
'Q' => Some(TemplateSwitchSecondary::Query),
_ => None,
}
}
fn parse_ts_direction(c: char) -> Option<TemplateSwitchDirection> {
match c {
'F' => Some(TemplateSwitchDirection::Forward),
'R' => Some(TemplateSwitchDirection::Reverse),
_ => None,
}
}
fn parse_isize(
chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
terminators: &[char],
) -> anyhow::Result<isize> {
let mut buf = String::new();
if chars.peek().is_some_and(|(_, c)| *c == '-') {
buf.push('-');
chars.next();
}
while let Some((_, c)) = chars.peek() {
if terminators.contains(c) {
break;
}
buf.push(*c);
chars.next();
}
buf.parse::<isize>()
.with_context(|| format!("Invalid integer '{buf}'"))
}
fn expect_char(
chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
expected: char,
) -> anyhow::Result<()> {
match chars.next() {
Some((_, c)) if c == expected => Ok(()),
Some((i, c)) => bail!("Expected '{expected}' at offset {i}, got '{c}'"),
None => bail!("Expected '{expected}' but got end of input"),
}
}
fn parse_equal_cost_range(
chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)> + Clone>,
) -> anyhow::Result<EqualCostRange> {
expect_char(chars, '[')?;
let is_invalid = {
let mut tmp = chars.clone();
tmp.next().is_some_and(|(_, c)| c == '-') && tmp.next().is_some_and(|(_, c)| c == ']')
};
if is_invalid {
chars.next(); expect_char(chars, ']')?;
expect_char(chars, ':')?;
expect_char(chars, '[')?;
expect_char(chars, '-')?;
expect_char(chars, ']')?;
return Ok(EqualCostRange::new_invalid());
}
let min_start = parse_isize(chars, &[',']).context("min_start")?;
expect_char(chars, ',')?;
let max_start = parse_isize(chars, &[']']).context("max_start")?;
expect_char(chars, ']')?;
expect_char(chars, ':')?;
expect_char(chars, '[')?;
let min_end = parse_isize(chars, &[',']).context("min_end")?;
expect_char(chars, ',')?;
let max_end = parse_isize(chars, &[']']).context("max_end")?;
expect_char(chars, ']')?;
Ok(EqualCostRange {
min_start: min_start.try_into()?,
max_start: max_start.try_into()?,
min_end: min_end.try_into()?,
max_end: max_end.try_into()?,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn round_trip(alignment: &Alignment<AlignmentType>) -> Alignment<AlignmentType> {
let cigar = alignment.cigar();
cigar_to_alignment(&cigar).unwrap()
}
fn ecr(min_start: i8, max_start: i8, min_end: i8, max_end: i8) -> EqualCostRange {
EqualCostRange {
min_start,
max_start,
min_end,
max_end,
}
}
#[test]
fn test_empty() {
let alignment = Alignment::new();
assert_eq!(round_trip(&alignment), Alignment::new());
}
#[test]
fn test_single_match() {
let mut a = Alignment::new();
a.push_n(1, AlignmentType::PrimaryMatch);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_primary_only() {
let mut a = Alignment::new();
a.push_n(5, AlignmentType::PrimaryMatch);
a.push_n(2, AlignmentType::PrimaryInsertion);
a.push_n(3, AlignmentType::PrimaryDeletion);
a.push_n(1, AlignmentType::PrimarySubstitution);
a.push_n(4, AlignmentType::PrimaryMatch);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_large_counts() {
let mut a = Alignment::new();
a.push_n(10_000, AlignmentType::PrimaryMatch);
a.push_n(9_999, AlignmentType::PrimaryDeletion);
assert_eq!(round_trip(&a), a);
}
fn make_ts_entrance(
primary: TemplateSwitchPrimary,
secondary: TemplateSwitchSecondary,
direction: TemplateSwitchDirection,
equal_cost_range: EqualCostRange,
first_offset: isize,
) -> AlignmentType {
AlignmentType::TemplateSwitchEntrance {
primary,
secondary,
direction,
equal_cost_range,
first_offset,
}
}
fn push_simple_ts(
a: &mut Alignment<AlignmentType>,
primary: TemplateSwitchPrimary,
secondary: TemplateSwitchSecondary,
direction: TemplateSwitchDirection,
equal_cost_range: EqualCostRange,
first_offset: isize,
anti_primary_gap: isize,
) {
a.push(make_ts_entrance(
primary,
secondary,
direction,
equal_cost_range,
first_offset,
));
a.push_n(5, AlignmentType::SecondaryMatch);
a.push(AlignmentType::TemplateSwitchExit { anti_primary_gap });
}
#[test]
fn test_ts_empty_body() {
let mut a = Alignment::new();
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Reference,
TemplateSwitchSecondary::Query,
TemplateSwitchDirection::Forward,
ecr(0, 0, 0, 0),
0,
0,
);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_ts_with_secondary_ops() {
let mut a = Alignment::new();
a.push(make_ts_entrance(
TemplateSwitchPrimary::Query,
TemplateSwitchSecondary::Reference,
TemplateSwitchDirection::Reverse,
ecr(-1, 2, -3, 4),
-5,
));
a.push_n(3, AlignmentType::SecondaryMatch);
a.push_n(1, AlignmentType::SecondarySubstitution);
a.push_n(2, AlignmentType::SecondaryInsertion);
a.push_n(1, AlignmentType::SecondaryDeletion);
a.push(AlignmentType::TemplateSwitchExit {
anti_primary_gap: -3,
});
dbg!(a.cigar());
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_ts_negative_offsets() {
let mut a = Alignment::new();
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Reference,
TemplateSwitchSecondary::Reference,
TemplateSwitchDirection::Reverse,
ecr(-5, 0, -2, 0),
-10,
-7,
);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_ts_invalid_equal_cost_range() {
let mut a = Alignment::new();
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Reference,
TemplateSwitchSecondary::Query,
TemplateSwitchDirection::Forward,
EqualCostRange::new_invalid(),
0,
0,
);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_primary_ts_primary() {
let mut a = Alignment::new();
a.push_n(10, AlignmentType::PrimaryMatch);
a.push_n(2, AlignmentType::PrimaryDeletion);
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Reference,
TemplateSwitchSecondary::Query,
TemplateSwitchDirection::Forward,
ecr(-1, 1, -1, 1),
3,
2,
);
a.push_n(5, AlignmentType::PrimaryMatch);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_multiple_ts() {
let mut a = Alignment::new();
a.push_n(4, AlignmentType::PrimaryMatch);
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Reference,
TemplateSwitchSecondary::Query,
TemplateSwitchDirection::Forward,
ecr(0, 1, -1, 0),
1,
0,
);
a.push_n(3, AlignmentType::PrimarySubstitution);
push_simple_ts(
&mut a,
TemplateSwitchPrimary::Query,
TemplateSwitchSecondary::Reference,
TemplateSwitchDirection::Reverse,
ecr(-2, 0, 0, 3),
-1,
4,
);
a.push_n(6, AlignmentType::PrimaryMatch);
assert_eq!(round_trip(&a), a);
}
#[test]
fn test_error_unclosed_ts() {
let cigar = "[TSRQF:[0,0]:[0,0]:0:";
assert!(cigar_to_alignment(&cigar).is_err());
}
#[test]
fn test_error_exit_without_entrance() {
let cigar = ":0]";
assert!(cigar_to_alignment(&cigar).is_err());
}
#[test]
fn test_error_nested_ts() {
let cigar = "[TSRQF:[0,0]:[0,0]:0:[TSRQF:[0,0]:[0,0]:0::0]:0]";
assert!(cigar_to_alignment(&cigar).is_err());
}
#[test]
fn test_error_invalid_char() {
assert!(cigar_to_alignment(&"5Z").is_err());
}
#[test]
fn test_error_truncated_ts_entrance() {
assert!(cigar_to_alignment(&"[TSR").is_err());
}
}