use std::{
fmt::{self, Display},
ops::{Index, IndexMut},
time::{Duration, Instant},
};
use anyhow::anyhow;
use bstr::ByteSlice;
use compact_genome::interface::alphabet::AlphabetCharacter;
use generic_a_star::cost::AStarCost;
use lib_tsalign::{
a_star_aligner::{
alignment_geometry::AlignmentRange,
alignment_result::alignment::Alignment,
configurable_a_star_align::alphabets::dna_alphabet_or_n::{
DnaAlphabetOrN, DnaCharacterOrN,
},
template_switch_distance::{
AlignmentType, EqualCostRange, TemplateSwitchDirection, TemplateSwitchPrimary,
TemplateSwitchSecondary,
},
},
config::TemplateSwitchConfig,
costs::U64Cost,
};
use serde::{Deserialize, Serialize};
use tracing::instrument;
use crate::common::{
ImmutableSequence,
aligner::result::{
AlignmentFailure, AlignmentWithCost, TwitcherAlignmentResult,
TwitcherAlignmentWithStatistics,
},
};
#[derive(Deserialize, Serialize, Default)]
pub struct FourPointAligner {
costs: TemplateSwitchConfig<DnaAlphabetOrN, U64Cost>,
no_ts: bool,
}
struct AlignerInstance<'a> {
start_time: Instant,
reference: ImmutableSequence,
query: ImmutableSequence,
ranges: AlignmentRange,
costs: &'a TemplateSwitchConfig<DnaAlphabetOrN, U64Cost>,
no_ts: bool,
pre_ts: Mat<Cell>,
ts_qrr: Mat<TsCell>,
ts_rqr: Mat<TsCell>,
post_ts: Mat<Cell>,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct FpaAlignmentStatistics {
pub duration: Duration,
pub ranges: AlignmentRange,
}
impl FourPointAligner {
pub const fn new(costs: TemplateSwitchConfig<DnaAlphabetOrN, U64Cost>, no_ts: bool) -> Self {
Self { costs, no_ts }
}
#[instrument(skip_all)]
pub fn align(
&self,
reference: ImmutableSequence,
query: ImmutableSequence,
ranges: AlignmentRange,
) -> TwitcherAlignmentResult {
let instance = AlignerInstance::new(reference, query, ranges, &self.costs, self.no_ts)?;
let res = instance.run()?;
Ok(res)
}
}
impl<'a> AlignerInstance<'a> {
fn new(
reference: ImmutableSequence,
query: ImmutableSequence,
ranges: AlignmentRange,
costs: &'a TemplateSwitchConfig<DnaAlphabetOrN, U64Cost>,
no_ts: bool,
) -> Result<Self, AlignmentFailure> {
let start_time = Instant::now();
// Forward alignment within the cluster range
let pre_ts: Mat<Cell> = Mat::try_new(
ranges.reference_range().len() + 1,
ranges.query_range().len() + 1,
)?;
// QRR: query primary, full reference secondary aligned in reverse complement
// TODO since we only consider matches, surely this can be done in linear space at least... :D
let ts_qrr: Mat<TsCell> = if no_ts {
Mat::empty()
} else {
Mat::try_new(reference.len() + 1, ranges.query_range().len() + 1)?
};
// RQR: reference primary (range), full query secondary aligned in reverse complement
let ts_rqr: Mat<TsCell> = if no_ts {
Mat::empty()
} else {
Mat::try_new(ranges.reference_range().len() + 1, query.len() + 1)?
};
// Forward alignment within the cluster range after the template switch
let post_ts: Mat<Cell> = if no_ts {
Mat::empty()
} else {
Mat::try_new(
ranges.reference_range().len() + 1,
ranges.query_range().len() + 1,
)?
};
Ok(Self {
start_time,
reference,
query,
ranges,
costs,
no_ts,
pre_ts,
ts_qrr,
ts_rqr,
post_ts,
})
}
fn run(mut self) -> Result<TwitcherAlignmentWithStatistics, AlignmentFailure> {
self.fill_pre_ts()?;
if !self.no_ts {
self.fill_ts_qrr()?;
self.fill_ts_rqr()?;
self.fill_post_ts()?;
}
let ts_alignment = self.retrace_path()?;
let statistics = FpaAlignmentStatistics {
duration: self.start_time.elapsed(),
ranges: self.ranges,
};
Ok(TwitcherAlignmentWithStatistics {
alignment: ts_alignment,
stats: statistics.into(),
})
}
fn fill_pre_ts(&mut self) -> anyhow::Result<()> {
for i in 0..self.pre_ts.m() {
for j in 0..self.pre_ts.n() {
let rc = (i > 0)
.then(|| get_char(&self.reference, self.ranges.reference_offset() + i - 1))
.transpose()?;
let qc = (j > 0)
.then(|| get_char(&self.query, self.ranges.query_offset() + j - 1))
.transpose()?;
let matches = rc == qc;
if let (Some(rc), Some(qc)) = (rc, qc) {
// Mis-/Match
let cost = self
.costs
.primary_edit_costs
.match_or_substitution_cost(rc, qc);
let score = self.pre_ts[(i - 1, j - 1)].score + cost;
if self.pre_ts[(i, j)].ty.is_none() || score < self.pre_ts[(i, j)].score {
self.pre_ts[(i, j)].score = self.pre_ts[(i - 1, j - 1)].score + cost;
self.pre_ts[(i, j)].ty = Some(if matches {
Align::Match
} else {
Align::Substitution
});
}
}
if let Some(rc) = rc {
let cost = self.costs.primary_edit_costs.gap_extend_cost(rc);
let score = self.pre_ts[(i - 1, j)].score + cost;
if self.pre_ts[(i, j)].ty.is_none() || score < self.pre_ts[(i, j)].score {
self.pre_ts[(i, j)].score = score;
self.pre_ts[(i, j)].ty = Some(Align::Deletion);
}
}
if let Some(qc) = qc {
let cost = self.costs.primary_edit_costs.gap_extend_cost(qc);
let score = self.pre_ts[(i, j - 1)].score + cost;
if self.pre_ts[(i, j)].ty.is_none() || score < self.pre_ts[(i, j)].score {
self.pre_ts[(i, j)].score = score;
self.pre_ts[(i, j)].ty = Some(Align::Insertion);
}
}
}
}
Ok(())
}
fn fill_ts_qrr(&mut self) -> anyhow::Result<()> {
for j in 1..self.ts_qrr.n() {
let mut best_score = U64Cost::from_usize(usize::MAX);
let mut best_i = 0;
for i in 0..self.pre_ts.m() {
if self.pre_ts[(i, j - 1)].score < best_score {
best_score = self.pre_ts[(i, j - 1)].score;
best_i = i;
}
}
for i in 1..self.ts_qrr.m() {
// Use indices directly as this matrix has the whole sequence
let rc = get_char(&self.reference, i - 1)?;
let rc_rc = rc.complement();
let qc = get_char(&self.query, self.ranges.query_offset() + j - 1)?;
let matches = rc_rc == qc;
let mm_cost = self
.costs
.secondary_reverse_edit_costs
.match_or_substitution_cost(rc_rc, qc);
if j > 1 && i < self.ts_qrr.m() - 1 {
// Mis-/Match: advance in query (j), move backward in reference (i+1 → i)
let cost = mm_cost;
let score = self.ts_qrr[(i + 1, j - 1)].cell.score + cost;
if self.ts_qrr[(i, j)].cell.ty.is_none()
|| score < self.ts_qrr[(i, j)].cell.score
{
self.ts_qrr[(i, j)].cell.ty = Some(if matches {
Align::Match
} else {
Align::Substitution
});
self.ts_qrr[(i, j)].cell.score = score;
self.ts_qrr[(i, j)].ts_len = self.ts_qrr[(i + 1, j - 1)].ts_len + 1;
}
}
{
// Jump from pre_ts
let cost = mm_cost + self.costs.base_cost.qrr;
let score = self.pre_ts[(best_i, j - 1)].score + cost;
if self.ts_qrr[(i, j)].cell.ty.is_none()
|| score < self.ts_qrr[(i, j)].cell.score
{
self.ts_qrr[(i, j)].cell.ty = Some(if matches {
Align::MatchJump(WhichMatrix::PreTs, best_i, j - 1)
} else {
Align::SubstitutionJump(WhichMatrix::PreTs, best_i, j - 1)
});
self.ts_qrr[(i, j)].cell.score = score;
self.ts_qrr[(i, j)].ts_len = 1;
}
}
}
}
Ok(())
}
fn fill_ts_rqr(&mut self) -> anyhow::Result<()> {
for i in 1..self.ts_rqr.m() {
let mut best_score = U64Cost::from_usize(usize::MAX);
let mut best_j = 0;
for j in 0..self.pre_ts.n() {
if self.pre_ts[(i - 1, j)].score < best_score {
best_score = self.pre_ts[(i - 1, j)].score;
best_j = j;
}
}
for j in 1..self.ts_rqr.n() {
// Use full query indices as this matrix covers the whole query sequence
let qc = get_char(&self.query, j - 1)?;
let qc_rc = qc.complement();
let rc = get_char(&self.reference, self.ranges.reference_offset() + i - 1)?;
let matches = qc_rc == rc;
let mm_cost = self
.costs
.secondary_reverse_edit_costs
.match_or_substitution_cost(qc_rc, rc);
if i > 1 && j < self.ts_rqr.n() - 1 {
// Mis-/Match: advance in reference (i), move backward in query (j+1 → j)
let cost = mm_cost;
let score = self.ts_rqr[(i - 1, j + 1)].cell.score + cost;
if self.ts_rqr[(i, j)].cell.ty.is_none()
|| score < self.ts_rqr[(i, j)].cell.score
{
self.ts_rqr[(i, j)].cell.ty = Some(if matches {
Align::Match
} else {
Align::Substitution
});
self.ts_rqr[(i, j)].cell.score = score;
self.ts_rqr[(i, j)].ts_len = self.ts_rqr[(i - 1, j + 1)].ts_len + 1;
}
}
{
// Jump from pre_ts
let cost = mm_cost + self.costs.base_cost.rqr;
let score = self.pre_ts[(i - 1, best_j)].score + cost;
if self.ts_rqr[(i, j)].cell.ty.is_none()
|| score < self.ts_rqr[(i, j)].cell.score
{
self.ts_rqr[(i, j)].cell.ty = Some(if matches {
Align::MatchJump(WhichMatrix::PreTs, i - 1, best_j)
} else {
Align::SubstitutionJump(WhichMatrix::PreTs, i - 1, best_j)
});
self.ts_rqr[(i, j)].cell.score = score;
self.ts_rqr[(i, j)].ts_len = 1;
}
}
}
}
Ok(())
}
fn fill_post_ts(&mut self) -> anyhow::Result<()> {
// For each reference row i, precompute the best ts_rqr column at row i.
// Tiebreak equal scores by ts_len (prefer longer inner TS).
let best_rqr_for_i: Vec<(usize, U64Cost)> = (0..self.post_ts.m())
.map(|k| {
let mut best_j = 0;
let mut best_score = U64Cost::from_usize(usize::MAX);
let mut best_ts_len: u16 = 0;
for j in 1..self.ts_rqr.n() {
let cell = &self.ts_rqr[(k, j)];
if cell.cell.ty.is_some()
&& (cell.cell.score < best_score
|| (cell.cell.score == best_score && cell.ts_len > best_ts_len))
{
best_score = cell.cell.score;
best_j = j;
best_ts_len = cell.ts_len;
}
}
(best_j, best_score)
})
.collect();
for j in 2..self.post_ts.n() {
let mut best_qrr_i = 0;
let mut best_qrr_score = U64Cost::from_usize(usize::MAX);
let mut best_qrr_ts_len: u16 = 0;
// Note we start iterating at 1; tiebreak equal scores by ts_len.
for i in 1..self.ts_qrr.m() {
let cell = &self.ts_qrr[(i, j - 1)];
if cell.cell.ty.is_some()
&& (cell.cell.score < best_qrr_score
|| (cell.cell.score == best_qrr_score && cell.ts_len > best_qrr_ts_len))
{
best_qrr_score = cell.cell.score;
best_qrr_i = i;
best_qrr_ts_len = cell.ts_len;
}
}
for i in 1..self.post_ts.m() {
let rc = get_char(&self.reference, self.ranges.reference_offset() + i - 1)?;
let qc = get_char(&self.query, self.ranges.query_offset() + j - 1)?;
let matches = rc == qc;
let mm_cost = self
.costs
.primary_edit_costs
.match_or_substitution_cost(rc, qc);
{
// Jump from ts_qrr; base cost already paid at entrance
let cost = mm_cost;
let score = self.ts_qrr[(best_qrr_i, j - 1)].cell.score + cost;
if self.post_ts[(i, j)].ty.is_none() || score < self.post_ts[(i, j)].score {
self.post_ts[(i, j)].ty = Some(if matches {
Align::MatchJump(WhichMatrix::TsQrr, best_qrr_i, j - 1)
} else {
Align::SubstitutionJump(WhichMatrix::TsQrr, best_qrr_i, j - 1)
});
self.post_ts[(i, j)].score = score;
}
}
if i >= 2 {
// Jump from ts_rqr; base cost already paid at entrance
let (best_rqr_j, _) = best_rqr_for_i[i - 1];
let score = self.ts_rqr[(i - 1, best_rqr_j)].cell.score + mm_cost;
if self.post_ts[(i, j)].ty.is_none() || score < self.post_ts[(i, j)].score {
self.post_ts[(i, j)].ty = Some(if matches {
Align::MatchJump(WhichMatrix::TsRqr, i - 1, best_rqr_j)
} else {
Align::SubstitutionJump(WhichMatrix::TsRqr, i - 1, best_rqr_j)
});
self.post_ts[(i, j)].score = score;
}
}
if j > 2 && i > 1 {
// Mis-/Match
let cost = mm_cost;
let score = self.post_ts[(i - 1, j - 1)].score + cost;
if self.post_ts[(i, j)].ty.is_none() || score < self.post_ts[(i, j)].score {
self.post_ts[(i, j)].score = self.post_ts[(i - 1, j - 1)].score + cost;
self.post_ts[(i, j)].ty = Some(if matches {
Align::Match
} else {
Align::Substitution
});
}
}
if i > 1 {
// Deletion
let cost = self.costs.primary_edit_costs.gap_extend_cost(rc);
let score = self.post_ts[(i - 1, j)].score + cost;
if self.post_ts[(i, j)].ty.is_none() || score < self.post_ts[(i, j)].score {
self.post_ts[(i, j)].score = score;
self.post_ts[(i, j)].ty = Some(Align::Deletion);
}
}
if j > 2 {
// Insertion
let cost = self.costs.primary_edit_costs.gap_extend_cost(qc);
let score = self.post_ts[(i, j - 1)].score + cost;
if self.post_ts[(i, j)].ty.is_none() || score < self.post_ts[(i, j)].score {
self.post_ts[(i, j)].score = score;
self.post_ts[(i, j)].ty = Some(Align::Insertion);
}
}
}
}
Ok(())
}
fn retrace_path(&self) -> anyhow::Result<AlignmentWithCost> {
use WhichMatrix::{PostTs, PreTs, TsQrr, TsRqr};
let best_result_pre_ts = self
.pre_ts
.fin()
.map_or(U64Cost::from_primitive(u64::MAX), |c| c.score);
// Tiebreak equal scores by ts_len (prefer longer inner TS).
let (best_ts_qrr_i, best_result_ts_qrr, best_ts_qrr_ts_len) = {
let mut best_i = 0;
let mut best_score: U64Cost = U64Cost::from_primitive(u64::MAX);
let mut best_ts_len: u16 = 0;
for i in 1..self.ts_qrr.m() {
let cell = &self.ts_qrr[(i, self.ts_qrr.n() - 1)];
if cell.cell.ty.is_some()
&& (cell.cell.score < best_score
|| (cell.cell.score == best_score && cell.ts_len > best_ts_len))
{
best_i = i;
best_score = cell.cell.score;
best_ts_len = cell.ts_len;
}
}
(best_i, best_score, best_ts_len)
};
// RQR is complete when the last reference_range row is reached (primary = reference).
// Tiebreak equal scores by ts_len (prefer longer inner TS).
let (best_ts_rqr_j, best_result_ts_rqr, best_ts_rqr_ts_len) = {
let mut best_j = 0;
let mut best_score: U64Cost = U64Cost::from_primitive(u64::MAX);
let mut best_ts_len: u16 = 0;
for j in 1..self.ts_rqr.n() {
let cell = &self.ts_rqr[(self.ts_rqr.m() - 1, j)];
if cell.cell.ty.is_some()
&& (cell.cell.score < best_score
|| (cell.cell.score == best_score && cell.ts_len > best_ts_len))
{
best_j = j;
best_score = cell.cell.score;
best_ts_len = cell.ts_len;
}
}
(best_j, best_score, best_ts_len)
};
let best_result_post_ts = self
.post_ts
.fin()
.filter(|c| c.ty.is_some())
.map_or(U64Cost::from_primitive(u64::MAX), |c| c.score);
// Among the two complete-TS candidates, pick the one with lower score; break ties by
// ts_len (longer inner TS wins). This collapses them into one slot so the stable sort
// can't let a shorter TS sneak ahead of a longer one with equal score.
let (best_complete_ts_score, best_complete_ts_mat, best_complete_ts_i, best_complete_ts_j) =
if best_result_ts_qrr < best_result_ts_rqr
|| (best_result_ts_qrr == best_result_ts_rqr
&& best_ts_qrr_ts_len >= best_ts_rqr_ts_len)
{
(
best_result_ts_qrr,
TsQrr,
best_ts_qrr_i,
self.ts_qrr.n() - 1,
)
} else {
(
best_result_ts_rqr,
TsRqr,
self.ts_rqr.m() - 1,
best_ts_rqr_j,
)
};
let (score, mut curr_mat, mut curr_i, mut curr_j) = {
let mut indices = [
// Prefer no-TS, then complete TS (longer inner), then partial TS (shorter inner)
(
best_result_pre_ts,
PreTs,
self.pre_ts.m() - 1,
self.pre_ts.n() - 1,
),
(
best_complete_ts_score,
best_complete_ts_mat,
best_complete_ts_i,
best_complete_ts_j,
),
(
best_result_post_ts,
PostTs,
self.post_ts.m() - 1,
self.post_ts.n() - 1,
),
];
indices.sort_by_key(|(res, ..)| *res);
indices[0]
};
let mut aln = Alignment::new();
while !(curr_mat == PreTs && curr_i == 0 && curr_j == 0) {
match curr_mat {
TsQrr => {
self.retrace_ts_qrr(&mut curr_mat, &mut curr_i, &mut curr_j, &mut aln)?;
}
TsRqr => {
self.retrace_ts_rqr(&mut curr_mat, &mut curr_i, &mut curr_j, &mut aln)?;
}
_ => {
self.retrace_pre_or_post_ts(&mut curr_mat, &mut curr_i, &mut curr_j, &mut aln)?;
}
}
}
let actual_aln = aln.reverse();
Ok(AlignmentWithCost::new(actual_aln, score))
}
fn retrace_pre_or_post_ts(
&self,
curr_mat: &mut WhichMatrix,
curr_i: &mut usize,
curr_j: &mut usize,
aln: &mut Alignment<AlignmentType>,
) -> anyhow::Result<()> {
use WhichMatrix::{PreTs, TsQrr, TsRqr};
let curr_mat_ptr = if *curr_mat == PreTs {
&self.pre_ts
} else {
&self.post_ts
};
let cell = &curr_mat_ptr[(*curr_i, *curr_j)];
match cell.ty {
Some(Align::Match) => {
aln.push(AlignmentType::PrimaryMatch);
*curr_i -= 1;
*curr_j -= 1;
}
Some(Align::Substitution) => {
aln.push(AlignmentType::PrimarySubstitution);
*curr_i -= 1;
*curr_j -= 1;
}
Some(Align::Insertion) => {
aln.push(AlignmentType::PrimaryInsertion);
*curr_j -= 1;
}
Some(Align::Deletion) => {
aln.push(AlignmentType::PrimaryDeletion);
*curr_i -= 1;
}
Some(
j @ (Align::MatchJump(WhichMatrix::TsQrr, new_i, new_j)
| Align::SubstitutionJump(WhichMatrix::TsQrr, new_i, new_j)),
) => {
// Jump from ts_qrr
if matches!(j, Align::MatchJump(..)) {
aln.push(AlignmentType::PrimaryMatch);
} else {
aln.push(AlignmentType::PrimarySubstitution);
}
let len = self.ts_qrr[(new_i, new_j)].ts_len;
let entrance_i = new_i + len as usize - 1;
let entrance_j = new_j - len as usize + 1;
let entrance_jump = &self.ts_qrr[(entrance_i, entrance_j)].cell;
let Some(
Align::MatchJump(_, mat1_entrance_i, ..)
| Align::SubstitutionJump(_, mat1_entrance_i, ..),
) = entrance_jump.ty
else {
anyhow::bail!(
"We should have found the entrance jump in ts_qrr at i={entrance_i}, j={entrance_j}",
)
};
let ref_diff = isize::try_from(*curr_i)? - isize::try_from(mat1_entrance_i)? - 1;
aln.push(AlignmentType::TemplateSwitchExit {
anti_primary_gap: ref_diff,
});
*curr_mat = TsQrr;
*curr_i = new_i;
*curr_j = new_j;
}
Some(
j @ (Align::MatchJump(WhichMatrix::TsRqr, new_i, new_j)
| Align::SubstitutionJump(WhichMatrix::TsRqr, new_i, new_j)),
) => {
// Jump from ts_rqr
if matches!(j, Align::MatchJump(..)) {
aln.push(AlignmentType::PrimaryMatch);
} else {
aln.push(AlignmentType::PrimarySubstitution);
}
let len = self.ts_rqr[(new_i, new_j)].ts_len;
let entrance_i = new_i - len as usize + 1;
let entrance_j = new_j + len as usize - 1;
let entrance_jump = &self.ts_rqr[(entrance_i, entrance_j)].cell;
let Some(
Align::MatchJump(_, _, mat1_entrance_j)
| Align::SubstitutionJump(_, _, mat1_entrance_j),
) = entrance_jump.ty
else {
anyhow::bail!(
"We should have found the entrance jump in ts_rqr at i={entrance_i}, j={entrance_j}",
)
};
// anti_primary_gap = SP4 - SP1 in query (the anti-primary sequence for RQR)
let query_diff = isize::try_from(*curr_j)? - isize::try_from(mat1_entrance_j)? - 1;
aln.push(AlignmentType::TemplateSwitchExit {
anti_primary_gap: query_diff,
});
*curr_mat = TsRqr;
*curr_i = new_i;
*curr_j = new_j;
}
Some(
Align::MatchJump(WhichMatrix::PreTs | WhichMatrix::PostTs, _, _)
| Align::SubstitutionJump(WhichMatrix::PreTs | WhichMatrix::PostTs, _, _),
)
| None => {
anyhow::bail!("Implementation error")
}
}
Ok(())
}
fn retrace_ts_qrr(
&self,
curr_mat: &mut WhichMatrix,
curr_i: &mut usize,
curr_j: &mut usize,
aln: &mut Alignment<AlignmentType>,
) -> anyhow::Result<()> {
use WhichMatrix::PreTs;
let rev_cell = &self.ts_qrr[(*curr_i, *curr_j)];
if aln.inner_mut().is_empty() {
// We are starting from ts_qrr, so the exit is at the end of the alignment.
let len = rev_cell.ts_len;
let entrance_i = *curr_i + len as usize - 1;
let entrance_j = *curr_j - len as usize + 1;
let entrance_jump = &self.ts_qrr[(entrance_i, entrance_j)].cell;
let Some(
Align::MatchJump(_, mat1_entrance_i, ..)
| Align::SubstitutionJump(_, mat1_entrance_i, ..),
) = entrance_jump.ty
else {
anyhow::bail!(
"We should have found the entrance jump in ts_qrr at i={entrance_i}, j={entrance_j}",
)
};
let ref_diff = isize::try_from(self.ranges.reference_range().len() + 1)?
- isize::try_from(mat1_entrance_i)?
- 1;
aln.push(AlignmentType::TemplateSwitchExit {
anti_primary_gap: ref_diff,
});
}
match rev_cell.cell.ty {
Some(Align::Match) => {
aln.push(AlignmentType::SecondaryMatch);
*curr_i += 1;
*curr_j -= 1;
}
Some(Align::Substitution) => {
aln.push(AlignmentType::SecondarySubstitution);
*curr_i += 1;
*curr_j -= 1;
}
Some(Align::Insertion) => {
aln.push(AlignmentType::SecondaryInsertion);
*curr_j -= 1;
}
Some(Align::Deletion) => {
aln.push(AlignmentType::SecondaryDeletion);
*curr_i += 1;
}
Some(
j @ (Align::MatchJump(WhichMatrix::PreTs, new_i, new_j)
| Align::SubstitutionJump(WhichMatrix::PreTs, new_i, new_j)),
) => {
if matches!(j, Align::MatchJump(..)) {
aln.push(AlignmentType::SecondaryMatch);
} else {
aln.push(AlignmentType::SecondarySubstitution);
}
// first_offset = secondary_start - primary_entrance, both absolute.
// curr_i is absolute (ts_qrr), new_i is cluster-relative (pre_ts).
let first_offset = isize::try_from(*curr_i)?
- isize::try_from(new_i)?
- isize::try_from(self.ranges.reference_offset())?;
aln.push(AlignmentType::TemplateSwitchEntrance {
first_offset,
equal_cost_range: EqualCostRange {
min_start: 0,
max_start: 0,
min_end: 0,
max_end: 0,
},
primary: TemplateSwitchPrimary::Query,
secondary: TemplateSwitchSecondary::Reference,
direction: TemplateSwitchDirection::Reverse,
});
*curr_mat = PreTs;
*curr_i = new_i;
*curr_j = new_j;
}
_ => anyhow::bail!("Implementation error"),
}
Ok(())
}
fn retrace_ts_rqr(
&self,
curr_mat: &mut WhichMatrix,
curr_i: &mut usize,
curr_j: &mut usize,
aln: &mut Alignment<AlignmentType>,
) -> anyhow::Result<()> {
use WhichMatrix::PreTs;
let rev_cell = &self.ts_rqr[(*curr_i, *curr_j)];
if aln.inner_mut().is_empty() {
// We are starting from ts_rqr, so the exit is at the end of the alignment.
let len = rev_cell.ts_len;
let entrance_i = *curr_i - len as usize + 1;
let entrance_j = *curr_j + len as usize - 1;
let entrance_jump = &self.ts_rqr[(entrance_i, entrance_j)].cell;
let Some(
Align::MatchJump(WhichMatrix::PreTs, _, mat1_entrance_j)
| Align::SubstitutionJump(WhichMatrix::PreTs, _, mat1_entrance_j),
) = entrance_jump.ty
else {
anyhow::bail!(
"We should have found the entrance jump in ts_rqr at i={entrance_i}, j={entrance_j}",
)
};
// anti_primary_gap = SP4 - SP1 in query (anti-primary for RQR)
let query_diff = isize::try_from(self.ranges.query_range().len() + 1)?
- isize::try_from(mat1_entrance_j)?
- 1;
aln.push(AlignmentType::TemplateSwitchExit {
anti_primary_gap: query_diff,
});
}
match rev_cell.cell.ty {
Some(Align::Match) => {
aln.push(AlignmentType::SecondaryMatch);
*curr_i -= 1;
*curr_j += 1;
}
Some(Align::Substitution) => {
aln.push(AlignmentType::SecondarySubstitution);
*curr_i -= 1;
*curr_j += 1;
}
Some(Align::Insertion) => {
aln.push(AlignmentType::SecondaryInsertion);
*curr_j += 1;
}
Some(Align::Deletion) => {
aln.push(AlignmentType::SecondaryDeletion);
*curr_i -= 1;
}
Some(
j @ (Align::MatchJump(WhichMatrix::PreTs, new_i, new_j)
| Align::SubstitutionJump(WhichMatrix::PreTs, new_i, new_j)),
) => {
if matches!(j, Align::MatchJump(..)) {
aln.push(AlignmentType::SecondaryMatch);
} else {
aln.push(AlignmentType::SecondarySubstitution);
}
// first_offset = secondary_abs - primary_entrance_abs.
// curr_j is full-query absolute (secondary); new_i is range-relative reference
// (primary entrance), converted to absolute by adding reference_offset.
let first_offset = isize::try_from(*curr_j)?
- isize::try_from(new_i)?
- isize::try_from(self.ranges.reference_offset())?;
aln.push(AlignmentType::TemplateSwitchEntrance {
first_offset,
equal_cost_range: EqualCostRange {
min_start: 0,
max_start: 0,
min_end: 0,
max_end: 0,
},
primary: TemplateSwitchPrimary::Reference,
secondary: TemplateSwitchSecondary::Query,
direction: TemplateSwitchDirection::Reverse,
});
*curr_mat = PreTs;
*curr_i = new_i;
*curr_j = new_j;
}
_ => anyhow::bail!("Implementation error"),
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum WhichMatrix {
PreTs,
TsQrr,
TsRqr,
PostTs,
}
fn get_char(seq: &[u8], ix: usize) -> anyhow::Result<DnaCharacterOrN> {
let c = seq[ix];
DnaCharacterOrN::try_from(c)
.map_err(|()| anyhow!("Unknown character in query sequence: {}", &[c].as_bstr()))
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum Align {
Match,
Substitution,
Deletion,
Insertion,
MatchJump(WhichMatrix, usize, usize),
SubstitutionJump(WhichMatrix, usize, usize),
}
#[derive(Debug, PartialEq, Eq)]
struct Cell {
score: U64Cost,
ty: Option<Align>,
}
impl Display for Cell {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let op = match self.ty {
Some(Align::Match) => "Mat",
Some(Align::Substitution) => "Sub",
Some(Align::Deletion) => "Del",
Some(Align::Insertion) => "Ins",
Some(Align::MatchJump(..)) => "MaJ",
Some(Align::SubstitutionJump(..)) => "SuJ",
None => "???",
};
write!(f, "{:>3} ({op})", self.score.as_u64())
}
}
impl Default for Cell {
fn default() -> Self {
Self {
score: U64Cost::from_usize(0),
ty: None,
}
}
}
#[derive(Debug, Default, PartialEq, Eq)]
struct TsCell {
cell: Cell,
ts_len: u16,
}
impl Display for TsCell {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} (len={:>3})", self.cell, self.ts_len)
}
}
#[derive(Clone)]
struct Mat<T> {
data: Vec<T>,
n: usize,
}
impl<T: Default> Mat<T> {
pub fn try_new(m: usize, n: usize) -> Result<Self, AlignmentFailure> {
assert!(n > 0, "width must be nonzero");
let mut data = Vec::new();
data.try_reserve_exact(m * n)
.map_err(|_| AlignmentFailure::oom())?;
data.resize_with(m * n, T::default);
Ok(Self { data, n })
}
}
impl<T> Mat<T> {
pub fn empty() -> Self {
Self {
data: Vec::new(),
n: 1,
}
}
pub const fn n(&self) -> usize {
self.n
}
pub const fn m(&self) -> usize {
self.data.len() / self.n
}
/// i = row index (< m)
/// j = col index (< n)
#[inline]
fn index(&self, i: usize, j: usize) -> usize {
debug_assert!(
i < self.m(),
"row {i} out of bounds, matrix has {} rows and {} columns",
self.m(),
self.n
);
debug_assert!(
j < self.n,
"column {j} out of bounds, matrix has {} rows and {} columns",
self.m(),
self.n
);
i * self.n + j
}
fn fin(&self) -> Option<&T> {
self.data.last()
}
}
impl<T> Index<(usize, usize)> for Mat<T> {
type Output = T;
fn index(&self, (i, j): (usize, usize)) -> &Self::Output {
let idx = self.index(i, j);
&self.data[idx]
}
}
impl<T> IndexMut<(usize, usize)> for Mat<T> {
fn index_mut(&mut self, (i, j): (usize, usize)) -> &mut Self::Output {
let idx = self.index(i, j);
&mut self.data[idx]
}
}
impl<T: fmt::Display> fmt::Debug for Mat<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let height = self.m();
writeln!(f, "[")?;
for i in 0..height {
write!(f, " [")?;
for j in 0..self.n {
write!(f, "{}", self[(i, j)])?;
if j + 1 < self.n {
write!(f, ", ")?;
}
}
writeln!(f, "],")?;
}
writeln!(f, "]")?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use lib_tsalign::a_star_aligner::alignment_geometry::AlignmentCoordinates;
use super::*;
fn has_ts(geom: &str, alignment: &TwitcherAlignmentWithStatistics) -> bool {
assert_eq!(geom.len(), 3);
let lowercase = geom.to_lowercase();
let mut geom = lowercase.chars();
let p = match geom.next().unwrap() {
'r' => TemplateSwitchPrimary::Reference,
'q' => TemplateSwitchPrimary::Query,
_ => panic!(),
};
let s = match geom.next().unwrap() {
'r' => TemplateSwitchSecondary::Reference,
'q' => TemplateSwitchSecondary::Query,
_ => panic!(),
};
let d = match geom.next().unwrap() {
'r' => TemplateSwitchDirection::Reverse,
'f' => TemplateSwitchDirection::Forward,
_ => panic!(),
};
for (_, ty) in alignment.alignment.alignment.iter_compact() {
match ty {
AlignmentType::TemplateSwitchEntrance {
primary,
secondary,
direction,
..
} if *primary == p && *secondary == s && *direction == d => return true,
_ => {}
}
}
false
}
#[test]
fn fpa_basic() {
let r = b"ACGTA";
let q = b"AACGTTA";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_complete(r.len(), q.len()),
);
r.unwrap();
}
#[test]
fn fpa_basic_inv() {
let q = b"ACGTA";
let r = b"AACGTTA";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_complete(r.len(), q.len()),
);
r.unwrap();
}
#[test]
fn fpa_with_ranges() {
let r = b"TTTTTACGTATTTTT";
let q = b"TTTTTACGAAAAAAATTTTT";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(5, 5),
AlignmentCoordinates::new(10, 15),
),
);
r.unwrap();
}
#[test]
fn fpa_with_ranges_inv() {
let q = b"TTTTTACGTATTTTT";
let r = b"TTTTTACGAAAAAAATTTTT";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(5, 5),
AlignmentCoordinates::new(15, 10),
),
);
r.unwrap();
}
#[test]
fn fpa_with_cursed_ranges() {
let r = b"TTTTTACGTATTTTT";
let q = b"TTTTTACGAAAAAAATTTTT";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(0, 18),
AlignmentCoordinates::new(1, 20),
),
);
r.unwrap();
}
#[test]
fn fpa_with_cursed_ranges_inv() {
let q = b"TTTTTACGTATTTTT";
let r = b"TTTTTACGAAAAAAATTTTT";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(18, 0),
AlignmentCoordinates::new(20, 1),
),
);
r.unwrap();
}
#[test]
fn fpa_vcf() {
let r = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let q = b"AAAAAAAAAAAAAAATTTTTTTTTTAAAAAAAAAAAAAA";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(15, 15),
AlignmentCoordinates::new(26, 25),
),
);
dbg!(&r);
r.unwrap();
}
#[test]
fn fpa_vcf_inv() {
let q = b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let r = b"AAAAAAAAAAAAAAATTTTTTTTTTAAAAAAAAAAAAAA";
let fpa = FourPointAligner::default();
let r = fpa.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_offset_limit(
AlignmentCoordinates::new(15, 15),
AlignmentCoordinates::new(25, 26),
),
);
dbg!(&r);
r.unwrap();
}
#[test]
fn fpa_large() {
let r = b"TTTGTTTTCTTTTTAGGAAGCCATCATTCTTTAGAGGGCAATGACCAAACAGTACCAGCAGAAATTGAAGTACCAGCAGAAGGCTAAGAAGGTTAGGAGAAAAAGACATTTTGTATTTTACTGTTTTTTTCTTTTTTTTTTTTTAGACGAAGTCTTGCTCTGTCACCAGGCTAGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTGACTCCCTGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTAAGTAGCTGGGATTATAGGCACAGACCACCACATCCAGCTATTTTTTGTATTTTTGGTAGAGACCAGGGTTTCACCATGTTGGCCAGGATGGTCTCAATCTTTTGACCTCCTGATCCACCCACCTCGGCCTCCCAACATGCTGGGATTACAGATGTGGGAGCTTGGCCACCTCCTCTTGGGAGAAATGCACTGATTCTGGTTGCCACGTGGATTTATTTTGGGAGTGATATTCATCTAACTTCATGGAAATAATACTAGATAGAGAATTCATCCGCTAACCTTTCTATCTGATGAGAGTTTTGGGCAAATCGAATACCAAGTTACCAGTTTTGTTTTTTTCTCTGATGCAAAAAAACAATTTGCCAGCCAGTGAAAAACTCTCACAGCTCTGGATGTGAGTTTAGGATACTGGATTTCTACATTCAATTTCTTACTACTTTTCTTGCACAGGGATCATGGCACAAGCTGCAGTTTCCACCCTGCCCATTGAAGATGAGGAGTCCATGGCAGATGAGGAGTCCGTTGAAGATGAGTCTGTTGAAGATGAGTCCACAGAGAACAGGATGGTGGTGACATTGCTCATATCAGCTCTTGAGTCCATGGTGAGACCTTCCGTTCTAACATTCTGTAATTGGGTAGTACTGGGGTGGTAGATAAGGTTGATTTGTTTTTGTAGAATTTATAATTTTATGATTTATAGTTCTAATGAGTAGATCTTTTTCTTGAATAGTAGTTATGGTCAAAACACTTCTGACCAAATGTGCCATGTTGTCCAGCCTGG";
let q = b"TTTGTTTTCTTTTTAGGAAGCCATCATTCTTTAGAGGGCAATGACCAAACAGTACCAGCAGAAATTGAAGTACCAGCAGAAGGCTAAGAAGGTTAGGAGAAAAAGACATTTTGTATTTTACTGTTATTTTCTTTTTTTTTTTTTAGACGAAGTCTTGCTCTGTCACCAGGCTAGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTGACTCCCTGGTTCAAGTGATTCTCCTGCCTTAGCCTCCTAAGTAGCTGGGATTATAGGCACAGACCACCACATCCAGCTATTTTTTGTATTTTTGGTAGAGACCAGGGTTTCACCATGTTGGCCAGGATGGTCTCAATCTTTTGACCTCCTGATCTACCCACCTCGGCCTCCCAACATGCTGGGATTACAGATGTGGGCGCTTGGCCACCTCCTCTTGGGAGAAATGCACTGATTCTGGTTGCCACGTGGATTTATTTTGGGAGTGATATTCATCTAACTTCATGGAAATAGTACTAGATAGAAAGTTAGCGGATGAATTCTCTATCTGATGAGAGTTTTGGGCAAATCGAATACCAAGTTACCAAGTTTTGTTTTTTTCTCTGATGCAAAAAAACAATTTGCCAGCCAGTGAAAAACTCTCACAGCTCTGGATGTGAGTTTAGGATACTGGATTTCTACCATTCAATTTCTTACTACTTTTCTTGCACAGGGATCATGGCACAAGCTGCAGTTTCCACCCTGCCCATTGAAGATGAGGAGTCCATGGAAGATGAGGAGTCCGTTGAAGATGAGTCTGTTGAAGATGAGTCCGCAGAGAACAGGATGGTGGTGACATTGCTCATATCAGCTCTTGAGTCCATGGTGAGACCTTCTGTTCTAACATTCTGTAATTGGGTAGTACTGGGTGGTAGATAAGGTTGATTTGTTTTTGTAGAATTTATAATTTTATGATTTATAGTTCTAATGAGTAGATCTTTTTCTTGAATAGTAGTTACGGTCAAACACTTCTGACCAAATGTGCCATGTTGTCCAGCCTGG";
let fpa = FourPointAligner::default();
let res = fpa
.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_complete(r.len(), q.len()),
)
.unwrap();
println!("{}", res.alignment.alignment.cigar());
assert!(has_ts("qrr", &res));
}
#[test]
fn fpa_large_inv() {
let q = b"TTTGTTTTCTTTTTAGGAAGCCATCATTCTTTAGAGGGCAATGACCAAACAGTACCAGCAGAAATTGAAGTACCAGCAGAAGGCTAAGAAGGTTAGGAGAAAAAGACATTTTGTATTTTACTGTTTTTTTCTTTTTTTTTTTTTAGACGAAGTCTTGCTCTGTCACCAGGCTAGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTGACTCCCTGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTAAGTAGCTGGGATTATAGGCACAGACCACCACATCCAGCTATTTTTTGTATTTTTGGTAGAGACCAGGGTTTCACCATGTTGGCCAGGATGGTCTCAATCTTTTGACCTCCTGATCCACCCACCTCGGCCTCCCAACATGCTGGGATTACAGATGTGGGAGCTTGGCCACCTCCTCTTGGGAGAAATGCACTGATTCTGGTTGCCACGTGGATTTATTTTGGGAGTGATATTCATCTAACTTCATGGAAATAATACTAGATAGAGAATTCATCCGCTAACCTTTCTATCTGATGAGAGTTTTGGGCAAATCGAATACCAAGTTACCAGTTTTGTTTTTTTCTCTGATGCAAAAAAACAATTTGCCAGCCAGTGAAAAACTCTCACAGCTCTGGATGTGAGTTTAGGATACTGGATTTCTACATTCAATTTCTTACTACTTTTCTTGCACAGGGATCATGGCACAAGCTGCAGTTTCCACCCTGCCCATTGAAGATGAGGAGTCCATGGCAGATGAGGAGTCCGTTGAAGATGAGTCTGTTGAAGATGAGTCCACAGAGAACAGGATGGTGGTGACATTGCTCATATCAGCTCTTGAGTCCATGGTGAGACCTTCCGTTCTAACATTCTGTAATTGGGTAGTACTGGGGTGGTAGATAAGGTTGATTTGTTTTTGTAGAATTTATAATTTTATGATTTATAGTTCTAATGAGTAGATCTTTTTCTTGAATAGTAGTTATGGTCAAAACACTTCTGACCAAATGTGCCATGTTGTCCAGCCTGG";
let r = b"TTTGTTTTCTTTTTAGGAAGCCATCATTCTTTAGAGGGCAATGACCAAACAGTACCAGCAGAAATTGAAGTACCAGCAGAAGGCTAAGAAGGTTAGGAGAAAAAGACATTTTGTATTTTACTGTTATTTTCTTTTTTTTTTTTTAGACGAAGTCTTGCTCTGTCACCAGGCTAGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTGACTCCCTGGTTCAAGTGATTCTCCTGCCTTAGCCTCCTAAGTAGCTGGGATTATAGGCACAGACCACCACATCCAGCTATTTTTTGTATTTTTGGTAGAGACCAGGGTTTCACCATGTTGGCCAGGATGGTCTCAATCTTTTGACCTCCTGATCTACCCACCTCGGCCTCCCAACATGCTGGGATTACAGATGTGGGCGCTTGGCCACCTCCTCTTGGGAGAAATGCACTGATTCTGGTTGCCACGTGGATTTATTTTGGGAGTGATATTCATCTAACTTCATGGAAATAGTACTAGATAGAAAGTTAGCGGATGAATTCTCTATCTGATGAGAGTTTTGGGCAAATCGAATACCAAGTTACCAAGTTTTGTTTTTTTCTCTGATGCAAAAAAACAATTTGCCAGCCAGTGAAAAACTCTCACAGCTCTGGATGTGAGTTTAGGATACTGGATTTCTACCATTCAATTTCTTACTACTTTTCTTGCACAGGGATCATGGCACAAGCTGCAGTTTCCACCCTGCCCATTGAAGATGAGGAGTCCATGGAAGATGAGGAGTCCGTTGAAGATGAGTCTGTTGAAGATGAGTCCGCAGAGAACAGGATGGTGGTGACATTGCTCATATCAGCTCTTGAGTCCATGGTGAGACCTTCTGTTCTAACATTCTGTAATTGGGTAGTACTGGGTGGTAGATAAGGTTGATTTGTTTTTGTAGAATTTATAATTTTATGATTTATAGTTCTAATGAGTAGATCTTTTTCTTGAATAGTAGTTACGGTCAAACACTTCTGACCAAATGTGCCATGTTGTCCAGCCTGG";
let fpa = FourPointAligner::default();
let res = fpa
.align(
Arc::from(&r[..]),
Arc::from(&q[..]),
AlignmentRange::new_complete(r.len(), q.len()),
)
.unwrap();
println!("{}", res.alignment.alignment.cigar());
assert!(has_ts("rqr", &res));
}
}