use crate::*;
use imgt::*;
use mzcore::sequence::{
AnnotatedPeptide, AtMax, HasPeptidoform, Linear, Peptidoform, Region, SimpleLinear, UnAmbiguous,
};
use mzcv::CVIndex;
use std::collections::HashSet;
use itertools::Itertools;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ConsecutiveAlignment<'lifetime, A> {
pub alignments: Vec<
Vec<(
Allele<'lifetime>,
Alignment<&'lifetime Peptidoform<UnAmbiguous>, Peptidoform<A>>,
)>,
>,
}
impl<'lifetime, A> ConsecutiveAlignment<'lifetime, A> {
pub fn main_alignment(
&self,
) -> Vec<&(
Allele<'lifetime>,
Alignment<&'lifetime Peptidoform<UnAmbiguous>, Peptidoform<A>>,
)> {
self.alignments.iter().filter_map(|a| a.first()).collect()
}
}
impl<A: AtMax<Linear>> ConsecutiveAlignment<'_, A> {
#[expect(clippy::missing_panics_doc)]
pub fn regions(&self) -> Vec<(Peptidoform<A>, Region)> {
let mut b_offset = 0;
self.alignments
.iter()
.filter_map(|a| {
a.first().map(|(allele, alignment)| {
let mut index_a = alignment.start_a;
let mut start_region_b = alignment.start_b;
let mut index_b = alignment.start_b;
let mut region = allele
.get_region(index_a)
.map_or(&Region::Framework(1), |(r, _)| r);
let mut ranges = Vec::new();
for step in &alignment.path {
let new_region = allele
.get_region(index_a + step.step_a as usize)
.map_or(&Region::Framework(1), |(r, _)| r);
if region != new_region {
ranges.push((b_offset + start_region_b, b_offset + index_b, region));
start_region_b = index_b + 1;
region = new_region;
}
index_a += step.step_a as usize;
index_b += step.step_b as usize;
}
b_offset += alignment.len_b() + alignment.start_b;
ranges
})
})
.flatten()
.chunk_by(|(_, _, r)| *r)
.into_iter()
.map(|(region, mut chunk)| {
let first = chunk.next().unwrap();
let last = chunk.last().unwrap_or(first);
(
self.alignments[0][0]
.1
.seq_b()
.cast_peptidoform()
.sub_peptide(first.0..=last.1),
region.clone(),
)
})
.collect()
}
}
#[expect(clippy::needless_pass_by_value)]
pub fn consecutive_align<'imgt, const STEPS: u16, A: HasPeptidoform<SimpleLinear> + Eq + Clone>(
sequence: A,
genes: &[(GeneType, AlignType)],
species: Option<
HashSet<Species, impl std::hash::BuildHasher + Clone + Send + Sync + Default + 'imgt>,
>,
chains: Option<
HashSet<ChainType, impl std::hash::BuildHasher + Clone + Send + Sync + Default + 'imgt>,
>,
allele: AlleleSelection,
scoring: AlignScoring<'_>,
return_number: usize,
imgt: &'imgt CVIndex<IMGT>,
) -> ConsecutiveAlignment<'imgt, SimpleLinear> {
assert!(genes.len() >= 2);
assert!(return_number != 0);
let mut output: Vec<
Vec<(
Allele<'imgt>,
Alignment<&'imgt Peptidoform<UnAmbiguous>, Peptidoform<SimpleLinear>>,
)>,
> = Vec::with_capacity(genes.len());
let mut prev = 0;
for gene in genes {
let (left_sequence, use_species, use_chains) =
output.last().and_then(|v| v.first()).map_or_else(
|| {
(
sequence.cast_peptidoform().clone(),
species.clone(),
chains.clone(),
)
},
|last| {
prev += last.1.start_b() + last.1.len_b();
(
sequence.cast_peptidoform().sub_peptide(prev..),
Some(std::iter::once(last.0.species).collect()),
Some(std::iter::once(last.0.gene.chain).collect()),
)
},
);
if left_sequence.is_empty() {
break;
}
output.push(
Selection {
species: use_species,
chains: use_chains,
allele,
genes: Some([gene.0].into()),
}
.germlines(imgt)
.map(|seq| {
let alignment = align::<
STEPS,
&'imgt Peptidoform<UnAmbiguous>,
Peptidoform<SimpleLinear>,
>(
seq.sequence, left_sequence.clone(), scoring, gene.1
);
(seq, alignment)
})
.k_largest_by(return_number, |a, b| a.1.cmp(&b.1))
.collect_vec(),
);
}
ConsecutiveAlignment { alignments: output }
}
#[cfg(feature = "rayon")]
#[expect(clippy::needless_pass_by_value)]
pub fn par_consecutive_align<
'imgt,
const STEPS: u16,
A: HasPeptidoform<SimpleLinear> + Send + Sync + Eq + Clone,
>(
sequence: A,
genes: &[(GeneType, AlignType)],
species: Option<
HashSet<Species, impl std::hash::BuildHasher + Clone + Send + Sync + Default + 'imgt>,
>,
chains: Option<
HashSet<ChainType, impl std::hash::BuildHasher + Clone + Send + Sync + Default + 'imgt>,
>,
allele: AlleleSelection,
scoring: AlignScoring<'_>,
return_number: usize,
imgt: &'imgt CVIndex<IMGT>,
) -> ConsecutiveAlignment<'imgt, SimpleLinear> {
use rayon::iter::ParallelIterator;
assert!(genes.len() >= 2);
assert!(return_number != 0);
let mut output: Vec<
Vec<(
Allele<'imgt>,
Alignment<&'imgt Peptidoform<UnAmbiguous>, Peptidoform<SimpleLinear>>,
)>,
> = Vec::with_capacity(genes.len());
let mut prev = 0;
for gene in genes {
let (left_sequence, use_species, use_chains) =
output.last().and_then(|v| v.first()).map_or_else(
|| {
(
sequence.cast_peptidoform().clone(),
species.clone(),
chains.clone(),
)
},
|last| {
prev += last.1.start_b() + last.1.len_b();
(
sequence.cast_peptidoform().sub_peptide(prev..),
Some(std::iter::once(last.0.species).collect()),
Some(std::iter::once(last.0.gene.chain).collect()),
)
},
);
if left_sequence.is_empty() {
break;
}
output.push(
Selection {
species: use_species,
chains: use_chains,
allele,
genes: Some([gene.0].into()),
}
.par_germlines(imgt)
.map(|seq| {
let alignment = align::<
STEPS,
&'imgt Peptidoform<UnAmbiguous>,
Peptidoform<SimpleLinear>,
>(
seq.sequence, left_sequence.clone(), scoring, gene.1
);
(seq, alignment)
})
.collect::<Vec<_>>()
.into_iter()
.k_largest_by(return_number, |a, b| a.1.cmp(&b.1))
.collect_vec(),
);
}
ConsecutiveAlignment { alignments: output }
}