use crate::shared::data_structures::{RangeArray1, RangeArray2};
use crate::shared::feature::Feature;
use crate::shared::utils::difference_as_i64;
use crate::shared::{DAlignment, Dna, FeaturesTrait, InferenceParameters, VJAlignment};
use crate::vdj::Sequence;
use itertools::iproduct;
pub struct AggregatedFeatureEndV {
pub index: usize, pub start_gene: usize,
pub start_v3: i64,
pub end_v3: i64,
likelihood: RangeArray1,
dirty_likelihood: RangeArray1,
}
pub struct AggregatedFeatureStartJ {
pub index: usize, pub start_seq: usize,
pub start_j5: i64,
pub end_j5: i64,
likelihood: RangeArray1,
dirty_likelihood: RangeArray1,
}
pub struct AggregatedFeatureSpanD {
pub index: usize,
pub start_d5: i64,
pub end_d5: i64,
pub start_d3: i64,
pub end_d3: i64,
likelihood: RangeArray2,
dirty_likelihood: RangeArray2,
}
impl AggregatedFeatureEndV {
pub fn new(
v: &VJAlignment,
feat: &impl FeaturesTrait,
ip: &InferenceParameters,
) -> Option<AggregatedFeatureEndV> {
let mut likelihood = RangeArray1::zeros((
difference_as_i64(v.end_seq, feat.delv().dim().0) + 1,
v.end_seq as i64 + 1,
));
let mut total_likelihood = 0.;
for delv in 0..feat.delv().dim().0 {
let v_end = difference_as_i64(v.end_seq, delv);
let ll_delv = feat.delv().likelihood((delv, v.index));
let ll_v_err = feat
.error()
.likelihood((v.nb_errors(delv), v.length_with_deletion(delv)));
let ll = ll_delv * ll_v_err;
if ll > ip.min_likelihood {
*likelihood.get_mut(v_end) = ll;
total_likelihood += ll;
}
}
if total_likelihood == 0. {
return None;
}
Some(AggregatedFeatureEndV {
start_v3: likelihood.min,
end_v3: likelihood.max,
dirty_likelihood: RangeArray1::zeros(likelihood.dim()),
likelihood,
index: v.index,
start_gene: v.start_gene,
})
}
pub fn iter(&self) -> impl Iterator<Item = (i64, &f64)> + '_ {
self.likelihood.iter().filter(|&(_, &v)| v != 0.0)
}
pub fn likelihood(&self, ev: i64) -> f64 {
self.likelihood.get(ev)
}
pub fn max_likelihood(&self) -> f64 {
self.likelihood.max_value()
}
pub fn dirty_update(&mut self, ev: i64, likelihood: f64) {
*self.dirty_likelihood.get_mut(ev) += likelihood;
}
pub fn disaggregate(
&self,
v: &VJAlignment,
feat: &mut impl FeaturesTrait,
ip: &InferenceParameters,
) {
for delv in 0..feat.delv().dim().0 {
let v_end = difference_as_i64(v.end_seq, delv);
let ll = feat.delv().likelihood((delv, v.index))
* feat
.error()
.likelihood((v.nb_errors(delv), v.length_with_deletion(delv)));
if ll > ip.min_likelihood {
let dirty_proba = self.dirty_likelihood.get(v_end); if dirty_proba > 0. {
feat.delv_mut().dirty_update((delv, v.index), dirty_proba);
feat.error_mut().dirty_update(
(v.nb_errors(delv), v.length_with_deletion(delv)),
dirty_proba,
);
}
}
}
}
}
impl AggregatedFeatureStartJ {
pub fn new(
j: &VJAlignment,
feat: &impl FeaturesTrait,
ip: &InferenceParameters,
) -> Option<AggregatedFeatureStartJ> {
let mut likelihood = RangeArray1::zeros((
j.start_seq as i64,
(j.start_seq + feat.delj().dim().0) as i64,
));
let mut total_likelihood = 0.;
for delj in 0..feat.delj().dim().0 {
let j_start = (j.start_seq + delj) as i64;
let ll_delj = feat.delj().likelihood((delj, j.index));
let ll_errj = feat
.error()
.likelihood((j.nb_errors(delj), j.length_with_deletion(delj)));
let ll = ll_delj * ll_errj;
if ll > ip.min_likelihood {
*likelihood.get_mut(j_start) = ll;
total_likelihood += ll;
}
}
if total_likelihood == 0. {
return None;
}
Some(AggregatedFeatureStartJ {
start_j5: likelihood.min,
end_j5: likelihood.max,
dirty_likelihood: RangeArray1::zeros(likelihood.dim()),
likelihood,
index: j.index,
start_seq: j.start_seq,
})
}
pub fn iter(&self) -> impl Iterator<Item = (i64, &f64)> + '_ {
self.likelihood.iter().filter(|&(_, &v)| v != 0.0)
}
pub fn likelihood(&self, sj: i64) -> f64 {
self.likelihood.get(sj)
}
pub fn max_likelihood(&self) -> f64 {
self.likelihood.max_value()
}
pub fn dirty_update(&mut self, sj: i64, likelihood: f64) {
*self.dirty_likelihood.get_mut(sj) += likelihood;
}
pub fn disaggregate(
&self,
j: &VJAlignment,
feat: &mut impl FeaturesTrait,
ip: &InferenceParameters,
) {
for delj in 0..feat.delj().dim().0 {
let j_start = (j.start_seq + delj) as i64;
let ll = feat.delj().likelihood((delj, j.index))
* feat
.error()
.likelihood((j.nb_errors(delj), j.length_with_deletion(delj)));
if ll > ip.min_likelihood {
let dirty_proba = self.dirty_likelihood.get(j_start);
if dirty_proba > 0. {
feat.delj_mut().dirty_update((delj, j.index), dirty_proba);
feat.error_mut().dirty_update(
(j.nb_errors(delj), j.length_with_deletion(delj)),
dirty_proba,
)
}
}
}
}
}
impl AggregatedFeatureSpanD {
pub fn new(
ds: &Vec<DAlignment>,
feat: &impl FeaturesTrait,
ip: &InferenceParameters,
) -> Option<AggregatedFeatureSpanD> {
if ds.is_empty() {
return None;
}
let mut total_likelihood = 0.;
let mut likelihoods = RangeArray2::zeros((
(
ds.iter().map(|x| x.pos).min().unwrap() as i64,
ds.iter().map(|x| x.pos + x.len()).min().unwrap() as i64
- feat.deld().dim().1 as i64
+ 1,
),
(
ds.iter().map(|x| x.pos).max().unwrap() as i64 + feat.deld().dim().0 as i64,
ds.iter().map(|x| x.pos + x.len()).max().unwrap() as i64 + 1,
),
));
let dindex = ds.first().unwrap().index;
for d in ds {
if d.index != dindex {
panic!("AggregatedFeatureSpanD received different genes.");
}
for (deld5, deld3) in iproduct!(0..feat.deld().dim().0, 0..feat.deld().dim().1) {
let d_start = (d.pos + deld5) as i64;
let d_end = (d.pos + d.len() - deld3) as i64;
if d_start > d_end {
continue;
}
let ll_deld = feat.deld().likelihood((deld5, deld3, d.index));
let ll_errord = feat.error().likelihood((
d.nb_errors(deld5, deld3),
d.length_with_deletion(deld5, deld3),
));
let likelihood = ll_deld * ll_errord;
if likelihood > ip.min_likelihood {
*likelihoods.get_mut((d_start, d_end)) += likelihood;
total_likelihood += likelihood;
}
}
}
if total_likelihood == 0. {
return None;
}
Some(AggregatedFeatureSpanD {
start_d5: likelihoods.min.0,
end_d5: likelihoods.max.0,
start_d3: likelihoods.min.1,
end_d3: likelihoods.max.1,
dirty_likelihood: RangeArray2::zeros(likelihoods.dim()),
likelihood: likelihoods,
index: dindex,
})
}
pub fn likelihood(&self, sd: i64, ed: i64) -> f64 {
self.likelihood.get((sd, ed))
}
pub fn iter(&self) -> impl Iterator<Item = (i64, i64, &f64)> + '_ {
self.likelihood.iter() }
pub fn iter_fixed_dend(&self, dend: i64) -> impl Iterator<Item = (i64, &f64)> + '_ {
let iteropt = if (dend < self.likelihood.min.1) || (dend >= self.likelihood.max.1) {
None
} else {
Some(self.likelihood.iter_fixed_2nd(dend))
};
iteropt.into_iter().flatten()
}
pub fn max_likelihood(&self) -> f64 {
self.likelihood.max_value()
}
pub fn dirty_update(&mut self, sd: i64, ed: i64, likelihood: f64) {
*self.dirty_likelihood.get_mut((sd, ed)) += likelihood;
}
pub fn disaggregate(
&self,
ds: &[DAlignment],
feat: &mut impl FeaturesTrait,
ip: &InferenceParameters,
) {
for d in ds.iter() {
for (deld5, deld3) in iproduct!(0..feat.deld().dim().0, 0..feat.deld().dim().1) {
let d_start = (d.pos + deld5) as i64;
let d_end = (d.pos + d.len() - deld3) as i64;
if d_start > d_end {
continue;
}
let nb_err = d.nb_errors(deld5, deld3);
let likelihood = feat.deld().likelihood((deld5, deld3, d.index))
* feat
.error()
.likelihood((nb_err, d.length_with_deletion(deld5, deld3)));
if likelihood > ip.min_likelihood {
let dirty_proba = self.dirty_likelihood.get((d_start, d_end));
let corrected_proba =
dirty_proba * likelihood / self.likelihood(d_start, d_end);
if dirty_proba > 0. {
feat.deld_mut()
.dirty_update((deld5, deld3, d.index), corrected_proba);
feat.error_mut().dirty_update(
(nb_err, d.length_with_deletion(deld5, deld3)),
corrected_proba,
);
}
}
}
}
}
}
pub struct FeatureVD {
likelihood: RangeArray2,
dirty_likelihood: RangeArray2,
}
impl FeatureVD {
pub fn new(
sequence: &Sequence,
feat: &impl FeaturesTrait,
ip: &InferenceParameters,
) -> Option<FeatureVD> {
if sequence.v_genes.is_empty() || sequence.d_genes.is_empty() {
return None;
}
let min_end_v = sequence.v_genes.iter().map(|x| x.end_seq).min().unwrap() as i64
- feat.delv().dim().0 as i64
+ 1;
let min_start_d = sequence.d_genes.iter().map(|x| x.pos).min().unwrap() as i64;
let max_end_v = sequence.v_genes.iter().map(|x| x.end_seq).max().unwrap() as i64;
let max_start_d = sequence.d_genes.iter().map(|x| x.pos).max().unwrap() as i64
+ feat.deld().dim().0 as i64
- 1;
let mut likelihoods =
RangeArray2::zeros(((min_end_v, min_start_d), (max_end_v + 1, max_start_d + 1)));
for ev in min_end_v..=max_end_v {
for sd in min_start_d..=max_start_d {
if sd >= ev && ((sd - ev) as usize) < feat.insvd().max_nb_insertions() {
let ins_vd_plus_first = sequence.get_subsequence(ev - 1, sd);
let likelihood = feat.insvd().likelihood(&ins_vd_plus_first);
if likelihood > ip.min_likelihood {
*likelihoods.get_mut((ev, sd)) = likelihood;
}
}
}
}
Some(FeatureVD {
dirty_likelihood: RangeArray2::zeros(likelihoods.dim()),
likelihood: likelihoods,
})
}
pub fn iter(&self) -> impl Iterator<Item = (i64, i64, &f64)> + '_ {
self.likelihood.iter().filter(|&(_, _, &v)| v != 0.0)
}
pub fn max_ev(&self) -> i64 {
self.likelihood.max.0
}
pub fn max_likelihood(&self) -> f64 {
self.likelihood.max_value()
}
pub fn min_ev(&self) -> i64 {
self.likelihood.min.0
}
pub fn max_sd(&self) -> i64 {
self.likelihood.max.1
}
pub fn min_sd(&self) -> i64 {
self.likelihood.min.1
}
pub fn likelihood(&self, ev: i64, sd: i64) -> f64 {
self.likelihood.get((ev, sd))
}
pub fn dirty_update(&mut self, ev: i64, sd: i64, likelihood: f64) {
*self.dirty_likelihood.get_mut((ev, sd)) += likelihood;
}
pub fn disaggregate(
&self,
sequence: &Dna,
feat: &mut impl FeaturesTrait,
ip: &InferenceParameters,
) {
for ev in self.likelihood.lower().0..self.likelihood.upper().0 {
for sd in self.likelihood.lower().1..self.likelihood.upper().1 {
if sd >= ev
&& ((sd - ev) as usize) < feat.insvd().max_nb_insertions()
&& self.likelihood(ev, sd) > ip.min_likelihood
{
let ins_vd_plus_first = &sequence.extract_padded_subsequence(ev - 1, sd);
let likelihood = self.likelihood(ev, sd);
if likelihood > ip.min_likelihood {
feat.insvd_mut()
.dirty_update(ins_vd_plus_first, self.dirty_likelihood.get((ev, sd)))
}
}
}
}
}
}
pub struct FeatureDJ {
likelihood: RangeArray2,
dirty_likelihood: RangeArray2,
}
impl FeatureDJ {
pub fn new(
sequence: &Sequence,
feat: &impl FeaturesTrait,
ip: &InferenceParameters,
) -> Option<FeatureDJ> {
if sequence.d_genes.is_empty() || sequence.j_genes.is_empty() {
return None;
}
let min_end_d = sequence
.d_genes
.iter()
.map(|x| x.pos + x.len())
.min()
.unwrap() as i64
- feat.deld().dim().1 as i64
+ 1;
let min_start_j = sequence.j_genes.iter().map(|x| x.start_seq).min().unwrap() as i64;
let max_end_d = sequence
.d_genes
.iter()
.map(|x| x.pos + x.len())
.max()
.unwrap() as i64;
let max_start_j = sequence.j_genes.iter().map(|x| x.start_seq).max().unwrap() as i64
+ feat.delj().dim().0 as i64
- 1;
let mut likelihoods =
RangeArray2::zeros(((min_end_d, min_start_j), (max_end_d + 1, max_start_j + 1)));
for ed in min_end_d..=max_end_d {
for sj in min_start_j..=max_start_j {
if sj >= ed && ((sj - ed) as usize) < feat.insdj().max_nb_insertions() {
let mut ins_dj_plus_last = sequence.get_subsequence(ed, sj + 1);
ins_dj_plus_last.reverse();
let likelihood = feat.insdj().likelihood(&ins_dj_plus_last);
if likelihood > ip.min_likelihood {
*likelihoods.get_mut((ed, sj)) = likelihood;
}
}
}
}
Some(FeatureDJ {
dirty_likelihood: RangeArray2::zeros(likelihoods.dim()),
likelihood: likelihoods,
})
}
pub fn likelihood(&self, ed: i64, sj: i64) -> f64 {
self.likelihood.get((ed, sj))
}
pub fn max_likelihood(&self) -> f64 {
self.likelihood.max_value()
}
pub fn iter(&self) -> impl Iterator<Item = (i64, i64, &f64)> + '_ {
self.likelihood.iter().filter(|&(_, _, &v)| v != 0.0)
}
pub fn max_ed(&self) -> i64 {
self.likelihood.max.0
}
pub fn min_ed(&self) -> i64 {
self.likelihood.min.0
}
pub fn max_sj(&self) -> i64 {
self.likelihood.max.1
}
pub fn min_sj(&self) -> i64 {
self.likelihood.min.1
}
pub fn dirty_update(&mut self, ed: i64, sj: i64, likelihood: f64) {
*self.dirty_likelihood.get_mut((ed, sj)) += likelihood;
}
pub fn disaggregate(
&self,
sequence: &Dna,
feat: &mut impl FeaturesTrait,
ip: &InferenceParameters,
) {
for ed in self.likelihood.lower().0..self.likelihood.upper().0 {
for sj in self.likelihood.lower().1..self.likelihood.upper().1 {
if sj >= ed
&& ((sj - ed) as usize) < feat.insdj().max_nb_insertions()
&& (self.dirty_likelihood.get((ed, sj)) > 0.)
{
let mut ins_dj_plus_last = sequence.extract_padded_subsequence(ed, sj + 1);
ins_dj_plus_last.reverse();
let likelihood = feat.insdj().likelihood(&ins_dj_plus_last);
if likelihood > ip.min_likelihood {
feat.insdj_mut()
.dirty_update(&ins_dj_plus_last, self.dirty_likelihood.get((ed, sj)));
}
}
}
}
}
}