extern crate fast_math;
#[macro_use]
pub mod utls;
use utls::*;
use fast_math::log2_raw;
type MtchNm = usize;
type Sd = ((AlgnPsPr, AlgnPsPr), AlgnScr, Strnd, (MtchNm, MtchNm)); type Sds = Vec<Sd>;
type PrcsPrb = f64;
type AlgnScrSchm = (AlgnScr, AlgnScr); type SdFrq = usize;
type EvlPr = (Prb, Prb);
type AlgnPrms = (SdFrq, AlgnScr, AlgnScr, Prb, AlgnScr, Prb, EvlPr, EvlPr, EvlPr); type Dst = Prb;
type MsmtchNm = MtchNm;
type MtchMsmtchNmPr = (MtchNm, MsmtchNm);
type PrAlgn = ((BSq, BSq), (AlgnPsPr, AlgnPsPr), AlgnScr, Strnd, (MtchMsmtchNmPr, MtchMsmtchNmPr));
type PrAlgns = Vec<PrAlgn>;
type FnlPrAlgn = ((BSq, BSq), (AlgnPsPr, AlgnPsPr), AlgnScr, Strnd, (BSqLn, BSqLn), (Prb, Prb));
type FnlPrAlgns = Vec<FnlPrAlgn>;
type DpSrc = u32;
const MN_ALGN_SCR: AlgnScr = std::f32::MIN;
const GP: BElm = '-' as BElm;
const MTCH_SCR: AlgnScr = MX_SCR;
const MSMTCH_SCR: AlgnScr = -MTCH_SCR;
const BS_MTCH_PRB: PrcsPrb = 0.25;
const BS_MSMTCH_PRB: PrcsPrb = 1. - BS_MTCH_PRB;
const DGNL: DpSrc = 0;
const VRTCL: DpSrc = DGNL + 1;
const HRZNTL: DpSrc = VRTCL + 1;
#[inline]
pub fn crast(id_pr: &(&Id, &Id), sq_pr: &(&BSq, &BSq), sfx_ars: &Vec<&SfxAr>, ptrn_hsh_mps: &Vec<&PtrnHshMp>, prb_dst_sqs: &Vec<&PrbDstSq>, algn_scr_schm: &AlgnScrSchm, algn_prms: &AlgnPrms, srchs_rvrs_strnd: bool, is_sm_glbl: bool, db_sz: usize, rf_sq_nm: usize, tmp_pr_algn_dr: &Path) {
let (sd_frq, ungp_algn_x_drp, gp_algn_x_drp, cntxt_mtch_prb, bs_rt, sd_e_vl, ref ungp_algn_e_vl_pr, ref gp_algn_e_vl_pr, ref fnl_gp_algn_e_vl_pr) = *algn_prms;
let mut sds = sd_srch(sq_pr, sfx_ars, ptrn_hsh_mps, prb_dst_sqs, sd_frq, cntxt_mtch_prb, bs_rt, sd_e_vl, srchs_rvrs_strnd);
if sds.len() > 0 {
sds = gt_extnd_sds(srchs_rvrs_strnd, ungp_algn_x_drp, cntxt_mtch_prb, bs_rt, ungp_algn_e_vl_pr, sq_pr, prb_dst_sqs, &mut sds);
if sds.len() > 1 {
gt_rdc_sds(cntxt_mtch_prb, bs_rt, sq_pr, prb_dst_sqs, &mut sds);
}
}
if sds.len() == 0 {
return;
} else {
let mut pr_algns = gt_extnd_algns(srchs_rvrs_strnd, gp_algn_x_drp, cntxt_mtch_prb, bs_rt, gp_algn_e_vl_pr, algn_scr_schm, sq_pr, prb_dst_sqs, &sds);
if pr_algns.len() > 1 {
gt_rdc_algns(&mut pr_algns);
}
if pr_algns.len() == 0 {
return;
} else {
let fnl_pr_algns = gt_fnl_pr_algns(sq_pr, prb_dst_sqs, algn_scr_schm, cntxt_mtch_prb, bs_rt, fnl_gp_algn_e_vl_pr, is_sm_glbl, srchs_rvrs_strnd, &pr_algns);
wrt_algns(&tmp_pr_algn_dr, &id_pr, &fnl_pr_algns, db_sz, rf_sq_nm);
}
}
}
#[inline]
fn sd_srch(sq_pr: &(&BSq, &BSq), sfx_ars: &Vec<&SfxAr>, ptrn_hsh_mps: &Vec<&PtrnHshMp>, prb_dst_sqs: &Vec<&PrbDstSq>, sd_frq: SdFrq, cntxt_mtch_prb: Prb, bs_rt: AlgnScr, sd_e_vl: Prb, srchs_rvrs_strnd: bool) -> Sds {
let mut sds = Vec::new();
let rf_sq = sq_pr.0;
let trgt_sq = sq_pr.1;
let rf_sq_ln = rf_sq.len();
let sq_ln = trgt_sq.len();
let trgt_sq_prb_dst_sq = if srchs_rvrs_strnd {prb_dst_sqs[2]} else {prb_dst_sqs[1]};
let strnds = if srchs_rvrs_strnd {vec![true, false]} else {vec![true]};
for strnd in strnds {
let sfx_ar = if strnd {sfx_ars[0]} else {sfx_ars[1]};
let ptrn_hsh_mp = if strnd {ptrn_hsh_mps[0]} else {ptrn_hsh_mps[1]};
let rf_sq_prb_dst_sq = if strnd {prb_dst_sqs[0]} else {prb_dst_sqs[1]};
let mut i = 0;
while i < sq_ln {
if is_msk_elm(trgt_sq[i]) {
i += 1;
continue;
}
let mut srch_rng = (0, 0);
for j in i + 1 .. sq_ln + 1 {
if is_msk_elm(trgt_sq[j - 1]) {
i = j;
break;
}
let ps_pr = (i, j - 1);
let sbsq = &trgt_sq[i .. j];
let sbsq_ln = sbsq.len();
let frq = if sbsq_ln <= MX_K_MR_SZ {
let sfx_ar_indx_pr = ptrn_hsh_mp.get(sbsq);
srch_rng = match sfx_ar_indx_pr {
Some(&sfx_ar_indx_pr) => sfx_ar_indx_pr,
None => (0, 0)
};
srch_rng.1 - srch_rng.0
} else {
srch_rng = gt_sfx_ar_indx_pr(sbsq, &rf_sq[..], sfx_ar, &srch_rng, sbsq_ln - 1, rf_sq_ln);
srch_rng.1 - srch_rng.0
};
if frq <= sd_frq && frq > 0 {
let invrt_sd = 1. / (sbsq_ln as PrcsPrb * cntxt_mtch_prb as PrcsPrb * (1. - cntxt_mtch_prb as PrcsPrb)).sqrt();
for i in srch_rng.0 .. srch_rng.1 {
let sfx_indx = sfx_ar[i];
let mtch_nm = (&rf_sq_prb_dst_sq[sfx_indx .. sfx_indx + sbsq_ln]).iter().zip((&trgt_sq_prb_dst_sq[ps_pr.0 .. ps_pr.0 + sbsq_ln]).iter()).fold(0, |acmltr, itr_pr| acmltr + if gt_jsd(itr_pr.0, itr_pr.1) < cntxt_mtch_prb {1} else {0});
let z = (mtch_nm as PrcsPrb - sbsq_ln as PrcsPrb * cntxt_mtch_prb as PrcsPrb) * invrt_sd;
let e_vl = ((sq_ln - sbsq_ln + 1) as PrcsPrb * (1. - gt_cdf(z))) as Prb;
if e_vl < sd_e_vl {
sds.push((((sfx_indx, sfx_indx + sbsq_ln - 1), ps_pr), gt_blnc_scr(&(sbsq_ln as AlgnScr * MTCH_SCR, mtch_nm as AlgnScr * MTCH_SCR + (sbsq_ln - mtch_nm) as AlgnScr * MSMTCH_SCR), bs_rt), strnd, (sbsq_ln, mtch_nm)));
}
}
i = j;
break;
} else if frq == 0 {
i += 1;
break;
}
if j == sq_ln {
i += 1;
}
}
}
}
sds
}
#[inline]
fn gt_extnd_sds(srchs_rvrs_strnd: bool, ungp_algn_x_drp: AlgnScr, cntxt_mtch_prb: Prb, bs_rt: AlgnScr, ungp_algn_e_vl_pr: &(Prb, Prb), sq_pr: &(&BSq, &BSq), prb_dst_sqs: &Vec<&PrbDstSq>, sds: &mut Sds) -> Sds {
let (rf_sq, trgt_sq) = *sq_pr;
let trgt_sq_prb_dst_sq = if srchs_rvrs_strnd {prb_dst_sqs[2]} else {prb_dst_sqs[1]};
let (rf_sq_ln, trgt_sq_ln) = (rf_sq.len() - 1, trgt_sq.len());
let mut pr_algns = Vec::new();
for sd in sds {
let rf_sq_prb_dst_sq = if sd.2 {prb_dst_sqs[0]} else {prb_dst_sqs[1]};
let mut mx_extnsn = *sd;
let mut is_extnsn_fnsh = false;
while !is_extnsn_fnsh {
if ((sd.0).0).0 > 0 && ((sd.0).1).0 > 0 {
let nxt_ps_pr = (if sd.2 {((sd.0).0).0 - 1} else {rf_sq_ln - 1 - (((sd.0).0).0 - 1)}, ((sd.0).1).0 - 1);
let nxt_bs_pr = (if sd.2 {rf_sq[nxt_ps_pr.0]} else {gt_cmplmnt_bs(rf_sq[nxt_ps_pr.0])}, trgt_sq[nxt_ps_pr.1]);
let algn_scr_pr = (if nxt_bs_pr.0 == nxt_bs_pr.1 {(sd.3).0 += 1; MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[nxt_ps_pr.0], &trgt_sq_prb_dst_sq[nxt_ps_pr.1], cntxt_mtch_prb));
if algn_scr_pr.1 == MTCH_SCR {
(sd.3).1 += 1;
}
let algn_scr = gt_blnc_scr(&algn_scr_pr, bs_rt);
((sd.0).0).0 -= 1;
((sd.0).1).0 -= 1;
sd.1 += algn_scr;
if mx_extnsn.1 < sd.1 {
mx_extnsn = *sd;
}
if mx_extnsn.1 - sd.1 >= ungp_algn_x_drp {
is_extnsn_fnsh = true;
}
} else {
is_extnsn_fnsh = true;
}
}
*sd = mx_extnsn;
let mut mx_extnsn = *sd;
let mut is_extnsn_fnsh = false;
while !is_extnsn_fnsh {
if ((sd.0).0).1 < rf_sq_ln - 1 && ((sd.0).1).1 < trgt_sq_ln - 1 {
let nxt_ps_pr = (if sd.2 {((sd.0).0).1 + 1} else {rf_sq_ln - 1 - (((sd.0).0).1 + 1)}, ((sd.0).1).1 + 1);
let nxt_bs_pr = (if sd.2 {rf_sq[nxt_ps_pr.0]} else {gt_cmplmnt_bs(rf_sq[nxt_ps_pr.0])}, trgt_sq[nxt_ps_pr.1]);
let algn_scr_pr = (if nxt_bs_pr.0 == nxt_bs_pr.1 {(sd.3).0 += 1; MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[nxt_ps_pr.0], &trgt_sq_prb_dst_sq[nxt_ps_pr.1], cntxt_mtch_prb));
if algn_scr_pr.1 == MTCH_SCR {
(sd.3).1 += 1;
}
let algn_scr = gt_blnc_scr(&algn_scr_pr, bs_rt);
((sd.0).0).1 += 1;
((sd.0).1).1 += 1;
sd.1 += algn_scr;
if mx_extnsn.1 < sd.1 {
mx_extnsn = *sd;
}
if mx_extnsn.1 - sd.1 >= ungp_algn_x_drp {
is_extnsn_fnsh = true;
}
} else {
is_extnsn_fnsh = true;
}
}
let sd_ln = (((mx_extnsn.0).0).1 - ((mx_extnsn.0).0).0 + 1) as PrcsPrb;
let mtch_nm = (mx_extnsn.3).0 as PrcsPrb;
let z = (mtch_nm - sd_ln * BS_MTCH_PRB) / (sd_ln * BS_MTCH_PRB * BS_MSMTCH_PRB).sqrt();
let e_vl_cf = trgt_sq_ln as PrcsPrb - sd_ln + 1.;
let sq_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if sq_e_vl < ungp_algn_e_vl_pr.0 {
let mtch_nm = (mx_extnsn.3).1 as PrcsPrb;
let z = (mtch_nm - sd_ln * cntxt_mtch_prb as PrcsPrb) / (sd_ln * cntxt_mtch_prb as PrcsPrb * (1. - cntxt_mtch_prb as PrcsPrb)).sqrt();
let strct_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if strct_e_vl < ungp_algn_e_vl_pr.1 {
pr_algns.push(mx_extnsn);
}
}
}
pr_algns
}
#[inline]
fn gt_rdc_sds(cntxt_mtch_prb: Prb, bs_rt: AlgnScr, sq_pr: &(&BSq, &BSq), prb_dst_sqs: &Vec<&PrbDstSq>, sds: &mut Sds) {
let rf_sq_ln = sq_pr.0.len() - 1;
let mut is_prcs_fnsh = false;
while !is_prcs_fnsh {
let sd_nm = sds.len();
if sd_nm == 1 {
break;
}
let mut sd_indx_pr = (0, 0);
let mut sd_indx = 0;
let mut is_mrg_sd_pr_fnd = false;
let mut is_rmv_sd_fnd = false;
for (i, sd_1) in (&sds[.. sd_nm - 1]).iter().enumerate() {
for (j, sd_2) in (&sds[i + 1 ..]).iter().enumerate() {
if sd_1.2 == sd_2.2 {
if ((sd_1.0).0).0 <= ((sd_2.0).0).0 && ((sd_2.0).0).0 <= ((sd_1.0).0).1 + 1 && ((sd_1.0).1).0 <= ((sd_2.0).1).0 && ((sd_2.0).1).0 <= ((sd_1.0).1).1 + 1 {
if ((sd_2.0).0).1 <= ((sd_1.0).0).1 && ((sd_2.0).1).1 <= ((sd_1.0).1).1 {
sd_indx = i + 1 + j;
is_rmv_sd_fnd = true;
break;
} else if ((sd_1.0).0).1 - ((sd_2.0).0).0 == ((sd_1.0).1).1 - ((sd_2.0).1).0 {
sd_indx_pr = (i, i + 1 + j);
is_mrg_sd_pr_fnd = true;
break;
}
} else if ((sd_2.0).0).0 <= ((sd_1.0).0).0 && ((sd_1.0).0).0 <= ((sd_2.0).0).1 + 1 && ((sd_2.0).1).0 <= ((sd_1.0).1).0 && ((sd_1.0).1).0 <= ((sd_2.0).1).1 + 1 {
if ((sd_1.0).0).1 <= ((sd_2.0).0).1 && ((sd_1.0).1).1 <= ((sd_2.0).1).1 {
sd_indx = i;
is_rmv_sd_fnd = true;
break;
} else if ((sd_2.0).0).1 - ((sd_1.0).0).0 == ((sd_2.0).1).1 - ((sd_1.0).1).0 {
sd_indx_pr = (i + 1 + j, i);
is_mrg_sd_pr_fnd = true;
break;
}
}
}
}
if is_mrg_sd_pr_fnd || is_rmv_sd_fnd {
break;
}
}
if is_mrg_sd_pr_fnd {
sds[sd_indx_pr.0].1 += sds[sd_indx_pr.1].1;
(sds[sd_indx_pr.0].3).0 += (sds[sd_indx_pr.1].3).0;
(sds[sd_indx_pr.0].3).1 += (sds[sd_indx_pr.1].3).1;
let rf_sq = if sds[sd_indx_pr.0].2 {prb_dst_sqs[0]} else {prb_dst_sqs[1]};
let trgt_sq = if sds[sd_indx_pr.0].2 {prb_dst_sqs[1]} else {prb_dst_sqs[2]};
for (i, j) in (((sds[sd_indx_pr.1].0).0).0 .. ((sds[sd_indx_pr.0].0).0).1 + 1).zip(((sds[sd_indx_pr.1].0).1).0 .. ((sds[sd_indx_pr.0].0).1).1 + 1) {
let rf_sq_bs = if sds[sd_indx_pr.0].2 {sq_pr.0[i]} else {gt_cmplmnt_bs(sq_pr.0[rf_sq_ln - 1 - i])};
let algn_scr_pr = (if rf_sq_bs == sq_pr.1[j] {(sds[sd_indx_pr.0].3).0 -= 1; MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq[i], &trgt_sq[j], cntxt_mtch_prb));
if algn_scr_pr.1 == MTCH_SCR {
(sds[sd_indx_pr.0].3).1 -= 1;
}
sds[sd_indx_pr.0].1 -= gt_blnc_scr(&algn_scr_pr, bs_rt);
}
((sds[sd_indx_pr.0].0).0).1 = ((sds[sd_indx_pr.1].0).0).1;
((sds[sd_indx_pr.0].0).1).1 = ((sds[sd_indx_pr.1].0).1).1;
sds.remove(sd_indx_pr.1);
} else if is_rmv_sd_fnd {
sds.remove(sd_indx);
} else {
is_prcs_fnsh = true;
}
}
sds.sort_by(|sd_1, sd_2| sd_2.1.partial_cmp(&sd_1.1).expect("Failed to compare 2 seeds"));
let mut is_rmv_fnsh = false;
while !is_rmv_fnsh {
let sd_nm = sds.len();
if sd_nm == 1 {
break;
}
let mut sd_indx = 0;
let mut is_rmv_sd_fnd = false;
for (i, sd_1) in (&sds[.. sd_nm - 1]).iter().enumerate() {
for (j, sd_2) in (&sds[i + 1 ..]).iter().enumerate() {
if sd_1.2 == sd_2.2 {
if (((sd_1.0).0).0 <= ((sd_2.0).0).0 && ((sd_2.0).0).0 <= ((sd_1.0).0).1) || (((sd_1.0).1).0 <= ((sd_2.0).1).0 && ((sd_2.0).1).0 <= ((sd_1.0).1).1) || ((((sd_2.0).0).0 <= ((sd_1.0).0).0 && ((sd_1.0).0).0 <= ((sd_2.0).0).1) || (((sd_2.0).1).0 <= ((sd_1.0).1).0 && ((sd_1.0).1).0 <= ((sd_2.0).1).1)) {
sd_indx = i + 1 + j;
is_rmv_sd_fnd = true;
break;
}
}
}
if is_rmv_sd_fnd {
break;
}
}
if is_rmv_sd_fnd {
sds.remove(sd_indx);
} else {
is_rmv_fnsh = true;
}
}
}
#[inline]
fn gt_extnd_algns(srchs_rvrs_strnd: bool, gp_algn_x_drp: AlgnScr, cntxt_mtch_prb: Prb, bs_rt: AlgnScr, gp_algn_e_vl_pr: &(Prb, Prb), algn_scr_schm: &AlgnScrSchm, sq_pr: &(&BSq, &BSq), prb_dst_sqs: &Vec<&PrbDstSq>, sds: &Sds) -> PrAlgns {
let (rf_sq, trgt_sq) = *sq_pr;
let trgt_sq_prb_dst_sq = if srchs_rvrs_strnd {prb_dst_sqs[2]} else {prb_dst_sqs[1]};
let (rf_sq_ln, trgt_sq_ln) = (rf_sq.len() - 1, trgt_sq.len());
let (gp_opn_pnlty, gp_extnsn_pnlty) = *algn_scr_schm;
let mut pr_algns = Vec::new();
for sd in sds {
let rf_sq_prb_dst_sq = if sd.2 {prb_dst_sqs[0]} else {prb_dst_sqs[1]};
let sd_ln = ((sd.0).0).1 - ((sd.0).0).0 + 1;
let mut pr_algn: PrAlgn = ((if sd.2 {(&rf_sq[((sd.0).0).0 .. ((sd.0).0).1 + 1]).iter().map(|&elm| elm).collect()} else {(&rf_sq[rf_sq_ln - ((sd.0).0).1 .. rf_sq_ln - ((sd.0).0).0 + 1]).iter().rev().map(|&elm| gt_cmplmnt_bs(elm)).collect()}, (&trgt_sq[((sd.0).1).0 .. ((sd.0).1).1 + 1]).iter().map(|&elm| elm).collect()), sd.0, sd.1, sd.2, (((sd.3).0, sd_ln - (sd.3).0), ((sd.3).1, sd_ln - (sd.3).1)));
let mut mx_extnsn = pr_algn.clone();
let mut is_extnsn_fnsh = false;
while !is_extnsn_fnsh {
if ((pr_algn.1).0).0 > 0 && ((pr_algn.1).1).0 > 0 {
let nxt_ps_pr = (if sd.2 {((pr_algn.1).0).0 - 1} else {rf_sq_ln - 1 - (((pr_algn.1).0).0 - 1)}, ((pr_algn.1).1).0 - 1);
let nxt_bs_pr = (if sd.2 {rf_sq[nxt_ps_pr.0]} else {gt_cmplmnt_bs(rf_sq[nxt_ps_pr.0])}, trgt_sq[nxt_ps_pr.1]);
let algn_scr_pr = (if nxt_bs_pr.0 == nxt_bs_pr.1 {MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[nxt_ps_pr.0], &trgt_sq_prb_dst_sq[nxt_ps_pr.1], cntxt_mtch_prb));
let algn_scr = gt_blnc_scr(&algn_scr_pr, bs_rt);
let strt_pr = ((pr_algn.0).0[0], (pr_algn.0).1[0]);
let scr_2_rf_sq_gp_insrt = gp_extnsn_pnlty + if strt_pr.0 == GP {0.} else {gp_opn_pnlty};
let scr_2_trgt_sq_gp_insrt = gp_extnsn_pnlty + if strt_pr.1 == GP {0.} else {gp_opn_pnlty};
let mx_scr = algn_scr.max(scr_2_rf_sq_gp_insrt.max(scr_2_trgt_sq_gp_insrt));
if mx_scr == algn_scr {
if nxt_bs_pr.0 == nxt_bs_pr.1 {
((pr_algn.4).0).0 += 1;
} else {
((pr_algn.4).0).1 += 1;
}
if algn_scr_pr.1 == MTCH_SCR {
((pr_algn.4).1).0 += 1;
} else {
((pr_algn.4).1).1 += 1;
}
(pr_algn.0).0.insert(0, nxt_bs_pr.0);
(pr_algn.0).1.insert(0, nxt_bs_pr.1);
((pr_algn.1).0).0 -= 1;
((pr_algn.1).1).0 -= 1;
} else if mx_scr == scr_2_rf_sq_gp_insrt {
(pr_algn.0).0.insert(0, GP);
(pr_algn.0).1.insert(0, nxt_bs_pr.1);
((pr_algn.1).1).0 -= 1;
} else {
(pr_algn.0).0.insert(0, nxt_bs_pr.0);
(pr_algn.0).1.insert(0, GP);
((pr_algn.1).0).0 -= 1;
}
pr_algn.2 += mx_scr;
if mx_extnsn.2 < pr_algn.2 {
mx_extnsn = pr_algn.clone();
}
if mx_extnsn.2 - pr_algn.2 >= gp_algn_x_drp {
is_extnsn_fnsh = true;
}
} else {
is_extnsn_fnsh = true;
}
}
pr_algn = mx_extnsn.clone();
let mut is_extnsn_fnsh = false;
while !is_extnsn_fnsh {
if ((pr_algn.1).0).1 < rf_sq_ln - 1 && ((pr_algn.1).1).1 < trgt_sq_ln - 1 {
let nxt_ps_pr = (if sd.2 {((pr_algn.1).0).1 + 1} else {rf_sq_ln - 1 - (((pr_algn.1).0).1 + 1)}, ((pr_algn.1).1).1 + 1);
let nxt_bs_pr = (if sd.2 {rf_sq[nxt_ps_pr.0]} else {gt_cmplmnt_bs(rf_sq[nxt_ps_pr.0])}, trgt_sq[nxt_ps_pr.1]);
let algn_scr_pr = (if nxt_bs_pr.0 == nxt_bs_pr.1 {MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[nxt_ps_pr.0], &trgt_sq_prb_dst_sq[nxt_ps_pr.1], cntxt_mtch_prb));
let algn_scr = gt_blnc_scr(&algn_scr_pr, bs_rt);
let end_pr = (*(pr_algn.0).0.last().expect("Failed to get last elem."), *(pr_algn.0).1.last().expect("Failed to get last elem."));
let scr_2_rf_sq_gp_insrt = gp_extnsn_pnlty + if end_pr.0 == GP {0.} else {gp_opn_pnlty};
let scr_2_trgt_sq_gp_insrt = gp_extnsn_pnlty + if end_pr.1 == GP {0.} else {gp_opn_pnlty};
let mx_scr = algn_scr.max(scr_2_rf_sq_gp_insrt.max(scr_2_trgt_sq_gp_insrt));
if mx_scr == algn_scr {
if nxt_bs_pr.0 == nxt_bs_pr.1 {
((pr_algn.4).0).0 += 1;
} else {
((pr_algn.4).0).1 += 1;
}
if algn_scr_pr.1 == MTCH_SCR {
((pr_algn.4).1).0 += 1;
} else {
((pr_algn.4).1).1 += 1;
}
(pr_algn.0).0.push(nxt_bs_pr.0);
(pr_algn.0).1.push(nxt_bs_pr.1);
((pr_algn.1).0).1 += 1;
((pr_algn.1).1).1 += 1;
} else if mx_scr == scr_2_rf_sq_gp_insrt {
(pr_algn.0).0.push(GP);
(pr_algn.0).1.push(nxt_bs_pr.1);
((pr_algn.1).1).1 += 1;
} else {
(pr_algn.0).0.push(nxt_bs_pr.0);
(pr_algn.0).1.push(GP);
((pr_algn.1).0).1 += 1;
}
pr_algn.2 += mx_scr;
if mx_extnsn.2 < pr_algn.2 {
mx_extnsn = pr_algn.clone();
}
if mx_extnsn.2 - pr_algn.2 >= gp_algn_x_drp {
is_extnsn_fnsh = true;
}
} else {
is_extnsn_fnsh = true;
}
}
let algn_ln_wtht_gps = (((mx_extnsn.4).0).0 + ((mx_extnsn.4).0).1) as PrcsPrb;
let mtch_nm = ((mx_extnsn.4).0).0 as PrcsPrb;
let z = (mtch_nm - algn_ln_wtht_gps * BS_MTCH_PRB) / (algn_ln_wtht_gps * BS_MTCH_PRB * BS_MSMTCH_PRB).sqrt();
let e_vl_cf = trgt_sq_ln as PrcsPrb - algn_ln_wtht_gps + 1.;
let sq_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if sq_e_vl < gp_algn_e_vl_pr.0 {
let mtch_nm = ((mx_extnsn.4).1).0 as PrcsPrb;
let z = (mtch_nm - algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb) / (algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb * (1. - cntxt_mtch_prb as PrcsPrb)).sqrt();
let strct_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if strct_e_vl < gp_algn_e_vl_pr.1 {
pr_algns.push(mx_extnsn);
}
}
}
pr_algns
}
#[inline]
fn gt_rdc_algns(pr_algns: &mut PrAlgns) {
pr_algns.sort_by(|pr_algn_1, pr_algn_2| pr_algn_2.2.partial_cmp(&pr_algn_1.2).expect("Failed to compare 2 alignments"));
let mut is_rmv_fnsh = false;
while !is_rmv_fnsh {
let pr_algn_nm = pr_algns.len();
if pr_algn_nm == 1 {
break;
}
let mut algn_indx = 0;
let mut is_rmv_algn_fnd = false;
for (i, pr_algn_1) in (&pr_algns[.. pr_algn_nm - 1]).iter().enumerate() {
for (j, pr_algn_2) in (&pr_algns[i + 1 ..]).iter().enumerate() {
if pr_algn_1.2 == pr_algn_2.2 {
if (((pr_algn_1.1).0).0 <= ((pr_algn_2.1).0).0 && ((pr_algn_2.1).0).0 <= ((pr_algn_1.1).0).1) || (((pr_algn_1.1).1).0 <= ((pr_algn_2.1).1).0 && ((pr_algn_2.1).1).0 <= ((pr_algn_1.1).1).1) || ((((pr_algn_2.1).0).0 <= ((pr_algn_1.1).0).0 && ((pr_algn_1.1).0).0 <= ((pr_algn_2.1).0).1) || (((pr_algn_2.1).1).0 <= ((pr_algn_1.1).1).0 && ((pr_algn_1.1).1).0 <= ((pr_algn_2.1).1).1)) {
algn_indx = i + 1 + j;
is_rmv_algn_fnd = true;
break;
}
}
}
if is_rmv_algn_fnd {
break;
}
}
if is_rmv_algn_fnd {
pr_algns.remove(algn_indx);
} else {
is_rmv_fnsh = true;
}
}
pr_algns.sort_by_key(|ky| gt_nm_sqr(((ky.1).0).0) + gt_nm_sqr(((ky.1).1).0));
}
#[inline]
fn gt_fnl_pr_algns(sq_pr: &(&BSq, &BSq), prb_dst_sqs: &Vec<&PrbDstSq>, algn_scr_schm: &AlgnScrSchm, cntxt_mtch_prb: Prb, bs_rt: AlgnScr, fnl_gp_algn_e_vl_pr: &(Prb, Prb), is_sm_glbl: bool, srchs_rvrs_strnd: bool, pr_algns: &PrAlgns) -> FnlPrAlgns {
let mut fnl_pr_algns = Vec::new();
let (rf_sq, trgt_sq) = *sq_pr;
let rf_sq_ln = rf_sq.len() - 1;
let trgt_sq_ln = trgt_sq.len();
let trgt_sq_prb_dst_sq = if srchs_rvrs_strnd {prb_dst_sqs[2]} else {prb_dst_sqs[1]};
let (gp_opn_pnlty, gp_extnsn_pnlty) = *algn_scr_schm;
let strnds = if srchs_rvrs_strnd {vec![true, false]} else {vec![true]};
let pr_algn_st_ln = pr_algns.len();
if !is_sm_glbl && ((srchs_rvrs_strnd && pr_algn_st_ln == 2 && pr_algns[0].3 != pr_algns[1].3) || (!srchs_rvrs_strnd && pr_algn_st_ln == 1)) {
for pr_algn in pr_algns {
let mut fnl_pr_algn = (pr_algn.0.clone(), pr_algn.1, pr_algn.2, pr_algn.3, (rf_sq_ln, trgt_sq_ln), (0., 0.));
let algn_ln_wtht_gps = (((pr_algn.4).0).0 + ((pr_algn.4).0).1) as PrcsPrb;
let mtch_nm = ((pr_algn.4).0).0 as PrcsPrb;
let z = (mtch_nm - algn_ln_wtht_gps * BS_MTCH_PRB) / (algn_ln_wtht_gps * BS_MTCH_PRB * BS_MSMTCH_PRB).sqrt();
let e_vl_cf = trgt_sq_ln as PrcsPrb - algn_ln_wtht_gps + 1.;
(fnl_pr_algn.5).0 = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
let mtch_nm = ((pr_algn.4).1).0 as PrcsPrb;
let z = (mtch_nm - algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb) / (algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb * (1. - cntxt_mtch_prb as PrcsPrb)).sqrt();
(fnl_pr_algn.5).1 = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if (fnl_pr_algn.5).0 < fnl_gp_algn_e_vl_pr.0 && (fnl_pr_algn.5).1 < fnl_gp_algn_e_vl_pr.1 {
fnl_pr_algns.push(fnl_pr_algn);
}
}
fnl_pr_algns
} else {
for strnd in strnds {
let rf_sq_prb_dst_sq = if strnd {prb_dst_sqs[0]} else {prb_dst_sqs[1]};
let mut dp_mtrx = vec![vec![(MN_ALGN_SCR, DGNL); trgt_sq_ln + 1]; rf_sq_ln + 1];
let dp_mtrx_clmn_ln = dp_mtrx.len();
let dp_mtrx_rw_ln = dp_mtrx[0].len();
dp_mtrx[0][0].0 = 0.;
dp_mtrx[1][0].0 = 0.;
dp_mtrx[0][1].0 = if is_sm_glbl {gp_opn_pnlty + gp_extnsn_pnlty} else {0.};
for i in 2 .. dp_mtrx_clmn_ln {
dp_mtrx[i][0].0 = 0.;
}
for i in 2 .. dp_mtrx_rw_ln {
dp_mtrx[0][i].0 = if is_sm_glbl {dp_mtrx[0][i - 1].0 + gp_extnsn_pnlty} else {0.};
}
let mut fl_ps_pr = (0, 0);
let mut mx_scr_ps_pr = (MN_ALGN_SCR, fl_ps_pr);
for pr_algn in pr_algns {
if pr_algn.3 == strnd {
if ((pr_algn.1).0).0 > 0 && ((pr_algn.1).1).0 > 0 {
for i in fl_ps_pr.0 .. ((pr_algn.1).0).0 {
for j in fl_ps_pr.1 .. ((pr_algn.1).1).0 {
let ps = if strnd {i} else {rf_sq_ln - 1 - i};
let rf_sq_bs = if strnd {rf_sq[ps]} else {gt_cmplmnt_bs(rf_sq[ps])};
let algn_scr_pr = (if rf_sq_bs == trgt_sq[j] {MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[ps], &trgt_sq_prb_dst_sq[j], cntxt_mtch_prb));
let dgnl_scr = dp_mtrx[i][j].0 + gt_blnc_scr(&algn_scr_pr, bs_rt);
let vrtcl_scr = if dp_mtrx[i][j + 1].0 == MN_ALGN_SCR {dp_mtrx[i][j + 1].0} else {dp_mtrx[i][j + 1].0 + gp_extnsn_pnlty + if dp_mtrx[i][j + 1].1 == VRTCL {0.} else {gp_opn_pnlty}};
let hrzntl_scr = if dp_mtrx[i + 1][j].0 == MN_ALGN_SCR {dp_mtrx[i + 1][j].0} else {dp_mtrx[i + 1][j].0 + gp_extnsn_pnlty + if dp_mtrx[i + 1][j].1 == HRZNTL {0.} else {gp_opn_pnlty}};
let mx_scr = dgnl_scr.max(vrtcl_scr.max(hrzntl_scr));
let mx_scr = if is_sm_glbl {mx_scr} else {mx_scr.max(0.)};
dp_mtrx[i + 1][j + 1].0 = mx_scr;
dp_mtrx[i + 1][j + 1].1 = if (!is_sm_glbl && mx_scr == 0.) || mx_scr == dgnl_scr {DGNL} else if mx_scr == vrtcl_scr {VRTCL} else {HRZNTL};
if !is_sm_glbl && mx_scr_ps_pr.0 < mx_scr {
mx_scr_ps_pr = (mx_scr, (i + 1, j + 1));
}
}
}
}
let mut ps_pr = (((pr_algn.1).0).0, ((pr_algn.1).1).0);
for (&algn_elm_1, &algn_elm_2) in (pr_algn.0).0.iter().zip((pr_algn.0).1.iter()) {
let mtrx_cl = dp_mtrx[ps_pr.0][ps_pr.1];
let mut mx_scr;
if algn_elm_1 != GP && algn_elm_2 != GP {
mx_scr = mtrx_cl.0 + gt_blnc_scr(&(if algn_elm_1 == algn_elm_2 {MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[ps_pr.0], &trgt_sq_prb_dst_sq[ps_pr.1], cntxt_mtch_prb)), bs_rt);
if !is_sm_glbl {
mx_scr = mx_scr.max(0.);
}
ps_pr.0 += 1;
ps_pr.1 += 1;
} else if algn_elm_1 == GP {
mx_scr = mtrx_cl.0 + gp_extnsn_pnlty + if mtrx_cl.1 == VRTCL {0.} else {gp_opn_pnlty};
if !is_sm_glbl {
mx_scr = mx_scr.max(0.);
}
ps_pr.1 += 1;
dp_mtrx[ps_pr.0][ps_pr.1].1 = VRTCL;
} else {
mx_scr = mtrx_cl.0 + gp_extnsn_pnlty + if mtrx_cl.1 == HRZNTL {0.} else {gp_opn_pnlty};
if !is_sm_glbl {
mx_scr = mx_scr.max(0.);
}
ps_pr.0 += 1;
dp_mtrx[ps_pr.0][ps_pr.1].1 = HRZNTL;
}
dp_mtrx[ps_pr.0][ps_pr.1].0 = mx_scr;
if !is_sm_glbl && mx_scr_ps_pr.0 < mx_scr {
mx_scr_ps_pr = (mx_scr, ps_pr);
}
}
fl_ps_pr = (((pr_algn.1).0).1 + 1, ((pr_algn.1).1).1 + 1);
}
}
if fl_ps_pr.0 < rf_sq_ln && fl_ps_pr.1 < trgt_sq_ln {
for i in fl_ps_pr.0 .. rf_sq_ln {
for j in fl_ps_pr.1 .. trgt_sq_ln {
let ps_pr = (if strnd {i} else {rf_sq_ln - 1 - i}, j);
let rf_sq_bs = if strnd {rf_sq[ps_pr.0]} else {gt_cmplmnt_bs(rf_sq[ps_pr.0])};
let algn_scr_pr = (if rf_sq_bs == trgt_sq[ps_pr.1] {MTCH_SCR} else {MSMTCH_SCR}, gt_strct_scr(&rf_sq_prb_dst_sq[ps_pr.0], &trgt_sq_prb_dst_sq[ps_pr.1], cntxt_mtch_prb));
let dgnl_scr = if dp_mtrx[i][j].0 == MN_ALGN_SCR {dp_mtrx[i][j].0} else {dp_mtrx[i][j].0 + gt_blnc_scr(&algn_scr_pr, bs_rt)};
let vrtcl_scr = if dp_mtrx[i][j + 1].0 == MN_ALGN_SCR {dp_mtrx[i][j + 1].0} else {dp_mtrx[i][j + 1].0 + gp_extnsn_pnlty + if dp_mtrx[i][j + 1].1 == VRTCL {0.} else {gp_opn_pnlty}};
let hrzntl_scr = if dp_mtrx[i + 1][j].0 == MN_ALGN_SCR {dp_mtrx[i + 1][j].0} else {dp_mtrx[i + 1][j].0 + gp_extnsn_pnlty + if dp_mtrx[i + 1][j].1 == HRZNTL {0.} else {gp_opn_pnlty}};
let mx_scr = dgnl_scr.max(vrtcl_scr.max(hrzntl_scr));
let mx_scr = if is_sm_glbl {mx_scr} else {mx_scr.max(0.)};
dp_mtrx[i + 1][j + 1].0 = mx_scr;
dp_mtrx[i + 1][j + 1].1 = if (!is_sm_glbl && mx_scr == 0.) || mx_scr == dgnl_scr {DGNL} else if mx_scr == vrtcl_scr {VRTCL} else {HRZNTL};
if !is_sm_glbl && mx_scr_ps_pr.0 < mx_scr {
mx_scr_ps_pr = (mx_scr, (i + 1, j + 1));
}
}
}
}
if is_sm_glbl {
if fl_ps_pr.0 == rf_sq_ln && fl_ps_pr.1 < trgt_sq_ln {
for j in fl_ps_pr.1 .. trgt_sq_ln {
dp_mtrx[rf_sq_ln][j + 1].0 = if dp_mtrx[rf_sq_ln][j].0 == MN_ALGN_SCR {dp_mtrx[rf_sq_ln][j].0} else {dp_mtrx[rf_sq_ln][j].0 + gp_extnsn_pnlty + if dp_mtrx[rf_sq_ln][j].1 == HRZNTL {0.} else {gp_opn_pnlty}};
}
}
for i in 1 .. dp_mtrx_clmn_ln {
let scr = dp_mtrx[i][trgt_sq_ln].0;
if mx_scr_ps_pr.0 < scr {
mx_scr_ps_pr = (scr, (i, trgt_sq_ln));
}
}
}
let mut fnl_pr_algn = (Vec::new(), Vec::new());
let (mut i, mut j) = mx_scr_ps_pr.1;
let (mut prvs_i, mut prvs_j) = (i, j);
let mut scr_prfls = ((0., 0.), 0.);
while i > 0 || j > 0 {
if (!is_sm_glbl && dp_mtrx[i][j].0 == 0.) || (is_sm_glbl && j == 0) {
break;
}
prvs_i = i;
prvs_j = j;
if j == 0 {
let ps = if strnd {i - 1} else {rf_sq_ln - 1 - (i - 1)};
let rf_sq_bs = if strnd {rf_sq[ps]} else {gt_cmplmnt_bs(rf_sq[ps])};
fnl_pr_algn.0.insert(0, rf_sq_bs);
fnl_pr_algn.1.insert(0, GP);
i -= 1;
continue;
} else if i == 0 {
fnl_pr_algn.0.insert(0, GP);
fnl_pr_algn.1.insert(0, trgt_sq[j - 1]);
j -= 1;
continue;
}
let src = dp_mtrx[i][j].1;
let ps = if strnd {i - 1} else {rf_sq_ln - 1 - (i - 1)};
let rf_sq_bs = if strnd {rf_sq[ps]} else {gt_cmplmnt_bs(rf_sq[ps])};
if src == DGNL {
fnl_pr_algn.0.insert(0, rf_sq_bs);
fnl_pr_algn.1.insert(0, trgt_sq[j - 1]);
if rf_sq_bs == trgt_sq[j - 1] {
(scr_prfls.0).0 += 1.;
} else {
(scr_prfls.0).1 += 1.;
}
if gt_jsd(&rf_sq_prb_dst_sq[i - 1], &trgt_sq_prb_dst_sq[j - 1]) < cntxt_mtch_prb {
scr_prfls.1 += 1.;
}
i -= 1;
j -= 1;
} else if src == VRTCL {
fnl_pr_algn.0.insert(0, rf_sq_bs);
fnl_pr_algn.1.insert(0, GP);
i -= 1;
} else {
fnl_pr_algn.0.insert(0, GP);
fnl_pr_algn.1.insert(0, trgt_sq[j - 1]);
j -= 1;
}
}
let algn_ln_wtht_gps = (scr_prfls.0).0 + (scr_prfls.0).1;
let z = ((scr_prfls.0).0 - algn_ln_wtht_gps * BS_MTCH_PRB) / (algn_ln_wtht_gps * BS_MTCH_PRB * BS_MSMTCH_PRB).sqrt();
let e_vl_cf = trgt_sq_ln as PrcsPrb - algn_ln_wtht_gps + 1.;
let sq_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if sq_e_vl < fnl_gp_algn_e_vl_pr.0 {
let z = (scr_prfls.1 - algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb) / (algn_ln_wtht_gps * cntxt_mtch_prb as PrcsPrb * (1. - cntxt_mtch_prb as PrcsPrb)).sqrt();
let strct_e_vl = (e_vl_cf * (1. - gt_cdf(z))) as Prb;
if strct_e_vl < fnl_gp_algn_e_vl_pr.1 {
let fnl_pr_algn = (fnl_pr_algn, ((if prvs_i == 0 {0} else {prvs_i - 1}, (mx_scr_ps_pr.1).0 - 1), (if prvs_j == 0 {0} else {prvs_j - 1}, (mx_scr_ps_pr.1).1 - 1)), mx_scr_ps_pr.0, strnd, (rf_sq_ln, trgt_sq_ln), (sq_e_vl, strct_e_vl));
fnl_pr_algns.push(fnl_pr_algn);
}
}
}
fnl_pr_algns
}
}
#[inline]
fn gt_strnd<'a>(strnd: Strnd) -> &'a str {
if strnd {"+"} else {"-"}
}
#[inline]
fn wrt_algns(tmp_dr: &Path, id_pr: &(&Id, &Id), fnl_pr_algns: &FnlPrAlgns, db_sz: usize, rf_sq_nm: usize) {
let tmp_otpt_fl = (id_pr.0.clone() + "_" + &id_pr.1).replace("/", "");
let mut tmp_otpt_fl = File::create(tmp_dr.join(&tmp_otpt_fl).to_str().expect("Failed to parse temp. file path")).expect("Failed to create temp. output file");
for fnl_pr_algn in fnl_pr_algns {
let (ref sq_pr, ref strt_end_pr, scr, strnd, ref sq_ln_pr, e_vl_pr) = *fnl_pr_algn;
let algn_ln = sq_pr.0.len();
let e_vl_cf = (db_sz - rf_sq_nm * (algn_ln - 1)) as Prb;
let bfr = format!("a score={} E1={:e} E2={:e}\n", scr, e_vl_cf * e_vl_pr.0, e_vl_cf * e_vl_pr.1);
let _ = tmp_otpt_fl.write_all(bfr.as_bytes());
let bfr = format!("s {} {} {} {} {} {}\n", &id_pr.0, (strt_end_pr.0).0, ((strt_end_pr.0).1 - (strt_end_pr.0).0 + 1), gt_strnd(strnd), sq_ln_pr.0, std::str::from_utf8(&sq_pr.0).expect("Failed to get alignment"));
let _ = tmp_otpt_fl.write_all(bfr.as_bytes());
let bfr = format!("s {} {} {} + {} {}\n\n", &id_pr.1, (strt_end_pr.1).0, ((strt_end_pr.1).1 - (strt_end_pr.1).0 + 1), sq_ln_pr.1, std::str::from_utf8(&sq_pr.1).expect("Failed to get alignment"));
let _ = tmp_otpt_fl.write_all(bfr.as_bytes());
}
}
#[inline]
fn gt_nm_sqr(nm: usize) -> usize {
nm * nm
}
#[inline]
fn gt_jsd(prb_dst_1: &PrbDst, prb_dst_2: &PrbDst) -> Dst {
let mut md_dst = [0.; PRB_DST_DM];
for (&prb_1, &prb_2, md_prb) in multizip((prb_dst_1.iter(), prb_dst_2.iter(), &mut md_dst)) {
*md_prb = (prb_1 + prb_2) / 2.;
}
let jsd = ((gt_kld(prb_dst_1, &md_dst) + gt_kld(prb_dst_2, &md_dst)) / 2.).sqrt();
jsd
}
#[inline]
fn gt_kld(prb_dst_1: &PrbDst, prb_dst_2: &PrbDst) -> Dst {
prb_dst_1.iter().zip(prb_dst_2.iter()).fold(0., |acmltr, (&prb_1, &prb_2)| acmltr + if prb_1 <= 0. || prb_2 <= 0. {0.} else {prb_1 * log2_raw(prb_1 / prb_2)})
}
#[inline]
fn gt_strct_scr(prb_dst_1: &PrbDst, prb_dst_2: &PrbDst, cntxt_mtch_prb: Prb) -> AlgnScr {
if gt_jsd(prb_dst_1, prb_dst_2) < cntxt_mtch_prb {
MTCH_SCR
} else {
MSMTCH_SCR
}
}
#[inline]
fn is_msk_elm(elm: BElm) -> bool {
match elm {
A => false,
T => false,
U => false,
C => false,
G => false,
_ => true,
}
}
#[inline]
fn gt_blnc_scr(scr_pr: &(AlgnScr, AlgnScr), bs_rt: AlgnScr) -> AlgnScr {
bs_rt * scr_pr.0 + (1. - bs_rt) * scr_pr.1
}
#[inline]
fn gt_cdf(intgrt_upr_bnd: PrcsPrb) -> PrcsPrb {
1. / (1. + (-1.7 * intgrt_upr_bnd).exp())
}