1extern crate bio;
2extern crate getopts;
3extern crate hashbrown;
4extern crate itertools;
5extern crate ndarray;
6extern crate num_cpus;
7extern crate rna_algos;
8extern crate scoped_threadpool;
9
10pub use bio::io::fasta::Reader;
11pub use getopts::Options;
12pub use hashbrown::HashSet;
13pub use itertools::multizip;
14pub use ndarray::prelude::*;
15pub use rna_algos::compiled_align_scores::*;
16pub use rna_algos::durbin_algo::*;
17pub use rna_algos::mccaskill_algo::*;
18pub use rna_algos::utils::*;
19pub use scoped_threadpool::Pool;
20pub use std::cmp::Ord;
21pub use std::fs::create_dir;
22pub use std::fs::File;
23pub use std::io::prelude::*;
24pub use std::io::BufWriter;
25pub use std::marker::{Send, Sync};
26pub use std::path::Path;
27pub use std::str::from_utf8_unchecked;
28
29pub type ContextProfs = Array2<Prob>;
30pub type ContextProf = Array1<Prob>;
31pub type ContextProfSetPair = (ContextProfs, ContextProfs);
32pub type PosQuadMat<T> = HashSet<PosQuad<T>>;
33pub type PosPairMatSet<T> = HashMap<PosPair<T>, PosPairMat<T>>;
34pub type PosPairMat<T> = HashSet<PosPair<T>>;
35pub type ProbMat4d<T> = HashMap<PosQuad<T>, Prob>;
36pub type SumMat4d<T> = HashMap<PosQuad<T>, Sum>;
37pub type LoopSumsMat<T> = HashMap<PosPair<T>, LoopSums>;
38
39#[derive(Clone)]
40pub struct LoopSums {
41 pub sum_seqalign: Sum,
42 pub sum_seqalign_multibranch: Sum,
43 pub sum_multibranch: Sum,
44 pub sum_1st_pairmatches: Sum,
45 pub sum_1ormore_pairmatches: Sum,
46 pub sum_0ormore_pairmatches: Sum,
47}
48
49#[derive(Clone)]
50pub struct AlignfoldSums<T> {
51 pub sums_close: SumMat4d<T>,
52 pub sums_accessible_external: SumMat4d<T>,
53 pub sums_accessible_multibranch: SumMat4d<T>,
54 pub forward_sums_external: SparseSumMat<T>,
55 pub backward_sums_external: SparseSumMat<T>,
56 pub forward_sums_external2: SparseSumMat<T>,
57 pub backward_sums_external2: SparseSumMat<T>,
58 pub forward_sums_hashed_poss: LoopSumsHashedPoss<T>,
59 pub backward_sums_hashed_poss: LoopSumsHashedPoss<T>,
60 pub forward_sums_hashed_poss2: LoopSumsHashedPoss<T>,
61 pub backward_sums_hashed_poss2: LoopSumsHashedPoss<T>,
62}
63
64pub type ScoreMat = Vec<Vec<Score>>;
65
66pub struct AlignfoldScores<T> {
67 pub loopmatch_scores: SparseScoreMat<T>,
68 pub pairmatch_scores: ScoreMat4d<T>,
69 pub range_insert_scores: ScoreMat,
70 pub range_insert_scores2: ScoreMat,
71}
72
73pub type ParamSetsHashedIds<T> = HashMap<RnaIdPair, AlignfoldScores<T>>;
74pub type Probs4dHashedIds<T> = HashMap<RnaIdPair, ProbMat4d<T>>;
75pub type ProbMats<T> = Vec<SparseProbMat<T>>;
76pub type Prob1dMats = Vec<Probs>;
77pub type Arg = String;
78pub type Args = Vec<Arg>;
79pub type FastaId = String;
80pub type SeqPair<'a> = (SeqSlice<'a>, SeqSlice<'a>);
81pub type SeqSlices<'a> = Vec<SeqSlice<'a>>;
82pub type ScorePair = (Score, Score);
83pub type SparseScoreMat<T> = HashMap<PosPair<T>, Score>;
84pub type PosPairsHashedPoss<T> = HashMap<PosPair<T>, PosPair<T>>;
85pub type BoolsHashedPoss<T> = HashMap<PosPair<T>, bool>;
86pub type ProbMatPair<'a, T> = (&'a SparseProbMat<T>, &'a SparseProbMat<T>);
87pub type FoldScoreSetPair<'a, T> = (&'a FoldScores<T>, &'a FoldScores<T>);
88pub type NumThreads = u32;
89
90pub struct AlignfoldProbMats<T> {
91 pub basepair_probs_pair: SparseProbMatPair<T>,
92 pub context_profs_pair: ContextProfSetPair,
93 pub pairmatch_probs: ProbMat4d<T>,
94 pub loopmatch_probs: SparseProbMat<T>,
95 pub match_probs: SparseProbMat<T>,
96}
97
98#[derive(Clone)]
99pub struct AlignfoldProbMatsAvg<T> {
100 pub basepair_probs: SparseProbMat<T>,
101 pub context_profs: ContextProfs,
102}
103
104pub type SparseProbMatPair<T> = (SparseProbMat<T>, SparseProbMat<T>);
105pub type ProbMatSetsAvg<T> = Vec<AlignfoldProbMatsAvg<T>>;
106pub type AlignfoldProbsHashedIds<T> = HashMap<RnaIdPair, AlignfoldProbMats<T>>;
107pub type Poss<T> = Vec<T>;
108pub type LoopSumsHashedPoss<T> = HashMap<PosPair<T>, LoopSumsMat<T>>;
109
110#[derive(Clone)]
111pub struct MatchProbMats<T> {
112 pub loopmatch_probs: SparseProbMat<T>,
113 pub pairmatch_probs: ProbMat4d<T>,
114 pub match_probs: SparseProbMat<T>,
115}
116
117pub type MatchProbsHashedIds<T> = HashMap<RnaIdPair, MatchProbMats<T>>;
118pub type SparseProbsHashedIds<T> = HashMap<RnaIdPair, SparseProbMat<T>>;
119pub type PosQuadsHashedLens<T> = HashMap<PosPair<T>, PosPairMat<T>>;
120pub type SparsePoss<T> = HashSet<T>;
121pub type SparsePosSets<T> = HashMap<T, SparsePoss<T>>;
122
123pub type OutputsSparsePossGetter<T> = (
124 PosPairMatSet<T>,
125 PosPairMatSet<T>,
126 PosQuadMat<T>,
127 PosQuadsHashedLens<T>,
128 SparsePosSets<T>,
129 SparsePosSets<T>,
130);
131
132pub type InputsAlignfoldProbsGetter<'a, T> = (
133 &'a PosPair<T>,
134 &'a AlignfoldScores<T>,
135 &'a PosPair<T>,
136 &'a AlignfoldSums<T>,
137 &'a FoldScoreSetPair<'a, T>,
138 bool,
139 Sum,
140 &'a PosQuadsHashedLens<T>,
141 bool,
142 &'a PosPairMatSet<T>,
143 &'a PosPairMatSet<T>,
144 &'a SparsePosSets<T>,
145 &'a SparsePosSets<T>,
146 &'a AlignScores,
147);
148
149pub type InputsConsprobCore<'a, T> = (
150 &'a PosPair<T>,
151 &'a AlignfoldScores<T>,
152 &'a PosPair<T>,
153 &'a FoldScoreSetPair<'a, T>,
154 bool,
155 &'a PosPairMatSet<T>,
156 &'a PosPairMatSet<T>,
157 &'a PosQuadsHashedLens<T>,
158 bool,
159 &'a SparsePosSets<T>,
160 &'a SparsePosSets<T>,
161 &'a AlignScores,
162);
163
164pub type Inputs2loopSumsGetter<'a, T> = (
165 &'a PosQuad<T>,
166 &'a AlignfoldSums<T>,
167 bool,
168 &'a PosPairMatSet<T>,
169 &'a FoldScoreSetPair<'a, T>,
170 &'a AlignfoldScores<T>,
171 &'a SparsePosSets<T>,
172 &'a SparsePosSets<T>,
173 &'a AlignScores,
174);
175
176pub type InputsLoopSumsGetter<'a, T> = (
177 &'a AlignfoldScores<T>,
178 &'a PosQuad<T>,
179 &'a mut AlignfoldSums<T>,
180 bool,
181 &'a PosPairMatSet<T>,
182 &'a SparsePosSets<T>,
183 &'a SparsePosSets<T>,
184 &'a AlignScores,
185);
186
187pub type InputsInsideSumsGetter<'a, T> = (
188 &'a PosPair<T>,
189 &'a AlignfoldScores<T>,
190 &'a PosPair<T>,
191 &'a FoldScoreSetPair<'a, T>,
192 &'a PosPairMatSet<T>,
193 &'a PosPairMatSet<T>,
194 &'a PosQuadsHashedLens<T>,
195 &'a SparsePosSets<T>,
196 &'a SparsePosSets<T>,
197 &'a AlignScores,
198);
199
200impl<T: HashIndex> AlignfoldProbMats<T> {
201 pub fn origin() -> AlignfoldProbMats<T> {
202 AlignfoldProbMats {
203 basepair_probs_pair: (SparseProbMat::<T>::default(), SparseProbMat::<T>::default()),
204 context_profs_pair: (ContextProfs::default((0, 0)), ContextProfs::default((0, 0))),
205 pairmatch_probs: ProbMat4d::<T>::default(),
206 loopmatch_probs: SparseProbMat::<T>::default(),
207 match_probs: SparseProbMat::<T>::default(),
208 }
209 }
210
211 pub fn new(seq_len_pair: &(usize, usize)) -> AlignfoldProbMats<T> {
212 let neg_infs_pair = (
213 NEG_INFINITY * ContextProfs::ones((seq_len_pair.0, NUM_CONTEXTS)),
214 NEG_INFINITY * ContextProfs::ones((seq_len_pair.1, NUM_CONTEXTS)),
215 );
216 AlignfoldProbMats {
217 basepair_probs_pair: (SparseProbMat::<T>::default(), SparseProbMat::<T>::default()),
218 context_profs_pair: neg_infs_pair,
219 pairmatch_probs: ProbMat4d::<T>::default(),
220 loopmatch_probs: SparseProbMat::<T>::default(),
221 match_probs: SparseProbMat::<T>::default(),
222 }
223 }
224}
225
226impl<T: HashIndex> AlignfoldProbMatsAvg<T> {
227 pub fn origin() -> AlignfoldProbMatsAvg<T> {
228 AlignfoldProbMatsAvg {
229 basepair_probs: SparseProbMat::<T>::default(),
230 context_profs: ContextProfs::default((0, 0)),
231 }
232 }
233
234 pub fn new(seq_len: usize) -> AlignfoldProbMatsAvg<T> {
235 AlignfoldProbMatsAvg {
236 basepair_probs: SparseProbMat::<T>::default(),
237 context_profs: ContextProfs::zeros((seq_len, NUM_CONTEXTS)),
238 }
239 }
240}
241
242impl<T: HashIndex> AlignfoldScores<T> {
243 pub fn origin() -> AlignfoldScores<T> {
244 let scores = SparseScoreMat::<T>::default();
245 let score_mat = ScoreMat4d::<T>::default();
246 let insert_scores = Vec::new();
247 AlignfoldScores {
248 loopmatch_scores: scores,
249 pairmatch_scores: score_mat,
250 range_insert_scores: insert_scores.clone(),
251 range_insert_scores2: insert_scores,
252 }
253 }
254
255 pub fn new(
256 seq_pair: &SeqPair,
257 pos_quads: &PosQuadMat<T>,
258 match_probs: &SparseProbMat<T>,
259 align_scores: &AlignScores,
260 ) -> AlignfoldScores<T> {
261 let seq_len_pair = (
262 T::from_usize(seq_pair.0.len()).unwrap(),
263 T::from_usize(seq_pair.1.len()).unwrap(),
264 );
265 let mut alignfold_scores = AlignfoldScores::<T>::origin();
266 let mat = vec![
267 vec![NEG_INFINITY; seq_len_pair.0.to_usize().unwrap()];
268 seq_len_pair.0.to_usize().unwrap()
269 ];
270 alignfold_scores.range_insert_scores = mat;
271 let mat = vec![
272 vec![NEG_INFINITY; seq_len_pair.1.to_usize().unwrap()];
273 seq_len_pair.1.to_usize().unwrap()
274 ];
275 alignfold_scores.range_insert_scores2 = mat;
276 for i in range(T::one(), seq_len_pair.1 - T::one()) {
277 let long_i = i.to_usize().unwrap();
278 let base = seq_pair.1[long_i];
279 let mut sum = align_scores.insert_scores[base];
280 alignfold_scores.range_insert_scores2[long_i][long_i] = sum;
281 for j in range(i + T::one(), seq_len_pair.1 - T::one()) {
282 let long_j = j.to_usize().unwrap();
283 let base = seq_pair.1[long_j];
284 let term = align_scores.insert_scores[base] + align_scores.insert_extend_score;
285 sum += term;
286 alignfold_scores.range_insert_scores2[long_i][long_j] = sum;
287 }
288 }
289 for i in range(T::one(), seq_len_pair.0 - T::one()) {
290 let long_i = i.to_usize().unwrap();
291 let base = seq_pair.0[long_i];
292 let term = align_scores.insert_scores[base];
293 let mut sum = term;
294 alignfold_scores.range_insert_scores[long_i][long_i] = sum;
295 for j in range(i + T::one(), seq_len_pair.0 - T::one()) {
296 let long_j = j.to_usize().unwrap();
297 let base = seq_pair.0[long_j];
298 let term = align_scores.insert_scores[base] + align_scores.insert_extend_score;
299 sum += term;
300 alignfold_scores.range_insert_scores[long_i][long_j] = sum;
301 }
302 }
303 for pos_pair in match_probs.keys() {
304 let &(i, j) = pos_pair;
305 let (long_i, long_j) = (i.to_usize().unwrap(), j.to_usize().unwrap());
306 let base_pair = (seq_pair.0[long_i], seq_pair.1[long_j]);
307 alignfold_scores.loopmatch_scores.insert(
308 *pos_pair,
309 align_scores.match_scores[base_pair.0][base_pair.1],
310 );
311 }
312 for &(i, j, k, l) in pos_quads {
313 let (long_i, long_j, long_k, long_l) = (
314 i.to_usize().unwrap(),
315 j.to_usize().unwrap(),
316 k.to_usize().unwrap(),
317 l.to_usize().unwrap(),
318 );
319 let pos_quad = (i, j, k, l);
320 let base_pair = (seq_pair.0[long_i], seq_pair.0[long_j]);
321 let base_pair2 = (seq_pair.1[long_k], seq_pair.1[long_l]);
322 alignfold_scores.pairmatch_scores.insert(
323 pos_quad,
324 align_scores.match_scores[base_pair.0][base_pair2.0]
325 + align_scores.match_scores[base_pair.1][base_pair2.1],
326 );
327 }
328 alignfold_scores
329 }
330}
331
332impl Default for LoopSums {
333 fn default() -> Self {
334 Self::new()
335 }
336}
337
338impl LoopSums {
339 pub fn new() -> LoopSums {
340 LoopSums {
341 sum_seqalign: NEG_INFINITY,
342 sum_seqalign_multibranch: NEG_INFINITY,
343 sum_multibranch: NEG_INFINITY,
344 sum_1st_pairmatches: NEG_INFINITY,
345 sum_1ormore_pairmatches: NEG_INFINITY,
346 sum_0ormore_pairmatches: NEG_INFINITY,
347 }
348 }
349}
350
351impl<T: HashIndex> Default for AlignfoldSums<T> {
352 fn default() -> Self {
353 Self::new()
354 }
355}
356
357impl<T: HashIndex> AlignfoldSums<T> {
358 pub fn new() -> AlignfoldSums<T> {
359 let sum_mat = SumMat4d::<T>::default();
360 let sums = SparseSumMat::<T>::default();
361 let loop_sums_hashed_poss = LoopSumsHashedPoss::<T>::default();
362 AlignfoldSums {
363 sums_close: sum_mat.clone(),
364 sums_accessible_external: sum_mat.clone(),
365 sums_accessible_multibranch: sum_mat,
366 forward_sums_external: sums.clone(),
367 backward_sums_external: sums.clone(),
368 forward_sums_external2: sums.clone(),
369 backward_sums_external2: sums,
370 forward_sums_hashed_poss: loop_sums_hashed_poss.clone(),
371 backward_sums_hashed_poss: loop_sums_hashed_poss.clone(),
372 forward_sums_hashed_poss2: loop_sums_hashed_poss.clone(),
373 backward_sums_hashed_poss2: loop_sums_hashed_poss,
374 }
375 }
376}
377
378impl<T> Default for MatchProbMats<T> {
379 fn default() -> Self {
380 Self::new()
381 }
382}
383
384impl<T> MatchProbMats<T> {
385 pub fn new() -> MatchProbMats<T> {
386 MatchProbMats {
387 loopmatch_probs: SparseProbMat::<T>::default(),
388 pairmatch_probs: ProbMat4d::<T>::default(),
389 match_probs: SparseProbMat::<T>::default(),
390 }
391 }
392}
393
394pub const DEFAULT_MIN_BASEPAIR_PROB: Prob = 0.01;
395pub const DEFAULT_MIN_MATCH_PROB: Prob = 0.01;
396pub const BASEPAIR_PROBS_FILE: &str = "basepair_probs.dat";
397pub const UNPAIR_PROBS_FILE_HAIRPIN: &str = "unpair_probs_hairpin.dat";
398pub const UNPAIR_PROBS_FILE_BULGE: &str = "unpair_probs_bulge.dat";
399pub const UNPAIR_PROBS_FILE_INTERIOR: &str = "unpair_probs_interior.dat";
400pub const UNPAIR_PROBS_FILE_MULTIBRANCH: &str = "unpair_probs_multibranch.dat";
401pub const UNPAIR_PROBS_FILE_EXTERNAL: &str = "unpair_probs_external.dat";
402pub const BASEPAIR_PROBS_FILE2: &str = "basepair_probs2.dat";
403pub const PAIRMATCH_PROBS_FILE: &str = "pairmatch_probs.dat";
404pub const LOOPMATCH_PROBS_FILE: &str = "loopmatch_probs.dat";
405pub const MATCH_PROBS_FILE: &str = "match_probs.dat";
406pub const README_FILE: &str = "README.md";
407pub const README_CONTENTS: &str = "# basepair_probs.dat\n
408This file contains average probabilistic consistency based on posterior nucleotide pair-matching probabilities. You can treat this average probabilistic consistency like conventional nucleotide base-pairing probabilities. Nucleotide positions are indexed starting from zero.\n\n
409# basepair_probs2.dat\n
410This file contains average probabilistic consistency per nucleotide. This average probabilistic consistency is obtained by marginalizing each nucleotide for average probabilistic consistency in \"basepair_probs.dat.\"\n\n
411# unpair_probs_x.dat\n
412This file type contains average probabilistic consistency per nucleotide. This average probabilistic consistency is for nucleotide unpairing and under the structural context \"x.\" \"hairpin,\" \"bulge,\" \"interior,\" \"multibranch,\" \"external\" stand for hairpin loops, bulge loops, interior loops, multi-loops, external loops, respectively.\n\n
413# pairmatch_probs.dat\n
414This file contains posterior nucleotide pair-matching probabilities.\n\n
415# loopmatch_probs.dat\n
416This file contains posterior nucleotide loop-matching probabilities.\n\n
417match_probs.dat\n
418This file contains posterior nucleotide matching probabilities.";
419pub const INSERT2MATCH_SCORE: Score = MATCH2INSERT_SCORE;
420pub const NUM_CONTEXTS: usize = 6;
421pub const CONTEXT_INDEX_HAIRPIN: usize = 0;
422pub const CONTEXT_INDEX_BULGE: usize = 1;
423pub const CONTEXT_INDEX_INTERIOR: usize = 2;
424pub const CONTEXT_INDEX_EXTERNAL: usize = 3;
425pub const CONTEXT_INDEX_BASEPAIR: usize = 4;
426pub const CONTEXT_INDEX_MULTIBRANCH: usize = 5;
427pub const EXAMPLE_FASTA_FILE_PATH: &str = "assets/sampled_trnas.fa";
428pub const EPSILON: Prob = 0.00_1;
429pub const PROB_BOUND_LOWER: Prob = -EPSILON;
430pub const PROB_BOUND_UPPER: Prob = 1. + EPSILON;
431
432pub fn consprob_core<T>(inputs: InputsConsprobCore<T>) -> AlignfoldProbMats<T>
433where
434 T: HashIndex,
435{
436 let (
437 seq_len_pair,
438 alignfold_scores,
439 max_basepair_span_pair,
440 fold_scores_pair,
441 produces_context_profs,
442 forward_pos_pairs,
443 backward_pos_pairs,
444 pos_quads_hashed_lens,
445 produces_match_probs,
446 matchable_poss,
447 matchable_poss2,
448 align_scores,
449 ) = inputs;
450 let (alignfold_sums, global_sum) = get_alignfold_sums::<T>((
451 seq_len_pair,
452 alignfold_scores,
453 max_basepair_span_pair,
454 fold_scores_pair,
455 forward_pos_pairs,
456 backward_pos_pairs,
457 pos_quads_hashed_lens,
458 matchable_poss,
459 matchable_poss2,
460 align_scores,
461 ));
462 get_alignfold_probs::<T>((
463 seq_len_pair,
464 alignfold_scores,
465 max_basepair_span_pair,
466 &alignfold_sums,
467 fold_scores_pair,
468 produces_context_profs,
469 global_sum,
470 pos_quads_hashed_lens,
471 produces_match_probs,
472 forward_pos_pairs,
473 backward_pos_pairs,
474 matchable_poss,
475 matchable_poss2,
476 align_scores,
477 ))
478}
479
480pub fn get_alignfold_sums<T>(inputs: InputsInsideSumsGetter<T>) -> (AlignfoldSums<T>, Sum)
481where
482 T: HashIndex,
483{
484 let (
485 seq_len_pair,
486 alignfold_scores,
487 max_basepair_span_pair,
488 fold_scores_pair,
489 forward_pos_pairs,
490 backward_pos_pairs,
491 pos_quads_hashed_lens,
492 matchable_poss,
493 matchable_poss2,
494 align_scores,
495 ) = inputs;
496 let mut alignfold_sums = AlignfoldSums::<T>::new();
497 for substr_len in range_inclusive(
498 T::from_usize(MIN_SPAN_HAIRPIN_CLOSE).unwrap(),
499 max_basepair_span_pair.0,
500 ) {
501 for substr_len2 in range_inclusive(
502 T::from_usize(MIN_SPAN_HAIRPIN_CLOSE).unwrap(),
503 max_basepair_span_pair.1,
504 ) {
505 if let Some(pos_pairs) = pos_quads_hashed_lens.get(&(substr_len, substr_len2)) {
506 for &(i, k) in pos_pairs {
507 let (j, l) = (i + substr_len - T::one(), k + substr_len2 - T::one());
508 let (long_i, long_j, long_k, long_l) = (
509 i.to_usize().unwrap(),
510 j.to_usize().unwrap(),
511 k.to_usize().unwrap(),
512 l.to_usize().unwrap(),
513 );
514 let pos_quad = (i, j, k, l);
515 let pairmatch_score = alignfold_scores.pairmatch_scores[&pos_quad];
516 let computes_forward_sums = true;
517 let (sum_seqalign, sum_multibranch) = get_loop_sums::<T>((
518 alignfold_scores,
519 &pos_quad,
520 &mut alignfold_sums,
521 computes_forward_sums,
522 forward_pos_pairs,
523 matchable_poss,
524 matchable_poss2,
525 align_scores,
526 ));
527 let computes_forward_sums = false;
528 let _ = get_loop_sums::<T>((
529 alignfold_scores,
530 &pos_quad,
531 &mut alignfold_sums,
532 computes_forward_sums,
533 backward_pos_pairs,
534 matchable_poss,
535 matchable_poss2,
536 align_scores,
537 ));
538 let mut sum = NEG_INFINITY;
539 let score = pairmatch_score
540 + fold_scores_pair.0.hairpin_scores[&(i, j)]
541 + fold_scores_pair.1.hairpin_scores[&(k, l)]
542 + sum_seqalign;
543 logsumexp(&mut sum, score);
544 let forward_sums = &alignfold_sums.forward_sums_hashed_poss2[&(i, k)];
545 let backward_sums = &alignfold_sums.backward_sums_hashed_poss2[&(j, l)];
546 let min = T::from_usize(MIN_SPAN_HAIRPIN_CLOSE).unwrap();
547 let min_len_pair = (
548 if substr_len <= min + T::from_usize(MAX_2LOOP_LEN + 2).unwrap() {
549 min
550 } else {
551 substr_len - T::from_usize(MAX_2LOOP_LEN + 2).unwrap()
552 },
553 if substr_len2 <= min + T::from_usize(MAX_2LOOP_LEN + 2).unwrap() {
554 min
555 } else {
556 substr_len2 - T::from_usize(MAX_2LOOP_LEN + 2).unwrap()
557 },
558 );
559 for substr_len3 in range(min_len_pair.0, substr_len - T::one()) {
560 for substr_len4 in range(min_len_pair.1, substr_len2 - T::one()) {
561 if let Some(pos_pairs2) = pos_quads_hashed_lens.get(&(substr_len3, substr_len4)) {
562 for &(m, o) in pos_pairs2 {
563 let (n, p) = (m + substr_len3 - T::one(), o + substr_len4 - T::one());
564 if !(i < m && n < j && k < o && p < l) {
565 continue;
566 }
567 let (long_m, long_n, long_o, long_p) = (
568 m.to_usize().unwrap(),
569 n.to_usize().unwrap(),
570 o.to_usize().unwrap(),
571 p.to_usize().unwrap(),
572 );
573 if long_m - long_i - 1 + long_j - long_n - 1 > MAX_2LOOP_LEN {
574 continue;
575 }
576 if long_o - long_k - 1 + long_l - long_p - 1 > MAX_2LOOP_LEN {
577 continue;
578 }
579 let pos_quad2 = (m, n, o, p);
580 if let Some(&x) = alignfold_sums.sums_close.get(&pos_quad2) {
581 let mut forward_term = NEG_INFINITY;
582 let mut backward_term = forward_term;
583 let pos_pair2 = (m - T::one(), o - T::one());
584 if let Some(x) = forward_sums.get(&pos_pair2) {
585 logsumexp(&mut forward_term, x.sum_seqalign);
586 }
587 let pos_pair2 = (n + T::one(), p + T::one());
588 if let Some(x) = backward_sums.get(&pos_pair2) {
589 logsumexp(&mut backward_term, x.sum_seqalign);
590 }
591 let sum_2loop = forward_term + backward_term;
592 let twoloop_score = fold_scores_pair.0.twoloop_scores[&(i, j, m, n)];
593 let twoloop_score2 = fold_scores_pair.1.twoloop_scores[&(k, l, o, p)];
594 let x = pairmatch_score + twoloop_score + twoloop_score2 + x;
595 logsumexp(&mut sum, x + sum_2loop);
596 }
597 }
598 }
599 }
600 }
601 let multibranch_close_score = fold_scores_pair.0.multibranch_close_scores[&(i, j)];
602 let multibranch_close_score2 = fold_scores_pair.1.multibranch_close_scores[&(k, l)];
603 let score =
604 pairmatch_score + multibranch_close_score + multibranch_close_score2 + sum_multibranch;
605 logsumexp(&mut sum, score);
606 if sum > NEG_INFINITY {
607 alignfold_sums.sums_close.insert(pos_quad, sum);
608 let accessible_score = fold_scores_pair.0.accessible_scores[&(i, j)];
609 let accessible_score2 = fold_scores_pair.1.accessible_scores[&(k, l)];
610 sum += accessible_score + accessible_score2;
611 alignfold_sums
612 .sums_accessible_external
613 .insert(pos_quad, sum);
614 alignfold_sums
615 .sums_accessible_multibranch
616 .insert(pos_quad, sum + 2. * COEFF_NUM_BRANCHES);
617 }
618 }
619 }
620 }
621 }
622 let leftmost_pos_pair = (T::zero(), T::zero());
623 let rightmost_pos_pair = (seq_len_pair.0 - T::one(), seq_len_pair.1 - T::one());
624 alignfold_sums
625 .forward_sums_external
626 .insert(leftmost_pos_pair, 0.);
627 alignfold_sums
628 .backward_sums_external
629 .insert(rightmost_pos_pair, 0.);
630 for i in range(T::zero(), seq_len_pair.0 - T::one()) {
631 for j in range(T::zero(), seq_len_pair.1 - T::one()) {
632 let pos_pair = (i, j);
633 if pos_pair == leftmost_pos_pair {
634 continue;
635 }
636 let mut sum = NEG_INFINITY;
637 if let Some(x) = forward_pos_pairs.get(&pos_pair) {
638 for &(k, l) in x {
639 let pos_pair2 = (k - T::one(), l - T::one());
640 let pos_quad = (k, i, l, j);
641 if let Some(&x) = alignfold_sums.sums_accessible_external.get(&pos_quad) {
642 if let Some(&y) = alignfold_sums.forward_sums_external2.get(&pos_pair2) {
643 let y = x + y;
644 logsumexp(&mut sum, y);
645 }
646 }
647 }
648 }
649 if i > T::zero() && j > T::zero() {
650 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
651 let mut sum2 = NEG_INFINITY;
652 let pos_pair2 = (i - T::one(), j - T::one());
653 let long_pos_pair2 = (
654 pos_pair2.0.to_usize().unwrap(),
655 pos_pair2.1.to_usize().unwrap(),
656 );
657 let begins_sum = pos_pair2 == leftmost_pos_pair;
658 if let Some(&x) = alignfold_sums.forward_sums_external.get(&pos_pair2) {
659 let x = x
660 + if begins_sum {
661 align_scores.init_match_score
662 } else {
663 align_scores.match2match_score
664 };
665 logsumexp(&mut sum2, x);
666 }
667 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
668 for &x in x {
669 if x >= pos_pair2.1 {
670 continue;
671 }
672 let pos_pair3 = (pos_pair2.0, x);
673 if let Some(&y) = alignfold_sums.forward_sums_external.get(&pos_pair3) {
674 let long_x = x.to_usize().unwrap();
675 let begins_sum = pos_pair3 == leftmost_pos_pair;
676 let z = alignfold_scores.range_insert_scores2[long_x + 1][long_pos_pair2.1]
677 + if begins_sum {
678 align_scores.init_insert_score
679 } else {
680 align_scores.match2insert_score
681 };
682 let z = y + z + align_scores.match2insert_score;
683 logsumexp(&mut sum2, z);
684 }
685 }
686 }
687 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
688 for &x in x {
689 if x >= pos_pair2.0 {
690 continue;
691 }
692 let pos_pair3 = (x, pos_pair2.1);
693 if let Some(&y) = alignfold_sums.forward_sums_external.get(&pos_pair3) {
694 let long_x = x.to_usize().unwrap();
695 let begins_sum = pos_pair3 == leftmost_pos_pair;
696 let z = alignfold_scores.range_insert_scores[long_x + 1][long_pos_pair2.0]
697 + if begins_sum {
698 align_scores.init_insert_score
699 } else {
700 align_scores.match2insert_score
701 };
702 let z = y + z + align_scores.match2insert_score;
703 logsumexp(&mut sum2, z);
704 }
705 }
706 }
707 if sum2 > NEG_INFINITY {
708 alignfold_sums
709 .forward_sums_external2
710 .insert(pos_pair2, sum2);
711 }
712 let term = sum2 + loopmatch_score;
713 logsumexp(&mut sum, term);
714 if sum > NEG_INFINITY {
715 alignfold_sums.forward_sums_external.insert(pos_pair, sum);
716 }
717 }
718 }
719 }
720 }
721 let mut final_sum = NEG_INFINITY;
722 let pos_pair2 = (
723 rightmost_pos_pair.0 - T::one(),
724 rightmost_pos_pair.1 - T::one(),
725 );
726 let long_pos_pair2 = (
727 pos_pair2.0.to_usize().unwrap(),
728 pos_pair2.1.to_usize().unwrap(),
729 );
730 if let Some(&x) = alignfold_sums.forward_sums_external.get(&pos_pair2) {
731 logsumexp(&mut final_sum, x);
732 }
733 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
734 for &x in x {
735 if x >= pos_pair2.1 {
736 continue;
737 }
738 if let Some(&y) = alignfold_sums.forward_sums_external.get(&(pos_pair2.0, x)) {
739 let long_x = x.to_usize().unwrap();
740 let z = alignfold_scores.range_insert_scores2[long_x + 1][long_pos_pair2.1]
741 + align_scores.match2insert_score;
742 let z = y + z;
743 logsumexp(&mut final_sum, z);
744 }
745 }
746 }
747 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
748 for &x in x {
749 if x >= pos_pair2.0 {
750 continue;
751 }
752 if let Some(&y) = alignfold_sums.forward_sums_external.get(&(x, pos_pair2.1)) {
753 let long_x = x.to_usize().unwrap();
754 let z = alignfold_scores.range_insert_scores[long_x + 1][long_pos_pair2.0]
755 + align_scores.match2insert_score;
756 let z = y + z;
757 logsumexp(&mut final_sum, z);
758 }
759 }
760 }
761 for i in range(T::one(), seq_len_pair.0).rev() {
762 for j in range(T::one(), seq_len_pair.1).rev() {
763 let pos_pair = (i, j);
764 if pos_pair == rightmost_pos_pair {
765 continue;
766 }
767 let mut sum = NEG_INFINITY;
768 if let Some(x) = backward_pos_pairs.get(&pos_pair) {
769 for &(k, l) in x {
770 let pos_pair2 = (k + T::one(), l + T::one());
771 let pos_quad = (i, k, j, l);
772 if let Some(&x) = alignfold_sums.sums_accessible_external.get(&pos_quad) {
773 if let Some(&y) = alignfold_sums.backward_sums_external2.get(&pos_pair2) {
774 let y = x + y;
775 logsumexp(&mut sum, y);
776 }
777 }
778 }
779 }
780 if i < seq_len_pair.0 - T::one() && j < seq_len_pair.1 - T::one() {
781 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
782 let mut sum2 = NEG_INFINITY;
783 let pos_pair2 = (i + T::one(), j + T::one());
784 let long_pos_pair2 = (
785 pos_pair2.0.to_usize().unwrap(),
786 pos_pair2.1.to_usize().unwrap(),
787 );
788 let ends_sum = pos_pair2 == rightmost_pos_pair;
789 if let Some(&x) = alignfold_sums.backward_sums_external.get(&pos_pair2) {
790 let x = x
791 + if ends_sum {
792 0.
793 } else {
794 align_scores.match2match_score
795 };
796 logsumexp(&mut sum2, x);
797 }
798 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
799 for &x in x {
800 if x <= pos_pair2.1 {
801 continue;
802 }
803 let pos_pair3 = (pos_pair2.0, x);
804 if let Some(&y) = alignfold_sums.backward_sums_external.get(&pos_pair3) {
805 let long_x = x.to_usize().unwrap();
806 let ends_sum = pos_pair3 == rightmost_pos_pair;
807 let z = alignfold_scores.range_insert_scores2[long_pos_pair2.1][long_x - 1]
808 + if ends_sum {
809 0.
810 } else {
811 align_scores.match2insert_score
812 };
813 let z = y + z + align_scores.match2insert_score;
814 logsumexp(&mut sum2, z);
815 }
816 }
817 }
818 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
819 for &x in x {
820 if x <= pos_pair2.0 {
821 continue;
822 }
823 let pos_pair3 = (x, pos_pair2.1);
824 if let Some(&y) = alignfold_sums.backward_sums_external.get(&pos_pair3) {
825 let long_x = x.to_usize().unwrap();
826 let ends_sum = pos_pair3 == rightmost_pos_pair;
827 let z = alignfold_scores.range_insert_scores[long_pos_pair2.0][long_x - 1]
828 + if ends_sum {
829 0.
830 } else {
831 align_scores.match2insert_score
832 };
833 let z = y + z + align_scores.match2insert_score;
834 logsumexp(&mut sum2, z);
835 }
836 }
837 }
838 if sum2 > NEG_INFINITY {
839 alignfold_sums
840 .backward_sums_external2
841 .insert(pos_pair2, sum2);
842 }
843 let term = sum2 + loopmatch_score;
844 logsumexp(&mut sum, term);
845 if sum > NEG_INFINITY {
846 alignfold_sums.backward_sums_external.insert(pos_pair, sum);
847 }
848 }
849 }
850 }
851 }
852 (alignfold_sums, final_sum)
853}
854
855pub fn get_loop_sums<T>(inputs: InputsLoopSumsGetter<T>) -> (Sum, Sum)
856where
857 T: HashIndex,
858{
859 let (
860 alignfold_scores,
861 pos_quad,
862 alignfold_sums,
863 computes_forward_sums,
864 pos_pairs,
865 matchable_poss,
866 matchable_poss2,
867 align_scores,
868 ) = inputs;
869 let &(i, j, k, l) = pos_quad;
870 let leftmost_pos_pair = if computes_forward_sums {
871 (i, k)
872 } else {
873 (i + T::one(), k + T::one())
874 };
875 let rightmost_pos_pair = if computes_forward_sums {
876 (j - T::one(), l - T::one())
877 } else {
878 (j, l)
879 };
880 let sums_hashed_poss = if computes_forward_sums {
881 &mut alignfold_sums.forward_sums_hashed_poss
882 } else {
883 &mut alignfold_sums.backward_sums_hashed_poss
884 };
885 let sums_hashed_poss2 = if computes_forward_sums {
886 &mut alignfold_sums.forward_sums_hashed_poss2
887 } else {
888 &mut alignfold_sums.backward_sums_hashed_poss2
889 };
890 if !sums_hashed_poss.contains_key(&if computes_forward_sums {
891 leftmost_pos_pair
892 } else {
893 rightmost_pos_pair
894 }) {
895 sums_hashed_poss.insert(
896 if computes_forward_sums {
897 leftmost_pos_pair
898 } else {
899 rightmost_pos_pair
900 },
901 LoopSumsMat::<T>::new(),
902 );
903 }
904 if !sums_hashed_poss2.contains_key(&if computes_forward_sums {
905 leftmost_pos_pair
906 } else {
907 rightmost_pos_pair
908 }) {
909 sums_hashed_poss2.insert(
910 if computes_forward_sums {
911 leftmost_pos_pair
912 } else {
913 rightmost_pos_pair
914 },
915 LoopSumsMat::<T>::new(),
916 );
917 }
918 let sums_mat = &mut sums_hashed_poss
919 .get_mut(&if computes_forward_sums {
920 leftmost_pos_pair
921 } else {
922 rightmost_pos_pair
923 })
924 .unwrap();
925 let sums_mat2 = &mut sums_hashed_poss2
926 .get_mut(&if computes_forward_sums {
927 leftmost_pos_pair
928 } else {
929 rightmost_pos_pair
930 })
931 .unwrap();
932 let iter: Poss<T> = if computes_forward_sums {
933 range(i, j).collect()
934 } else {
935 range_inclusive(i + T::one(), j).rev().collect()
936 };
937 let iter2: Poss<T> = if computes_forward_sums {
938 range(k, l).collect()
939 } else {
940 range_inclusive(k + T::one(), l).rev().collect()
941 };
942 for &u in iter.iter() {
943 for &v in iter2.iter() {
944 let pos_pair = (u, v);
945 if sums_mat.contains_key(&pos_pair) {
946 continue;
947 }
948 let mut sums = LoopSums::new();
949 if (computes_forward_sums && u == i && v == k) || (!computes_forward_sums && u == j && v == l)
950 {
951 sums.sum_seqalign = 0.;
952 sums.sum_0ormore_pairmatches = 0.;
953 sums_mat.insert(pos_pair, sums);
954 continue;
955 }
956 let mut sum_multibranch = NEG_INFINITY;
957 let mut sum_1st_pairmatches = sum_multibranch;
958 let mut sum = sum_multibranch;
959 if let Some(pos_pairs) = pos_pairs.get(&pos_pair) {
960 for &(m, n) in pos_pairs {
961 if computes_forward_sums {
962 if !(i < m && k < n) {
963 continue;
964 }
965 } else if !(m < j && n < l) {
966 continue;
967 }
968 let pos_pair2 = if computes_forward_sums {
969 (m - T::one(), n - T::one())
970 } else {
971 (m + T::one(), n + T::one())
972 };
973 let pos_quad2 = if computes_forward_sums {
974 (m, u, n, v)
975 } else {
976 (u, m, v, n)
977 };
978 if let Some(&x) = alignfold_sums.sums_accessible_multibranch.get(&pos_quad2) {
979 if let Some(y) = sums_mat2.get(&pos_pair2) {
980 let z = x + y.sum_1ormore_pairmatches;
981 logsumexp(&mut sum_multibranch, z);
982 let z = x + y.sum_seqalign;
983 logsumexp(&mut sum_1st_pairmatches, z);
984 }
985 }
986 }
987 }
988 let pos_pair2 = if computes_forward_sums {
989 (u - T::one(), v - T::one())
990 } else {
991 (u + T::one(), v + T::one())
992 };
993 let long_pos_pair2 = (
994 pos_pair2.0.to_usize().unwrap(),
995 pos_pair2.1.to_usize().unwrap(),
996 );
997 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
998 let mut sums2 = LoopSums::new();
999 let mut sum_seqalign2 = NEG_INFINITY;
1000 let mut sum_multibranch2 = sum_seqalign2;
1001 let mut sum_1st_pairmatches2 = sum_seqalign2;
1002 let mut sum2 = sum_seqalign2;
1003 if let Some(x) = sums_mat.get(&pos_pair2) {
1004 let y = x.sum_multibranch + align_scores.match2match_score;
1005 logsumexp(&mut sum_multibranch2, y);
1006 let y = x.sum_1st_pairmatches + align_scores.match2match_score;
1007 logsumexp(&mut sum_1st_pairmatches2, y);
1008 let y = x.sum_seqalign + align_scores.match2match_score;
1009 logsumexp(&mut sum_seqalign2, y);
1010 }
1011 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
1012 for &x in x {
1013 if computes_forward_sums && x >= pos_pair2.1
1014 || (!computes_forward_sums && x <= pos_pair2.1)
1015 {
1016 continue;
1017 }
1018 if let Some(y) = sums_mat.get(&(pos_pair2.0, x)) {
1019 let long_x = x.to_usize().unwrap();
1020 let z = if computes_forward_sums {
1021 alignfold_scores.range_insert_scores2[long_x + 1][long_pos_pair2.1]
1022 } else {
1023 alignfold_scores.range_insert_scores2[long_pos_pair2.1][long_x - 1]
1024 } + align_scores.match2insert_score;
1025 let x = y.sum_multibranch + align_scores.match2insert_score + z;
1026 logsumexp(&mut sum_multibranch2, x);
1027 let x = y.sum_1st_pairmatches + align_scores.match2insert_score + z;
1028 logsumexp(&mut sum_1st_pairmatches2, x);
1029 let x = y.sum_seqalign + align_scores.match2insert_score + z;
1030 logsumexp(&mut sum_seqalign2, x);
1031 }
1032 }
1033 }
1034 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
1035 for &x in x {
1036 if computes_forward_sums && x >= pos_pair2.0
1037 || (!computes_forward_sums && x <= pos_pair2.0)
1038 {
1039 continue;
1040 }
1041 if let Some(y) = sums_mat.get(&(x, pos_pair2.1)) {
1042 let long_x = x.to_usize().unwrap();
1043 let z = if computes_forward_sums {
1044 alignfold_scores.range_insert_scores[long_x + 1][long_pos_pair2.0]
1045 } else {
1046 alignfold_scores.range_insert_scores[long_pos_pair2.0][long_x - 1]
1047 } + align_scores.match2insert_score;
1048 let x = y.sum_multibranch + align_scores.match2insert_score + z;
1049 logsumexp(&mut sum_multibranch2, x);
1050 let x = y.sum_1st_pairmatches + align_scores.match2insert_score + z;
1051 logsumexp(&mut sum_1st_pairmatches2, x);
1052 let x = y.sum_seqalign + align_scores.match2insert_score + z;
1053 logsumexp(&mut sum_seqalign2, x);
1054 }
1055 }
1056 }
1057 sums2.sum_multibranch = sum_multibranch2;
1058 logsumexp(&mut sum2, sum_multibranch2);
1059 sums2.sum_1st_pairmatches = sum_1st_pairmatches2;
1060 logsumexp(&mut sum2, sum_1st_pairmatches2);
1061 sums2.sum_1ormore_pairmatches = sum2;
1062 sums2.sum_seqalign = sum_seqalign2;
1063 logsumexp(&mut sum2, sum_seqalign2);
1064 sums2.sum_0ormore_pairmatches = sum2;
1065 if has_valid_sums(&sums2) {
1066 sums_mat2.insert(pos_pair2, sums2);
1067 }
1068 let term = sum_multibranch2 + loopmatch_score;
1069 logsumexp(&mut sum_multibranch, term);
1070 sums.sum_multibranch = sum_multibranch;
1071 logsumexp(&mut sum, sum_multibranch);
1072 let term = sum_1st_pairmatches2 + loopmatch_score;
1073 logsumexp(&mut sum_1st_pairmatches, term);
1074 sums.sum_1st_pairmatches = sum_1st_pairmatches;
1075 logsumexp(&mut sum, sum_1st_pairmatches);
1076 sums.sum_1ormore_pairmatches = sum;
1077 let sum_seqalign = sum_seqalign2 + loopmatch_score;
1078 sums.sum_seqalign = sum_seqalign;
1079 logsumexp(&mut sum, sum_seqalign);
1080 sums.sum_0ormore_pairmatches = sum;
1081 if has_valid_sums(&sums) {
1082 sums_mat.insert(pos_pair, sums);
1083 }
1084 }
1085 }
1086 }
1087 let mut final_sum_seqalign = NEG_INFINITY;
1088 let mut final_sum_multibranch = final_sum_seqalign;
1089 if computes_forward_sums {
1090 let pos_pair2 = rightmost_pos_pair;
1091 let long_pos_pair2 = (
1092 pos_pair2.0.to_usize().unwrap(),
1093 pos_pair2.1.to_usize().unwrap(),
1094 );
1095 if let Some(x) = sums_mat.get(&pos_pair2) {
1096 let y = x.sum_multibranch + align_scores.match2match_score;
1097 logsumexp(&mut final_sum_multibranch, y);
1098 let y = x.sum_seqalign + align_scores.match2match_score;
1099 logsumexp(&mut final_sum_seqalign, y);
1100 }
1101 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
1102 for &x in x {
1103 if x >= pos_pair2.1 {
1104 continue;
1105 }
1106 if let Some(y) = sums_mat.get(&(pos_pair2.0, x)) {
1107 let long_x = x.to_usize().unwrap();
1108 let z = alignfold_scores.range_insert_scores2[long_x + 1][long_pos_pair2.1]
1109 + align_scores.match2insert_score;
1110 let x = y.sum_multibranch + align_scores.match2insert_score + z;
1111 logsumexp(&mut final_sum_multibranch, x);
1112 let x = y.sum_seqalign + align_scores.match2insert_score + z;
1113 logsumexp(&mut final_sum_seqalign, x);
1114 }
1115 }
1116 }
1117 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
1118 for &x in x {
1119 if x >= pos_pair2.0 {
1120 continue;
1121 }
1122 if let Some(y) = sums_mat.get(&(x, pos_pair2.1)) {
1123 let long_x = x.to_usize().unwrap();
1124 let z = alignfold_scores.range_insert_scores[long_x + 1][long_pos_pair2.0]
1125 + align_scores.match2insert_score;
1126 let x = y.sum_multibranch + align_scores.match2insert_score + z;
1127 logsumexp(&mut final_sum_multibranch, x);
1128 let x = y.sum_seqalign + align_scores.match2insert_score + z;
1129 logsumexp(&mut final_sum_seqalign, x);
1130 }
1131 }
1132 }
1133 }
1134 (final_sum_seqalign, final_sum_multibranch)
1135}
1136
1137pub fn get_2loop_sums<T>(inputs: Inputs2loopSumsGetter<T>) -> (SparseSumMat<T>, SparseSumMat<T>)
1138where
1139 T: HashIndex,
1140{
1141 let (
1142 pos_quad,
1143 alignfold_sums,
1144 computes_forward_sums,
1145 pos_pairs,
1146 fold_scores_pair,
1147 alignfold_scores,
1148 matchable_poss,
1149 matchable_poss2,
1150 align_scores,
1151 ) = inputs;
1152 let &(i, j, k, l) = pos_quad;
1153 let leftmost_pos_pair = if computes_forward_sums {
1154 (i, k)
1155 } else {
1156 (i + T::one(), k + T::one())
1157 };
1158 let rightmost_pos_pair = if computes_forward_sums {
1159 (j - T::one(), l - T::one())
1160 } else {
1161 (j, l)
1162 };
1163 let sums_hashed_poss = if computes_forward_sums {
1164 &alignfold_sums.forward_sums_hashed_poss2
1165 } else {
1166 &alignfold_sums.backward_sums_hashed_poss2
1167 };
1168 let sums = &sums_hashed_poss[&if computes_forward_sums {
1169 leftmost_pos_pair
1170 } else {
1171 rightmost_pos_pair
1172 }];
1173 let iter: Poss<T> = if computes_forward_sums {
1174 range(i, j).collect()
1175 } else {
1176 range_inclusive(i + T::one(), j).rev().collect()
1177 };
1178 let iter2: Poss<T> = if computes_forward_sums {
1179 range(k, l).collect()
1180 } else {
1181 range_inclusive(k + T::one(), l).rev().collect()
1182 };
1183 let mut sum_mat = SparseSumMat::<T>::default();
1184 let mut sum_mat2 = sum_mat.clone();
1185 for &u in iter.iter() {
1186 for &v in iter2.iter() {
1187 let pos_pair = (u, v);
1188 if (computes_forward_sums && u == i && v == k) || (!computes_forward_sums && u == j && v == l)
1189 {
1190 continue;
1191 }
1192 let mut sum = NEG_INFINITY;
1193 if let Some(x) = pos_pairs.get(&pos_pair) {
1194 for &(m, n) in x {
1195 if computes_forward_sums {
1196 if !(i < m && k < n) {
1197 continue;
1198 }
1199 } else if !(m < j && n < l) {
1200 continue;
1201 }
1202 let pos_pair2 = if computes_forward_sums {
1203 (m - T::one(), n - T::one())
1204 } else {
1205 (m + T::one(), n + T::one())
1206 };
1207 let pos_quad2 = if computes_forward_sums {
1208 (m, u, n, v)
1209 } else {
1210 (u, m, v, n)
1211 };
1212 if pos_quad2.0 - i - T::one() + j - pos_quad2.1 - T::one()
1213 > T::from_usize(MAX_2LOOP_LEN).unwrap()
1214 {
1215 continue;
1216 }
1217 if pos_quad2.2 - k - T::one() + l - pos_quad2.3 - T::one()
1218 > T::from_usize(MAX_2LOOP_LEN).unwrap()
1219 {
1220 continue;
1221 }
1222 if let Some(&x) = alignfold_sums.sums_close.get(&pos_quad2) {
1223 if let Some(y) = sums.get(&pos_pair2) {
1224 let twoloop_score =
1225 fold_scores_pair.0.twoloop_scores[&(i, j, pos_quad2.0, pos_quad2.1)];
1226 let twoloop_score2 =
1227 fold_scores_pair.1.twoloop_scores[&(k, l, pos_quad2.2, pos_quad2.3)];
1228 let y = x + y.sum_seqalign + twoloop_score + twoloop_score2;
1229 logsumexp(&mut sum, y);
1230 }
1231 }
1232 }
1233 }
1234 let pos_pair2 = if computes_forward_sums {
1235 (u - T::one(), v - T::one())
1236 } else {
1237 (u + T::one(), v + T::one())
1238 };
1239 let long_pos_pair2 = (
1240 pos_pair2.0.to_usize().unwrap(),
1241 pos_pair2.1.to_usize().unwrap(),
1242 );
1243 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
1244 let mut sum2 = NEG_INFINITY;
1245 if let Some(&x) = sum_mat.get(&pos_pair2) {
1246 let x = x + align_scores.match2match_score;
1247 logsumexp(&mut sum2, x);
1248 }
1249 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
1250 for &x in x {
1251 if computes_forward_sums && x >= pos_pair2.1
1252 || (!computes_forward_sums && x <= pos_pair2.1)
1253 {
1254 continue;
1255 }
1256 if let Some(&y) = sum_mat.get(&(pos_pair2.0, x)) {
1257 let long_x = x.to_usize().unwrap();
1258 let z = if computes_forward_sums {
1259 alignfold_scores.range_insert_scores2[long_x + 1][long_pos_pair2.1]
1260 } else {
1261 alignfold_scores.range_insert_scores2[long_pos_pair2.1][long_x - 1]
1262 } + align_scores.match2insert_score;
1263 let z = y + align_scores.match2insert_score + z;
1264 logsumexp(&mut sum2, z);
1265 }
1266 }
1267 }
1268 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
1269 for &x in x {
1270 if computes_forward_sums && x >= pos_pair2.0
1271 || (!computes_forward_sums && x <= pos_pair2.0)
1272 {
1273 continue;
1274 }
1275 if let Some(&y) = sum_mat.get(&(x, pos_pair2.1)) {
1276 let long_x = x.to_usize().unwrap();
1277 let z = if computes_forward_sums {
1278 alignfold_scores.range_insert_scores[long_x + 1][long_pos_pair2.0]
1279 } else {
1280 alignfold_scores.range_insert_scores[long_pos_pair2.0][long_x - 1]
1281 } + align_scores.match2insert_score;
1282 let z = y + align_scores.match2insert_score + z;
1283 logsumexp(&mut sum2, z);
1284 }
1285 }
1286 }
1287 if sum2 > NEG_INFINITY {
1288 sum_mat2.insert(pos_pair2, sum2);
1289 }
1290 let term = sum2 + loopmatch_score;
1291 logsumexp(&mut sum, term);
1292 if sum > NEG_INFINITY {
1293 sum_mat.insert(pos_pair, sum);
1294 }
1295 }
1296 }
1297 }
1298 (sum_mat, sum_mat2)
1299}
1300
1301pub fn has_valid_sums(x: &LoopSums) -> bool {
1302 x.sum_seqalign > NEG_INFINITY
1303 || x.sum_multibranch > NEG_INFINITY
1304 || x.sum_1st_pairmatches > NEG_INFINITY
1305}
1306
1307pub fn get_alignfold_probs<T>(inputs: InputsAlignfoldProbsGetter<T>) -> AlignfoldProbMats<T>
1308where
1309 T: HashIndex,
1310{
1311 let (
1312 seq_len_pair,
1313 alignfold_scores,
1314 max_basepair_span_pair,
1315 alignfold_sums,
1316 fold_scores_pair,
1317 produces_context_profs,
1318 global_sum,
1319 pos_quads_hashed_lens,
1320 produces_match_probs,
1321 forward_pos_pairs,
1322 backward_pos_pairs,
1323 matchable_poss,
1324 matchable_poss2,
1325 align_scores,
1326 ) = inputs;
1327 let mut alignfold_outside_sums = SumMat4d::<T>::default();
1328 let mut alignfold_probs = AlignfoldProbMats::<T>::new(&(
1329 seq_len_pair.0.to_usize().unwrap(),
1330 seq_len_pair.1.to_usize().unwrap(),
1331 ));
1332 let mut prob_coeffs_multibranch = SumMat4d::<T>::default();
1333 let mut prob_coeffs_multibranch2 = prob_coeffs_multibranch.clone();
1334 let leftmost_pos_pair = (T::zero(), T::zero());
1335 let rightmost_pos_pair = (seq_len_pair.0 - T::one(), seq_len_pair.1 - T::one());
1336 for substr_len in range_inclusive(
1337 T::from_usize(MIN_SPAN_HAIRPIN_CLOSE).unwrap(),
1338 max_basepair_span_pair.0,
1339 )
1340 .rev()
1341 {
1342 for substr_len2 in range_inclusive(
1343 T::from_usize(MIN_SPAN_HAIRPIN_CLOSE).unwrap(),
1344 max_basepair_span_pair.1,
1345 )
1346 .rev()
1347 {
1348 if let Some(pos_pairs) = pos_quads_hashed_lens.get(&(substr_len, substr_len2)) {
1349 for &(i, k) in pos_pairs {
1350 let (j, l) = (i + substr_len - T::one(), k + substr_len2 - T::one());
1351 let pos_quad = (i, j, k, l);
1352 if let Some(&sum_close) = alignfold_sums.sums_close.get(&pos_quad) {
1353 let (long_i, long_j, long_k, long_l) = (
1354 i.to_usize().unwrap(),
1355 j.to_usize().unwrap(),
1356 k.to_usize().unwrap(),
1357 l.to_usize().unwrap(),
1358 );
1359 let prob_coeff = sum_close - global_sum;
1360 let mut sum = NEG_INFINITY;
1361 let mut forward_term = sum;
1362 let mut backward_term = sum;
1363 let pos_pair2 = (i - T::one(), k - T::one());
1364 if let Some(&x) = alignfold_sums.forward_sums_external2.get(&pos_pair2) {
1365 logsumexp(&mut forward_term, x);
1366 }
1367 let pos_pair2 = (j + T::one(), l + T::one());
1368 if let Some(&x) = alignfold_sums.backward_sums_external2.get(&pos_pair2) {
1369 logsumexp(&mut backward_term, x);
1370 }
1371 let sum_external = forward_term + backward_term;
1372 if sum_external > NEG_INFINITY {
1373 let coeff = alignfold_sums.sums_accessible_external[&pos_quad] - sum_close;
1374 sum = coeff + sum_external;
1375 }
1376 for substr_len3 in range_inclusive(
1377 substr_len + T::from_usize(2).unwrap(),
1378 (substr_len + T::from_usize(MAX_2LOOP_LEN + 2).unwrap())
1379 .min(max_basepair_span_pair.0),
1380 ) {
1381 for substr_len4 in range_inclusive(
1382 substr_len2 + T::from_usize(2).unwrap(),
1383 (substr_len2 + T::from_usize(MAX_2LOOP_LEN + 2).unwrap())
1384 .min(max_basepair_span_pair.1),
1385 ) {
1386 if let Some(pos_pairs2) = pos_quads_hashed_lens.get(&(substr_len3, substr_len4)) {
1387 for &(m, o) in pos_pairs2 {
1388 let (n, p) = (m + substr_len3 - T::one(), o + substr_len4 - T::one());
1389 if !(m < i && j < n && o < k && l < p) {
1390 continue;
1391 }
1392 let (long_m, long_n, long_o, long_p) = (
1393 m.to_usize().unwrap(),
1394 n.to_usize().unwrap(),
1395 o.to_usize().unwrap(),
1396 p.to_usize().unwrap(),
1397 );
1398 if long_n - long_j - 1 + long_i - long_m - 1 > MAX_2LOOP_LEN {
1399 continue;
1400 }
1401 if long_p - long_l - 1 + long_k - long_o - 1 > MAX_2LOOP_LEN {
1402 continue;
1403 }
1404 let pos_quad2 = (m, n, o, p);
1405 if let Some(&outside_sum) = alignfold_outside_sums.get(&pos_quad2) {
1406 let forward_sums = &alignfold_sums.forward_sums_hashed_poss2[&(m, o)];
1407 let backward_sums = &alignfold_sums.backward_sums_hashed_poss2[&(n, p)];
1408 let mut forward_term = NEG_INFINITY;
1409 let mut backward_term = forward_term;
1410 let pos_pair2 = (i - T::one(), k - T::one());
1411 if let Some(x) = forward_sums.get(&pos_pair2) {
1412 logsumexp(&mut forward_term, x.sum_seqalign);
1413 }
1414 let pos_pair2 = (j + T::one(), l + T::one());
1415 if let Some(x) = backward_sums.get(&pos_pair2) {
1416 logsumexp(&mut backward_term, x.sum_seqalign);
1417 }
1418 let sum_2loop = forward_term + backward_term;
1419 if sum_2loop > NEG_INFINITY {
1420 let pairmatch_score = alignfold_scores.pairmatch_scores[&pos_quad2];
1421 let twoloop_score = fold_scores_pair.0.twoloop_scores[&(m, n, i, j)];
1422 let twoloop_score2 = fold_scores_pair.1.twoloop_scores[&(o, p, k, l)];
1423 let coeff = pairmatch_score + twoloop_score + twoloop_score2 + outside_sum;
1424 let sum_2loop = coeff + sum_2loop;
1425 logsumexp(&mut sum, sum_2loop);
1426 if produces_context_profs {
1427 let loop_len_pair = (long_i - long_m - 1, long_n - long_j - 1);
1428 let found_bulge_loop = (loop_len_pair.0 == 0) ^ (loop_len_pair.1 == 0);
1429 let found_interior_loop = loop_len_pair.0 > 0 && loop_len_pair.1 > 0;
1430 let pairmatch_prob_2loop = prob_coeff + sum_2loop;
1431 for q in long_m + 1..long_i {
1432 if found_bulge_loop {
1433 logsumexp(
1434 &mut alignfold_probs.context_profs_pair.0[(q, CONTEXT_INDEX_BULGE)],
1435 pairmatch_prob_2loop,
1436 );
1437 } else if found_interior_loop {
1438 logsumexp(
1439 &mut alignfold_probs.context_profs_pair.0
1440 [(q, CONTEXT_INDEX_INTERIOR)],
1441 pairmatch_prob_2loop,
1442 );
1443 }
1444 }
1445 for q in long_j + 1..long_n {
1446 if found_bulge_loop {
1447 logsumexp(
1448 &mut alignfold_probs.context_profs_pair.0[(q, CONTEXT_INDEX_BULGE)],
1449 pairmatch_prob_2loop,
1450 );
1451 } else if found_interior_loop {
1452 logsumexp(
1453 &mut alignfold_probs.context_profs_pair.0
1454 [(q, CONTEXT_INDEX_INTERIOR)],
1455 pairmatch_prob_2loop,
1456 );
1457 }
1458 }
1459 let loop_len_pair = (long_k - long_o - 1, long_p - long_l - 1);
1460 let found_bulge_loop = (loop_len_pair.0 == 0) ^ (loop_len_pair.1 == 0);
1461 let found_interior_loop = loop_len_pair.0 > 0 && loop_len_pair.1 > 0;
1462 for q in long_o + 1..long_k {
1463 if found_bulge_loop {
1464 logsumexp(
1465 &mut alignfold_probs.context_profs_pair.1[(q, CONTEXT_INDEX_BULGE)],
1466 pairmatch_prob_2loop,
1467 );
1468 } else if found_interior_loop {
1469 logsumexp(
1470 &mut alignfold_probs.context_profs_pair.1
1471 [(q, CONTEXT_INDEX_INTERIOR)],
1472 pairmatch_prob_2loop,
1473 );
1474 }
1475 }
1476 for q in long_l + 1..long_p {
1477 if found_bulge_loop {
1478 logsumexp(
1479 &mut alignfold_probs.context_profs_pair.1[(q, CONTEXT_INDEX_BULGE)],
1480 pairmatch_prob_2loop,
1481 );
1482 } else if found_interior_loop {
1483 logsumexp(
1484 &mut alignfold_probs.context_profs_pair.1
1485 [(q, CONTEXT_INDEX_INTERIOR)],
1486 pairmatch_prob_2loop,
1487 );
1488 }
1489 }
1490 }
1491 }
1492 }
1493 }
1494 }
1495 }
1496 }
1497 let sum_ratio = alignfold_sums.sums_accessible_multibranch[&pos_quad] - sum_close;
1498 for (&(u, v), x) in &alignfold_sums.forward_sums_hashed_poss2 {
1499 if !(u < i && v < k) {
1500 continue;
1501 }
1502 let pos_quad2 = (u, j, v, l);
1503 let mut forward_term = NEG_INFINITY;
1504 let mut forward_term2 = forward_term;
1505 let pos_pair2 = (i - T::one(), k - T::one());
1506 if let Some(x) = x.get(&pos_pair2) {
1507 logsumexp(&mut forward_term, x.sum_1ormore_pairmatches);
1508 logsumexp(&mut forward_term2, x.sum_seqalign);
1509 }
1510 let mut sum_multibranch = NEG_INFINITY;
1511 if let Some(x) = prob_coeffs_multibranch.get(&pos_quad2) {
1512 let x = x + sum_ratio + forward_term;
1513 logsumexp(&mut sum_multibranch, x);
1514 }
1515 if let Some(x) = prob_coeffs_multibranch2.get(&pos_quad2) {
1516 let x = x + sum_ratio + forward_term2;
1517 logsumexp(&mut sum_multibranch, x);
1518 }
1519 if sum_multibranch > NEG_INFINITY {
1520 logsumexp(&mut sum, sum_multibranch);
1521 }
1522 }
1523 if sum > NEG_INFINITY {
1524 alignfold_outside_sums.insert(pos_quad, sum);
1525 let pairmatch_prob = prob_coeff + sum;
1526 if produces_match_probs {
1527 alignfold_probs
1528 .pairmatch_probs
1529 .insert(pos_quad, pairmatch_prob);
1530 match alignfold_probs.match_probs.get_mut(&(i, k)) {
1531 Some(x) => {
1532 logsumexp(x, pairmatch_prob);
1533 }
1534 None => {
1535 alignfold_probs.match_probs.insert((i, k), pairmatch_prob);
1536 }
1537 }
1538 match alignfold_probs.match_probs.get_mut(&(j, l)) {
1539 Some(x) => {
1540 logsumexp(x, pairmatch_prob);
1541 }
1542 None => {
1543 alignfold_probs.match_probs.insert((j, l), pairmatch_prob);
1544 }
1545 }
1546 }
1547 match alignfold_probs.basepair_probs_pair.0.get_mut(&(i, j)) {
1548 Some(x) => {
1549 logsumexp(x, pairmatch_prob);
1550 }
1551 None => {
1552 alignfold_probs
1553 .basepair_probs_pair
1554 .0
1555 .insert((i, j), pairmatch_prob);
1556 }
1557 }
1558 match alignfold_probs.basepair_probs_pair.1.get_mut(&(k, l)) {
1559 Some(x) => {
1560 logsumexp(x, pairmatch_prob);
1561 }
1562 None => {
1563 alignfold_probs
1564 .basepair_probs_pair
1565 .1
1566 .insert((k, l), pairmatch_prob);
1567 }
1568 }
1569 if produces_context_profs {
1570 logsumexp(
1571 &mut alignfold_probs.context_profs_pair.0[(long_i, CONTEXT_INDEX_BASEPAIR)],
1572 pairmatch_prob,
1573 );
1574 logsumexp(
1575 &mut alignfold_probs.context_profs_pair.0[(long_j, CONTEXT_INDEX_BASEPAIR)],
1576 pairmatch_prob,
1577 );
1578 logsumexp(
1579 &mut alignfold_probs.context_profs_pair.1[(long_k, CONTEXT_INDEX_BASEPAIR)],
1580 pairmatch_prob,
1581 );
1582 logsumexp(
1583 &mut alignfold_probs.context_profs_pair.1[(long_l, CONTEXT_INDEX_BASEPAIR)],
1584 pairmatch_prob,
1585 );
1586 }
1587 let pairmatch_score = alignfold_scores.pairmatch_scores[&pos_quad];
1588 let multibranch_close_score = fold_scores_pair.0.multibranch_close_scores[&(i, j)];
1589 let multibranch_close_score2 = fold_scores_pair.1.multibranch_close_scores[&(k, l)];
1590 let coeff =
1591 sum + pairmatch_score + multibranch_close_score + multibranch_close_score2;
1592 let backward_sums = &alignfold_sums.backward_sums_hashed_poss2[&(j, l)];
1593 for pos_pair in alignfold_scores.loopmatch_scores.keys() {
1594 let &(u, v) = pos_pair;
1595 if !(i < u && u < j && k < v && v < l) {
1596 continue;
1597 }
1598 let mut backward_term = NEG_INFINITY;
1599 let mut backward_term2 = backward_term;
1600 let pos_pair2 = (u + T::one(), v + T::one());
1601 if let Some(x) = backward_sums.get(&pos_pair2) {
1602 logsumexp(&mut backward_term, x.sum_0ormore_pairmatches);
1603 logsumexp(&mut backward_term2, x.sum_1ormore_pairmatches);
1604 }
1605 let pos_quad2 = (i, u, k, v);
1606 let x = coeff + backward_term;
1607 match prob_coeffs_multibranch.get_mut(&pos_quad2) {
1608 Some(y) => {
1609 logsumexp(y, x);
1610 }
1611 None => {
1612 prob_coeffs_multibranch.insert(pos_quad2, x);
1613 }
1614 }
1615 let x = coeff + backward_term2;
1616 match prob_coeffs_multibranch2.get_mut(&pos_quad2) {
1617 Some(y) => {
1618 logsumexp(y, x);
1619 }
1620 None => {
1621 prob_coeffs_multibranch2.insert(pos_quad2, x);
1622 }
1623 }
1624 }
1625 }
1626 }
1627 }
1628 }
1629 }
1630 }
1631 for x in alignfold_probs.basepair_probs_pair.0.values_mut() {
1632 *x = expf(*x);
1633 }
1634 for x in alignfold_probs.basepair_probs_pair.1.values_mut() {
1635 *x = expf(*x);
1636 }
1637 if produces_context_profs || produces_match_probs {
1638 let mut unpair_probs_range_external =
1639 (SparseProbMat::<T>::default(), SparseProbMat::<T>::default());
1640 let mut unpair_probs_range_hairpin =
1641 (SparseProbMat::<T>::default(), SparseProbMat::<T>::default());
1642 for u in range(T::zero(), seq_len_pair.0 - T::one()) {
1643 let long_u = u.to_usize().unwrap();
1644 for v in range(T::zero(), seq_len_pair.1 - T::one()) {
1645 let pos_pair = (u, v);
1646 let long_v = v.to_usize().unwrap();
1647 let pos_pair2 = (u + T::one(), v + T::one());
1648 let long_pos_pair2 = (
1649 pos_pair2.0.to_usize().unwrap(),
1650 pos_pair2.1.to_usize().unwrap(),
1651 );
1652 if u > T::zero() && v > T::zero() {
1653 let pos_pair_loopmatch = (u - T::one(), v - T::one());
1654 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
1655 let mut forward_term = NEG_INFINITY;
1656 if let Some(&x) = alignfold_sums
1657 .forward_sums_external2
1658 .get(&pos_pair_loopmatch)
1659 {
1660 logsumexp(&mut forward_term, x);
1661 }
1662 let mut backward_term = NEG_INFINITY;
1663 if let Some(&x) = alignfold_sums.backward_sums_external2.get(&pos_pair2) {
1664 logsumexp(&mut backward_term, x);
1665 }
1666 let loopmatch_prob_external =
1667 loopmatch_score + forward_term + backward_term - global_sum;
1668 if produces_context_profs {
1669 logsumexp(
1670 &mut alignfold_probs.context_profs_pair.0[(long_u, CONTEXT_INDEX_EXTERNAL)],
1671 loopmatch_prob_external,
1672 );
1673 logsumexp(
1674 &mut alignfold_probs.context_profs_pair.1[(long_v, CONTEXT_INDEX_EXTERNAL)],
1675 loopmatch_prob_external,
1676 );
1677 }
1678 if produces_match_probs {
1679 match alignfold_probs.loopmatch_probs.get_mut(&pos_pair) {
1680 Some(x) => {
1681 logsumexp(x, loopmatch_prob_external);
1682 }
1683 None => {
1684 alignfold_probs
1685 .loopmatch_probs
1686 .insert(pos_pair, loopmatch_prob_external);
1687 }
1688 }
1689 }
1690 }
1691 }
1692 if produces_context_profs {
1693 if let Some(&sum) = alignfold_sums.forward_sums_external.get(&pos_pair) {
1694 let begins_sum = pos_pair == leftmost_pos_pair;
1695 let forward_term = sum
1696 + if begins_sum {
1697 align_scores.init_insert_score
1698 } else {
1699 align_scores.match2insert_score
1700 };
1701 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
1702 for &x in x {
1703 if x <= pos_pair2.1 {
1704 continue;
1705 }
1706 let pos_pair3 = (pos_pair2.0, x);
1707 if let Some(&y) = alignfold_sums.backward_sums_external.get(&pos_pair3) {
1708 let long_x = x.to_usize().unwrap();
1709 let ends_sum = pos_pair3 == rightmost_pos_pair;
1710 let z = alignfold_scores.range_insert_scores2[long_pos_pair2.1][long_x - 1]
1711 + if ends_sum {
1712 0.
1713 } else {
1714 align_scores.match2insert_score
1715 };
1716 let z = forward_term + y + z - global_sum;
1717 let pos_pair4 = (pos_pair2.1, x - T::one());
1718 match unpair_probs_range_external.1.get_mut(&pos_pair4) {
1719 Some(x) => {
1720 logsumexp(x, z);
1721 }
1722 None => {
1723 unpair_probs_range_external.1.insert(pos_pair4, z);
1724 }
1725 }
1726 }
1727 }
1728 }
1729 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
1730 for &x in x {
1731 if x <= pos_pair2.0 {
1732 continue;
1733 }
1734 let pos_pair3 = (x, pos_pair2.1);
1735 if let Some(&y) = alignfold_sums.backward_sums_external.get(&pos_pair3) {
1736 let long_x = x.to_usize().unwrap();
1737 let ends_sum = pos_pair3 == rightmost_pos_pair;
1738 let z = alignfold_scores.range_insert_scores[long_pos_pair2.0][long_x - 1]
1739 + if ends_sum {
1740 0.
1741 } else {
1742 align_scores.match2insert_score
1743 };
1744 let z = forward_term + y + z - global_sum;
1745 let pos_pair4 = (pos_pair2.0, x - T::one());
1746 match unpair_probs_range_external.0.get_mut(&pos_pair4) {
1747 Some(x) => {
1748 logsumexp(x, z);
1749 }
1750 None => {
1751 unpair_probs_range_external.0.insert(pos_pair4, z);
1752 }
1753 }
1754 }
1755 }
1756 }
1757 }
1758 }
1759 }
1760 }
1761 for (pos_quad, &outside_sum) in &alignfold_outside_sums {
1762 let (i, j, k, l) = *pos_quad;
1763 let pairmatch_score = alignfold_scores.pairmatch_scores[pos_quad];
1764 let prob_coeff = outside_sum - global_sum + pairmatch_score;
1765 let hairpin_score = match fold_scores_pair.0.hairpin_scores.get(&(i, j)) {
1766 Some(&x) => x,
1767 None => NEG_INFINITY,
1768 };
1769 let hairpin_score2 = match fold_scores_pair.1.hairpin_scores.get(&(k, l)) {
1770 Some(&x) => x,
1771 None => NEG_INFINITY,
1772 };
1773 let prob_coeff_hairpin = prob_coeff + hairpin_score + hairpin_score2;
1774 let multibranch_close_score = fold_scores_pair.0.multibranch_close_scores[&(i, j)];
1775 let multibranch_close_score2 = fold_scores_pair.1.multibranch_close_scores[&(k, l)];
1776 let prob_coeff_multibranch = prob_coeff + multibranch_close_score + multibranch_close_score2;
1777 let forward_sums = &alignfold_sums.forward_sums_hashed_poss[&(i, k)];
1778 let backward_sums = &alignfold_sums.backward_sums_hashed_poss[&(j, l)];
1779 let forward_sums2 = &alignfold_sums.forward_sums_hashed_poss2[&(i, k)];
1780 let backward_sums2 = &alignfold_sums.backward_sums_hashed_poss2[&(j, l)];
1781 let (_, forward_sums_2loop2) = if produces_match_probs {
1782 let computes_forward_sums = true;
1783 get_2loop_sums((
1784 pos_quad,
1785 alignfold_sums,
1786 computes_forward_sums,
1787 forward_pos_pairs,
1788 fold_scores_pair,
1789 alignfold_scores,
1790 matchable_poss,
1791 matchable_poss2,
1792 align_scores,
1793 ))
1794 } else {
1795 (SparseSumMat::<T>::default(), SparseSumMat::<T>::default())
1796 };
1797 let (_, backward_sums_2loop2) = if produces_match_probs {
1798 let computes_forward_sums = false;
1799 get_2loop_sums((
1800 pos_quad,
1801 alignfold_sums,
1802 computes_forward_sums,
1803 backward_pos_pairs,
1804 fold_scores_pair,
1805 alignfold_scores,
1806 matchable_poss,
1807 matchable_poss2,
1808 align_scores,
1809 ))
1810 } else {
1811 (SparseSumMat::<T>::default(), SparseSumMat::<T>::default())
1812 };
1813 for u in range(i, j) {
1814 let long_u = u.to_usize().unwrap();
1815 for v in range(k, l) {
1816 let pos_pair = (u, v);
1817 let long_v = v.to_usize().unwrap();
1818 let pos_pair2 = (u + T::one(), v + T::one());
1819 let long_pos_pair2 = (
1820 pos_pair2.0.to_usize().unwrap(),
1821 pos_pair2.1.to_usize().unwrap(),
1822 );
1823 if let Some(&loopmatch_score) = alignfold_scores.loopmatch_scores.get(&pos_pair) {
1824 let mut backward_term_match_seqalign = NEG_INFINITY;
1825 let mut backward_term_match_multibranch = backward_term_match_seqalign;
1826 let mut backward_term_match_1ormore = backward_term_match_seqalign;
1827 let mut backward_term_match_0ormore = backward_term_match_seqalign;
1828 let mut backward_term_match_2loop = backward_term_match_seqalign;
1829 if let Some(x) = backward_sums2.get(&pos_pair2) {
1830 logsumexp(&mut backward_term_match_seqalign, x.sum_seqalign);
1831 if produces_match_probs {
1832 logsumexp(&mut backward_term_match_multibranch, x.sum_multibranch);
1833 logsumexp(&mut backward_term_match_1ormore, x.sum_1ormore_pairmatches);
1834 logsumexp(&mut backward_term_match_0ormore, x.sum_0ormore_pairmatches);
1835 }
1836 }
1837 if produces_match_probs {
1838 if let Some(&x) = backward_sums_2loop2.get(&pos_pair2) {
1839 logsumexp(&mut backward_term_match_2loop, x);
1840 }
1841 }
1842 let pos_pair_loopmatch = (u - T::one(), v - T::one());
1843 let mut loopmatch_prob_hairpin = NEG_INFINITY;
1844 let mut loopmatch_prob_multibranch = loopmatch_prob_hairpin;
1845 let mut loopmatch_prob_2loop = loopmatch_prob_hairpin;
1846 if let Some(x) = forward_sums2.get(&pos_pair_loopmatch) {
1847 let y = prob_coeff_hairpin
1848 + loopmatch_score
1849 + x.sum_seqalign
1850 + backward_term_match_seqalign;
1851 logsumexp(&mut loopmatch_prob_hairpin, y);
1852 if produces_match_probs {
1853 let y = prob_coeff_multibranch
1854 + loopmatch_score
1855 + x.sum_seqalign
1856 + backward_term_match_multibranch;
1857 logsumexp(&mut loopmatch_prob_multibranch, y);
1858 let y = prob_coeff_multibranch
1859 + loopmatch_score
1860 + x.sum_1st_pairmatches
1861 + backward_term_match_1ormore;
1862 logsumexp(&mut loopmatch_prob_multibranch, y);
1863 let y = prob_coeff_multibranch
1864 + loopmatch_score
1865 + x.sum_multibranch
1866 + backward_term_match_0ormore;
1867 logsumexp(&mut loopmatch_prob_multibranch, y);
1868 let y = prob_coeff + loopmatch_score + x.sum_seqalign + backward_term_match_2loop;
1869 logsumexp(&mut loopmatch_prob_2loop, y);
1870 }
1871 }
1872 if produces_context_profs {
1873 logsumexp(
1874 &mut alignfold_probs.context_profs_pair.0[(long_u, CONTEXT_INDEX_HAIRPIN)],
1875 loopmatch_prob_hairpin,
1876 );
1877 logsumexp(
1878 &mut alignfold_probs.context_profs_pair.1[(long_v, CONTEXT_INDEX_HAIRPIN)],
1879 loopmatch_prob_hairpin,
1880 );
1881 }
1882 if produces_match_probs {
1883 if let Some(&x) = forward_sums_2loop2.get(&pos_pair_loopmatch) {
1884 let x = prob_coeff + loopmatch_score + x + backward_term_match_seqalign;
1885 logsumexp(&mut loopmatch_prob_2loop, x);
1886 }
1887 let mut prob = NEG_INFINITY;
1888 logsumexp(&mut prob, loopmatch_prob_hairpin);
1889 logsumexp(&mut prob, loopmatch_prob_multibranch);
1890 logsumexp(&mut prob, loopmatch_prob_2loop);
1891 match alignfold_probs.loopmatch_probs.get_mut(&pos_pair) {
1892 Some(x) => {
1893 logsumexp(x, prob);
1894 }
1895 None => {
1896 alignfold_probs.loopmatch_probs.insert(pos_pair, prob);
1897 }
1898 }
1899 }
1900 }
1901 if produces_context_profs {
1902 if let Some(sums) = forward_sums.get(&pos_pair) {
1903 let forward_term = sums.sum_seqalign + align_scores.match2insert_score;
1904 if let Some(x) = matchable_poss.get(&pos_pair2.0) {
1905 for &x in x {
1906 if x <= pos_pair2.1 {
1907 continue;
1908 }
1909 let pos_pair3 = (pos_pair2.0, x);
1910 if let Some(y) = backward_sums.get(&pos_pair3) {
1911 let long_x = x.to_usize().unwrap();
1912 let z = alignfold_scores.range_insert_scores2[long_pos_pair2.1][long_x - 1]
1913 + align_scores.match2insert_score;
1914 let z = prob_coeff_hairpin + forward_term + y.sum_seqalign + z;
1915 let pos_pair4 = (pos_pair2.1, x - T::one());
1916 match unpair_probs_range_hairpin.1.get_mut(&pos_pair4) {
1917 Some(x) => {
1918 logsumexp(x, z);
1919 }
1920 None => {
1921 unpair_probs_range_hairpin.1.insert(pos_pair4, z);
1922 }
1923 }
1924 }
1925 }
1926 }
1927 if let Some(x) = matchable_poss2.get(&pos_pair2.1) {
1928 for &x in x {
1929 if x <= pos_pair2.0 {
1930 continue;
1931 }
1932 let pos_pair3 = (x, pos_pair2.1);
1933 if let Some(y) = backward_sums.get(&pos_pair3) {
1934 let long_x = x.to_usize().unwrap();
1935 let z = alignfold_scores.range_insert_scores[long_pos_pair2.0][long_x - 1]
1936 + align_scores.match2insert_score;
1937 let z = prob_coeff_hairpin + forward_term + z + y.sum_seqalign;
1938 let pos_pair4 = (pos_pair2.0, x - T::one());
1939 match unpair_probs_range_hairpin.0.get_mut(&pos_pair4) {
1940 Some(x) => {
1941 logsumexp(x, z);
1942 }
1943 None => {
1944 unpair_probs_range_hairpin.0.insert(pos_pair4, z);
1945 }
1946 }
1947 }
1948 }
1949 }
1950 }
1951 }
1952 }
1953 }
1954 }
1955 if produces_context_profs {
1956 for (pos_pair, &x) in &unpair_probs_range_external.0 {
1957 for i in range_inclusive(pos_pair.0, pos_pair.1) {
1958 let long_i = i.to_usize().unwrap();
1959 logsumexp(
1960 &mut alignfold_probs.context_profs_pair.0[(long_i, CONTEXT_INDEX_EXTERNAL)],
1961 x,
1962 );
1963 }
1964 }
1965 for (pos_pair, &x) in &unpair_probs_range_external.1 {
1966 for i in range_inclusive(pos_pair.0, pos_pair.1) {
1967 let long_i = i.to_usize().unwrap();
1968 logsumexp(
1969 &mut alignfold_probs.context_profs_pair.1[(long_i, CONTEXT_INDEX_EXTERNAL)],
1970 x,
1971 );
1972 }
1973 }
1974 for (pos_pair, &x) in &unpair_probs_range_hairpin.0 {
1975 for i in range_inclusive(pos_pair.0, pos_pair.1) {
1976 let long_i = i.to_usize().unwrap();
1977 logsumexp(
1978 &mut alignfold_probs.context_profs_pair.0[(long_i, CONTEXT_INDEX_HAIRPIN)],
1979 x,
1980 );
1981 }
1982 }
1983 for (pos_pair, &x) in &unpair_probs_range_hairpin.1 {
1984 for i in range_inclusive(pos_pair.0, pos_pair.1) {
1985 let long_i = i.to_usize().unwrap();
1986 logsumexp(
1987 &mut alignfold_probs.context_profs_pair.1[(long_i, CONTEXT_INDEX_HAIRPIN)],
1988 x,
1989 );
1990 }
1991 }
1992 alignfold_probs
1993 .context_profs_pair
1994 .0
1995 .slice_mut(s![.., ..CONTEXT_INDEX_MULTIBRANCH])
1996 .mapv_inplace(expf);
1997 let fold = 1.
1998 - alignfold_probs
1999 .context_profs_pair
2000 .0
2001 .slice_mut(s![.., ..CONTEXT_INDEX_MULTIBRANCH])
2002 .sum_axis(Axis(1));
2003 alignfold_probs
2004 .context_profs_pair
2005 .0
2006 .slice_mut(s![.., CONTEXT_INDEX_MULTIBRANCH])
2007 .assign(&fold);
2008 alignfold_probs
2009 .context_profs_pair
2010 .1
2011 .slice_mut(s![.., ..CONTEXT_INDEX_MULTIBRANCH])
2012 .mapv_inplace(expf);
2013 let fold = 1.
2014 - alignfold_probs
2015 .context_profs_pair
2016 .1
2017 .slice_mut(s![.., ..CONTEXT_INDEX_MULTIBRANCH])
2018 .sum_axis(Axis(1));
2019 alignfold_probs
2020 .context_profs_pair
2021 .1
2022 .slice_mut(s![.., CONTEXT_INDEX_MULTIBRANCH])
2023 .assign(&fold);
2024 }
2025 if produces_match_probs {
2026 for (pos_pair, x) in alignfold_probs.loopmatch_probs.iter_mut() {
2027 match alignfold_probs.match_probs.get_mut(pos_pair) {
2028 Some(y) => {
2029 logsumexp(y, *x);
2030 *y = expf(*y);
2031 }
2032 None => {
2033 alignfold_probs.match_probs.insert(*pos_pair, expf(*x));
2034 }
2035 }
2036 *x = expf(*x);
2037 }
2038 for x in alignfold_probs.pairmatch_probs.values_mut() {
2039 *x = expf(*x);
2040 }
2041 }
2042 }
2043 alignfold_probs
2044}
2045
2046pub fn pair_probs2avg_probs<T>(
2047 alignfold_probs_hashed_ids: &AlignfoldProbsHashedIds<T>,
2048 rna_id: RnaId,
2049 num_rnas: usize,
2050 unpair_probs_len: usize,
2051 produces_context_profs: bool,
2052) -> AlignfoldProbMatsAvg<T>
2053where
2054 T: HashIndex,
2055{
2056 let weight = 1. / (num_rnas - 1) as Prob;
2057 let mut alignfold_probs_avg = AlignfoldProbMatsAvg::new(unpair_probs_len);
2058 for rna_id2 in 0..num_rnas {
2059 if rna_id == rna_id2 {
2060 continue;
2061 }
2062 let rna_id_pair = if rna_id < rna_id2 {
2063 (rna_id, rna_id2)
2064 } else {
2065 (rna_id2, rna_id)
2066 };
2067 let alignfold_probs = &alignfold_probs_hashed_ids[&rna_id_pair];
2068 let basepair_probs = if rna_id < rna_id2 {
2069 &alignfold_probs.basepair_probs_pair.0
2070 } else {
2071 &alignfold_probs.basepair_probs_pair.1
2072 };
2073 for (x, &y) in basepair_probs.iter() {
2074 let y = weight * y;
2075 match alignfold_probs_avg.basepair_probs.get_mut(x) {
2076 Some(x) => {
2077 *x += y;
2078 }
2079 None => {
2080 alignfold_probs_avg.basepair_probs.insert(*x, y);
2081 }
2082 }
2083 }
2084 if produces_context_profs {
2085 let context_profs = if rna_id < rna_id2 {
2086 &alignfold_probs.context_profs_pair.0
2087 } else {
2088 &alignfold_probs.context_profs_pair.1
2089 };
2090 alignfold_probs_avg.context_profs =
2091 alignfold_probs_avg.context_profs + weight * context_profs;
2092 }
2093 }
2094 alignfold_probs_avg
2095}
2096
2097pub fn get_max_basepair_span<T>(basepair_probs: &SparseProbMat<T>) -> T
2098where
2099 T: HashIndex,
2100{
2101 let max_basepair_span = basepair_probs.keys().map(|x| x.1 - x.0 + T::one()).max();
2102 match max_basepair_span {
2103 Some(x) => x,
2104 None => T::zero(),
2105 }
2106}
2107
2108pub fn filter_basepair_probs<T>(
2109 basepair_probs: &SparseProbMat<T>,
2110 min_basepair_prob: Prob,
2111) -> SparseProbMat<T>
2112where
2113 T: HashIndex,
2114{
2115 basepair_probs
2116 .iter()
2117 .filter(|(_, &x)| x >= min_basepair_prob)
2118 .map(|(x, &y)| ((x.0 + T::one(), x.1 + T::one()), y))
2119 .collect()
2120}
2121
2122pub fn filter_match_probs<T>(match_probs: &ProbMat, min_match_prob: Prob) -> SparseProbMat<T>
2123where
2124 T: HashIndex,
2125{
2126 let mut match_probs_sparse = SparseProbMat::<T>::default();
2127 for (i, x) in match_probs.iter().enumerate() {
2128 let i = T::from_usize(i).unwrap();
2129 for (j, &x) in x.iter().enumerate() {
2130 if x >= min_match_prob {
2131 let j = T::from_usize(j).unwrap();
2132 match_probs_sparse.insert((i, j), x);
2133 }
2134 }
2135 }
2136 match_probs_sparse
2137}
2138
2139pub fn consprob<'a, T>(
2140 thread_pool: &mut Pool,
2141 seqs: &SeqSlices<'a>,
2142 min_basepair_prob: Prob,
2143 min_match_prob: Prob,
2144 produces_context_profs: bool,
2145 produces_match_probs: bool,
2146 align_scores: &AlignScores,
2147) -> (ProbMatSetsAvg<T>, MatchProbsHashedIds<T>)
2148where
2149 T: HashIndex,
2150{
2151 let num_seqs = seqs.len();
2152 let mut basepair_prob_mats = vec![SparseProbMat::<T>::new(); num_seqs];
2153 let mut max_basepair_spans = vec![T::zero(); num_seqs];
2154 let mut fold_score_sets = vec![FoldScores::<T>::new(); num_seqs];
2155 let uses_contra_model = false;
2156 let allows_short_hairpins = false;
2157 thread_pool.scoped(|scope| {
2158 for (x, y, z, a) in multizip((
2159 basepair_prob_mats.iter_mut(),
2160 max_basepair_spans.iter_mut(),
2161 seqs.iter(),
2162 fold_score_sets.iter_mut(),
2163 )) {
2164 let b = z.len();
2165 scope.execute(move || {
2166 let (c, d) = mccaskill_algo(
2167 &z[1..b - 1],
2168 uses_contra_model,
2169 allows_short_hairpins,
2170 &FoldScoreSets::new(0.),
2171 );
2172 *x = filter_basepair_probs::<T>(&c, min_basepair_prob);
2173 *a = filter_fold_scores(&d, x);
2174 *y = get_max_basepair_span::<T>(x);
2175 });
2176 }
2177 });
2178 let mut alignfold_probs_hashed_ids = AlignfoldProbsHashedIds::<T>::default();
2179 let mut match_probs_hashed_ids = SparseProbsHashedIds::<T>::default();
2180 for x in 0..num_seqs {
2181 for y in x + 1..num_seqs {
2182 let y = (x, y);
2183 alignfold_probs_hashed_ids.insert(y, AlignfoldProbMats::<T>::origin());
2184 match_probs_hashed_ids.insert(y, SparseProbMat::<T>::default());
2185 }
2186 }
2187 thread_pool.scoped(|x| {
2188 for (y, z) in match_probs_hashed_ids.iter_mut() {
2189 let y = (seqs[y.0], seqs[y.1]);
2190 x.execute(move || {
2191 *z = filter_match_probs(&durbin_algo(&y, align_scores), min_match_prob);
2192 });
2193 }
2194 });
2195 thread_pool.scoped(|x| {
2196 for (y, z) in alignfold_probs_hashed_ids.iter_mut() {
2197 let seq_pair = (seqs[y.0], seqs[y.1]);
2198 let max_basepair_span_pair = (max_basepair_spans[y.0], max_basepair_spans[y.1]);
2199 let basepair_probs_pair = (&basepair_prob_mats[y.0], &basepair_prob_mats[y.1]);
2200 let fold_scores_pair = (&fold_score_sets[y.0], &fold_score_sets[y.1]);
2201 let seq_len_pair = (
2202 T::from_usize(seq_pair.0.len()).unwrap(),
2203 T::from_usize(seq_pair.1.len()).unwrap(),
2204 );
2205 let match_probs = &match_probs_hashed_ids[y];
2206 let (
2207 forward_pos_pairs,
2208 backward_pos_pairs,
2209 pos_quad_mat,
2210 pos_quads_hashed_lens,
2211 matchable_poss,
2212 matchable_poss2,
2213 ) = get_sparse_poss(&basepair_probs_pair, match_probs, &seq_len_pair);
2214 x.execute(move || {
2215 let alignfold_scores =
2216 AlignfoldScores::<T>::new(&seq_pair, &pos_quad_mat, match_probs, align_scores);
2217 *z = consprob_core::<T>((
2218 &seq_len_pair,
2219 &alignfold_scores,
2220 &max_basepair_span_pair,
2221 &fold_scores_pair,
2222 produces_context_profs,
2223 &forward_pos_pairs,
2224 &backward_pos_pairs,
2225 &pos_quads_hashed_lens,
2226 produces_match_probs,
2227 &matchable_poss,
2228 &matchable_poss2,
2229 align_scores,
2230 ));
2231 });
2232 }
2233 });
2234 let mut alignfold_prob_mats_avg = vec![AlignfoldProbMatsAvg::<T>::origin(); num_seqs];
2235 thread_pool.scoped(|x| {
2236 let y = &alignfold_probs_hashed_ids;
2237 for (z, a) in alignfold_prob_mats_avg.iter_mut().enumerate() {
2238 let b = seqs[z].len();
2239 x.execute(move || {
2240 *a = pair_probs2avg_probs::<T>(y, z, num_seqs, b, produces_context_profs);
2241 });
2242 }
2243 });
2244 let mut match_probs_hashed_ids = MatchProbsHashedIds::<T>::default();
2245 if produces_match_probs {
2246 for x in 0..num_seqs {
2247 for y in x + 1..num_seqs {
2248 let y = (x, y);
2249 let z = &alignfold_probs_hashed_ids[&y];
2250 let mut a = MatchProbMats::<T>::new();
2251 a.loopmatch_probs = z.loopmatch_probs.clone();
2252 a.pairmatch_probs = z.pairmatch_probs.clone();
2253 a.match_probs = z.match_probs.clone();
2254 match_probs_hashed_ids.insert(y, a);
2255 }
2256 }
2257 }
2258 (alignfold_prob_mats_avg, match_probs_hashed_ids)
2259}
2260
2261pub fn filter_fold_scores<T>(
2262 fold_scores_dense: &FoldScores<T>,
2263 basepair_prob_mat: &SparseProbMat<T>,
2264) -> FoldScores<T>
2265where
2266 T: HashIndex,
2267{
2268 let mut fold_scores = FoldScores::new();
2269 fold_scores.hairpin_scores = fold_scores_dense
2270 .hairpin_scores
2271 .iter()
2272 .map(|(x, &y)| ((x.0 + T::one(), x.1 + T::one()), y))
2273 .filter(|(x, _)| basepair_prob_mat.contains_key(x))
2274 .collect();
2275 fold_scores.twoloop_scores = fold_scores_dense
2276 .twoloop_scores
2277 .iter()
2278 .map(|(x, &y)| {
2279 (
2280 (
2281 x.0 + T::one(),
2282 x.1 + T::one(),
2283 x.2 + T::one(),
2284 x.3 + T::one(),
2285 ),
2286 y,
2287 )
2288 })
2289 .filter(|(x, _)| {
2290 basepair_prob_mat.contains_key(&(x.0, x.1)) && basepair_prob_mat.contains_key(&(x.2, x.3))
2291 })
2292 .collect();
2293 fold_scores.multibranch_close_scores = fold_scores_dense
2294 .multibranch_close_scores
2295 .iter()
2296 .map(|(x, &y)| ((x.0 + T::one(), x.1 + T::one()), y))
2297 .filter(|(x, _)| basepair_prob_mat.contains_key(x))
2298 .collect();
2299 fold_scores.accessible_scores = fold_scores_dense
2300 .accessible_scores
2301 .iter()
2302 .map(|(x, &y)| ((x.0 + T::one(), x.1 + T::one()), y))
2303 .filter(|(x, _)| basepair_prob_mat.contains_key(x))
2304 .collect();
2305 fold_scores
2306}
2307
2308pub fn write_alignfold_prob_mats<T>(
2309 output_dir_path: &Path,
2310 alignfold_prob_mats_avg: &ProbMatSetsAvg<T>,
2311 match_probs_hashed_ids: &MatchProbsHashedIds<T>,
2312 produces_context_profs: bool,
2313 produces_match_probs: bool,
2314) where
2315 T: HashIndex,
2316{
2317 if !output_dir_path.exists() {
2318 let _ = create_dir(output_dir_path);
2319 }
2320 let basepair_probs_file_path = output_dir_path.join(BASEPAIR_PROBS_FILE);
2321 let mut writer_basepair_prob = BufWriter::new(File::create(basepair_probs_file_path).unwrap());
2322 let mut buf_basepair_prob = String::new();
2323 for (rna_id, alignfold_probs_avg) in alignfold_prob_mats_avg.iter().enumerate() {
2324 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2325 for (x, &y) in alignfold_probs_avg.basepair_probs.iter() {
2326 buf_rna_id.push_str(&format!("{},{},{} ", x.0 - T::one(), x.1 - T::one(), y));
2327 }
2328 buf_basepair_prob.push_str(&buf_rna_id);
2329 }
2330 let _ = writer_basepair_prob.write_all(buf_basepair_prob.as_bytes());
2331 if produces_context_profs {
2332 let basepair_prob_file_path2 = output_dir_path.join(BASEPAIR_PROBS_FILE2);
2333 let mut writer_basepair_prob2 = BufWriter::new(File::create(basepair_prob_file_path2).unwrap());
2334 let unpair_prob_file_path = output_dir_path.join(UNPAIR_PROBS_FILE_HAIRPIN);
2335 let mut writer_unpair_prob_hairpin =
2336 BufWriter::new(File::create(unpair_prob_file_path).unwrap());
2337 let unpair_prob_file_path = output_dir_path.join(UNPAIR_PROBS_FILE_BULGE);
2338 let mut writer_unpair_prob_bulge = BufWriter::new(File::create(unpair_prob_file_path).unwrap());
2339 let unpair_prob_file_path = output_dir_path.join(UNPAIR_PROBS_FILE_INTERIOR);
2340 let mut writer_unpair_prob_interior =
2341 BufWriter::new(File::create(unpair_prob_file_path).unwrap());
2342 let unpair_prob_file_path = output_dir_path.join(UNPAIR_PROBS_FILE_MULTIBRANCH);
2343 let mut writer_unpair_prob_multibranch =
2344 BufWriter::new(File::create(unpair_prob_file_path).unwrap());
2345 let unpair_prob_file_path = output_dir_path.join(UNPAIR_PROBS_FILE_EXTERNAL);
2346 let mut writer_unpair_prob_external =
2347 BufWriter::new(File::create(unpair_prob_file_path).unwrap());
2348 let mut buf_basepair_prob2 = String::new();
2349 let mut buf_unpair_prob_hairpin = String::new();
2350 let mut buf_unpair_prob_bulge = buf_unpair_prob_hairpin.clone();
2351 let mut buf_unpair_prob_interior = buf_unpair_prob_hairpin.clone();
2352 let mut buf_unpair_prob_external = buf_unpair_prob_hairpin.clone();
2353 let mut buf_unpair_prob_multibranch = buf_unpair_prob_hairpin.clone();
2354 for (rna_id, alignfold_probs_avg) in alignfold_prob_mats_avg.iter().enumerate() {
2355 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2356 let slice = alignfold_probs_avg
2357 .context_profs
2358 .slice(s![.., CONTEXT_INDEX_BASEPAIR]);
2359 let slice_len = slice.len();
2360 for (i, &x) in slice.iter().enumerate() {
2361 if i == 0 || i == slice_len - 1 {
2362 continue;
2363 }
2364 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2365 }
2366 buf_basepair_prob2.push_str(&buf_rna_id);
2367 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2368 let slice = alignfold_probs_avg
2369 .context_profs
2370 .slice(s![.., CONTEXT_INDEX_HAIRPIN]);
2371 let slice_len = slice.len();
2372 for (i, &x) in slice.iter().enumerate() {
2373 if i == 0 || i == slice_len - 1 {
2374 continue;
2375 }
2376 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2377 }
2378 buf_unpair_prob_hairpin.push_str(&buf_rna_id);
2379 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2380 let slice = alignfold_probs_avg
2381 .context_profs
2382 .slice(s![.., CONTEXT_INDEX_BULGE]);
2383 let slice_len = slice.len();
2384 for (i, &x) in slice.iter().enumerate() {
2385 if i == 0 || i == slice_len - 1 {
2386 continue;
2387 }
2388 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2389 }
2390 buf_unpair_prob_bulge.push_str(&buf_rna_id);
2391 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2392 let slice = alignfold_probs_avg
2393 .context_profs
2394 .slice(s![.., CONTEXT_INDEX_INTERIOR]);
2395 let slice_len = slice.len();
2396 for (i, &x) in slice.iter().enumerate() {
2397 if i == 0 || i == slice_len - 1 {
2398 continue;
2399 }
2400 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2401 }
2402 buf_unpair_prob_interior.push_str(&buf_rna_id);
2403 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2404 let slice = alignfold_probs_avg
2405 .context_profs
2406 .slice(s![.., CONTEXT_INDEX_MULTIBRANCH]);
2407 let slice_len = slice.len();
2408 for (i, &x) in slice.iter().enumerate() {
2409 if i == 0 || i == slice_len - 1 {
2410 continue;
2411 }
2412 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2413 }
2414 buf_unpair_prob_multibranch.push_str(&buf_rna_id);
2415 let mut buf_rna_id = format!("\n\n>{}\n", rna_id);
2416 let slice = alignfold_probs_avg
2417 .context_profs
2418 .slice(s![.., CONTEXT_INDEX_EXTERNAL]);
2419 let slice_len = slice.len();
2420 for (i, &x) in slice.iter().enumerate() {
2421 if i == 0 || i == slice_len - 1 {
2422 continue;
2423 }
2424 buf_rna_id.push_str(&format!("{},{} ", i - 1, x));
2425 }
2426 buf_unpair_prob_external.push_str(&buf_rna_id);
2427 }
2428 let _ = writer_basepair_prob2.write_all(buf_basepair_prob2.as_bytes());
2429 let _ = writer_unpair_prob_hairpin.write_all(buf_unpair_prob_hairpin.as_bytes());
2430 let _ = writer_unpair_prob_bulge.write_all(buf_unpair_prob_bulge.as_bytes());
2431 let _ = writer_unpair_prob_interior.write_all(buf_unpair_prob_interior.as_bytes());
2432 let _ = writer_unpair_prob_multibranch.write_all(buf_unpair_prob_multibranch.as_bytes());
2433 let _ = writer_unpair_prob_external.write_all(buf_unpair_prob_external.as_bytes());
2434 }
2435 if produces_match_probs {
2436 let loopmatch_probs_file = output_dir_path.join(LOOPMATCH_PROBS_FILE);
2437 let pairmatch_probs_file = output_dir_path.join(PAIRMATCH_PROBS_FILE);
2438 let match_probs_file = output_dir_path.join(MATCH_PROBS_FILE);
2439 let mut writer_loopmatch_prob = BufWriter::new(File::create(loopmatch_probs_file).unwrap());
2440 let mut writer_pairmatch_prob = BufWriter::new(File::create(pairmatch_probs_file).unwrap());
2441 let mut writer_match_prob = BufWriter::new(File::create(match_probs_file).unwrap());
2442 let mut buf_loopmatch_prob = String::new();
2443 let mut buf_pairmatch_prob = String::new();
2444 let mut buf_match_prob = String::new();
2445 for (rna_id_pair, match_probs) in match_probs_hashed_ids.iter() {
2446 let mut buf_rna_id_pair = format!("\n\n>{},{}\n", rna_id_pair.0, rna_id_pair.1);
2447 for (x, &y) in match_probs.loopmatch_probs.iter() {
2448 buf_rna_id_pair.push_str(&format!("{},{},{} ", x.0 - T::one(), x.1 - T::one(), y));
2449 }
2450 buf_loopmatch_prob.push_str(&buf_rna_id_pair);
2451 let mut buf_rna_id_pair = format!("\n\n>{},{}\n", rna_id_pair.0, rna_id_pair.1);
2452 for (x, &y) in match_probs.pairmatch_probs.iter() {
2453 buf_rna_id_pair.push_str(&format!(
2454 "{},{},{},{},{} ",
2455 x.0 - T::one(),
2456 x.1 - T::one(),
2457 x.2 - T::one(),
2458 x.3 - T::one(),
2459 y
2460 ));
2461 }
2462 buf_pairmatch_prob.push_str(&buf_rna_id_pair);
2463 let mut buf_rna_id_pair = format!("\n\n>{},{}\n", rna_id_pair.0, rna_id_pair.1);
2464 for (x, &y) in match_probs.match_probs.iter() {
2465 buf_rna_id_pair.push_str(&format!("{},{},{} ", x.0 - T::one(), x.1 - T::one(), y));
2466 }
2467 buf_match_prob.push_str(&buf_rna_id_pair);
2468 }
2469 let _ = writer_loopmatch_prob.write_all(buf_loopmatch_prob.as_bytes());
2470 let _ = writer_pairmatch_prob.write_all(buf_pairmatch_prob.as_bytes());
2471 let _ = writer_match_prob.write_all(buf_match_prob.as_bytes());
2472 }
2473}
2474
2475pub fn get_sparse_poss<T>(
2476 basepair_probs_pair: &ProbMatPair<T>,
2477 match_probs: &SparseProbMat<T>,
2478 seq_len_pair: &(T, T),
2479) -> OutputsSparsePossGetter<T>
2480where
2481 T: HashIndex,
2482{
2483 let mut forward_pos_pairs = PosPairMatSet::<T>::default();
2484 let mut backward_pos_pairs = PosPairMatSet::<T>::default();
2485 let mut pos_quads = PosQuadMat::<T>::default();
2486 let mut pos_quads_hashed_lens = PosQuadsHashedLens::<T>::default();
2487 let mut matchable_poss = SparsePosSets::<T>::default();
2488 let mut matchable_poss2 = SparsePosSets::<T>::default();
2489 for pos_pair in basepair_probs_pair.0.keys() {
2490 for pos_pair2 in basepair_probs_pair.1.keys() {
2491 let forward_pos_pair = (pos_pair.0, pos_pair2.0);
2492 let backward_pos_pair = (pos_pair.1, pos_pair2.1);
2493 if !match_probs.contains_key(&forward_pos_pair) {
2494 continue;
2495 }
2496 if !match_probs.contains_key(&backward_pos_pair) {
2497 continue;
2498 }
2499 match forward_pos_pairs.get_mut(&backward_pos_pair) {
2500 Some(x) => {
2501 x.insert(forward_pos_pair);
2502 }
2503 None => {
2504 let mut x = PosPairMat::<T>::default();
2505 x.insert(forward_pos_pair);
2506 forward_pos_pairs.insert(backward_pos_pair, x);
2507 }
2508 }
2509 match backward_pos_pairs.get_mut(&forward_pos_pair) {
2510 Some(x) => {
2511 x.insert(backward_pos_pair);
2512 }
2513 None => {
2514 let mut x = PosPairMat::<T>::default();
2515 x.insert(backward_pos_pair);
2516 backward_pos_pairs.insert(forward_pos_pair, x);
2517 }
2518 }
2519 pos_quads.insert((pos_pair.0, pos_pair.1, pos_pair2.0, pos_pair2.1));
2520 let substr_len_pair = (
2521 pos_pair.1 - pos_pair.0 + T::one(),
2522 pos_pair2.1 - pos_pair2.0 + T::one(),
2523 );
2524 let left_pos_pair = (pos_pair.0, pos_pair2.0);
2525 match pos_quads_hashed_lens.get_mut(&substr_len_pair) {
2526 Some(x) => {
2527 x.insert(left_pos_pair);
2528 }
2529 None => {
2530 let mut x = PosPairMat::<T>::default();
2531 x.insert(left_pos_pair);
2532 pos_quads_hashed_lens.insert(substr_len_pair, x);
2533 }
2534 }
2535 }
2536 }
2537 for x in match_probs.keys() {
2538 match matchable_poss.get_mut(&x.0) {
2539 Some(y) => {
2540 y.insert(x.1);
2541 }
2542 None => {
2543 let mut y = SparsePoss::<T>::default();
2544 y.insert(x.1);
2545 matchable_poss.insert(x.0, y);
2546 }
2547 }
2548 match matchable_poss2.get_mut(&x.1) {
2549 Some(y) => {
2550 y.insert(x.0);
2551 }
2552 None => {
2553 let mut y = SparsePoss::<T>::default();
2554 y.insert(x.0);
2555 matchable_poss2.insert(x.1, y);
2556 }
2557 }
2558 }
2559 let mut matchable_poss_leftmost = SparsePoss::<T>::default();
2560 matchable_poss_leftmost.insert(T::zero());
2561 matchable_poss.insert(T::zero(), matchable_poss_leftmost.clone());
2562 matchable_poss2.insert(T::zero(), matchable_poss_leftmost);
2563 let mut matchable_poss_rightmost = SparsePoss::<T>::default();
2564 matchable_poss_rightmost.insert(seq_len_pair.1 - T::one());
2565 matchable_poss.insert(seq_len_pair.0 - T::one(), matchable_poss_rightmost);
2566 let mut matchable_poss_rightmost = SparsePoss::<T>::default();
2567 matchable_poss_rightmost.insert(seq_len_pair.0 - T::one());
2568 matchable_poss2.insert(seq_len_pair.1 - T::one(), matchable_poss_rightmost);
2569 (
2570 forward_pos_pairs,
2571 backward_pos_pairs,
2572 pos_quads,
2573 pos_quads_hashed_lens,
2574 matchable_poss,
2575 matchable_poss2,
2576 )
2577}
2578
2579pub fn write_readme(output_dir_path: &Path, readme_contents: &String) {
2580 let readme_file_path = output_dir_path.join(README_FILE);
2581 let mut writer = BufWriter::new(File::create(readme_file_path).unwrap());
2582 let buf = String::from(readme_contents);
2583 let _ = writer.write_all(buf.as_bytes());
2584}