rna_algos/
utils.rs

1pub use bio::io::fasta::Reader;
2pub use hashbrown::HashMap;
3pub use itertools::multizip;
4pub use num::{
5  range, range_inclusive, Bounded, FromPrimitive, Integer, One, PrimInt, ToPrimitive, Unsigned,
6  Zero,
7};
8pub use rna_ss_params::compiled_scores_contra::*;
9pub use rna_ss_params::compiled_scores_turner::*;
10pub use rna_ss_params::utils::*;
11pub use scoped_threadpool::Pool;
12pub use std::cmp::{max, min};
13pub use std::env;
14pub use std::f32::NEG_INFINITY;
15pub use std::fmt::Display;
16pub use std::fs::create_dir;
17pub use std::fs::File;
18pub use std::hash::Hash;
19pub use std::io::prelude::*;
20pub use std::io::{BufReader, BufWriter};
21pub use std::path::Path;
22pub use std::str::from_utf8_unchecked;
23
24pub trait HashIndex:
25  Unsigned
26  + PrimInt
27  + Hash
28  + FromPrimitive
29  + ToPrimitive
30  + Clone
31  + Integer
32  + Eq
33  + One
34  + Ord
35  + Display
36  + Sync
37  + Send
38{
39}
40impl<T: Unsigned + PrimInt + Hash + FromPrimitive + Integer + One + Ord + Display + Sync + Send>
41  HashIndex for T
42{
43}
44
45pub type PosPair<T> = (T, T);
46pub type PosQuad<T> = (T, T, T, T);
47type Arg = String;
48pub type Args = Vec<Arg>;
49pub type FastaId = String;
50#[derive(Clone)]
51pub struct FastaRecord {
52  pub fasta_id: FastaId,
53  pub seq: Seq,
54}
55#[derive(Debug)]
56pub struct Align<T> {
57  pub cols: Cols,
58  pub pos_map_sets: PosMapSets<T>,
59}
60pub type PosMaps<T> = Vec<T>;
61pub type PosMapSets<T> = Vec<PosMaps<T>>;
62pub type FastaRecords = Vec<FastaRecord>;
63pub type SeqSlice<'a> = &'a [Base];
64pub type NumThreads = u32;
65pub type SparseProbMat<T> = HashMap<PosPair<T>, Prob>;
66pub type SeqId = String;
67pub type SeqIds = Vec<SeqId>;
68pub type Col = Vec<Base>;
69pub type Cols = Vec<Col>;
70pub type Sum4dMat<T> = HashMap<PosQuad<T>, Sum>;
71pub type SparseSumMat<T> = HashMap<PosPair<T>, Sum>;
72pub type PosPairs<T> = Vec<PosPair<T>>;
73pub type FoldChar = u8;
74pub type FoldStr = Vec<FoldChar>;
75pub type Seq = Vec<Base>;
76pub type SeqPair<'a> = (SeqSlice<'a>, SeqSlice<'a>);
77pub type Prob = f32;
78pub type Sum = Prob;
79pub type Probs = Vec<Prob>;
80pub type ProbMat = Vec<Probs>;
81pub type Sums = Vec<Sum>;
82pub type SumMat = Vec<Sums>;
83pub type Pos = usize;
84pub type Base = usize;
85pub type MatchScores = [[Score; NUM_BASES]; NUM_BASES];
86pub type InsertScores = [Score; NUM_BASES];
87pub type RnaId = usize;
88pub type RnaIdPair = (RnaId, RnaId);
89pub type ProbMatsHashedIds = HashMap<RnaIdPair, ProbMat>;
90
91#[derive(Clone, Debug)]
92pub struct FoldScoreSets {
93  // The CONTRAfold model.
94  pub hairpin_scores_len: HairpinScoresLen,
95  pub bulge_scores_len: BulgeScoresLen,
96  pub interior_scores_len: InteriorScoresLen,
97  pub interior_scores_symmetric: InteriorScoresSymmetric,
98  pub interior_scores_asymmetric: InteriorScoresAsymmetric,
99  pub stack_scores: StackScores,
100  pub terminal_mismatch_scores: TerminalMismatchScores,
101  pub dangling_scores_left: DanglingScores,
102  pub dangling_scores_right: DanglingScores,
103  pub helix_close_scores: HelixCloseScores,
104  pub basepair_scores: BasepairScores,
105  pub interior_scores_explicit: InteriorScoresExplicit,
106  pub bulge_scores_0x1: BulgeScores0x1,
107  pub interior_scores_1x1: InteriorScores1x1Contra,
108  pub multibranch_score_base: Score,
109  pub multibranch_score_basepair: Score,
110  pub multibranch_score_unpair: Score,
111  pub external_score_basepair: Score,
112  pub external_score_unpair: Score,
113  // The cumulative parameters of the CONTRAfold model.
114  pub hairpin_scores_len_cumulative: HairpinScoresLen,
115  pub bulge_scores_len_cumulative: BulgeScoresLen,
116  pub interior_scores_len_cumulative: InteriorScoresLen,
117  pub interior_scores_symmetric_cumulative: InteriorScoresSymmetric,
118  pub interior_scores_asymmetric_cumulative: InteriorScoresAsymmetric,
119}
120
121pub const LOGSUMEXP_THRESHOLD_UPPER: Score = 11.862_479;
122pub const PSEUDO_BASE: Base = U + 1 as Base;
123pub const UNPAIR: FoldChar = b'.';
124pub const BASEPAIR_LEFT: FoldChar = b'(';
125pub const BASEPAIR_RIGHT: FoldChar = b')';
126pub const EXAMPLE_FASTA_FILE_PATH: &str = "assets/sampled_trnas.fa";
127pub const EPSILON: Prob = 0.00_1;
128pub const PROB_BOUND_LOWER: Prob = -EPSILON;
129pub const PROB_BOUND_UPPER: Prob = 1. + EPSILON;
130
131impl FastaRecord {
132  pub fn origin() -> FastaRecord {
133    FastaRecord {
134      fasta_id: FastaId::new(),
135      seq: Seq::new(),
136    }
137  }
138
139  pub fn new(fasta_id: FastaId, seq: Seq) -> FastaRecord {
140    FastaRecord {
141      fasta_id: fasta_id,
142      seq: seq,
143    }
144  }
145}
146
147impl<T> Default for Align<T> {
148  fn default() -> Self {
149    Self::new()
150  }
151}
152
153impl<T> Align<T> {
154  pub fn new() -> Align<T> {
155    Align {
156      cols: Cols::new(),
157      pos_map_sets: PosMapSets::<T>::new(),
158    }
159  }
160}
161
162pub fn has_canonical_basepair(x: &Basepair) -> bool {
163  matches!(*x, AU | CG | GC | GU | UA | UG)
164}
165
166pub fn get_hairpin_score(seq: SeqSlice, pos_pair_close: &(usize, usize)) -> Score {
167  let hairpin = &seq[pos_pair_close.0..pos_pair_close.1 + 1];
168  let special_hairpin_score = get_special_hairpin_score(hairpin);
169  if special_hairpin_score > NEG_INFINITY {
170    special_hairpin_score
171  } else {
172    let hairpin_len = pos_pair_close.1 - pos_pair_close.0 - 1;
173    let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
174    let hairpin_score = if hairpin_len == MIN_HAIRPIN_LEN {
175      HAIRPIN_SCORES_INIT[hairpin_len]
176    } else {
177      let terminal_mismatch = (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]);
178      let hairpin_score_init = if hairpin_len <= MAX_HAIRPIN_LEN_EXTRAPOLATION {
179        HAIRPIN_SCORES_INIT[hairpin_len]
180      } else {
181        HAIRPIN_SCORES_INIT[MIN_HAIRPIN_LEN_EXTRAPOLATION - 1]
182          + COEFF_HAIRPIN_LEN_EXTRAPOLATION
183            * (hairpin_len as Score / (MIN_HAIRPIN_LEN_EXTRAPOLATION - 1) as Score).ln()
184      };
185      hairpin_score_init
186        + TERMINAL_MISMATCH_SCORES_HAIRPIN[basepair_close.0][basepair_close.1][terminal_mismatch.0]
187          [terminal_mismatch.1]
188    };
189    hairpin_score
190      + if matches_augu(&basepair_close) {
191        HELIX_AUGU_END_PENALTY
192      } else {
193        0.
194      }
195  }
196}
197
198pub fn get_special_hairpin_score(seq: SeqSlice) -> Score {
199  for x in HAIRPIN_SCORES_SPECIAL.iter() {
200    if x.0 == seq {
201      return x.1;
202    }
203  }
204  NEG_INFINITY
205}
206
207pub fn get_2loop_score(
208  seq: SeqSlice,
209  pos_pair_close: &(usize, usize),
210  pos_pair_accessible: &(usize, usize),
211) -> Score {
212  if pos_pair_close.0 + 1 == pos_pair_accessible.0 && pos_pair_close.1 - 1 == pos_pair_accessible.1
213  {
214    get_stack_score(seq, pos_pair_close, pos_pair_accessible)
215  } else if pos_pair_close.0 + 1 == pos_pair_accessible.0
216    || pos_pair_close.1 - 1 == pos_pair_accessible.1
217  {
218    get_bulge_score(seq, pos_pair_close, pos_pair_accessible)
219  } else {
220    get_interior_score(seq, pos_pair_close, pos_pair_accessible)
221  }
222}
223
224fn get_stack_score(
225  seq: SeqSlice,
226  pos_pair_close: &(usize, usize),
227  pos_pair_accessible: &(usize, usize),
228) -> Score {
229  let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
230  let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
231  STACK_SCORES[basepair_close.0][basepair_close.1][basepair_accessible.0][basepair_accessible.1]
232}
233
234fn get_bulge_score(
235  seq: SeqSlice,
236  pos_pair_close: &(usize, usize),
237  pos_pair_accessible: &(usize, usize),
238) -> Score {
239  let bulge_len =
240    pos_pair_accessible.0 - pos_pair_close.0 + pos_pair_close.1 - pos_pair_accessible.1 - 2;
241  if bulge_len == 1 {
242    BULGE_SCORES_INIT[bulge_len] + get_stack_score(seq, pos_pair_close, pos_pair_accessible)
243  } else {
244    let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
245    let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
246    BULGE_SCORES_INIT[bulge_len]
247      + if matches_augu(&basepair_close) {
248        HELIX_AUGU_END_PENALTY
249      } else {
250        0.
251      }
252      + if matches_augu(&basepair_accessible) {
253        HELIX_AUGU_END_PENALTY
254      } else {
255        0.
256      }
257  }
258}
259
260fn get_interior_score(
261  seq: SeqSlice,
262  pos_pair_close: &(usize, usize),
263  pos_pair_accessible: &(usize, usize),
264) -> Score {
265  let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
266  let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
267  let loop_len_pair = (
268    pos_pair_accessible.0 - pos_pair_close.0 - 1,
269    pos_pair_close.1 - pos_pair_accessible.1 - 1,
270  );
271  let interior_len = loop_len_pair.0 + loop_len_pair.1;
272  match loop_len_pair {
273    (1, 1) => {
274      let interior = (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]);
275      INTERIOR_SCORES_1X1[basepair_close.0][basepair_close.1][interior.0][interior.1]
276        [basepair_accessible.0][basepair_accessible.1]
277    }
278    (1, 2) => {
279      let interior = (
280        (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
281        seq[pos_pair_close.1 - 2],
282      );
283      INTERIOR_SCORES_1X2[basepair_close.0][basepair_close.1][(interior.0).0][(interior.0).1]
284        [interior.1][basepair_accessible.0][basepair_accessible.1]
285    }
286    (2, 1) => {
287      let interior = (
288        (seq[pos_pair_close.1 - 1], seq[pos_pair_close.0 + 2]),
289        seq[pos_pair_close.0 + 1],
290      );
291      let basepair_accessible_inverse = invert_basepair(&basepair_accessible);
292      let basepair_close_inverse = invert_basepair(&basepair_close);
293      INTERIOR_SCORES_1X2[basepair_accessible_inverse.0][basepair_accessible_inverse.1]
294        [(interior.0).0][(interior.0).1][interior.1][basepair_close_inverse.0]
295        [basepair_close_inverse.1]
296    }
297    (2, 2) => {
298      let interior = (
299        (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
300        (seq[pos_pair_close.0 + 2], seq[pos_pair_close.1 - 2]),
301      );
302      INTERIOR_SCORES_2X2[basepair_close.0][basepair_close.1][(interior.0).0][(interior.0).1]
303        [(interior.1).0][(interior.1).1][basepair_accessible.0][basepair_accessible.1]
304    }
305    _ => {
306      INTERIOR_SCORES_INIT[interior_len]
307        + (NINIO_COEFF * get_abs_diff(loop_len_pair.0, loop_len_pair.1) as Score).max(NINIO_MAX)
308        + get_interior_mismatch_score(seq, pos_pair_close, pos_pair_accessible, &loop_len_pair)
309        + if matches_augu(&basepair_close) {
310          HELIX_AUGU_END_PENALTY
311        } else {
312          0.
313        }
314        + if matches_augu(&basepair_accessible) {
315          HELIX_AUGU_END_PENALTY
316        } else {
317          0.
318        }
319    }
320  }
321}
322
323pub fn invert_basepair(x: &Basepair) -> Basepair {
324  (x.1, x.0)
325}
326
327pub fn get_abs_diff(x: usize, y: usize) -> usize {
328  max(x, y) - min(x, y)
329}
330
331fn get_interior_mismatch_score(
332  seq: SeqSlice,
333  pos_pair_close: &(usize, usize),
334  pos_pair_accessible: &(usize, usize),
335  loop_len_pair: &(usize, usize),
336) -> Score {
337  let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
338  let basepair_accessible = (seq[pos_pair_accessible.1], seq[pos_pair_accessible.0]);
339  let terminal_mismatch = (
340    (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
341    (
342      seq[pos_pair_accessible.1 + 1],
343      seq[pos_pair_accessible.0 - 1],
344    ),
345  );
346  match *loop_len_pair {
347    (1, _) | (_, 1) => {
348      TERMINAL_MISMATCH_SCORES_1XMANY[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
349        [(terminal_mismatch.0).1]
350        + TERMINAL_MISMATCH_SCORES_1XMANY[basepair_accessible.0][basepair_accessible.1]
351          [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
352    }
353    (2, 3) | (3, 2) => {
354      TERMINAL_MISMATCH_SCORES_2X3[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
355        [(terminal_mismatch.0).1]
356        + TERMINAL_MISMATCH_SCORES_2X3[basepair_accessible.0][basepair_accessible.1]
357          [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
358    }
359    _ => {
360      TERMINAL_MISMATCH_SCORES_INTERIOR[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
361        [(terminal_mismatch.0).1]
362        + TERMINAL_MISMATCH_SCORES_INTERIOR[basepair_accessible.0][basepair_accessible.1]
363          [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
364    }
365  }
366}
367
368pub fn get_multibranch_close_score(seq: SeqSlice, pos_pair_close: &(usize, usize)) -> Score {
369  let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
370  let basepair_close_inverse = invert_basepair(&basepair_close);
371  let basepair_stack_inverse =
372    invert_basepair(&(seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]));
373  let terminal_mismatch_score = TERMINAL_MISMATCH_SCORES_MULTIBRANCH[basepair_close_inverse.0]
374    [basepair_close_inverse.1][basepair_stack_inverse.0][basepair_stack_inverse.1];
375  INIT_MULTIBRANCH_BASE
376    + terminal_mismatch_score
377    + if matches_augu(&basepair_close) {
378      HELIX_AUGU_END_PENALTY
379    } else {
380      0.
381    }
382}
383
384pub fn get_accessible_score(
385  seq: SeqSlice,
386  pos_pair_accessible: &(usize, usize),
387  uses_sentinel_bases: bool,
388) -> Score {
389  let seq_len = seq.len();
390  let end_5prime = usize::from(uses_sentinel_bases);
391  let end_3prime = seq_len - if uses_sentinel_bases { 2 } else { 1 };
392  let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
393  let score = if pos_pair_accessible.0 > end_5prime && pos_pair_accessible.1 < end_3prime {
394    TERMINAL_MISMATCH_SCORES_MULTIBRANCH[basepair_accessible.0][basepair_accessible.1]
395      [seq[pos_pair_accessible.0 - 1]][seq[pos_pair_accessible.1 + 1]]
396  } else if pos_pair_accessible.0 > end_5prime {
397    DANGLING_SCORES_5PRIME[basepair_accessible.0][basepair_accessible.1]
398      [seq[pos_pair_accessible.0 - 1]]
399  } else if pos_pair_accessible.1 < end_3prime {
400    DANGLING_SCORES_3PRIME[basepair_accessible.0][basepair_accessible.1]
401      [seq[pos_pair_accessible.1 + 1]]
402  } else {
403    0.
404  };
405  score
406    + if matches_augu(&basepair_accessible) {
407      HELIX_AUGU_END_PENALTY
408    } else {
409      0.
410    }
411}
412
413pub fn get_hairpin_score_contra(
414  seq: SeqSlice,
415  pos_pair_close: &(usize, usize),
416  fold_score_sets: &FoldScoreSets,
417) -> Score {
418  let hairpin_len = pos_pair_close.1 - pos_pair_close.0 - 1;
419  fold_score_sets.hairpin_scores_len_cumulative[hairpin_len.min(MAX_LOOP_LEN)]
420    + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
421}
422
423pub fn get_2loop_score_contra(
424  seq: SeqSlice,
425  pos_pair_close: &(usize, usize),
426  pos_pair_accessible: &(usize, usize),
427  fold_score_sets: &FoldScoreSets,
428) -> Score {
429  let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
430  let score = if pos_pair_close.0 + 1 == pos_pair_accessible.0
431    && pos_pair_close.1 - 1 == pos_pair_accessible.1
432  {
433    get_stack_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
434  } else if pos_pair_close.0 + 1 == pos_pair_accessible.0
435    || pos_pair_close.1 - 1 == pos_pair_accessible.1
436  {
437    get_bulge_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
438  } else {
439    get_interior_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
440  };
441  score + fold_score_sets.basepair_scores[basepair_accessible.0][basepair_accessible.1]
442}
443
444pub fn get_stack_score_contra(
445  seq: SeqSlice,
446  pos_pair_close: &(usize, usize),
447  pos_pair_accessible: &(usize, usize),
448  fold_score_sets: &FoldScoreSets,
449) -> Score {
450  let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
451  let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
452  fold_score_sets.stack_scores[basepair_close.0][basepair_close.1][basepair_accessible.0]
453    [basepair_accessible.1]
454}
455
456pub fn get_bulge_score_contra(
457  seq: SeqSlice,
458  pos_pair_close: &(usize, usize),
459  pos_pair_accessible: &(usize, usize),
460  fold_score_sets: &FoldScoreSets,
461) -> Score {
462  let bulge_len =
463    pos_pair_accessible.0 - pos_pair_close.0 + pos_pair_close.1 - pos_pair_accessible.1 - 2;
464  let score = if bulge_len == 1 {
465    fold_score_sets.bulge_scores_0x1[if pos_pair_accessible.0 - pos_pair_close.0 - 1 == 1 {
466      seq[pos_pair_close.0 + 1]
467    } else {
468      seq[pos_pair_close.1 - 1]
469    }]
470  } else {
471    0.
472  };
473  score
474    + fold_score_sets.bulge_scores_len_cumulative[bulge_len - 1]
475    + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
476    + get_junction_score_single(
477      seq,
478      &(pos_pair_accessible.1, pos_pair_accessible.0),
479      fold_score_sets,
480    )
481}
482
483pub fn get_interior_score_contra(
484  seq: SeqSlice,
485  pos_pair_close: &(usize, usize),
486  pos_pair_accessible: &(usize, usize),
487  fold_score_sets: &FoldScoreSets,
488) -> Score {
489  let loop_len_pair = (
490    pos_pair_accessible.0 - pos_pair_close.0 - 1,
491    pos_pair_close.1 - pos_pair_accessible.1 - 1,
492  );
493  let interior_len = loop_len_pair.0 + loop_len_pair.1;
494  let score = if loop_len_pair.0 == loop_len_pair.1 {
495    let score_1x1 = if interior_len == 2 {
496      fold_score_sets.interior_scores_1x1[seq[pos_pair_close.0 + 1]][seq[pos_pair_close.1 - 1]]
497    } else {
498      0.
499    };
500    score_1x1 + fold_score_sets.interior_scores_symmetric_cumulative[loop_len_pair.0 - 1]
501  } else {
502    fold_score_sets.interior_scores_asymmetric_cumulative
503      [get_abs_diff(loop_len_pair.0, loop_len_pair.1) - 1]
504  };
505  let score_explicit =
506    if loop_len_pair.0 <= MAX_INTERIOR_EXPLICIT && loop_len_pair.1 <= MAX_INTERIOR_EXPLICIT {
507      fold_score_sets.interior_scores_explicit[loop_len_pair.0 - 1][loop_len_pair.1 - 1]
508    } else {
509      0.
510    };
511  score
512    + score_explicit
513    + fold_score_sets.interior_scores_len_cumulative[interior_len - 2]
514    + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
515    + get_junction_score_single(
516      seq,
517      &(pos_pair_accessible.1, pos_pair_accessible.0),
518      fold_score_sets,
519    )
520}
521
522pub fn get_junction_score(
523  seq: SeqSlice,
524  pos_pair: &(usize, usize),
525  uses_sentinel_bases: bool,
526  fold_score_sets: &FoldScoreSets,
527) -> Score {
528  let seq_len = seq.len();
529  let basepair = (seq[pos_pair.0], seq[pos_pair.1]);
530  let end_5prime = usize::from(uses_sentinel_bases);
531  let end_3prime = seq_len - if uses_sentinel_bases { 2 } else { 1 };
532  get_helix_close_score(&basepair, fold_score_sets)
533    + if pos_pair.0 < end_3prime {
534      fold_score_sets.dangling_scores_left[basepair.0][basepair.1][seq[pos_pair.0 + 1]]
535    } else {
536      0.
537    }
538    + if pos_pair.1 > end_5prime {
539      fold_score_sets.dangling_scores_right[basepair.0][basepair.1][seq[pos_pair.1 - 1]]
540    } else {
541      0.
542    }
543}
544
545pub fn get_junction_score_single(x: SeqSlice, y: &(usize, usize), z: &FoldScoreSets) -> Score {
546  let a = (x[y.0], x[y.1]);
547  get_helix_close_score(&a, z) + get_terminal_mismatch_score(&a, &(x[y.0 + 1], x[y.1 - 1]), z)
548}
549
550pub fn get_helix_close_score(x: &Basepair, y: &FoldScoreSets) -> Score {
551  y.helix_close_scores[x.0][x.1]
552}
553
554pub fn get_terminal_mismatch_score(x: &Basepair, y: &Basepair, z: &FoldScoreSets) -> Score {
555  z.terminal_mismatch_scores[x.0][x.1][y.0][y.1]
556}
557
558pub fn matches_augu(x: &Basepair) -> bool {
559  *x == AU || *x == UA || *x == GU || *x == UG
560}
561
562pub fn bytes2seq(x: &[Char]) -> Seq {
563  let mut y = Seq::new();
564  for &x in x {
565    let x = match x {
566      A_LOWER | A_UPPER => A,
567      C_LOWER | C_UPPER => C,
568      G_LOWER | G_UPPER => G,
569      U_LOWER | U_UPPER => U,
570      _ => {
571        panic!();
572      }
573    };
574    y.push(x);
575  }
576  y
577}
578
579#[inline]
580pub fn logsumexp(sum: &mut Score, x: Score) {
581  if !x.is_finite() {
582    return;
583  }
584  *sum = if !sum.is_finite() {
585    x
586  } else {
587    let y = sum.min(x);
588    let z = sum.max(x) - y;
589    y + if z >= LOGSUMEXP_THRESHOLD_UPPER {
590      z
591    } else {
592      // Equivalent to z.exp().ln_1p()
593      ln_exp_1p(z)
594    }
595  };
596}
597
598/*
599 * Approximated (x.exp() + 1).ln() from CONTRAfold, eliminating ln() and exp()
600 * (assuming 0 <= x <= LOGSUMEXP_THRESHOLD_UPPER)
601 */
602#[inline]
603pub fn ln_exp_1p(x: Score) -> Score {
604  if x < 3.379_25 {
605    if x < 1.632_015_8 {
606      if x < 0.661_536_75 {
607        ((-0.0065591595 * x + 0.127_644_27) * x + 0.499_655_46) * x + 0.693_154_2
608      } else {
609        ((-0.015_515_756 * x + 0.144_677_56) * x + 0.488_293_98) * x + 0.695_809_3
610      }
611    } else if x < 2.491_258_9 {
612      ((-0.012_890_925 * x + 0.130_102_83) * x + 0.515_039_86) * x + 0.679_558_6
613    } else {
614      ((-0.0072142647 * x + 0.087_754_086) * x + 0.620_870_8) * x + 0.590_967_6
615    }
616  } else if x < 5.789_071 {
617    if x < 4.426_169 {
618      ((-0.0031455354 * x + 0.046_722_945) * x + 0.759_253_2) * x + 0.434_879_45
619    } else {
620      ((-0.0010110698 * x + 0.018_594_341) * x + 0.883_173_05) * x + 0.252_369_55
621    }
622  } else if x < 7.816_272_7 {
623    ((-0.000_196_278 * x + 0.0046084408) * x + 0.963_443_2) * x + 0.098_314_89
624  } else {
625    ((-0.0000113994 * x + 0.0003734731) * x + 0.995_910_7) * x + 0.0149855051
626  }
627}
628
629// Approximated x.exp() from CONTRAfold
630#[inline]
631pub fn expf(x: Score) -> Score {
632  if x < -2.491_503_5 {
633    if x < -5.862_282_3 {
634      if x < -9.91152 {
635        0.
636      } else {
637        ((0.0000803850 * x + 0.002_162_743) * x + 0.019_470_856) * x + 0.058_808_003
638      }
639    } else if x < -3.839_663 {
640      ((0.0013889414 * x + 0.024_467_647) * x + 0.147_129_06) * x + 0.304_275_78
641    } else {
642      ((0.0072335607 * x + 0.090_600_27) * x + 0.398_311_14) * x + 0.624_595_94
643    }
644  } else if x < -0.672_505_3 {
645    if x < -1.480_537_5 {
646      ((0.023_241_036 * x + 0.208_564_6) * x + 0.690_636_8) * x + 0.868_232_25
647    } else {
648      ((0.057_378_277 * x + 0.358_025_85) * x + 0.912_113_3) * x + 0.979_309_2
649    }
650  } else if x < 0. {
651    ((0.119_917_594 * x + 0.481_566_82) * x + 0.997_599_2) * x + 0.999_950_5
652  } else {
653    x.exp()
654  }
655}
656
657pub fn read_align_clustal(clustal_file_path: &Path) -> (Cols, SeqIds) {
658  let mut cols = Cols::new();
659  let mut seq_ids = SeqIds::new();
660  let reader = BufReader::new(File::open(clustal_file_path).unwrap());
661  let mut seq_pointer = 0;
662  let mut pos_pointer = 0;
663  let mut has_read_seq_ids = false;
664  for (i, line) in reader.lines().enumerate() {
665    let line = line.unwrap();
666    if i == 0 || line.is_empty() || line.starts_with(' ') {
667      if !cols.is_empty() {
668        seq_pointer = 0;
669        pos_pointer = cols.len();
670        has_read_seq_ids = true;
671      }
672      continue;
673    }
674    let mut lines = line.split_whitespace();
675    let line = lines.next().unwrap();
676    if !has_read_seq_ids {
677      seq_ids.push(String::from(line));
678    }
679    let line = lines.next().unwrap();
680    if seq_pointer == 0 {
681      for x in line.chars() {
682        cols.push(vec![align_char2base(x as Char)]);
683      }
684      seq_pointer += 1;
685    } else {
686      for (j, x) in line.chars().enumerate() {
687        cols[pos_pointer + j].push(align_char2base(x as Char));
688      }
689    }
690  }
691  (cols, seq_ids)
692}
693
694pub fn read_align_fasta(fasta_file_path: &Path) -> (Cols, SeqIds) {
695  let mut cols = Cols::new();
696  let mut seq_ids = SeqIds::new();
697  let reader = BufReader::new(File::open(fasta_file_path).unwrap());
698  let mut seqs = Vec::<Seq>::new();
699  for (i, split) in reader.split(b'>').enumerate() {
700    let split = String::from_utf8(split.unwrap()).unwrap();
701    if i == 0 {
702      continue;
703    }
704    let splits: Vec<&str> = split.split_whitespace().collect();
705    let seq_id = splits[0];
706    seq_ids.push(SeqId::from(seq_id));
707    let seq = splits[1..].join("");
708    let seq = seq.chars().map(|x| align_char2base(x as Char)).collect();
709    seqs.push(seq);
710  }
711  let align_len = seqs[0].len();
712  for i in 0..align_len {
713    let x = seqs.iter().map(|x| x[i]).collect();
714    cols.push(x);
715  }
716  (cols, seq_ids)
717}
718
719pub fn read_align_stockholm(stockholm_file_path: &Path) -> (Cols, SeqIds) {
720  let mut cols = Cols::new();
721  let mut seq_ids = SeqIds::new();
722  let reader = BufReader::new(File::open(stockholm_file_path).unwrap());
723  let mut seqs = Vec::<Seq>::new();
724  for line in reader.lines() {
725    let line = line.unwrap();
726    if line.is_empty() || line.starts_with('#') {
727      continue;
728    } else if line.starts_with("//") {
729      break;
730    }
731    let lines: Vec<&str> = line.split_whitespace().collect();
732    let seq_id = lines[0];
733    seq_ids.push(SeqId::from(seq_id));
734    let seq = lines[1];
735    let seq = seq.chars().map(|x| align_char2base(x as Char)).collect();
736    seqs.push(seq);
737  }
738  let align_len = seqs[0].len();
739  for i in 0..align_len {
740    let x = seqs.iter().map(|x| x[i]).collect();
741    cols.push(x);
742  }
743  (cols, seq_ids)
744}
745
746pub fn align_char2base(x: Char) -> Base {
747  match x {
748    A_LOWER | A_UPPER => A,
749    C_LOWER | C_UPPER => C,
750    G_LOWER | G_UPPER => G,
751    U_LOWER | U_UPPER => U,
752    _ => PSEUDO_BASE,
753  }
754}