1pub use bio::io::fasta::Reader;
2pub use hashbrown::HashMap;
3pub use itertools::multizip;
4pub use num::{
5 range, range_inclusive, Bounded, FromPrimitive, Integer, One, PrimInt, ToPrimitive, Unsigned,
6 Zero,
7};
8pub use rna_ss_params::compiled_scores_contra::*;
9pub use rna_ss_params::compiled_scores_turner::*;
10pub use rna_ss_params::utils::*;
11pub use scoped_threadpool::Pool;
12pub use std::cmp::{max, min};
13pub use std::env;
14pub use std::f32::NEG_INFINITY;
15pub use std::fmt::Display;
16pub use std::fs::create_dir;
17pub use std::fs::File;
18pub use std::hash::Hash;
19pub use std::io::prelude::*;
20pub use std::io::{BufReader, BufWriter};
21pub use std::path::Path;
22pub use std::str::from_utf8_unchecked;
23
24pub trait HashIndex:
25 Unsigned
26 + PrimInt
27 + Hash
28 + FromPrimitive
29 + ToPrimitive
30 + Clone
31 + Integer
32 + Eq
33 + One
34 + Ord
35 + Display
36 + Sync
37 + Send
38{
39}
40impl<T: Unsigned + PrimInt + Hash + FromPrimitive + Integer + One + Ord + Display + Sync + Send>
41 HashIndex for T
42{
43}
44
45pub type PosPair<T> = (T, T);
46pub type PosQuad<T> = (T, T, T, T);
47type Arg = String;
48pub type Args = Vec<Arg>;
49pub type FastaId = String;
50#[derive(Clone)]
51pub struct FastaRecord {
52 pub fasta_id: FastaId,
53 pub seq: Seq,
54}
55#[derive(Debug)]
56pub struct Align<T> {
57 pub cols: Cols,
58 pub pos_map_sets: PosMapSets<T>,
59}
60pub type PosMaps<T> = Vec<T>;
61pub type PosMapSets<T> = Vec<PosMaps<T>>;
62pub type FastaRecords = Vec<FastaRecord>;
63pub type SeqSlice<'a> = &'a [Base];
64pub type NumThreads = u32;
65pub type SparseProbMat<T> = HashMap<PosPair<T>, Prob>;
66pub type SeqId = String;
67pub type SeqIds = Vec<SeqId>;
68pub type Col = Vec<Base>;
69pub type Cols = Vec<Col>;
70pub type Sum4dMat<T> = HashMap<PosQuad<T>, Sum>;
71pub type SparseSumMat<T> = HashMap<PosPair<T>, Sum>;
72pub type PosPairs<T> = Vec<PosPair<T>>;
73pub type FoldChar = u8;
74pub type FoldStr = Vec<FoldChar>;
75pub type Seq = Vec<Base>;
76pub type SeqPair<'a> = (SeqSlice<'a>, SeqSlice<'a>);
77pub type Prob = f32;
78pub type Sum = Prob;
79pub type Probs = Vec<Prob>;
80pub type ProbMat = Vec<Probs>;
81pub type Sums = Vec<Sum>;
82pub type SumMat = Vec<Sums>;
83pub type Pos = usize;
84pub type Base = usize;
85pub type MatchScores = [[Score; NUM_BASES]; NUM_BASES];
86pub type InsertScores = [Score; NUM_BASES];
87pub type RnaId = usize;
88pub type RnaIdPair = (RnaId, RnaId);
89pub type ProbMatsHashedIds = HashMap<RnaIdPair, ProbMat>;
90
91#[derive(Clone, Debug)]
92pub struct FoldScoreSets {
93 pub hairpin_scores_len: HairpinScoresLen,
95 pub bulge_scores_len: BulgeScoresLen,
96 pub interior_scores_len: InteriorScoresLen,
97 pub interior_scores_symmetric: InteriorScoresSymmetric,
98 pub interior_scores_asymmetric: InteriorScoresAsymmetric,
99 pub stack_scores: StackScores,
100 pub terminal_mismatch_scores: TerminalMismatchScores,
101 pub dangling_scores_left: DanglingScores,
102 pub dangling_scores_right: DanglingScores,
103 pub helix_close_scores: HelixCloseScores,
104 pub basepair_scores: BasepairScores,
105 pub interior_scores_explicit: InteriorScoresExplicit,
106 pub bulge_scores_0x1: BulgeScores0x1,
107 pub interior_scores_1x1: InteriorScores1x1Contra,
108 pub multibranch_score_base: Score,
109 pub multibranch_score_basepair: Score,
110 pub multibranch_score_unpair: Score,
111 pub external_score_basepair: Score,
112 pub external_score_unpair: Score,
113 pub hairpin_scores_len_cumulative: HairpinScoresLen,
115 pub bulge_scores_len_cumulative: BulgeScoresLen,
116 pub interior_scores_len_cumulative: InteriorScoresLen,
117 pub interior_scores_symmetric_cumulative: InteriorScoresSymmetric,
118 pub interior_scores_asymmetric_cumulative: InteriorScoresAsymmetric,
119}
120
121pub const LOGSUMEXP_THRESHOLD_UPPER: Score = 11.862_479;
122pub const PSEUDO_BASE: Base = U + 1 as Base;
123pub const UNPAIR: FoldChar = b'.';
124pub const BASEPAIR_LEFT: FoldChar = b'(';
125pub const BASEPAIR_RIGHT: FoldChar = b')';
126pub const EXAMPLE_FASTA_FILE_PATH: &str = "assets/sampled_trnas.fa";
127pub const EPSILON: Prob = 0.00_1;
128pub const PROB_BOUND_LOWER: Prob = -EPSILON;
129pub const PROB_BOUND_UPPER: Prob = 1. + EPSILON;
130
131impl FastaRecord {
132 pub fn origin() -> FastaRecord {
133 FastaRecord {
134 fasta_id: FastaId::new(),
135 seq: Seq::new(),
136 }
137 }
138
139 pub fn new(fasta_id: FastaId, seq: Seq) -> FastaRecord {
140 FastaRecord {
141 fasta_id: fasta_id,
142 seq: seq,
143 }
144 }
145}
146
147impl<T> Default for Align<T> {
148 fn default() -> Self {
149 Self::new()
150 }
151}
152
153impl<T> Align<T> {
154 pub fn new() -> Align<T> {
155 Align {
156 cols: Cols::new(),
157 pos_map_sets: PosMapSets::<T>::new(),
158 }
159 }
160}
161
162pub fn has_canonical_basepair(x: &Basepair) -> bool {
163 matches!(*x, AU | CG | GC | GU | UA | UG)
164}
165
166pub fn get_hairpin_score(seq: SeqSlice, pos_pair_close: &(usize, usize)) -> Score {
167 let hairpin = &seq[pos_pair_close.0..pos_pair_close.1 + 1];
168 let special_hairpin_score = get_special_hairpin_score(hairpin);
169 if special_hairpin_score > NEG_INFINITY {
170 special_hairpin_score
171 } else {
172 let hairpin_len = pos_pair_close.1 - pos_pair_close.0 - 1;
173 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
174 let hairpin_score = if hairpin_len == MIN_HAIRPIN_LEN {
175 HAIRPIN_SCORES_INIT[hairpin_len]
176 } else {
177 let terminal_mismatch = (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]);
178 let hairpin_score_init = if hairpin_len <= MAX_HAIRPIN_LEN_EXTRAPOLATION {
179 HAIRPIN_SCORES_INIT[hairpin_len]
180 } else {
181 HAIRPIN_SCORES_INIT[MIN_HAIRPIN_LEN_EXTRAPOLATION - 1]
182 + COEFF_HAIRPIN_LEN_EXTRAPOLATION
183 * (hairpin_len as Score / (MIN_HAIRPIN_LEN_EXTRAPOLATION - 1) as Score).ln()
184 };
185 hairpin_score_init
186 + TERMINAL_MISMATCH_SCORES_HAIRPIN[basepair_close.0][basepair_close.1][terminal_mismatch.0]
187 [terminal_mismatch.1]
188 };
189 hairpin_score
190 + if matches_augu(&basepair_close) {
191 HELIX_AUGU_END_PENALTY
192 } else {
193 0.
194 }
195 }
196}
197
198pub fn get_special_hairpin_score(seq: SeqSlice) -> Score {
199 for x in HAIRPIN_SCORES_SPECIAL.iter() {
200 if x.0 == seq {
201 return x.1;
202 }
203 }
204 NEG_INFINITY
205}
206
207pub fn get_2loop_score(
208 seq: SeqSlice,
209 pos_pair_close: &(usize, usize),
210 pos_pair_accessible: &(usize, usize),
211) -> Score {
212 if pos_pair_close.0 + 1 == pos_pair_accessible.0 && pos_pair_close.1 - 1 == pos_pair_accessible.1
213 {
214 get_stack_score(seq, pos_pair_close, pos_pair_accessible)
215 } else if pos_pair_close.0 + 1 == pos_pair_accessible.0
216 || pos_pair_close.1 - 1 == pos_pair_accessible.1
217 {
218 get_bulge_score(seq, pos_pair_close, pos_pair_accessible)
219 } else {
220 get_interior_score(seq, pos_pair_close, pos_pair_accessible)
221 }
222}
223
224fn get_stack_score(
225 seq: SeqSlice,
226 pos_pair_close: &(usize, usize),
227 pos_pair_accessible: &(usize, usize),
228) -> Score {
229 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
230 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
231 STACK_SCORES[basepair_close.0][basepair_close.1][basepair_accessible.0][basepair_accessible.1]
232}
233
234fn get_bulge_score(
235 seq: SeqSlice,
236 pos_pair_close: &(usize, usize),
237 pos_pair_accessible: &(usize, usize),
238) -> Score {
239 let bulge_len =
240 pos_pair_accessible.0 - pos_pair_close.0 + pos_pair_close.1 - pos_pair_accessible.1 - 2;
241 if bulge_len == 1 {
242 BULGE_SCORES_INIT[bulge_len] + get_stack_score(seq, pos_pair_close, pos_pair_accessible)
243 } else {
244 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
245 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
246 BULGE_SCORES_INIT[bulge_len]
247 + if matches_augu(&basepair_close) {
248 HELIX_AUGU_END_PENALTY
249 } else {
250 0.
251 }
252 + if matches_augu(&basepair_accessible) {
253 HELIX_AUGU_END_PENALTY
254 } else {
255 0.
256 }
257 }
258}
259
260fn get_interior_score(
261 seq: SeqSlice,
262 pos_pair_close: &(usize, usize),
263 pos_pair_accessible: &(usize, usize),
264) -> Score {
265 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
266 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
267 let loop_len_pair = (
268 pos_pair_accessible.0 - pos_pair_close.0 - 1,
269 pos_pair_close.1 - pos_pair_accessible.1 - 1,
270 );
271 let interior_len = loop_len_pair.0 + loop_len_pair.1;
272 match loop_len_pair {
273 (1, 1) => {
274 let interior = (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]);
275 INTERIOR_SCORES_1X1[basepair_close.0][basepair_close.1][interior.0][interior.1]
276 [basepair_accessible.0][basepair_accessible.1]
277 }
278 (1, 2) => {
279 let interior = (
280 (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
281 seq[pos_pair_close.1 - 2],
282 );
283 INTERIOR_SCORES_1X2[basepair_close.0][basepair_close.1][(interior.0).0][(interior.0).1]
284 [interior.1][basepair_accessible.0][basepair_accessible.1]
285 }
286 (2, 1) => {
287 let interior = (
288 (seq[pos_pair_close.1 - 1], seq[pos_pair_close.0 + 2]),
289 seq[pos_pair_close.0 + 1],
290 );
291 let basepair_accessible_inverse = invert_basepair(&basepair_accessible);
292 let basepair_close_inverse = invert_basepair(&basepair_close);
293 INTERIOR_SCORES_1X2[basepair_accessible_inverse.0][basepair_accessible_inverse.1]
294 [(interior.0).0][(interior.0).1][interior.1][basepair_close_inverse.0]
295 [basepair_close_inverse.1]
296 }
297 (2, 2) => {
298 let interior = (
299 (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
300 (seq[pos_pair_close.0 + 2], seq[pos_pair_close.1 - 2]),
301 );
302 INTERIOR_SCORES_2X2[basepair_close.0][basepair_close.1][(interior.0).0][(interior.0).1]
303 [(interior.1).0][(interior.1).1][basepair_accessible.0][basepair_accessible.1]
304 }
305 _ => {
306 INTERIOR_SCORES_INIT[interior_len]
307 + (NINIO_COEFF * get_abs_diff(loop_len_pair.0, loop_len_pair.1) as Score).max(NINIO_MAX)
308 + get_interior_mismatch_score(seq, pos_pair_close, pos_pair_accessible, &loop_len_pair)
309 + if matches_augu(&basepair_close) {
310 HELIX_AUGU_END_PENALTY
311 } else {
312 0.
313 }
314 + if matches_augu(&basepair_accessible) {
315 HELIX_AUGU_END_PENALTY
316 } else {
317 0.
318 }
319 }
320 }
321}
322
323pub fn invert_basepair(x: &Basepair) -> Basepair {
324 (x.1, x.0)
325}
326
327pub fn get_abs_diff(x: usize, y: usize) -> usize {
328 max(x, y) - min(x, y)
329}
330
331fn get_interior_mismatch_score(
332 seq: SeqSlice,
333 pos_pair_close: &(usize, usize),
334 pos_pair_accessible: &(usize, usize),
335 loop_len_pair: &(usize, usize),
336) -> Score {
337 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
338 let basepair_accessible = (seq[pos_pair_accessible.1], seq[pos_pair_accessible.0]);
339 let terminal_mismatch = (
340 (seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]),
341 (
342 seq[pos_pair_accessible.1 + 1],
343 seq[pos_pair_accessible.0 - 1],
344 ),
345 );
346 match *loop_len_pair {
347 (1, _) | (_, 1) => {
348 TERMINAL_MISMATCH_SCORES_1XMANY[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
349 [(terminal_mismatch.0).1]
350 + TERMINAL_MISMATCH_SCORES_1XMANY[basepair_accessible.0][basepair_accessible.1]
351 [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
352 }
353 (2, 3) | (3, 2) => {
354 TERMINAL_MISMATCH_SCORES_2X3[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
355 [(terminal_mismatch.0).1]
356 + TERMINAL_MISMATCH_SCORES_2X3[basepair_accessible.0][basepair_accessible.1]
357 [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
358 }
359 _ => {
360 TERMINAL_MISMATCH_SCORES_INTERIOR[basepair_close.0][basepair_close.1][(terminal_mismatch.0).0]
361 [(terminal_mismatch.0).1]
362 + TERMINAL_MISMATCH_SCORES_INTERIOR[basepair_accessible.0][basepair_accessible.1]
363 [(terminal_mismatch.1).0][(terminal_mismatch.1).1]
364 }
365 }
366}
367
368pub fn get_multibranch_close_score(seq: SeqSlice, pos_pair_close: &(usize, usize)) -> Score {
369 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
370 let basepair_close_inverse = invert_basepair(&basepair_close);
371 let basepair_stack_inverse =
372 invert_basepair(&(seq[pos_pair_close.0 + 1], seq[pos_pair_close.1 - 1]));
373 let terminal_mismatch_score = TERMINAL_MISMATCH_SCORES_MULTIBRANCH[basepair_close_inverse.0]
374 [basepair_close_inverse.1][basepair_stack_inverse.0][basepair_stack_inverse.1];
375 INIT_MULTIBRANCH_BASE
376 + terminal_mismatch_score
377 + if matches_augu(&basepair_close) {
378 HELIX_AUGU_END_PENALTY
379 } else {
380 0.
381 }
382}
383
384pub fn get_accessible_score(
385 seq: SeqSlice,
386 pos_pair_accessible: &(usize, usize),
387 uses_sentinel_bases: bool,
388) -> Score {
389 let seq_len = seq.len();
390 let end_5prime = usize::from(uses_sentinel_bases);
391 let end_3prime = seq_len - if uses_sentinel_bases { 2 } else { 1 };
392 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
393 let score = if pos_pair_accessible.0 > end_5prime && pos_pair_accessible.1 < end_3prime {
394 TERMINAL_MISMATCH_SCORES_MULTIBRANCH[basepair_accessible.0][basepair_accessible.1]
395 [seq[pos_pair_accessible.0 - 1]][seq[pos_pair_accessible.1 + 1]]
396 } else if pos_pair_accessible.0 > end_5prime {
397 DANGLING_SCORES_5PRIME[basepair_accessible.0][basepair_accessible.1]
398 [seq[pos_pair_accessible.0 - 1]]
399 } else if pos_pair_accessible.1 < end_3prime {
400 DANGLING_SCORES_3PRIME[basepair_accessible.0][basepair_accessible.1]
401 [seq[pos_pair_accessible.1 + 1]]
402 } else {
403 0.
404 };
405 score
406 + if matches_augu(&basepair_accessible) {
407 HELIX_AUGU_END_PENALTY
408 } else {
409 0.
410 }
411}
412
413pub fn get_hairpin_score_contra(
414 seq: SeqSlice,
415 pos_pair_close: &(usize, usize),
416 fold_score_sets: &FoldScoreSets,
417) -> Score {
418 let hairpin_len = pos_pair_close.1 - pos_pair_close.0 - 1;
419 fold_score_sets.hairpin_scores_len_cumulative[hairpin_len.min(MAX_LOOP_LEN)]
420 + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
421}
422
423pub fn get_2loop_score_contra(
424 seq: SeqSlice,
425 pos_pair_close: &(usize, usize),
426 pos_pair_accessible: &(usize, usize),
427 fold_score_sets: &FoldScoreSets,
428) -> Score {
429 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
430 let score = if pos_pair_close.0 + 1 == pos_pair_accessible.0
431 && pos_pair_close.1 - 1 == pos_pair_accessible.1
432 {
433 get_stack_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
434 } else if pos_pair_close.0 + 1 == pos_pair_accessible.0
435 || pos_pair_close.1 - 1 == pos_pair_accessible.1
436 {
437 get_bulge_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
438 } else {
439 get_interior_score_contra(seq, pos_pair_close, pos_pair_accessible, fold_score_sets)
440 };
441 score + fold_score_sets.basepair_scores[basepair_accessible.0][basepair_accessible.1]
442}
443
444pub fn get_stack_score_contra(
445 seq: SeqSlice,
446 pos_pair_close: &(usize, usize),
447 pos_pair_accessible: &(usize, usize),
448 fold_score_sets: &FoldScoreSets,
449) -> Score {
450 let basepair_close = (seq[pos_pair_close.0], seq[pos_pair_close.1]);
451 let basepair_accessible = (seq[pos_pair_accessible.0], seq[pos_pair_accessible.1]);
452 fold_score_sets.stack_scores[basepair_close.0][basepair_close.1][basepair_accessible.0]
453 [basepair_accessible.1]
454}
455
456pub fn get_bulge_score_contra(
457 seq: SeqSlice,
458 pos_pair_close: &(usize, usize),
459 pos_pair_accessible: &(usize, usize),
460 fold_score_sets: &FoldScoreSets,
461) -> Score {
462 let bulge_len =
463 pos_pair_accessible.0 - pos_pair_close.0 + pos_pair_close.1 - pos_pair_accessible.1 - 2;
464 let score = if bulge_len == 1 {
465 fold_score_sets.bulge_scores_0x1[if pos_pair_accessible.0 - pos_pair_close.0 - 1 == 1 {
466 seq[pos_pair_close.0 + 1]
467 } else {
468 seq[pos_pair_close.1 - 1]
469 }]
470 } else {
471 0.
472 };
473 score
474 + fold_score_sets.bulge_scores_len_cumulative[bulge_len - 1]
475 + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
476 + get_junction_score_single(
477 seq,
478 &(pos_pair_accessible.1, pos_pair_accessible.0),
479 fold_score_sets,
480 )
481}
482
483pub fn get_interior_score_contra(
484 seq: SeqSlice,
485 pos_pair_close: &(usize, usize),
486 pos_pair_accessible: &(usize, usize),
487 fold_score_sets: &FoldScoreSets,
488) -> Score {
489 let loop_len_pair = (
490 pos_pair_accessible.0 - pos_pair_close.0 - 1,
491 pos_pair_close.1 - pos_pair_accessible.1 - 1,
492 );
493 let interior_len = loop_len_pair.0 + loop_len_pair.1;
494 let score = if loop_len_pair.0 == loop_len_pair.1 {
495 let score_1x1 = if interior_len == 2 {
496 fold_score_sets.interior_scores_1x1[seq[pos_pair_close.0 + 1]][seq[pos_pair_close.1 - 1]]
497 } else {
498 0.
499 };
500 score_1x1 + fold_score_sets.interior_scores_symmetric_cumulative[loop_len_pair.0 - 1]
501 } else {
502 fold_score_sets.interior_scores_asymmetric_cumulative
503 [get_abs_diff(loop_len_pair.0, loop_len_pair.1) - 1]
504 };
505 let score_explicit =
506 if loop_len_pair.0 <= MAX_INTERIOR_EXPLICIT && loop_len_pair.1 <= MAX_INTERIOR_EXPLICIT {
507 fold_score_sets.interior_scores_explicit[loop_len_pair.0 - 1][loop_len_pair.1 - 1]
508 } else {
509 0.
510 };
511 score
512 + score_explicit
513 + fold_score_sets.interior_scores_len_cumulative[interior_len - 2]
514 + get_junction_score_single(seq, pos_pair_close, fold_score_sets)
515 + get_junction_score_single(
516 seq,
517 &(pos_pair_accessible.1, pos_pair_accessible.0),
518 fold_score_sets,
519 )
520}
521
522pub fn get_junction_score(
523 seq: SeqSlice,
524 pos_pair: &(usize, usize),
525 uses_sentinel_bases: bool,
526 fold_score_sets: &FoldScoreSets,
527) -> Score {
528 let seq_len = seq.len();
529 let basepair = (seq[pos_pair.0], seq[pos_pair.1]);
530 let end_5prime = usize::from(uses_sentinel_bases);
531 let end_3prime = seq_len - if uses_sentinel_bases { 2 } else { 1 };
532 get_helix_close_score(&basepair, fold_score_sets)
533 + if pos_pair.0 < end_3prime {
534 fold_score_sets.dangling_scores_left[basepair.0][basepair.1][seq[pos_pair.0 + 1]]
535 } else {
536 0.
537 }
538 + if pos_pair.1 > end_5prime {
539 fold_score_sets.dangling_scores_right[basepair.0][basepair.1][seq[pos_pair.1 - 1]]
540 } else {
541 0.
542 }
543}
544
545pub fn get_junction_score_single(x: SeqSlice, y: &(usize, usize), z: &FoldScoreSets) -> Score {
546 let a = (x[y.0], x[y.1]);
547 get_helix_close_score(&a, z) + get_terminal_mismatch_score(&a, &(x[y.0 + 1], x[y.1 - 1]), z)
548}
549
550pub fn get_helix_close_score(x: &Basepair, y: &FoldScoreSets) -> Score {
551 y.helix_close_scores[x.0][x.1]
552}
553
554pub fn get_terminal_mismatch_score(x: &Basepair, y: &Basepair, z: &FoldScoreSets) -> Score {
555 z.terminal_mismatch_scores[x.0][x.1][y.0][y.1]
556}
557
558pub fn matches_augu(x: &Basepair) -> bool {
559 *x == AU || *x == UA || *x == GU || *x == UG
560}
561
562pub fn bytes2seq(x: &[Char]) -> Seq {
563 let mut y = Seq::new();
564 for &x in x {
565 let x = match x {
566 A_LOWER | A_UPPER => A,
567 C_LOWER | C_UPPER => C,
568 G_LOWER | G_UPPER => G,
569 U_LOWER | U_UPPER => U,
570 _ => {
571 panic!();
572 }
573 };
574 y.push(x);
575 }
576 y
577}
578
579#[inline]
580pub fn logsumexp(sum: &mut Score, x: Score) {
581 if !x.is_finite() {
582 return;
583 }
584 *sum = if !sum.is_finite() {
585 x
586 } else {
587 let y = sum.min(x);
588 let z = sum.max(x) - y;
589 y + if z >= LOGSUMEXP_THRESHOLD_UPPER {
590 z
591 } else {
592 ln_exp_1p(z)
594 }
595 };
596}
597
598#[inline]
603pub fn ln_exp_1p(x: Score) -> Score {
604 if x < 3.379_25 {
605 if x < 1.632_015_8 {
606 if x < 0.661_536_75 {
607 ((-0.0065591595 * x + 0.127_644_27) * x + 0.499_655_46) * x + 0.693_154_2
608 } else {
609 ((-0.015_515_756 * x + 0.144_677_56) * x + 0.488_293_98) * x + 0.695_809_3
610 }
611 } else if x < 2.491_258_9 {
612 ((-0.012_890_925 * x + 0.130_102_83) * x + 0.515_039_86) * x + 0.679_558_6
613 } else {
614 ((-0.0072142647 * x + 0.087_754_086) * x + 0.620_870_8) * x + 0.590_967_6
615 }
616 } else if x < 5.789_071 {
617 if x < 4.426_169 {
618 ((-0.0031455354 * x + 0.046_722_945) * x + 0.759_253_2) * x + 0.434_879_45
619 } else {
620 ((-0.0010110698 * x + 0.018_594_341) * x + 0.883_173_05) * x + 0.252_369_55
621 }
622 } else if x < 7.816_272_7 {
623 ((-0.000_196_278 * x + 0.0046084408) * x + 0.963_443_2) * x + 0.098_314_89
624 } else {
625 ((-0.0000113994 * x + 0.0003734731) * x + 0.995_910_7) * x + 0.0149855051
626 }
627}
628
629#[inline]
631pub fn expf(x: Score) -> Score {
632 if x < -2.491_503_5 {
633 if x < -5.862_282_3 {
634 if x < -9.91152 {
635 0.
636 } else {
637 ((0.0000803850 * x + 0.002_162_743) * x + 0.019_470_856) * x + 0.058_808_003
638 }
639 } else if x < -3.839_663 {
640 ((0.0013889414 * x + 0.024_467_647) * x + 0.147_129_06) * x + 0.304_275_78
641 } else {
642 ((0.0072335607 * x + 0.090_600_27) * x + 0.398_311_14) * x + 0.624_595_94
643 }
644 } else if x < -0.672_505_3 {
645 if x < -1.480_537_5 {
646 ((0.023_241_036 * x + 0.208_564_6) * x + 0.690_636_8) * x + 0.868_232_25
647 } else {
648 ((0.057_378_277 * x + 0.358_025_85) * x + 0.912_113_3) * x + 0.979_309_2
649 }
650 } else if x < 0. {
651 ((0.119_917_594 * x + 0.481_566_82) * x + 0.997_599_2) * x + 0.999_950_5
652 } else {
653 x.exp()
654 }
655}
656
657pub fn read_align_clustal(clustal_file_path: &Path) -> (Cols, SeqIds) {
658 let mut cols = Cols::new();
659 let mut seq_ids = SeqIds::new();
660 let reader = BufReader::new(File::open(clustal_file_path).unwrap());
661 let mut seq_pointer = 0;
662 let mut pos_pointer = 0;
663 let mut has_read_seq_ids = false;
664 for (i, line) in reader.lines().enumerate() {
665 let line = line.unwrap();
666 if i == 0 || line.is_empty() || line.starts_with(' ') {
667 if !cols.is_empty() {
668 seq_pointer = 0;
669 pos_pointer = cols.len();
670 has_read_seq_ids = true;
671 }
672 continue;
673 }
674 let mut lines = line.split_whitespace();
675 let line = lines.next().unwrap();
676 if !has_read_seq_ids {
677 seq_ids.push(String::from(line));
678 }
679 let line = lines.next().unwrap();
680 if seq_pointer == 0 {
681 for x in line.chars() {
682 cols.push(vec![align_char2base(x as Char)]);
683 }
684 seq_pointer += 1;
685 } else {
686 for (j, x) in line.chars().enumerate() {
687 cols[pos_pointer + j].push(align_char2base(x as Char));
688 }
689 }
690 }
691 (cols, seq_ids)
692}
693
694pub fn read_align_fasta(fasta_file_path: &Path) -> (Cols, SeqIds) {
695 let mut cols = Cols::new();
696 let mut seq_ids = SeqIds::new();
697 let reader = BufReader::new(File::open(fasta_file_path).unwrap());
698 let mut seqs = Vec::<Seq>::new();
699 for (i, split) in reader.split(b'>').enumerate() {
700 let split = String::from_utf8(split.unwrap()).unwrap();
701 if i == 0 {
702 continue;
703 }
704 let splits: Vec<&str> = split.split_whitespace().collect();
705 let seq_id = splits[0];
706 seq_ids.push(SeqId::from(seq_id));
707 let seq = splits[1..].join("");
708 let seq = seq.chars().map(|x| align_char2base(x as Char)).collect();
709 seqs.push(seq);
710 }
711 let align_len = seqs[0].len();
712 for i in 0..align_len {
713 let x = seqs.iter().map(|x| x[i]).collect();
714 cols.push(x);
715 }
716 (cols, seq_ids)
717}
718
719pub fn read_align_stockholm(stockholm_file_path: &Path) -> (Cols, SeqIds) {
720 let mut cols = Cols::new();
721 let mut seq_ids = SeqIds::new();
722 let reader = BufReader::new(File::open(stockholm_file_path).unwrap());
723 let mut seqs = Vec::<Seq>::new();
724 for line in reader.lines() {
725 let line = line.unwrap();
726 if line.is_empty() || line.starts_with('#') {
727 continue;
728 } else if line.starts_with("//") {
729 break;
730 }
731 let lines: Vec<&str> = line.split_whitespace().collect();
732 let seq_id = lines[0];
733 seq_ids.push(SeqId::from(seq_id));
734 let seq = lines[1];
735 let seq = seq.chars().map(|x| align_char2base(x as Char)).collect();
736 seqs.push(seq);
737 }
738 let align_len = seqs[0].len();
739 for i in 0..align_len {
740 let x = seqs.iter().map(|x| x[i]).collect();
741 cols.push(x);
742 }
743 (cols, seq_ids)
744}
745
746pub fn align_char2base(x: Char) -> Base {
747 match x {
748 A_LOWER | A_UPPER => A,
749 C_LOWER | C_UPPER => C,
750 G_LOWER | G_UPPER => G,
751 U_LOWER | U_UPPER => U,
752 _ => PSEUDO_BASE,
753 }
754}