1use bio::bio_types::strand::Strand;
2use rayon::prelude::*;
3
4use crate::{
5 constants::{
6 INITIAL_MAX_SCORE, MIN_MOTIF_SCORE, MOTIF_THRESHOLD_OFFSET, RBS_DOWNSTREAM_DISTANCE,
7 RBS_UPSTREAM_DISTANCE,
8 },
9 sequence::{calculate_kmer_index, shine_dalgarno_exact, shine_dalgarno_mm},
10 types::{CodonType, Motif, Node, Training},
11};
12
13fn calculate_rbs_scores(
15 seq: &[u8],
16 pos: usize,
17 target_pos: usize,
18 rbs_weights: &[f64],
19) -> [usize; 2] {
20 [
21 shine_dalgarno_exact(seq, pos, target_pos, rbs_weights),
22 shine_dalgarno_mm(seq, pos, target_pos, rbs_weights),
23 ]
24}
25
26pub fn rbs_score(
39 seq: &[u8],
40 reverse_complement_encoded_sequence: &[u8],
41 sequence_length: usize,
42 nodes: &mut [Node],
43 training: &Training,
44) {
45 nodes
47 .par_iter_mut()
48 .enumerate()
49 .filter(|(_, node)| node.position.codon_type != CodonType::Stop && !node.position.is_edge)
50 .for_each(|(_, node)| {
51 node.motif_info.ribosome_binding_sites[0] = 0;
52 node.motif_info.ribosome_binding_sites[1] = 0;
53
54 match node.position.strand {
55 Strand::Forward => {
56 let search_start = node.position.index.saturating_sub(RBS_UPSTREAM_DISTANCE);
57 let search_end = node.position.index.saturating_sub(RBS_DOWNSTREAM_DISTANCE);
58
59 for j in search_start..=search_end {
60 let cur_sc = calculate_rbs_scores(
61 seq,
62 j,
63 node.position.index,
64 &*training.rbs_weights,
65 );
66
67 if cur_sc[0] > node.motif_info.ribosome_binding_sites[0] {
68 node.motif_info.ribosome_binding_sites[0] = cur_sc[0];
69 }
70 if cur_sc[1] > node.motif_info.ribosome_binding_sites[1] {
71 node.motif_info.ribosome_binding_sites[1] = cur_sc[1];
72 }
73 }
74 }
75 Strand::Reverse => {
76 let upstream_offset = RBS_UPSTREAM_DISTANCE + 1;
77 let downstream_offset = RBS_DOWNSTREAM_DISTANCE + 1;
78 if node.position.index >= sequence_length {
79 return; }
81 let start_pos = if sequence_length >= node.position.index + upstream_offset {
82 sequence_length - node.position.index - upstream_offset
83 } else {
84 0 };
86
87 let end_pos = if sequence_length >= node.position.index + downstream_offset {
88 sequence_length - node.position.index - downstream_offset
89 } else {
90 0 };
92 let target_pos = sequence_length - 1 - node.position.index;
93
94 for j in start_pos..=end_pos {
95 if !(0..sequence_length).contains(&j) {
96 continue;
97 }
98
99 let cur_sc = calculate_rbs_scores(
100 reverse_complement_encoded_sequence,
101 j,
102 target_pos,
103 &*training.rbs_weights,
104 );
105
106 if cur_sc[0] > node.motif_info.ribosome_binding_sites[0] {
107 node.motif_info.ribosome_binding_sites[0] = cur_sc[0];
108 }
109 if cur_sc[1] > node.motif_info.ribosome_binding_sites[1] {
110 node.motif_info.ribosome_binding_sites[1] = cur_sc[1];
111 }
112 }
113 }
114 Strand::Unknown => unreachable!(),
115 }
116 });
117}
118
119pub fn find_best_upstream_motif(
137 training: &Training,
138 seq: &[u8],
139 reverse_complement_encoded_sequence: &[u8],
140 sequence_length: usize,
141 node: &mut Node,
142 stage: usize,
143) {
144 if node.position.codon_type == CodonType::Stop || node.position.is_edge {
145 return;
146 }
147
148 let (wseq, start) = match node.position.strand {
149 Strand::Forward => (seq, node.position.index),
150 Strand::Reverse => (
151 reverse_complement_encoded_sequence,
152 sequence_length - 1 - node.position.index,
153 ),
154 Strand::Unknown => unreachable!(),
155 };
156
157 let mut max_sc = INITIAL_MAX_SCORE;
158 let mut max_spacendx = 0;
159 let mut max_spacer = 0;
160 let mut max_ndx = 0;
161 let mut max_len = 0;
162
163 for i in (0..=3).rev() {
165 let motif_len = i + 3;
166
167 let search_start: isize = start as isize - 18 - i;
169 let search_end: isize = start as isize - 6 - i;
170
171 for j in search_start..=search_end {
172 if j < 0 {
173 continue;
174 }
175 if (j as usize) + (motif_len as usize) > sequence_length {
178 continue;
179 }
180 let spacer = start as isize - j - i - 3;
181 let spacendx = if j <= start as isize - 16 - i {
182 3
183 } else if j <= start as isize - 14 - i {
184 2
185 } else if j >= start as isize - 7 - i {
186 1
187 } else {
188 0
189 };
190
191 let index = calculate_kmer_index(motif_len as usize, wseq, j as usize);
192 let score = training.motif_weights[i as usize][spacendx][index];
193
194 if score > max_sc {
195 max_sc = score;
196 max_spacendx = spacendx;
197 max_spacer = spacer;
198 max_ndx = index;
199 max_len = motif_len;
200 }
201 }
202 }
203
204 let is_stage_two = stage == 2;
206 let is_poor_motif =
207 max_sc == MIN_MOTIF_SCORE || max_sc < training.no_motif_weight + MOTIF_THRESHOLD_OFFSET;
208
209 let effective_max_sc = max_sc;
212
213 node.motif_info.best_motif = if is_stage_two && is_poor_motif {
214 Motif {
215 index: 0,
216 length: 0,
217 space_index: 0,
218 spacer: 0,
219 score: training.no_motif_weight,
220 }
221 } else {
222 Motif {
223 index: max_ndx,
224 length: max_len as usize,
225 space_index: max_spacendx,
226 spacer: max_spacer as usize,
227 score: effective_max_sc,
228 }
229 };
230}
231
232#[cfg(test)]
233mod tests {
234 use super::*;
235 use crate::types::*;
236 use bio::bio_types::strand::Strand;
237
238 fn create_test_node(strand: Strand, index: usize, codon_type: CodonType) -> Node {
239 Node {
240 position: NodePosition {
241 index,
242 strand,
243 codon_type,
244 stop_value: (index + 100) as isize,
245 is_edge: false,
246 },
247 scores: NodeScores::default(),
248 state: NodeState::default(),
249 motif_info: NodeMotifInfo {
250 ribosome_binding_sites: [0; 2],
251 best_motif: Motif::default(),
252 },
253 }
254 }
255
256 fn create_test_training() -> Training {
257 Training {
258 gc_content: 0.5,
259 translation_table: 11,
260 uses_shine_dalgarno: true,
261 start_type_weights: [1.0, 2.0, 3.0],
262 rbs_weights: Box::new([1.0; 28]),
263 upstream_composition: Box::new([[0.25; 4]; 32]),
264 motif_weights: Box::new([[[1.0; 4096]; 4]; 4]),
265 no_motif_weight: 0.5,
266 start_weight_factor: 4.35,
267 gc_bias_factors: [1.0; 3],
268 gene_dicodon_table: Box::new([0.0; 4096]),
269 total_dicodons: 0,
270 }
271 }
272
273 #[test]
274 fn test_calculate_rbs_scores() {
275 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3]; let rbs_weights = [
277 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
278 ];
279 let pos = 2;
280 let target_pos = 6;
281
282 let scores = calculate_rbs_scores(&seq, pos, target_pos, &rbs_weights);
283
284 assert_eq!(scores.len(), 2);
285 }
286
287 #[test]
288 fn test_calculate_rbs_scores_boundary_conditions() {
289 let seq = vec![0, 1, 2, 3];
290 let rbs_weights = [1.0; 16];
291
292 let scores = calculate_rbs_scores(&seq, 0, 3, &rbs_weights);
294 assert_eq!(scores.len(), 2);
295
296 let scores = calculate_rbs_scores(&seq, 2, 3, &rbs_weights);
298 assert_eq!(scores.len(), 2);
299 }
300
301 #[test]
302 fn test_rbs_score_forward_strand() {
303 let seq = vec![
304 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0,
305 1, 2, 3,
306 ];
307 let reverse_seq = vec![
308 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3,
309 2, 1, 0,
310 ];
311 let training = create_test_training();
312
313 let mut nodes = vec![create_test_node(Strand::Forward, 25, CodonType::Atg)];
314
315 rbs_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
316
317 assert_eq!(nodes[0].motif_info.ribosome_binding_sites.len(), 2);
318 }
319
320 #[test]
321 fn test_rbs_score_reverse_strand() {
322 let seq = vec![0; 60]; let reverse_seq = vec![3; 60];
325 let training = create_test_training();
326
327 let mut nodes = vec![
328 create_test_node(Strand::Reverse, 35, CodonType::Atg), ];
330
331 rbs_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
332
333 assert_eq!(nodes[0].motif_info.ribosome_binding_sites.len(), 2);
334 }
335
336 #[test]
337 fn test_rbs_score_stop_codon_skipped() {
338 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
339 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0];
340 let training = create_test_training();
341
342 let mut nodes = vec![create_test_node(Strand::Forward, 6, CodonType::Stop)];
343
344 let original_rbs = nodes[0].motif_info.ribosome_binding_sites;
345 rbs_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
346
347 assert_eq!(nodes[0].motif_info.ribosome_binding_sites, original_rbs);
348 }
349
350 #[test]
351 fn test_rbs_score_edge_node_skipped() {
352 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3];
353 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0];
354 let training = create_test_training();
355
356 let mut node = create_test_node(Strand::Forward, 6, CodonType::Atg);
357 node.position.is_edge = true;
358 let mut nodes = vec![node];
359
360 let original_rbs = nodes[0].motif_info.ribosome_binding_sites;
361 rbs_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
362
363 assert_eq!(nodes[0].motif_info.ribosome_binding_sites, original_rbs);
364 }
365
366 #[test]
367 fn test_rbs_score_parallel_processing() {
368 let seq = vec![0; 80];
370 let reverse_seq = vec![3; 80];
371 let training = create_test_training();
372
373 let mut nodes = vec![
374 create_test_node(Strand::Forward, 30, CodonType::Atg),
375 create_test_node(Strand::Reverse, 50, CodonType::Atg),
376 create_test_node(Strand::Forward, 70, CodonType::Atg),
377 ];
378
379 rbs_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
380
381 for node in &nodes {
382 assert_eq!(node.motif_info.ribosome_binding_sites.len(), 2);
383 }
384 }
385
386 #[test]
387 fn test_find_best_upstream_motif_stop_codon() {
388 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3];
389 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0];
390 let training = create_test_training();
391
392 let mut node = create_test_node(Strand::Forward, 6, CodonType::Stop);
393 let original_motif_index = node.motif_info.best_motif.index;
394 let original_motif_length = node.motif_info.best_motif.length;
395
396 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
397
398 assert_eq!(node.motif_info.best_motif.index, original_motif_index);
399 assert_eq!(node.motif_info.best_motif.length, original_motif_length);
400 }
401
402 #[test]
403 fn test_find_best_upstream_motif_edge_node() {
404 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3];
405 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0];
406 let training = create_test_training();
407
408 let mut node = create_test_node(Strand::Forward, 6, CodonType::Atg);
409 node.position.is_edge = true;
410 let original_motif_index = node.motif_info.best_motif.index;
411 let original_motif_length = node.motif_info.best_motif.length;
412
413 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
414
415 assert_eq!(node.motif_info.best_motif.index, original_motif_index);
416 assert_eq!(node.motif_info.best_motif.length, original_motif_length);
417 }
418
419 #[test]
420 fn test_find_best_upstream_motif_forward_strand() {
421 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
422 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0];
423 let training = create_test_training();
424
425 let mut node = create_test_node(Strand::Forward, 15, CodonType::Atg);
426
427 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
428
429 }
431
432 #[test]
433 fn test_find_best_upstream_motif_reverse_strand() {
434 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
435 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0];
436 let training = create_test_training();
437
438 let mut node = create_test_node(Strand::Reverse, 15, CodonType::Atg);
439
440 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
441
442 }
444
445 #[test]
446 fn test_find_best_upstream_motif_stage_two_filtering() {
447 let mut training = create_test_training();
448 training.no_motif_weight = 10.0; let seq = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
451 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0];
452
453 let mut node = create_test_node(Strand::Forward, 15, CodonType::Atg);
454
455 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 2);
456
457 if node.motif_info.best_motif.score < training.no_motif_weight + MOTIF_THRESHOLD_OFFSET {
458 assert_eq!(node.motif_info.best_motif.length, 0);
459 assert_eq!(node.motif_info.best_motif.score, training.no_motif_weight);
460 }
461 }
462
463 #[test]
464 fn test_find_best_upstream_motif_stage_one_accepts_all() {
465 let training = create_test_training();
466 let seq = vec![0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
467 let reverse_seq = vec![3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0];
468
469 let mut node = create_test_node(Strand::Forward, 15, CodonType::Atg);
470
471 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
472
473 }
475
476 #[test]
477 fn test_find_best_upstream_motif_empty_sequence() {
478 let seq = vec![];
479 let reverse_seq = vec![];
480 let training = create_test_training();
481
482 let mut node = create_test_node(Strand::Forward, 0, CodonType::Atg);
483
484 find_best_upstream_motif(&training, &seq, &reverse_seq, 0, &mut node, 1);
485
486 }
489
490 #[test]
491 fn test_find_best_upstream_motif_short_sequence() {
492 let seq = vec![0, 1, 2];
493 let reverse_seq = vec![2, 1, 0];
494 let training = create_test_training();
495
496 let mut node = create_test_node(Strand::Forward, 2, CodonType::Atg);
497
498 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
499
500 }
503
504 #[test]
505 fn test_motif_spacer_classification() {
506 let training = create_test_training();
507 let seq = vec![
508 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
509 ];
510 let reverse_seq = vec![
511 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0,
512 ];
513
514 let mut node = create_test_node(Strand::Forward, 20, CodonType::Atg);
515
516 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
517
518 assert!(node.motif_info.best_motif.space_index <= 3);
519 }
520
521 #[test]
522 fn test_motif_length_range() {
523 let training = create_test_training();
524 let seq = vec![
525 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
526 ];
527 let reverse_seq = vec![
528 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0,
529 ];
530
531 let mut node = create_test_node(Strand::Forward, 20, CodonType::Atg);
532
533 find_best_upstream_motif(&training, &seq, &reverse_seq, seq.len(), &mut node, 1);
534
535 assert!(node.motif_info.best_motif.length <= 6);
536 }
537}