1use bio::bio_types::strand::Strand;
2
3use crate::{
4 constants::{MAXIMUM_SAME_OVERLAP, OPERON_DISTANCE, OVERLAP_PENALTY_FACTOR},
5 types::{CodonType, Node, Training},
6};
7
8pub fn record_overlapping_starts(nodes: &mut [Node], training: &Training, flag: bool) {
9 let node_count = nodes.len();
10
11 for i in 0..node_count {
12 for j in 0..3 {
13 nodes[i].state.start_pointers[j] = None;
14 }
15
16 if nodes[i].position.codon_type != CodonType::Stop || nodes[i].position.is_edge {
17 continue;
18 }
19
20 let mut max_sc = -100.0;
21 if nodes[i].position.strand == Strand::Forward {
22 if i + 3 < i32::MAX as usize {
23 let start_j = i + 3;
24 for j in (0..=start_j).rev() {
25 if j >= node_count || nodes[j].position.index > nodes[i].position.index + 2 {
26 continue;
27 }
28
29 if nodes[j].position.index + MAXIMUM_SAME_OVERLAP < nodes[i].position.index {
30 break;
31 }
32
33 if nodes[j].position.strand == Strand::Forward
34 && nodes[j].position.codon_type != CodonType::Stop
35 {
36 if nodes[j].position.stop_value <= nodes[i].position.index as isize {
37 continue;
38 }
39
40 let frame = nodes[j].position.index % 3;
41
42 if !flag && nodes[i].state.start_pointers[frame].is_none() {
43 nodes[i].state.start_pointers[frame] = Some(j);
44 } else if flag {
45 let score = nodes[j].scores.coding_score
46 + nodes[j].scores.start_score
47 + intergenic_mod(&nodes[i], &nodes[j], training);
48 if score > max_sc {
49 nodes[i].state.start_pointers[frame] = Some(j);
50 max_sc = nodes[j].scores.coding_score
51 + nodes[j].scores.start_score
52 + intergenic_mod(&nodes[i], &nodes[j], training);
53 }
54 }
55 }
56 }
57 }
58 } else {
59 for j_signed in ((i as isize) - 3)..(node_count as isize) {
60 if j_signed < 0 {
61 continue;
62 }
63 let j = j_signed as usize;
64
65 if nodes[j].position.index < nodes[i].position.index.saturating_sub(2) {
67 continue;
68 }
69 if nodes[j].position.index > nodes[i].position.index + MAXIMUM_SAME_OVERLAP {
70 break;
71 }
72
73 if nodes[j].position.strand == Strand::Reverse
74 && nodes[j].position.codon_type != CodonType::Stop
75 {
76 if nodes[j].position.stop_value >= nodes[i].position.index as isize {
77 continue;
78 }
79
80 let frame = nodes[j].position.index % 3;
81
82 if !flag && nodes[i].state.start_pointers[frame].is_none() {
83 nodes[i].state.start_pointers[frame] = Some(j);
84 } else if flag {
85 let score = nodes[j].scores.coding_score
86 + nodes[j].scores.start_score
87 + intergenic_mod(&nodes[j], &nodes[i], training);
88 if score > max_sc {
89 nodes[i].state.start_pointers[frame] = Some(j);
90 max_sc = score; }
92 }
93 }
94 }
95 }
96 }
97}
98
99#[inline]
101pub fn intergenic_mod(n1: &Node, n2: &Node, training: &Training) -> f64 {
102 let dist = n1.position.index.abs_diff(n2.position.index) as f64;
103 let mut rval = 0.0;
104 let ovlp = is_overlap(n1, n2);
105
106 if is_operon_like(n1, n2) {
108 rval += calculate_operon_bonus(n1, n2);
109 }
110
111 if dist > 3.0 * OPERON_DISTANCE || n1.position.strand != n2.position.strand {
113 rval -= OVERLAP_PENALTY_FACTOR * training.start_weight_factor;
114 } else if (dist <= OPERON_DISTANCE && !ovlp) || dist < OPERON_DISTANCE / 4.0 {
115 rval +=
116 (2.0 - dist / OPERON_DISTANCE) * OVERLAP_PENALTY_FACTOR * training.start_weight_factor;
117 }
118
119 rval
120}
121
122fn is_operon_like(n1: &Node, n2: &Node) -> bool {
124 n1.position.strand == n2.position.strand
125 && (n1.position.index + 2 == n2.position.index
126 || n1.position.index == n2.position.index + 1)
127}
128
129fn is_overlap(n1: &Node, n2: &Node) -> bool {
131 (n1.position.strand == Strand::Forward
132 && n2.position.strand == Strand::Forward
133 && n1.position.index + 2 >= n2.position.index)
134 || (n1.position.strand == Strand::Reverse
135 && n2.position.strand == Strand::Reverse
136 && n1.position.index >= n2.position.index + 2)
137}
138
139fn calculate_operon_bonus(n1: &Node, n2: &Node) -> f64 {
141 let mut bonus = 0.0;
142
143 if n1.position.strand == Strand::Forward && n2.scores.ribosome_binding_score < 0.0 {
144 bonus -= n2.scores.ribosome_binding_score;
145 }
146 if n1.position.strand == Strand::Reverse && n1.scores.ribosome_binding_score < 0.0 {
147 bonus -= n1.scores.ribosome_binding_score;
148 }
149 if n1.position.strand == Strand::Forward && n2.scores.upstream_score < 0.0 {
150 bonus -= n2.scores.upstream_score;
151 }
152 if n1.position.strand == Strand::Reverse && n1.scores.upstream_score < 0.0 {
153 bonus -= n1.scores.upstream_score;
154 }
155
156 bonus
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use crate::types::*;
163 use bio::bio_types::strand::Strand;
164
165 fn create_test_node(
166 strand: Strand,
167 index: usize,
168 codon_type: CodonType,
169 stop_value: isize,
170 ) -> Node {
171 Node {
172 position: NodePosition {
173 index,
174 strand,
175 codon_type,
176 stop_value,
177 is_edge: false,
178 },
179 scores: NodeScores {
180 coding_score: 1.0,
181 start_score: 0.5,
182 ribosome_binding_score: 0.3,
183 upstream_score: 0.2,
184 ..Default::default()
185 },
186 state: NodeState::default(),
187 motif_info: NodeMotifInfo::default(),
188 }
189 }
190
191 fn create_test_training() -> Training {
192 Training {
193 gc_content: 0.5,
194 translation_table: 11,
195 uses_shine_dalgarno: true,
196 start_type_weights: [2.0, 1.5, 1.0],
197 rbs_weights: Box::new([1.0; 28]),
198 upstream_composition: Box::new([[0.25; 4]; 32]),
199 motif_weights: Box::new([[[1.0; 4096]; 4]; 4]),
200 no_motif_weight: 0.5,
201 start_weight_factor: 4.35,
202 gc_bias_factors: [1.0; 3],
203 gene_dicodon_table: Box::new([1.0; 4096]),
204 total_dicodons: 0,
205 }
206 }
207
208 #[test]
209 fn test_record_overlapping_starts_forward_strand() {
210 let training = create_test_training();
211 let mut nodes = vec![
212 create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
213 create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
214 create_test_node(Strand::Forward, 50, CodonType::Stop, 50),
215 ];
216
217 record_overlapping_starts(&mut nodes, &training, false);
218
219 assert!(
221 nodes[2].state.start_pointers.iter().any(|p| p.is_some())
222 || nodes[2].state.start_pointers.iter().all(|p| p.is_none())
223 );
224 }
225
226 #[test]
227 fn test_record_overlapping_starts_reverse_strand() {
228 let training = create_test_training();
229 let mut nodes = vec![
230 create_test_node(Strand::Reverse, 50, CodonType::Atg, 10),
231 create_test_node(Strand::Reverse, 45, CodonType::Gtg, 5),
232 create_test_node(Strand::Reverse, 20, CodonType::Stop, 20),
233 ];
234
235 record_overlapping_starts(&mut nodes, &training, false);
236
237 assert!(
239 nodes[2].state.start_pointers.iter().any(|p| p.is_some())
240 || nodes[2].state.start_pointers.iter().all(|p| p.is_none())
241 );
242 }
243
244 #[test]
245 fn test_record_overlapping_starts_with_scoring() {
246 let training = create_test_training();
247 let mut nodes = vec![
248 create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
249 create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
250 create_test_node(Strand::Forward, 50, CodonType::Stop, 50),
251 ];
252
253 record_overlapping_starts(&mut nodes, &training, true);
255
256 assert_eq!(nodes.len(), 3);
258 }
259
260 #[test]
261 fn test_record_overlapping_starts_edge_nodes() {
262 let training = create_test_training();
263 let mut nodes = vec![create_test_node(Strand::Forward, 10, CodonType::Atg, 100)];
264 nodes[0].position.is_edge = true;
265
266 record_overlapping_starts(&mut nodes, &training, false);
267
268 assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
269 }
270
271 #[test]
272 fn test_record_overlapping_starts_non_stop_codons() {
273 let training = create_test_training();
274 let mut nodes = vec![
275 create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
276 create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
277 ];
278
279 record_overlapping_starts(&mut nodes, &training, false);
280
281 assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
282 assert!(nodes[1].state.start_pointers.iter().all(|p| p.is_none()));
283 }
284
285 #[test]
286 fn test_intergenic_mod_same_strand() {
287 let training = create_test_training();
288 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
289 let n2 = create_test_node(Strand::Forward, 15, CodonType::Gtg, 105);
290
291 let score = intergenic_mod(&n1, &n2, &training);
292
293 assert!(score.is_finite());
295 }
296
297 #[test]
298 fn test_intergenic_mod_different_strands() {
299 let training = create_test_training();
300 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
301 let n2 = create_test_node(Strand::Reverse, 15, CodonType::Gtg, 5);
302
303 let score = intergenic_mod(&n1, &n2, &training);
304
305 assert!(score <= 0.0);
306 }
307
308 #[test]
309 fn test_intergenic_mod_large_distance() {
310 let training = create_test_training();
311 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
312 let n2 = create_test_node(Strand::Forward, 1000, CodonType::Gtg, 1100);
313
314 let score = intergenic_mod(&n1, &n2, &training);
315
316 assert!(score <= 0.0);
317 }
318
319 #[test]
320 fn test_intergenic_mod_operon_distance() {
321 let training = create_test_training();
322 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
323 let n2 = create_test_node(
324 Strand::Forward,
325 10 + (OPERON_DISTANCE / 2.0) as usize,
326 CodonType::Gtg,
327 105,
328 );
329
330 let score = intergenic_mod(&n1, &n2, &training);
331
332 assert!(score >= 0.0);
334 }
335
336 #[test]
337 fn test_is_operon_like_adjacent_nodes() {
338 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
339 let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
340
341 assert!(is_operon_like(&n1, &n2));
342 }
343
344 #[test]
345 fn test_is_operon_like_consecutive_nodes() {
346 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
347 let n2 = create_test_node(Strand::Forward, 9, CodonType::Gtg, 105);
348
349 assert!(is_operon_like(&n1, &n2));
350 }
351
352 #[test]
353 fn test_is_operon_like_different_strands() {
354 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
355 let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 5);
356
357 assert!(!is_operon_like(&n1, &n2));
358 }
359
360 #[test]
361 fn test_is_operon_like_distant_nodes() {
362 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
363 let n2 = create_test_node(Strand::Forward, 20, CodonType::Gtg, 105);
364
365 assert!(!is_operon_like(&n1, &n2));
366 }
367
368 #[test]
369 fn test_is_overlap_forward_strand() {
370 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
371 let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
372
373 assert!(is_overlap(&n1, &n2));
374 }
375
376 #[test]
377 fn test_is_overlap_reverse_strand() {
378 let n1 = create_test_node(Strand::Reverse, 15, CodonType::Atg, 5);
379 let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 2);
380
381 assert!(is_overlap(&n1, &n2));
382 }
383
384 #[test]
385 fn test_is_overlap_no_overlap() {
386 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
387 let n2 = create_test_node(Strand::Forward, 20, CodonType::Gtg, 105);
388
389 assert!(!is_overlap(&n1, &n2));
390 }
391
392 #[test]
393 fn test_is_overlap_different_strands() {
394 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
395 let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 5);
396
397 assert!(!is_overlap(&n1, &n2));
398 }
399
400 #[test]
401 fn test_calculate_operon_bonus_forward_negative_rbs() {
402 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
403 let mut n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
404 n2.scores.ribosome_binding_score = -0.5;
405
406 let bonus = calculate_operon_bonus(&n1, &n2);
407
408 assert!(bonus > 0.0);
410 }
411
412 #[test]
413 fn test_calculate_operon_bonus_reverse_negative_rbs() {
414 let mut n1 = create_test_node(Strand::Reverse, 15, CodonType::Atg, 5);
415 let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 2);
416 n1.scores.ribosome_binding_score = -0.3;
417
418 let bonus = calculate_operon_bonus(&n1, &n2);
419
420 assert!(bonus > 0.0);
422 }
423
424 #[test]
425 fn test_calculate_operon_bonus_negative_upstream() {
426 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
427 let mut n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
428 n2.scores.upstream_score = -0.4;
429
430 let bonus = calculate_operon_bonus(&n1, &n2);
431
432 assert!(bonus > 0.0);
434 }
435
436 #[test]
437 fn test_calculate_operon_bonus_positive_scores() {
438 let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
439 let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
440
441 let bonus = calculate_operon_bonus(&n1, &n2);
442
443 assert_eq!(bonus, 0.0);
445 }
446
447 #[test]
448 fn test_record_overlapping_starts_empty_nodes() {
449 let training = create_test_training();
450 let mut nodes = vec![];
451
452 record_overlapping_starts(&mut nodes, &training, false);
453
454 assert_eq!(nodes.len(), 0);
456 }
457
458 #[test]
459 fn test_record_overlapping_starts_single_node() {
460 let training = create_test_training();
461 let mut nodes = vec![create_test_node(Strand::Forward, 10, CodonType::Stop, 10)];
462
463 record_overlapping_starts(&mut nodes, &training, false);
464
465 assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
466 }
467
468 #[test]
469 fn test_record_overlapping_starts_maximum_overlap() {
470 let training = create_test_training();
471 let mut nodes = vec![
472 create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
473 create_test_node(
474 Strand::Forward,
475 10 + MAXIMUM_SAME_OVERLAP + 5,
476 CodonType::Stop,
477 50,
478 ),
479 ];
480
481 record_overlapping_starts(&mut nodes, &training, false);
482
483 assert!(nodes[1].state.start_pointers.iter().all(|p| p.is_none()));
485 }
486}