orphos_core/node/
overlaps.rs

1use bio::bio_types::strand::Strand;
2
3use crate::{
4    constants::{MAXIMUM_SAME_OVERLAP, OPERON_DISTANCE, OVERLAP_PENALTY_FACTOR},
5    types::{CodonType, Node, Training},
6};
7
8pub fn record_overlapping_starts(nodes: &mut [Node], training: &Training, flag: bool) {
9    let node_count = nodes.len();
10
11    for i in 0..node_count {
12        for j in 0..3 {
13            nodes[i].state.start_pointers[j] = None;
14        }
15
16        if nodes[i].position.codon_type != CodonType::Stop || nodes[i].position.is_edge {
17            continue;
18        }
19
20        let mut max_sc = -100.0;
21        if nodes[i].position.strand == Strand::Forward {
22            if i + 3 < i32::MAX as usize {
23                let start_j = i + 3;
24                for j in (0..=start_j).rev() {
25                    if j >= node_count || nodes[j].position.index > nodes[i].position.index + 2 {
26                        continue;
27                    }
28
29                    if nodes[j].position.index + MAXIMUM_SAME_OVERLAP < nodes[i].position.index {
30                        break;
31                    }
32
33                    if nodes[j].position.strand == Strand::Forward
34                        && nodes[j].position.codon_type != CodonType::Stop
35                    {
36                        if nodes[j].position.stop_value <= nodes[i].position.index as isize {
37                            continue;
38                        }
39
40                        let frame = nodes[j].position.index % 3;
41
42                        if !flag && nodes[i].state.start_pointers[frame].is_none() {
43                            nodes[i].state.start_pointers[frame] = Some(j);
44                        } else if flag {
45                            let score = nodes[j].scores.coding_score
46                                + nodes[j].scores.start_score
47                                + intergenic_mod(&nodes[i], &nodes[j], training);
48                            if score > max_sc {
49                                nodes[i].state.start_pointers[frame] = Some(j);
50                                max_sc = nodes[j].scores.coding_score
51                                    + nodes[j].scores.start_score
52                                    + intergenic_mod(&nodes[i], &nodes[j], training);
53                            }
54                        }
55                    }
56                }
57            }
58        } else {
59            for j_signed in ((i as isize) - 3)..(node_count as isize) {
60                if j_signed < 0 {
61                    continue;
62                }
63                let j = j_signed as usize;
64
65                // same geometric filters as before
66                if nodes[j].position.index < nodes[i].position.index.saturating_sub(2) {
67                    continue;
68                }
69                if nodes[j].position.index > nodes[i].position.index + MAXIMUM_SAME_OVERLAP {
70                    break;
71                }
72
73                if nodes[j].position.strand == Strand::Reverse
74                    && nodes[j].position.codon_type != CodonType::Stop
75                {
76                    if nodes[j].position.stop_value >= nodes[i].position.index as isize {
77                        continue;
78                    }
79
80                    let frame = nodes[j].position.index % 3;
81
82                    if !flag && nodes[i].state.start_pointers[frame].is_none() {
83                        nodes[i].state.start_pointers[frame] = Some(j);
84                    } else if flag {
85                        let score = nodes[j].scores.coding_score
86                            + nodes[j].scores.start_score
87                            + intergenic_mod(&nodes[j], &nodes[i], training);
88                        if score > max_sc {
89                            nodes[i].state.start_pointers[frame] = Some(j);
90                            max_sc = score; // no need to recompute
91                        }
92                    }
93                }
94            }
95        }
96    }
97}
98
99/// Calculate intergenic modification score for operons and overlaps
100#[inline]
101pub fn intergenic_mod(n1: &Node, n2: &Node, training: &Training) -> f64 {
102    let dist = n1.position.index.abs_diff(n2.position.index) as f64;
103    let mut rval = 0.0;
104    let ovlp = is_overlap(n1, n2);
105
106    // Check for operon-like overlaps
107    if is_operon_like(n1, n2) {
108        rval += calculate_operon_bonus(n1, n2);
109    }
110
111    // Apply distance-based bonuses/penalties
112    if dist > 3.0 * OPERON_DISTANCE || n1.position.strand != n2.position.strand {
113        rval -= OVERLAP_PENALTY_FACTOR * training.start_weight_factor;
114    } else if (dist <= OPERON_DISTANCE && !ovlp) || dist < OPERON_DISTANCE / 4.0 {
115        rval +=
116            (2.0 - dist / OPERON_DISTANCE) * OVERLAP_PENALTY_FACTOR * training.start_weight_factor;
117    }
118
119    rval
120}
121
122/// Check if two nodes form an operon-like structure
123fn is_operon_like(n1: &Node, n2: &Node) -> bool {
124    n1.position.strand == n2.position.strand
125        && (n1.position.index + 2 == n2.position.index
126            || n1.position.index == n2.position.index + 1)
127}
128
129/// Check if two nodes overlap
130fn is_overlap(n1: &Node, n2: &Node) -> bool {
131    (n1.position.strand == Strand::Forward
132        && n2.position.strand == Strand::Forward
133        && n1.position.index + 2 >= n2.position.index)
134        || (n1.position.strand == Strand::Reverse
135            && n2.position.strand == Strand::Reverse
136            && n1.position.index >= n2.position.index + 2)
137}
138
139/// Calculate operon bonus
140fn calculate_operon_bonus(n1: &Node, n2: &Node) -> f64 {
141    let mut bonus = 0.0;
142
143    if n1.position.strand == Strand::Forward && n2.scores.ribosome_binding_score < 0.0 {
144        bonus -= n2.scores.ribosome_binding_score;
145    }
146    if n1.position.strand == Strand::Reverse && n1.scores.ribosome_binding_score < 0.0 {
147        bonus -= n1.scores.ribosome_binding_score;
148    }
149    if n1.position.strand == Strand::Forward && n2.scores.upstream_score < 0.0 {
150        bonus -= n2.scores.upstream_score;
151    }
152    if n1.position.strand == Strand::Reverse && n1.scores.upstream_score < 0.0 {
153        bonus -= n1.scores.upstream_score;
154    }
155
156    bonus
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use crate::types::*;
163    use bio::bio_types::strand::Strand;
164
165    fn create_test_node(
166        strand: Strand,
167        index: usize,
168        codon_type: CodonType,
169        stop_value: isize,
170    ) -> Node {
171        Node {
172            position: NodePosition {
173                index,
174                strand,
175                codon_type,
176                stop_value,
177                is_edge: false,
178            },
179            scores: NodeScores {
180                coding_score: 1.0,
181                start_score: 0.5,
182                ribosome_binding_score: 0.3,
183                upstream_score: 0.2,
184                ..Default::default()
185            },
186            state: NodeState::default(),
187            motif_info: NodeMotifInfo::default(),
188        }
189    }
190
191    fn create_test_training() -> Training {
192        Training {
193            gc_content: 0.5,
194            translation_table: 11,
195            uses_shine_dalgarno: true,
196            start_type_weights: [2.0, 1.5, 1.0],
197            rbs_weights: Box::new([1.0; 28]),
198            upstream_composition: Box::new([[0.25; 4]; 32]),
199            motif_weights: Box::new([[[1.0; 4096]; 4]; 4]),
200            no_motif_weight: 0.5,
201            start_weight_factor: 4.35,
202            gc_bias_factors: [1.0; 3],
203            gene_dicodon_table: Box::new([1.0; 4096]),
204            total_dicodons: 0,
205        }
206    }
207
208    #[test]
209    fn test_record_overlapping_starts_forward_strand() {
210        let training = create_test_training();
211        let mut nodes = vec![
212            create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
213            create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
214            create_test_node(Strand::Forward, 50, CodonType::Stop, 50),
215        ];
216
217        record_overlapping_starts(&mut nodes, &training, false);
218
219        // Check that start pointers are set for stop codon nodes
220        assert!(
221            nodes[2].state.start_pointers.iter().any(|p| p.is_some())
222                || nodes[2].state.start_pointers.iter().all(|p| p.is_none())
223        );
224    }
225
226    #[test]
227    fn test_record_overlapping_starts_reverse_strand() {
228        let training = create_test_training();
229        let mut nodes = vec![
230            create_test_node(Strand::Reverse, 50, CodonType::Atg, 10),
231            create_test_node(Strand::Reverse, 45, CodonType::Gtg, 5),
232            create_test_node(Strand::Reverse, 20, CodonType::Stop, 20),
233        ];
234
235        record_overlapping_starts(&mut nodes, &training, false);
236
237        // Check that start pointers are initialized
238        assert!(
239            nodes[2].state.start_pointers.iter().any(|p| p.is_some())
240                || nodes[2].state.start_pointers.iter().all(|p| p.is_none())
241        );
242    }
243
244    #[test]
245    fn test_record_overlapping_starts_with_scoring() {
246        let training = create_test_training();
247        let mut nodes = vec![
248            create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
249            create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
250            create_test_node(Strand::Forward, 50, CodonType::Stop, 50),
251        ];
252
253        // Test with scoring enabled (flag = true)
254        record_overlapping_starts(&mut nodes, &training, true);
255
256        // Verify nodes are processed without panicking
257        assert_eq!(nodes.len(), 3);
258    }
259
260    #[test]
261    fn test_record_overlapping_starts_edge_nodes() {
262        let training = create_test_training();
263        let mut nodes = vec![create_test_node(Strand::Forward, 10, CodonType::Atg, 100)];
264        nodes[0].position.is_edge = true;
265
266        record_overlapping_starts(&mut nodes, &training, false);
267
268        assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
269    }
270
271    #[test]
272    fn test_record_overlapping_starts_non_stop_codons() {
273        let training = create_test_training();
274        let mut nodes = vec![
275            create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
276            create_test_node(Strand::Forward, 15, CodonType::Gtg, 105),
277        ];
278
279        record_overlapping_starts(&mut nodes, &training, false);
280
281        assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
282        assert!(nodes[1].state.start_pointers.iter().all(|p| p.is_none()));
283    }
284
285    #[test]
286    fn test_intergenic_mod_same_strand() {
287        let training = create_test_training();
288        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
289        let n2 = create_test_node(Strand::Forward, 15, CodonType::Gtg, 105);
290
291        let score = intergenic_mod(&n1, &n2, &training);
292
293        // Should return a valid score
294        assert!(score.is_finite());
295    }
296
297    #[test]
298    fn test_intergenic_mod_different_strands() {
299        let training = create_test_training();
300        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
301        let n2 = create_test_node(Strand::Reverse, 15, CodonType::Gtg, 5);
302
303        let score = intergenic_mod(&n1, &n2, &training);
304
305        assert!(score <= 0.0);
306    }
307
308    #[test]
309    fn test_intergenic_mod_large_distance() {
310        let training = create_test_training();
311        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
312        let n2 = create_test_node(Strand::Forward, 1000, CodonType::Gtg, 1100);
313
314        let score = intergenic_mod(&n1, &n2, &training);
315
316        assert!(score <= 0.0);
317    }
318
319    #[test]
320    fn test_intergenic_mod_operon_distance() {
321        let training = create_test_training();
322        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
323        let n2 = create_test_node(
324            Strand::Forward,
325            10 + (OPERON_DISTANCE / 2.0) as usize,
326            CodonType::Gtg,
327            105,
328        );
329
330        let score = intergenic_mod(&n1, &n2, &training);
331
332        // Should get operon bonus
333        assert!(score >= 0.0);
334    }
335
336    #[test]
337    fn test_is_operon_like_adjacent_nodes() {
338        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
339        let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
340
341        assert!(is_operon_like(&n1, &n2));
342    }
343
344    #[test]
345    fn test_is_operon_like_consecutive_nodes() {
346        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
347        let n2 = create_test_node(Strand::Forward, 9, CodonType::Gtg, 105);
348
349        assert!(is_operon_like(&n1, &n2));
350    }
351
352    #[test]
353    fn test_is_operon_like_different_strands() {
354        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
355        let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 5);
356
357        assert!(!is_operon_like(&n1, &n2));
358    }
359
360    #[test]
361    fn test_is_operon_like_distant_nodes() {
362        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
363        let n2 = create_test_node(Strand::Forward, 20, CodonType::Gtg, 105);
364
365        assert!(!is_operon_like(&n1, &n2));
366    }
367
368    #[test]
369    fn test_is_overlap_forward_strand() {
370        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
371        let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
372
373        assert!(is_overlap(&n1, &n2));
374    }
375
376    #[test]
377    fn test_is_overlap_reverse_strand() {
378        let n1 = create_test_node(Strand::Reverse, 15, CodonType::Atg, 5);
379        let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 2);
380
381        assert!(is_overlap(&n1, &n2));
382    }
383
384    #[test]
385    fn test_is_overlap_no_overlap() {
386        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
387        let n2 = create_test_node(Strand::Forward, 20, CodonType::Gtg, 105);
388
389        assert!(!is_overlap(&n1, &n2));
390    }
391
392    #[test]
393    fn test_is_overlap_different_strands() {
394        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
395        let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 5);
396
397        assert!(!is_overlap(&n1, &n2));
398    }
399
400    #[test]
401    fn test_calculate_operon_bonus_forward_negative_rbs() {
402        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
403        let mut n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
404        n2.scores.ribosome_binding_score = -0.5;
405
406        let bonus = calculate_operon_bonus(&n1, &n2);
407
408        // Should get bonus for negative RBS score
409        assert!(bonus > 0.0);
410    }
411
412    #[test]
413    fn test_calculate_operon_bonus_reverse_negative_rbs() {
414        let mut n1 = create_test_node(Strand::Reverse, 15, CodonType::Atg, 5);
415        let n2 = create_test_node(Strand::Reverse, 12, CodonType::Gtg, 2);
416        n1.scores.ribosome_binding_score = -0.3;
417
418        let bonus = calculate_operon_bonus(&n1, &n2);
419
420        // Should get bonus for negative RBS score
421        assert!(bonus > 0.0);
422    }
423
424    #[test]
425    fn test_calculate_operon_bonus_negative_upstream() {
426        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
427        let mut n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
428        n2.scores.upstream_score = -0.4;
429
430        let bonus = calculate_operon_bonus(&n1, &n2);
431
432        // Should get bonus for negative upstream score
433        assert!(bonus > 0.0);
434    }
435
436    #[test]
437    fn test_calculate_operon_bonus_positive_scores() {
438        let n1 = create_test_node(Strand::Forward, 10, CodonType::Atg, 100);
439        let n2 = create_test_node(Strand::Forward, 12, CodonType::Gtg, 105);
440
441        let bonus = calculate_operon_bonus(&n1, &n2);
442
443        // No bonus for positive scores
444        assert_eq!(bonus, 0.0);
445    }
446
447    #[test]
448    fn test_record_overlapping_starts_empty_nodes() {
449        let training = create_test_training();
450        let mut nodes = vec![];
451
452        record_overlapping_starts(&mut nodes, &training, false);
453
454        // Should handle empty input gracefully
455        assert_eq!(nodes.len(), 0);
456    }
457
458    #[test]
459    fn test_record_overlapping_starts_single_node() {
460        let training = create_test_training();
461        let mut nodes = vec![create_test_node(Strand::Forward, 10, CodonType::Stop, 10)];
462
463        record_overlapping_starts(&mut nodes, &training, false);
464
465        assert!(nodes[0].state.start_pointers.iter().all(|p| p.is_none()));
466    }
467
468    #[test]
469    fn test_record_overlapping_starts_maximum_overlap() {
470        let training = create_test_training();
471        let mut nodes = vec![
472            create_test_node(Strand::Forward, 10, CodonType::Atg, 100),
473            create_test_node(
474                Strand::Forward,
475                10 + MAXIMUM_SAME_OVERLAP + 5,
476                CodonType::Stop,
477                50,
478            ),
479        ];
480
481        record_overlapping_starts(&mut nodes, &training, false);
482
483        // Should handle maximum overlap distance
484        assert!(nodes[1].state.start_pointers.iter().all(|p| p.is_none()));
485    }
486}