orphos_core/node/
mod.rs

1//! Node management and scoring for gene prediction
2//!
3//! This module contains all functionality related to creating, scoring, and managing
4//! gene prediction nodes. Nodes represent potential start and stop codons in the
5//! sequence and are used in dynamic programming to find optimal gene predictions.
6
7mod creation;
8mod dicodon;
9mod management;
10mod motifs;
11mod overlaps;
12mod record_gc_bias;
13mod scoring;
14mod utilities;
15mod validation;
16
17pub use creation::add_nodes;
18pub use dicodon::calculate_dicodon_gene;
19pub use management::*;
20pub use motifs::{find_best_upstream_motif, rbs_score};
21pub use overlaps::{intergenic_mod, record_overlapping_starts};
22pub use record_gc_bias::record_gc_bias;
23pub use scoring::{raw_coding_score, score_nodes};
24pub use utilities::{reset_node_scores, sort_nodes_by_position};
25
26#[cfg(test)]
27mod tests {
28    use super::*;
29    use crate::types::*;
30    use bio::bio_types::strand::Strand;
31
32    fn create_test_training() -> Training {
33        Training {
34            gc_content: 0.5,
35            translation_table: 11,
36            uses_shine_dalgarno: true,
37            start_type_weights: [2.0, 1.5, 1.0],
38            rbs_weights: Box::new([1.0; 28]),
39            upstream_composition: Box::new([[0.25; 4]; 32]),
40            motif_weights: Box::new([[[1.0; 4096]; 4]; 4]),
41            no_motif_weight: 0.5,
42            start_weight_factor: 4.35,
43            gc_bias_factors: [1.0; 3],
44            gene_dicodon_table: Box::new([1.0; 4096]),
45            total_dicodons: 0,
46        }
47    }
48
49    fn create_test_node() -> Node {
50        Node {
51            position: NodePosition {
52                index: 25,
53                stop_value: 35,
54                strand: Strand::Forward,
55                codon_type: CodonType::Atg,
56                is_edge: false,
57            },
58            scores: NodeScores::default(),
59            state: NodeState::default(),
60            motif_info: NodeMotifInfo {
61                ribosome_binding_sites: [0; 2],
62                best_motif: Motif::default(),
63            },
64        }
65    }
66
67    #[test]
68    fn test_module_exports_exist() {
69        // Test that all the main module exports are accessible
70
71        use crate::sequence::encoded::EncodedSequence;
72
73        let seq = vec![0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 3, 0, 0];
74        let reverse_seq = vec![3; seq.len()];
75        let training = create_test_training();
76        let mut nodes = vec![create_test_node()];
77
78        // Create encoded sequence for the new API
79        let encoded_sequence = EncodedSequence {
80            forward_sequence: seq.clone(),
81            reverse_complement_sequence: reverse_seq.clone(),
82            unknown_sequence: vec![0; seq.len()],
83            masks: vec![],
84            gc_content: 0.5,
85            sequence_length: seq.len(),
86        };
87
88        // Test that we can call the main exported functions
89        // (We won't actually run them as they have complex signatures)
90
91        // score_nodes is accessible
92        let _ = score_nodes(&encoded_sequence, &mut nodes, &training, false, false);
93
94        // raw_coding_score is accessible
95        raw_coding_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
96
97        // calc_orf_gc is accessible
98        calc_orf_gc(&seq, seq.len(), &mut nodes);
99
100        // reset_node_scores is accessible
101        reset_node_scores(&mut nodes);
102
103        // sort_nodes_by_position is accessible
104        sort_nodes_by_position(&mut nodes);
105
106        // Function completion indicates success
107    }
108
109    #[test]
110    fn test_reset_node_scores_functionality() {
111        let mut nodes = vec![Node {
112            position: NodePosition {
113                index: 0,
114                stop_value: 12,
115                strand: Strand::Forward,
116                codon_type: CodonType::Atg,
117                is_edge: false,
118            },
119            scores: NodeScores {
120                coding_score: 10.0,
121                upstream_score: 5.0,
122                ribosome_binding_score: 3.0,
123                type_score: 2.0,
124                total_score: 20.0,
125                gc_content: 0.5,
126                start_score: 1.0,
127                gc_frame_scores: [0.4, 0.5, 0.6],
128            },
129            state: NodeState::default(),
130            motif_info: NodeMotifInfo {
131                ribosome_binding_sites: [0; 2],
132                best_motif: Motif::default(),
133            },
134        }];
135
136        reset_node_scores(&mut nodes);
137
138        assert_eq!(nodes[0].scores.coding_score, 0.0);
139        assert_eq!(nodes[0].scores.upstream_score, 0.0);
140        assert_eq!(nodes[0].scores.ribosome_binding_score, 0.0);
141        assert_eq!(nodes[0].scores.type_score, 0.0);
142        assert_eq!(nodes[0].scores.total_score, 0.0);
143    }
144
145    #[test]
146    fn test_sort_nodes_by_position_functionality() {
147        let mut nodes = vec![
148            Node {
149                position: NodePosition {
150                    index: 30,
151                    stop_value: 40,
152                    strand: Strand::Forward,
153                    codon_type: CodonType::Atg,
154                    is_edge: false,
155                },
156                scores: NodeScores::default(),
157                state: NodeState::default(),
158                motif_info: NodeMotifInfo {
159                    ribosome_binding_sites: [0; 2],
160                    best_motif: Motif::default(),
161                },
162            },
163            Node {
164                position: NodePosition {
165                    index: 10,
166                    stop_value: 20,
167                    strand: Strand::Forward,
168                    codon_type: CodonType::Atg,
169                    is_edge: false,
170                },
171                scores: NodeScores::default(),
172                state: NodeState::default(),
173                motif_info: NodeMotifInfo {
174                    ribosome_binding_sites: [0; 2],
175                    best_motif: Motif::default(),
176                },
177            },
178        ];
179
180        sort_nodes_by_position(&mut nodes);
181
182        assert!(nodes[0].position.index <= nodes[1].position.index);
183        assert_eq!(nodes[0].position.index, 10);
184        assert_eq!(nodes[1].position.index, 30);
185    }
186
187    #[test]
188    fn test_node_module_integration() {
189        // Test that the node module can perform basic operations
190        let seq = vec![
191            0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 3, 0, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
192        ];
193        let reverse_seq = vec![3; seq.len()];
194        let training = create_test_training();
195
196        let mut nodes = vec![create_test_node()];
197
198        // Test that we can perform basic scoring operations
199        raw_coding_score(&seq, &reverse_seq, seq.len(), &mut nodes, &training);
200        assert!(nodes[0].scores.coding_score.is_finite());
201
202        // Test GC calculation
203        calc_orf_gc(&seq, seq.len(), &mut nodes);
204
205        reset_node_scores(&mut nodes);
206        assert_eq!(nodes[0].scores.total_score, 0.0);
207
208        sort_nodes_by_position(&mut nodes);
209
210        // If we get here, all basic node operations work
211        // Function completion indicates success
212    }
213}