1use std::marker::PhantomData;
2use std::path::Path;
3
4use crate::algorithms::dynamic_programming::eliminate_bad_genes;
5use crate::algorithms::dynamic_programming::predict_genes;
6use crate::algorithms::gene_finding::GeneBuilder;
7use crate::config::OrphosConfig;
8use crate::constants::MIN_SEQUENCE_LENGTH;
9use crate::metagenomic::bins;
10use crate::metagenomic::get_preset_training_ref;
11use crate::node::{
12 add_nodes, calculate_dicodon_gene, raw_coding_score, rbs_score, record_gc_bias,
13 record_overlapping_starts, reset_node_scores, score_nodes, sort_nodes_by_position,
14};
15use crate::results::{OrphosResults, SequenceInfo};
16use crate::sequence::calc_most_gc_frame;
17use crate::sequence::encoded::EncodedSequence;
18use crate::sequence::read_fasta_sequences;
19use crate::training::non_sd_training::train_starts_nonsd;
20use crate::training::sd_training::train_starts_sd;
21use crate::training::should_use_sd;
22use crate::types::Gene;
23use crate::types::{CodonType, Node, OrphosError, Training};
24use bio::bio_types::strand::Strand;
25
26pub trait TrainingState {}
32
33#[derive(Debug, Clone)]
38pub struct Untrained;
39
40#[derive(Debug, Clone)]
45pub struct Trained;
46
47impl TrainingState for Untrained {}
48impl TrainingState for Trained {}
49
50#[derive(Debug, Default)]
85pub struct Orphos<S: TrainingState> {
86 pub config: OrphosConfig,
88 training: Option<Training>,
90 _state: PhantomData<S>,
92}
93
94pub type UntrainedOrphos = Orphos<Untrained>;
98
99pub type TrainedOrphos = Orphos<Trained>;
103
104impl UntrainedOrphos {
105 pub fn new() -> Self {
115 Self {
116 config: OrphosConfig::default(),
117 training: None,
118 _state: PhantomData,
119 }
120 }
121
122 pub fn with_config(config: OrphosConfig) -> Result<Self, OrphosError> {
148 if config.circular && config.closed_ends {
149 return Err(OrphosError::InvalidSequence(
150 "Invalid configuration: circular and closed_ends cannot both be true".to_string(),
151 ));
152 }
153
154 let orphos = Self {
155 config,
156 training: None,
157 _state: PhantomData,
158 };
159
160 if let Some(num_threads) = orphos.config.num_threads {
161 rayon::ThreadPoolBuilder::new()
162 .num_threads(num_threads)
163 .build_global()
164 .map_err(|e| {
165 OrphosError::InvalidSequence(format!("Failed to configure thread pool: {}", e))
166 })?;
167 }
168
169 Ok(orphos)
170 }
171
172 pub fn train_single_genome(
210 &mut self,
211 encoded_sequence: &EncodedSequence,
212 ) -> Result<TrainedOrphos, OrphosError> {
213 let sequence_length = encoded_sequence.sequence_length;
214 if sequence_length < MIN_SEQUENCE_LENGTH {
215 return Err(OrphosError::InvalidSequence(format!(
216 "Sequence too short for gene prediction: {} bp (minimum {} bp required)",
217 sequence_length, MIN_SEQUENCE_LENGTH
218 )));
219 }
220
221 if !self.config.quiet {
222 eprintln!(
223 "Training on single genome ({} bp, {:.2}% GC)...",
224 sequence_length,
225 encoded_sequence.gc_content * 100.0
226 );
227 }
228 let mut training = Training {
229 gc_content: 0.0,
230 ..Default::default()
231 };
232 training.uses_shine_dalgarno = false;
233 training.gc_bias_factors = [0.0; 3];
234
235 training.gc_content = encoded_sequence.gc_content;
236
237 let mut nodes = Vec::new();
238 let num_nodes = add_nodes(
239 encoded_sequence,
240 &mut nodes,
241 self.config.closed_ends,
242 self.config.circular,
243 &training,
244 )?;
245
246 if !self.config.quiet {
247 eprintln!(
248 "Located {} potential start/stop nodes, closed {}",
249 num_nodes, self.config.closed_ends
250 );
251 }
252
253 sort_nodes_by_position(&mut nodes);
254
255 let gc_frame = calc_most_gc_frame(&encoded_sequence.forward_sequence, sequence_length);
256
257 record_gc_bias(&gc_frame, &mut nodes, &mut training);
258
259 if !self.config.quiet {
260 eprintln!(
261 "Frame bias scores: {:.8} {:.8} {:.8}",
262 training.gc_bias_factors[0],
263 training.gc_bias_factors[1],
264 training.gc_bias_factors[2]
265 );
266 }
267
268 record_overlapping_starts(&mut nodes, &training, false);
269
270 let initial_path = predict_genes(&mut nodes, &training, false).unwrap_or(0);
271 calculate_dicodon_gene(
272 &mut training,
273 &encoded_sequence.forward_sequence,
274 &encoded_sequence.reverse_complement_sequence,
275 sequence_length,
276 &nodes,
277 initial_path,
278 );
279
280 raw_coding_score(
281 &encoded_sequence.forward_sequence,
282 &encoded_sequence.reverse_complement_sequence,
283 sequence_length,
284 &mut nodes,
285 &training,
286 );
287
288 rbs_score(
289 &encoded_sequence.forward_sequence,
290 &encoded_sequence.reverse_complement_sequence,
291 sequence_length,
292 &mut nodes,
293 &training,
294 );
295
296 train_starts_sd(
297 &encoded_sequence.forward_sequence,
298 &encoded_sequence.reverse_complement_sequence,
299 sequence_length,
300 &nodes,
301 &mut training,
302 );
303 training.uses_shine_dalgarno = should_use_sd(&training);
304 if self.config.force_non_sd {
305 training.uses_shine_dalgarno = false;
306 }
307
308 if !training.uses_shine_dalgarno {
309 train_starts_nonsd(
310 &encoded_sequence.forward_sequence,
311 &encoded_sequence.reverse_complement_sequence,
312 sequence_length,
313 &mut nodes,
314 &mut training,
315 );
316 }
317
318 if !self.config.quiet {
319 eprintln!("Training complete!");
320 }
321
322 Ok(Orphos {
323 config: self.config.clone(),
324 training: Some(training),
325 _state: PhantomData,
326 })
327 }
328
329 pub fn train_meta_genome(
330 &mut self,
331 _encoded_sequence: &EncodedSequence,
332 ) -> Result<TrainedOrphos, OrphosError> {
333 if !self.config.quiet {
334 eprintln!("Request: Metagenomic, Phase: Training");
335 eprintln!("Initializing training files...");
336 }
337 if !self.config.quiet {
339 eprintln!("Metagenomic training initialized.");
340 }
341 Ok(Orphos {
342 config: self.config.clone(),
343 training: None,
344 _state: PhantomData,
345 })
346 }
347}
348
349impl TrainedOrphos {
350 pub const fn new(config: OrphosConfig, training: Training) -> Self {
373 Self {
374 config,
375 training: Some(training),
376 _state: PhantomData,
377 }
378 }
379
380 fn find_genes_single(
431 &self,
432 encoded_sequence: &EncodedSequence,
433 ) -> Result<Vec<Gene>, OrphosError> {
434 let mut nodes = Vec::new();
435 let training = self
436 .training
437 .as_ref()
438 .ok_or_else(|| OrphosError::InvalidSequence("Orphos is not trained".to_string()))?;
439
440 let _num_nodes = add_nodes(
441 encoded_sequence,
442 &mut nodes,
443 self.config.closed_ends,
444 self.config.circular,
445 training,
446 )?;
447
448 sort_nodes_by_position(&mut nodes);
449
450 score_nodes(
453 encoded_sequence,
454 &mut nodes,
455 training,
456 self.config.closed_ends,
457 false,
458 )?;
459
460 record_overlapping_starts(&mut nodes, training, true);
461
462 let gene_path = match predict_genes(&mut nodes, training, true) {
463 Some(path) => path,
464 None => {
465 return Ok(vec![]);
467 }
468 };
469
470 eliminate_bad_genes(&mut nodes, Some(gene_path), training);
471
472 let genes = GeneBuilder::from_nodes(&nodes, gene_path, training, 1)
473 .with_tweaked_starts()
474 .with_annotations()
475 .build();
476
477 Ok(genes)
478 }
479
480 fn find_genes_meta(
481 &self,
482 encoded_sequence: &EncodedSequence,
483 ) -> Result<(Vec<Gene>, usize), OrphosError> {
484 if !self.config.quiet {
485 eprintln!("Request: Metagenomic, Phase: Gene Finding");
486 }
487 let mut low = 0.88495 * encoded_sequence.gc_content - 0.0102337;
488 if low > 0.65 {
489 low = 0.65;
490 }
491 let mut high = 0.86596 * encoded_sequence.gc_content + 0.1131991;
492 if high < 0.35 {
493 high = 0.35;
494 }
495 let mut max_score = -100.0;
496 let mut max_phase = 0;
497
498 let mut nodes = Vec::new();
499 let mut genes = Vec::new();
500
501 for (i, _bin) in bins().iter().enumerate() {
502 let preset_training = get_preset_training_ref(i).unwrap();
503 if i == 0
504 || preset_training.translation_table
505 != get_preset_training_ref(i - 1).unwrap().translation_table
506 {
507 let _num_nodes = add_nodes(
508 encoded_sequence,
509 &mut nodes,
510 self.config.closed_ends,
511 self.config.circular,
512 preset_training,
513 )?;
514 sort_nodes_by_position(&mut nodes);
515 }
516 if preset_training.gc_content < low || preset_training.gc_content > high {
517 continue;
518 }
519 reset_node_scores(&mut nodes);
520 score_nodes(
521 encoded_sequence,
522 &mut nodes,
523 preset_training,
524 self.config.closed_ends,
525 true,
526 )?;
527 record_overlapping_starts(&mut nodes, preset_training, true);
528 let gene_path = predict_genes(&mut nodes, preset_training, true);
529 if let Some(path) = gene_path
530 && let Some(node) = nodes.get(path)
531 && node.scores.total_score > max_score
532 {
533 max_phase = i;
534 max_score = node.scores.total_score;
535 eliminate_bad_genes(&mut nodes, gene_path, preset_training);
536 genes = GeneBuilder::from_nodes(&nodes, path, preset_training, 1)
537 .with_tweaked_starts()
538 .with_annotations()
539 .build();
540 }
541 }
543 nodes.clear();
544 let best_training = get_preset_training_ref(max_phase).unwrap();
545 let _ = add_nodes(
546 encoded_sequence,
547 &mut nodes,
548 self.config.closed_ends,
549 self.config.circular,
550 best_training,
551 )?;
552 sort_nodes_by_position(&mut nodes);
553 score_nodes(
554 encoded_sequence,
555 &mut nodes,
556 best_training,
557 self.config.closed_ends,
558 true,
559 )?;
560 update_display_scores(&mut genes, &nodes);
564 Ok((genes, max_phase))
565 }
566}
567
568fn update_display_scores(genes: &mut [Gene], nodes: &[Node]) {
571 for gene in genes.iter_mut() {
572 let target_pos = if gene.coordinates.strand == Strand::Forward {
574 gene.coordinates.begin.saturating_sub(1)
575 } else {
576 gene.coordinates.end.saturating_sub(1)
577 };
578
579 if let Some(start_node) = nodes.iter().find(|n| {
580 n.position.index == target_pos
581 && n.position.strand == gene.coordinates.strand
582 && n.position.codon_type != CodonType::Stop
583 }) {
584 gene.display_score =
585 Some(start_node.scores.coding_score + start_node.scores.start_score);
586 }
587 }
588}
589
590#[derive(Debug)]
656pub struct OrphosAnalyzer {
657 pub config: OrphosConfig,
659}
660
661impl OrphosAnalyzer {
662 pub const fn new(config: OrphosConfig) -> Self {
676 Self { config }
677 }
678
679 pub fn analyze_fasta_file<P: AsRef<Path>>(
713 &mut self,
714 path: P,
715 ) -> Result<Vec<OrphosResults>, OrphosError> {
716 let sequences = read_fasta_sequences(path.as_ref().to_str().unwrap())?;
717
718 let mut results = Vec::new();
719 for (header, description, seq_bytes) in sequences {
720 let result = self.analyze_sequence_bytes(&seq_bytes, header, description)?;
721 results.push(result);
722 }
723 Ok(results)
724 }
725
726 pub fn analyze_sequence(
764 &mut self,
765 sequence: &str,
766 header: Option<String>,
767 ) -> Result<OrphosResults, OrphosError> {
768 let seq_bytes = sequence.as_bytes();
769 let header = header.unwrap_or_else(|| "Orphos_Seq_1".to_string());
770
771 self.analyze_sequence_bytes(seq_bytes, header, None)
772 }
773
774 pub fn analyze_sequence_bytes(
817 &mut self,
818 sequence: &[u8],
819 header: String,
820 description: Option<String>,
821 ) -> Result<OrphosResults, OrphosError> {
822 let sequence_length = sequence.len();
823 let encoded_sequence = self.encode_sequence(sequence);
824
825 let mut untrained_orphos = UntrainedOrphos::with_config(self.config.clone())?;
826 let trained_orphos = if self.config.metagenomic {
827 untrained_orphos.train_meta_genome(&encoded_sequence)?
828 } else {
829 untrained_orphos.train_single_genome(&encoded_sequence)?
830 };
831 let (genes, metagenomic_model, best_training) = if self.config.metagenomic {
832 let (genes, best_phase) = trained_orphos.find_genes_meta(&encoded_sequence)?;
833 let bin = &bins()[best_phase];
834 let training = get_preset_training_ref(best_phase).unwrap();
835 let model_desc = format!(
836 "{}|{}|{}|{:.1}|{}|{}",
837 bin.id,
838 bin.name,
839 bin.domain,
840 bin.gc_percent,
841 training.translation_table,
842 if training.uses_shine_dalgarno { 1 } else { 0 }
843 );
844 (genes, Some(model_desc), training.clone())
845 } else {
846 let genes = trained_orphos.find_genes_single(&encoded_sequence)?;
847 let training = trained_orphos.training.clone().unwrap_or_default();
848 (genes, None, training)
849 };
850
851 Ok(OrphosResults {
852 genes: genes.clone(),
853 training_used: best_training,
854 sequence_info: SequenceInfo {
855 length: sequence_length,
856 gc_content: encoded_sequence.gc_content,
857 num_genes: genes.len(),
858 header,
859 description,
860 },
861 metagenomic_model,
862 })
863 }
864
865 fn encode_sequence(&self, sequence: &[u8]) -> EncodedSequence {
879 if self.config.mask_n_runs {
880 EncodedSequence::with_masking(sequence)
881 } else {
882 EncodedSequence::without_masking(sequence)
883 }
884 }
885}
886
887#[cfg(test)]
888mod tests {
889 use super::*;
890 use crate::config::OutputFormat;
891 use crate::constants::TEST_SEQUENCE_REPEAT_FACTOR;
892 use std::env;
893 use std::fs;
894
895 fn create_test_sequence() -> Vec<u8> {
897 "ATGAAACGTAAATAG".as_bytes().to_vec()
899 }
900
901 fn create_training_sequence() -> Vec<u8> {
903 let basic_gene = "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG";
905 basic_gene
906 .repeat(TEST_SEQUENCE_REPEAT_FACTOR)
907 .as_bytes()
908 .to_vec()
909 }
910
911 fn create_encoded_sequence_for_test(seq: &[u8]) -> EncodedSequence {
913 EncodedSequence::without_masking(seq)
914 }
915
916 #[test]
917 fn test_training_state_traits() {
918 let _untrained: Untrained = Untrained;
920 let _trained: Trained = Trained;
921
922 let untrained_clone = _untrained.clone();
924 let trained_clone = _trained.clone();
925
926 assert_eq!(format!("{:?}", untrained_clone), "Untrained");
927 assert_eq!(format!("{:?}", trained_clone), "Trained");
928 }
929
930 #[test]
931 fn test_untrained_orphos_new() {
932 let orphos = UntrainedOrphos::new();
933
934 assert!(!orphos.config.metagenomic);
935 assert!(!orphos.config.closed_ends);
936 assert!(!orphos.config.mask_n_runs);
937 assert!(!orphos.config.force_non_sd);
938 assert!(!orphos.config.quiet);
939 assert_eq!(orphos.config.output_format, OutputFormat::Genbank);
940 assert!(orphos.training.is_none());
941 }
942
943 #[test]
944 fn test_untrained_orphos_with_config() {
945 let config = OrphosConfig {
946 metagenomic: true,
947 closed_ends: true,
948 quiet: true,
949 ..OrphosConfig::default()
950 };
951
952 let result = UntrainedOrphos::with_config(config.clone());
953 assert!(result.is_ok());
954
955 let orphos = result.unwrap();
956 assert!(orphos.config.metagenomic);
957 assert!(orphos.config.closed_ends);
958 assert!(orphos.config.quiet);
959 assert!(orphos.training.is_none());
960 }
961
962 #[test]
963 fn test_untrained_orphos_with_thread_config() {
964 let config = OrphosConfig {
965 num_threads: Some(2),
966 ..OrphosConfig::default()
967 };
968
969 let result = UntrainedOrphos::with_config(config);
970 let _ = result;
973 }
974
975 #[test]
976 fn test_untrained_orphos_with_invalid_thread_config() {
977 let config = OrphosConfig {
978 num_threads: Some(0),
979 ..OrphosConfig::default()
980 };
981
982 let result = UntrainedOrphos::with_config(config);
983 let _ = result;
986 }
987
988 #[test]
989 fn test_untrained_orphos_with_conflicting_topology_config() {
990 let config = OrphosConfig {
991 circular: true,
992 closed_ends: true,
993 ..OrphosConfig::default()
994 };
995
996 let result = UntrainedOrphos::with_config(config);
997 assert!(result.is_err());
998 if let Err(OrphosError::InvalidSequence(msg)) = result {
999 assert!(msg.contains("circular"));
1000 assert!(msg.contains("closed_ends"));
1001 } else {
1002 panic!("Expected InvalidSequence error");
1003 }
1004 }
1005
1006 #[test]
1007 fn test_trained_orphos_new() {
1008 let config = OrphosConfig::default();
1009 let training = Training::default();
1010
1011 let orphos = TrainedOrphos::new(config, training);
1012
1013 assert!(orphos.training.is_some());
1014 }
1015
1016 #[test]
1017 fn test_train_single_genome_basic() {
1018 let mut orphos = UntrainedOrphos::new();
1019 let sequence = create_training_sequence();
1020 let encoded_sequence = create_encoded_sequence_for_test(&sequence);
1021
1022 let result = orphos.train_single_genome(&encoded_sequence);
1023
1024 assert!(result.is_ok());
1025 let trained = result.unwrap();
1026 assert!(trained.training.is_some());
1027 }
1028
1029 #[test]
1030 fn test_train_single_genome_quiet_mode() {
1031 let config = OrphosConfig {
1032 quiet: true,
1033 ..OrphosConfig::default()
1034 };
1035 let mut orphos = UntrainedOrphos::with_config(config).unwrap();
1036
1037 let sequence = create_training_sequence();
1038 let encoded_sequence = create_encoded_sequence_for_test(&sequence);
1039
1040 let result = orphos.train_single_genome(&encoded_sequence);
1041
1042 assert!(result.is_ok());
1043 }
1044
1045 #[test]
1046 fn test_train_single_genome_force_non_sd() {
1047 let config = OrphosConfig {
1048 force_non_sd: true,
1049 ..OrphosConfig::default()
1050 };
1051 let mut orphos = UntrainedOrphos::with_config(config).unwrap();
1052
1053 let sequence = create_training_sequence();
1054 let encoded_sequence = create_encoded_sequence_for_test(&sequence);
1055
1056 let result = orphos.train_single_genome(&encoded_sequence);
1057
1058 assert!(result.is_ok());
1059 let trained = result.unwrap();
1060 let training = trained.training.unwrap();
1061 assert!(!training.uses_shine_dalgarno); }
1063
1064 #[test]
1065 fn test_trained_orphos_find_genes_single() {
1066 let mut orphos = UntrainedOrphos::new();
1068 let sequence = create_training_sequence();
1069 let encoded_sequence = create_encoded_sequence_for_test(&sequence);
1070
1071 let trained = orphos.train_single_genome(&encoded_sequence).unwrap();
1072
1073 let test_seq = create_test_sequence();
1075 let test_encoded_sequence = create_encoded_sequence_for_test(&test_seq);
1076
1077 let result = trained.find_genes_single(&test_encoded_sequence);
1078
1079 assert!(result.is_ok());
1080 let _genes = result.unwrap();
1081 }
1083
1084 #[test]
1085 fn test_trained_orphos_find_genes_without_training() {
1086 let config = OrphosConfig::default();
1087 let orphos = TrainedOrphos {
1088 config,
1089 training: None,
1090 _state: PhantomData,
1091 };
1092
1093 let test_seq = create_test_sequence();
1094 let test_encoded_sequence = create_encoded_sequence_for_test(&test_seq);
1095
1096 let result = orphos.find_genes_single(&test_encoded_sequence);
1097
1098 assert!(result.is_err());
1099 if let Err(OrphosError::InvalidSequence(msg)) = result {
1100 assert!(msg.contains("not trained"));
1101 } else {
1102 panic!("Expected InvalidSequence error");
1103 }
1104 }
1105
1106 #[test]
1107 fn test_orphos_analyzer_new() {
1108 let config = OrphosConfig::default();
1109 let analyzer = OrphosAnalyzer::new(config.clone());
1110
1111 assert_eq!(analyzer.config.metagenomic, config.metagenomic);
1112 assert_eq!(analyzer.config.closed_ends, config.closed_ends);
1113 }
1114
1115 #[test]
1116 fn test_analyze_sequence_basic() {
1117 let config = OrphosConfig::default();
1118 let mut analyzer = OrphosAnalyzer::new(config);
1119
1120 let sequence =
1121 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
1122
1123 let result = analyzer.analyze_sequence(&sequence, None);
1124 assert!(result.is_ok());
1125
1126 let analysis = result.unwrap();
1127 assert_eq!(analysis.sequence_info.header, "Orphos_Seq_1");
1128 assert_eq!(analysis.sequence_info.length, sequence.len());
1129 assert!(
1130 analysis.sequence_info.gc_content >= 0.0 && analysis.sequence_info.gc_content <= 1.0
1131 );
1132 assert!(analysis.metagenomic_model.is_none());
1133 }
1134
1135 #[test]
1136 fn test_analyze_sequence_with_header() {
1137 let config = OrphosConfig::default();
1138 let mut analyzer = OrphosAnalyzer::new(config);
1139
1140 let sequence =
1141 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
1142 let header = Some("test_sequence".to_string());
1143
1144 let result = analyzer.analyze_sequence(&sequence, header);
1145 assert!(result.is_ok());
1146
1147 let analysis = result.unwrap();
1148 assert_eq!(analysis.sequence_info.header, "test_sequence");
1149 }
1150
1151 #[test]
1152 fn test_analyze_sequence_bytes() {
1153 let config = OrphosConfig::default();
1154 let mut analyzer = OrphosAnalyzer::new(config);
1155
1156 let sequence =
1157 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
1158 let seq_bytes = sequence.as_bytes();
1159 let header = "test_sequence".to_string();
1160 let description = Some("Test description".to_string());
1161
1162 let result =
1163 analyzer.analyze_sequence_bytes(seq_bytes, header.clone(), description.clone());
1164 assert!(result.is_ok());
1165
1166 let analysis = result.unwrap();
1167 assert_eq!(analysis.sequence_info.header, header);
1168 assert_eq!(analysis.sequence_info.description, description);
1169 assert_eq!(analysis.sequence_info.length, seq_bytes.len());
1170 }
1171
1172 #[test]
1173 fn test_analyze_sequence_metagenomic_config() {
1174 let config = OrphosConfig {
1175 metagenomic: true,
1176 ..OrphosConfig::default()
1177 };
1178 let mut analyzer = OrphosAnalyzer::new(config);
1179
1180 let sequence =
1181 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
1182
1183 let result = analyzer.analyze_sequence(&sequence, None);
1184 assert!(result.is_ok());
1185
1186 let analysis = result.unwrap();
1187 assert!(analysis.metagenomic_model.is_some());
1188 let model = analysis.metagenomic_model.unwrap();
1190 assert!(
1191 model.contains("|"),
1192 "Expected metagenomic model description with | separator"
1193 );
1194 }
1195
1196 #[test]
1234 fn test_analyze_fasta_file_not_found() {
1235 let config = OrphosConfig::default();
1236 let mut analyzer = OrphosAnalyzer::new(config);
1237
1238 let result = analyzer.analyze_fasta_file("nonexistent_file.fa");
1239 assert!(result.is_err());
1240 }
1241
1242 #[test]
1243 fn test_analyze_fasta_file_metagenomic() {
1244 let config = OrphosConfig {
1245 metagenomic: true,
1246 ..OrphosConfig::default()
1247 };
1248 let mut analyzer = OrphosAnalyzer::new(config);
1249
1250 let fasta_content = ">test_seq\nATCG\n";
1252 let temp_dir = env::temp_dir();
1253 let temp_file = temp_dir.join("test_metagenomic.fa");
1254 fs::write(&temp_file, fasta_content).unwrap();
1255
1256 let result = analyzer.analyze_fasta_file(&temp_file);
1257 assert!(result.is_ok());
1258
1259 let results = result.unwrap();
1260 assert_eq!(results.len(), 1); assert!(results[0].genes.is_empty()); let _ = fs::remove_file(temp_file);
1264 }
1265
1266 #[test]
1267 fn test_analyze_fasta_file_single_genome() {
1268 let config = OrphosConfig::default();
1269 let mut analyzer = OrphosAnalyzer::new(config);
1270
1271 let sequence =
1273 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
1274 let fasta_content = format!(">test_seq\n{}\n", sequence);
1275 let temp_dir = env::temp_dir();
1276 let temp_file = temp_dir.join("test_single_genome.fa");
1277 fs::write(&temp_file, fasta_content).unwrap();
1278
1279 let result = analyzer.analyze_fasta_file(&temp_file);
1280 assert!(result.is_ok());
1281
1282 let results = result.unwrap();
1283 assert_eq!(results.len(), 1);
1284 assert_eq!(results[0].sequence_info.header, "test_seq");
1285
1286 let _ = fs::remove_file(temp_file);
1287 }
1288
1289 #[test]
1290 fn test_analyze_empty_sequence() {
1291 let config = OrphosConfig::default();
1292 let mut analyzer = OrphosAnalyzer::new(config);
1293
1294 let sequence = "";
1295 let result = analyzer.analyze_sequence(sequence, None);
1296
1297 assert!(result.is_err());
1298 if let Err(e) = result {
1299 match e {
1301 OrphosError::InvalidSequence(msg) => {
1302 assert!(msg.contains("too short"));
1303 }
1304 _ => panic!("Expected InvalidSequence error for empty sequence"),
1305 }
1306 }
1307 }
1308
1309 #[test]
1310 fn test_analyze_very_short_sequence() {
1311 let config = OrphosConfig::default();
1312 let mut analyzer = OrphosAnalyzer::new(config);
1313
1314 let sequence = "ATG"; let result = analyzer.analyze_sequence(sequence, None);
1316
1317 assert!(result.is_err());
1318 if let Err(e) = result {
1319 match e {
1321 OrphosError::InvalidSequence(msg) => {
1322 assert!(msg.contains("too short"));
1323 }
1324 _ => panic!("Expected InvalidSequence error for very short sequence"),
1325 }
1326 }
1327 }
1328
1329 #[test]
1330 fn test_config_cloning() {
1331 let config1 = OrphosConfig::default();
1332 let orphos1 = UntrainedOrphos::with_config(config1.clone()).unwrap();
1333
1334 let config2 = orphos1.config.clone();
1335 let _orphos2 = UntrainedOrphos::with_config(config2).unwrap();
1336 }
1337
1338 #[test]
1339 fn test_debug_formatting() {
1340 let orphos = UntrainedOrphos::new();
1341 let debug_str = format!("{:?}", orphos);
1342 assert!(debug_str.contains("Orphos"));
1343 assert!(debug_str.contains("config"));
1344
1345 let analyzer = OrphosAnalyzer::new(OrphosConfig::default());
1346 let debug_str2 = format!("{:?}", analyzer);
1347 assert!(debug_str2.contains("OrphosAnalyzer"));
1348 assert!(debug_str2.contains("config"));
1349 }
1350
1351 #[test]
1352 fn test_type_aliases() {
1353 let _untrained: UntrainedOrphos = UntrainedOrphos::new();
1355 let _trained: TrainedOrphos =
1356 TrainedOrphos::new(OrphosConfig::default(), Training::default());
1357
1358 assert_eq!(
1359 std::any::type_name::<UntrainedOrphos>(),
1360 std::any::type_name::<Orphos<Untrained>>()
1361 );
1362 assert_eq!(
1363 std::any::type_name::<TrainedOrphos>(),
1364 std::any::type_name::<Orphos<Trained>>()
1365 );
1366 }
1367
1368 #[test]
1369 fn test_training_state_phantom_data() {
1370 let untrained = UntrainedOrphos::new();
1371 let trained = TrainedOrphos::new(OrphosConfig::default(), Training::default());
1372
1373 assert_eq!(std::mem::size_of_val(&untrained._state), 0);
1374 assert_eq!(std::mem::size_of_val(&trained._state), 0);
1375 }
1376
1377 #[test]
1378 fn test_analyzer_multiple_sequences() {
1379 let config = OrphosConfig::default();
1380 let mut analyzer = OrphosAnalyzer::new(config);
1381
1382 let sequence1 =
1383 "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(150);
1384 let sequence2 =
1385 "ATGCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATAG".repeat(200);
1386
1387 let result1 = analyzer.analyze_sequence(&sequence1, Some("seq1".to_string()));
1388 let result2 = analyzer.analyze_sequence(&sequence2, Some("seq2".to_string()));
1389
1390 assert!(result1.is_ok());
1391 assert!(result2.is_ok());
1392
1393 let analysis1 = result1.unwrap();
1394 let analysis2 = result2.unwrap();
1395
1396 assert_eq!(analysis1.sequence_info.header, "seq1");
1397 assert_eq!(analysis2.sequence_info.header, "seq2");
1398 assert_ne!(
1399 analysis1.sequence_info.length,
1400 analysis2.sequence_info.length
1401 );
1402 }
1403
1404 #[test]
1405 fn test_error_handling_edge_cases() {
1406 let config = OrphosConfig::default();
1407 let mut analyzer = OrphosAnalyzer::new(config);
1408
1409 let invalid_sequence = "ATCGXYZ123";
1411 let result = analyzer.analyze_sequence(invalid_sequence, None);
1412
1413 let _ = result;
1415 }
1416}