use std::marker::PhantomData;
use std::path::Path;
use crate::algorithms::dynamic_programming::eliminate_bad_genes;
use crate::algorithms::dynamic_programming::predict_genes;
use crate::algorithms::gene_finding::GeneBuilder;
use crate::config::OrphosConfig;
use crate::constants::MIN_SEQUENCE_LENGTH;
use crate::metagenomic::bins;
use crate::metagenomic::get_preset_training_ref;
use crate::node::{
add_nodes, calculate_dicodon_gene, raw_coding_score, rbs_score, record_gc_bias,
record_overlapping_starts, reset_node_scores, score_nodes, sort_nodes_by_position,
};
use crate::results::{OrphosResults, SequenceInfo};
use crate::sequence::calc_most_gc_frame;
use crate::sequence::encoded::EncodedSequence;
use crate::sequence::read_fasta_sequences;
use crate::training::non_sd_training::train_starts_nonsd;
use crate::training::sd_training::train_starts_sd;
use crate::training::should_use_sd;
use crate::types::Gene;
use crate::types::{CodonType, Node, OrphosError, Training};
use bio::bio_types::strand::Strand;
pub trait TrainingState {}
#[derive(Debug, Clone)]
pub struct Untrained;
#[derive(Debug, Clone)]
pub struct Trained;
impl TrainingState for Untrained {}
impl TrainingState for Trained {}
#[derive(Debug, Default)]
pub struct Orphos<S: TrainingState> {
pub config: OrphosConfig,
training: Option<Training>,
_state: PhantomData<S>,
}
pub type UntrainedOrphos = Orphos<Untrained>;
pub type TrainedOrphos = Orphos<Trained>;
impl UntrainedOrphos {
pub fn new() -> Self {
Self {
config: OrphosConfig::default(),
training: None,
_state: PhantomData,
}
}
pub fn with_config(config: OrphosConfig) -> Result<Self, OrphosError> {
if config.circular && config.closed_ends {
return Err(OrphosError::InvalidSequence(
"Invalid configuration: circular and closed_ends cannot both be true".to_string(),
));
}
let orphos = Self {
config,
training: None,
_state: PhantomData,
};
if let Some(num_threads) = orphos.config.num_threads {
rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global()
.map_err(|e| {
OrphosError::InvalidSequence(format!("Failed to configure thread pool: {}", e))
})?;
}
Ok(orphos)
}
pub fn train_single_genome(
&mut self,
encoded_sequence: &EncodedSequence,
) -> Result<TrainedOrphos, OrphosError> {
let sequence_length = encoded_sequence.sequence_length;
if sequence_length < MIN_SEQUENCE_LENGTH {
return Err(OrphosError::InvalidSequence(format!(
"Sequence too short for gene prediction: {} bp (minimum {} bp required)",
sequence_length, MIN_SEQUENCE_LENGTH
)));
}
if !self.config.quiet {
eprintln!(
"Training on single genome ({} bp, {:.2}% GC)...",
sequence_length,
encoded_sequence.gc_content * 100.0
);
}
let mut training = Training {
gc_content: 0.0,
..Default::default()
};
training.uses_shine_dalgarno = false;
training.gc_bias_factors = [0.0; 3];
training.gc_content = encoded_sequence.gc_content;
let mut nodes = Vec::new();
let num_nodes = add_nodes(
encoded_sequence,
&mut nodes,
self.config.closed_ends,
self.config.circular,
&training,
)?;
if !self.config.quiet {
eprintln!(
"Located {} potential start/stop nodes, closed {}",
num_nodes, self.config.closed_ends
);
}
sort_nodes_by_position(&mut nodes);
let gc_frame = calc_most_gc_frame(&encoded_sequence.forward_sequence, sequence_length);
record_gc_bias(&gc_frame, &mut nodes, &mut training);
if !self.config.quiet {
eprintln!(
"Frame bias scores: {:.8} {:.8} {:.8}",
training.gc_bias_factors[0],
training.gc_bias_factors[1],
training.gc_bias_factors[2]
);
}
record_overlapping_starts(&mut nodes, &training, false);
let initial_path = predict_genes(&mut nodes, &training, false).unwrap_or(0);
calculate_dicodon_gene(
&mut training,
&encoded_sequence.forward_sequence,
&encoded_sequence.reverse_complement_sequence,
sequence_length,
&nodes,
initial_path,
);
raw_coding_score(
&encoded_sequence.forward_sequence,
&encoded_sequence.reverse_complement_sequence,
sequence_length,
&mut nodes,
&training,
);
rbs_score(
&encoded_sequence.forward_sequence,
&encoded_sequence.reverse_complement_sequence,
sequence_length,
&mut nodes,
&training,
);
train_starts_sd(
&encoded_sequence.forward_sequence,
&encoded_sequence.reverse_complement_sequence,
sequence_length,
&nodes,
&mut training,
);
training.uses_shine_dalgarno = should_use_sd(&training);
if self.config.force_non_sd {
training.uses_shine_dalgarno = false;
}
if !training.uses_shine_dalgarno {
train_starts_nonsd(
&encoded_sequence.forward_sequence,
&encoded_sequence.reverse_complement_sequence,
sequence_length,
&mut nodes,
&mut training,
);
}
if !self.config.quiet {
eprintln!("Training complete!");
}
Ok(Orphos {
config: self.config.clone(),
training: Some(training),
_state: PhantomData,
})
}
pub fn train_meta_genome(
&mut self,
_encoded_sequence: &EncodedSequence,
) -> Result<TrainedOrphos, OrphosError> {
if !self.config.quiet {
eprintln!("Request: Metagenomic, Phase: Training");
eprintln!("Initializing training files...");
}
if !self.config.quiet {
eprintln!("Metagenomic training initialized.");
}
Ok(Orphos {
config: self.config.clone(),
training: None,
_state: PhantomData,
})
}
}
impl TrainedOrphos {
pub const fn new(config: OrphosConfig, training: Training) -> Self {
Self {
config,
training: Some(training),
_state: PhantomData,
}
}
fn find_genes_single(
&self,
encoded_sequence: &EncodedSequence,
) -> Result<Vec<Gene>, OrphosError> {
let mut nodes = Vec::new();
let training = self
.training
.as_ref()
.ok_or_else(|| OrphosError::InvalidSequence("Orphos is not trained".to_string()))?;
let _num_nodes = add_nodes(
encoded_sequence,
&mut nodes,
self.config.closed_ends,
self.config.circular,
training,
)?;
sort_nodes_by_position(&mut nodes);
score_nodes(
encoded_sequence,
&mut nodes,
training,
self.config.closed_ends,
false,
)?;
record_overlapping_starts(&mut nodes, training, true);
let gene_path = match predict_genes(&mut nodes, training, true) {
Some(path) => path,
None => {
return Ok(vec![]);
}
};
eliminate_bad_genes(&mut nodes, Some(gene_path), training);
let genes = GeneBuilder::from_nodes(&nodes, gene_path, training, 1)
.with_tweaked_starts()
.with_annotations()
.build();
Ok(genes)
}
fn find_genes_meta(
&self,
encoded_sequence: &EncodedSequence,
) -> Result<(Vec<Gene>, usize), OrphosError> {
if !self.config.quiet {
eprintln!("Request: Metagenomic, Phase: Gene Finding");
}
let mut low = 0.88495 * encoded_sequence.gc_content - 0.0102337;
if low > 0.65 {
low = 0.65;
}
let mut high = 0.86596 * encoded_sequence.gc_content + 0.1131991;
if high < 0.35 {
high = 0.35;
}
let mut max_score = -100.0;
let mut max_phase = 0;
let mut nodes = Vec::new();
let mut genes = Vec::new();
for (i, _bin) in bins().iter().enumerate() {
let preset_training = get_preset_training_ref(i).unwrap();
if i == 0
|| preset_training.translation_table
!= get_preset_training_ref(i - 1).unwrap().translation_table
{
let _num_nodes = add_nodes(
encoded_sequence,
&mut nodes,
self.config.closed_ends,
self.config.circular,
preset_training,
)?;
sort_nodes_by_position(&mut nodes);
}
if preset_training.gc_content < low || preset_training.gc_content > high {
continue;
}
reset_node_scores(&mut nodes);
score_nodes(
encoded_sequence,
&mut nodes,
preset_training,
self.config.closed_ends,
true,
)?;
record_overlapping_starts(&mut nodes, preset_training, true);
let gene_path = predict_genes(&mut nodes, preset_training, true);
if let Some(path) = gene_path
&& let Some(node) = nodes.get(path)
&& node.scores.total_score > max_score
{
max_phase = i;
max_score = node.scores.total_score;
eliminate_bad_genes(&mut nodes, gene_path, preset_training);
genes = GeneBuilder::from_nodes(&nodes, path, preset_training, 1)
.with_tweaked_starts()
.with_annotations()
.build();
}
}
nodes.clear();
let best_training = get_preset_training_ref(max_phase).unwrap();
let _ = add_nodes(
encoded_sequence,
&mut nodes,
self.config.closed_ends,
self.config.circular,
best_training,
)?;
sort_nodes_by_position(&mut nodes);
score_nodes(
encoded_sequence,
&mut nodes,
best_training,
self.config.closed_ends,
true,
)?;
update_display_scores(&mut genes, &nodes);
Ok((genes, max_phase))
}
}
fn update_display_scores(genes: &mut [Gene], nodes: &[Node]) {
for gene in genes.iter_mut() {
let target_pos = if gene.coordinates.strand == Strand::Forward {
gene.coordinates.begin.saturating_sub(1)
} else {
gene.coordinates.end.saturating_sub(1)
};
if let Some(start_node) = nodes.iter().find(|n| {
n.position.index == target_pos
&& n.position.strand == gene.coordinates.strand
&& n.position.codon_type != CodonType::Stop
}) {
gene.display_score =
Some(start_node.scores.coding_score + start_node.scores.start_score);
}
}
}
#[derive(Debug)]
pub struct OrphosAnalyzer {
pub config: OrphosConfig,
}
impl OrphosAnalyzer {
pub const fn new(config: OrphosConfig) -> Self {
Self { config }
}
pub fn analyze_fasta_file<P: AsRef<Path>>(
&mut self,
path: P,
) -> Result<Vec<OrphosResults>, OrphosError> {
let sequences = read_fasta_sequences(path.as_ref().to_str().unwrap())?;
let mut results = Vec::new();
for (header, description, seq_bytes) in sequences {
let result = self.analyze_sequence_bytes(&seq_bytes, header, description)?;
results.push(result);
}
Ok(results)
}
pub fn analyze_sequence(
&mut self,
sequence: &str,
header: Option<String>,
) -> Result<OrphosResults, OrphosError> {
let seq_bytes = sequence.as_bytes();
let header = header.unwrap_or_else(|| "Orphos_Seq_1".to_string());
self.analyze_sequence_bytes(seq_bytes, header, None)
}
pub fn analyze_sequence_bytes(
&mut self,
sequence: &[u8],
header: String,
description: Option<String>,
) -> Result<OrphosResults, OrphosError> {
let sequence_length = sequence.len();
let encoded_sequence = self.encode_sequence(sequence);
let mut untrained_orphos = UntrainedOrphos::with_config(self.config.clone())?;
let trained_orphos = if self.config.metagenomic {
untrained_orphos.train_meta_genome(&encoded_sequence)?
} else {
untrained_orphos.train_single_genome(&encoded_sequence)?
};
let (genes, metagenomic_model, best_training) = if self.config.metagenomic {
let (genes, best_phase) = trained_orphos.find_genes_meta(&encoded_sequence)?;
let bin = &bins()[best_phase];
let training = get_preset_training_ref(best_phase).unwrap();
let model_desc = format!(
"{}|{}|{}|{:.1}|{}|{}",
bin.id,
bin.name,
bin.domain,
bin.gc_percent,
training.translation_table,
if training.uses_shine_dalgarno { 1 } else { 0 }
);
(genes, Some(model_desc), training.clone())
} else {
let genes = trained_orphos.find_genes_single(&encoded_sequence)?;
let training = trained_orphos.training.clone().unwrap_or_default();
(genes, None, training)
};
Ok(OrphosResults {
genes: genes.clone(),
training_used: best_training,
sequence_info: SequenceInfo {
length: sequence_length,
gc_content: encoded_sequence.gc_content,
num_genes: genes.len(),
header,
description,
},
metagenomic_model,
})
}
fn encode_sequence(&self, sequence: &[u8]) -> EncodedSequence {
if self.config.mask_n_runs {
EncodedSequence::with_masking(sequence)
} else {
EncodedSequence::without_masking(sequence)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::OutputFormat;
use crate::constants::TEST_SEQUENCE_REPEAT_FACTOR;
use std::env;
use std::fs;
fn create_test_sequence() -> Vec<u8> {
"ATGAAACGTAAATAG".as_bytes().to_vec()
}
fn create_training_sequence() -> Vec<u8> {
let basic_gene = "ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG";
basic_gene
.repeat(TEST_SEQUENCE_REPEAT_FACTOR)
.as_bytes()
.to_vec()
}
fn create_encoded_sequence_for_test(seq: &[u8]) -> EncodedSequence {
EncodedSequence::without_masking(seq)
}
#[test]
fn test_training_state_traits() {
let _untrained: Untrained = Untrained;
let _trained: Trained = Trained;
let untrained_clone = _untrained.clone();
let trained_clone = _trained.clone();
assert_eq!(format!("{:?}", untrained_clone), "Untrained");
assert_eq!(format!("{:?}", trained_clone), "Trained");
}
#[test]
fn test_untrained_orphos_new() {
let orphos = UntrainedOrphos::new();
assert!(!orphos.config.metagenomic);
assert!(!orphos.config.closed_ends);
assert!(!orphos.config.mask_n_runs);
assert!(!orphos.config.force_non_sd);
assert!(!orphos.config.quiet);
assert_eq!(orphos.config.output_format, OutputFormat::Genbank);
assert!(orphos.training.is_none());
}
#[test]
fn test_untrained_orphos_with_config() {
let config = OrphosConfig {
metagenomic: true,
closed_ends: true,
quiet: true,
..OrphosConfig::default()
};
let result = UntrainedOrphos::with_config(config.clone());
assert!(result.is_ok());
let orphos = result.unwrap();
assert!(orphos.config.metagenomic);
assert!(orphos.config.closed_ends);
assert!(orphos.config.quiet);
assert!(orphos.training.is_none());
}
#[test]
fn test_untrained_orphos_with_thread_config() {
let config = OrphosConfig {
num_threads: Some(2),
..OrphosConfig::default()
};
let result = UntrainedOrphos::with_config(config);
let _ = result;
}
#[test]
fn test_untrained_orphos_with_invalid_thread_config() {
let config = OrphosConfig {
num_threads: Some(0),
..OrphosConfig::default()
};
let result = UntrainedOrphos::with_config(config);
let _ = result;
}
#[test]
fn test_untrained_orphos_with_conflicting_topology_config() {
let config = OrphosConfig {
circular: true,
closed_ends: true,
..OrphosConfig::default()
};
let result = UntrainedOrphos::with_config(config);
assert!(result.is_err());
if let Err(OrphosError::InvalidSequence(msg)) = result {
assert!(msg.contains("circular"));
assert!(msg.contains("closed_ends"));
} else {
panic!("Expected InvalidSequence error");
}
}
#[test]
fn test_trained_orphos_new() {
let config = OrphosConfig::default();
let training = Training::default();
let orphos = TrainedOrphos::new(config, training);
assert!(orphos.training.is_some());
}
#[test]
fn test_train_single_genome_basic() {
let mut orphos = UntrainedOrphos::new();
let sequence = create_training_sequence();
let encoded_sequence = create_encoded_sequence_for_test(&sequence);
let result = orphos.train_single_genome(&encoded_sequence);
assert!(result.is_ok());
let trained = result.unwrap();
assert!(trained.training.is_some());
}
#[test]
fn test_train_single_genome_quiet_mode() {
let config = OrphosConfig {
quiet: true,
..OrphosConfig::default()
};
let mut orphos = UntrainedOrphos::with_config(config).unwrap();
let sequence = create_training_sequence();
let encoded_sequence = create_encoded_sequence_for_test(&sequence);
let result = orphos.train_single_genome(&encoded_sequence);
assert!(result.is_ok());
}
#[test]
fn test_train_single_genome_force_non_sd() {
let config = OrphosConfig {
force_non_sd: true,
..OrphosConfig::default()
};
let mut orphos = UntrainedOrphos::with_config(config).unwrap();
let sequence = create_training_sequence();
let encoded_sequence = create_encoded_sequence_for_test(&sequence);
let result = orphos.train_single_genome(&encoded_sequence);
assert!(result.is_ok());
let trained = result.unwrap();
let training = trained.training.unwrap();
assert!(!training.uses_shine_dalgarno); }
#[test]
fn test_trained_orphos_find_genes_single() {
let mut orphos = UntrainedOrphos::new();
let sequence = create_training_sequence();
let encoded_sequence = create_encoded_sequence_for_test(&sequence);
let trained = orphos.train_single_genome(&encoded_sequence).unwrap();
let test_seq = create_test_sequence();
let test_encoded_sequence = create_encoded_sequence_for_test(&test_seq);
let result = trained.find_genes_single(&test_encoded_sequence);
assert!(result.is_ok());
let _genes = result.unwrap();
}
#[test]
fn test_trained_orphos_find_genes_without_training() {
let config = OrphosConfig::default();
let orphos = TrainedOrphos {
config,
training: None,
_state: PhantomData,
};
let test_seq = create_test_sequence();
let test_encoded_sequence = create_encoded_sequence_for_test(&test_seq);
let result = orphos.find_genes_single(&test_encoded_sequence);
assert!(result.is_err());
if let Err(OrphosError::InvalidSequence(msg)) = result {
assert!(msg.contains("not trained"));
} else {
panic!("Expected InvalidSequence error");
}
}
#[test]
fn test_orphos_analyzer_new() {
let config = OrphosConfig::default();
let analyzer = OrphosAnalyzer::new(config.clone());
assert_eq!(analyzer.config.metagenomic, config.metagenomic);
assert_eq!(analyzer.config.closed_ends, config.closed_ends);
}
#[test]
fn test_analyze_sequence_basic() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
let result = analyzer.analyze_sequence(&sequence, None);
assert!(result.is_ok());
let analysis = result.unwrap();
assert_eq!(analysis.sequence_info.header, "Orphos_Seq_1");
assert_eq!(analysis.sequence_info.length, sequence.len());
assert!(
analysis.sequence_info.gc_content >= 0.0 && analysis.sequence_info.gc_content <= 1.0
);
assert!(analysis.metagenomic_model.is_none());
}
#[test]
fn test_analyze_sequence_with_header() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
let header = Some("test_sequence".to_string());
let result = analyzer.analyze_sequence(&sequence, header);
assert!(result.is_ok());
let analysis = result.unwrap();
assert_eq!(analysis.sequence_info.header, "test_sequence");
}
#[test]
fn test_analyze_sequence_bytes() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
let seq_bytes = sequence.as_bytes();
let header = "test_sequence".to_string();
let description = Some("Test description".to_string());
let result =
analyzer.analyze_sequence_bytes(seq_bytes, header.clone(), description.clone());
assert!(result.is_ok());
let analysis = result.unwrap();
assert_eq!(analysis.sequence_info.header, header);
assert_eq!(analysis.sequence_info.description, description);
assert_eq!(analysis.sequence_info.length, seq_bytes.len());
}
#[test]
fn test_analyze_sequence_metagenomic_config() {
let config = OrphosConfig {
metagenomic: true,
..OrphosConfig::default()
};
let mut analyzer = OrphosAnalyzer::new(config);
let sequence =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
let result = analyzer.analyze_sequence(&sequence, None);
assert!(result.is_ok());
let analysis = result.unwrap();
assert!(analysis.metagenomic_model.is_some());
let model = analysis.metagenomic_model.unwrap();
assert!(
model.contains("|"),
"Expected metagenomic model description with | separator"
);
}
#[test]
fn test_analyze_fasta_file_not_found() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let result = analyzer.analyze_fasta_file("nonexistent_file.fa");
assert!(result.is_err());
}
#[test]
fn test_analyze_fasta_file_metagenomic() {
let config = OrphosConfig {
metagenomic: true,
..OrphosConfig::default()
};
let mut analyzer = OrphosAnalyzer::new(config);
let fasta_content = ">test_seq\nATCG\n";
let temp_dir = env::temp_dir();
let temp_file = temp_dir.join("test_metagenomic.fa");
fs::write(&temp_file, fasta_content).unwrap();
let result = analyzer.analyze_fasta_file(&temp_file);
assert!(result.is_ok());
let results = result.unwrap();
assert_eq!(results.len(), 1); assert!(results[0].genes.is_empty());
let _ = fs::remove_file(temp_file);
}
#[test]
fn test_analyze_fasta_file_single_genome() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(300);
let fasta_content = format!(">test_seq\n{}\n", sequence);
let temp_dir = env::temp_dir();
let temp_file = temp_dir.join("test_single_genome.fa");
fs::write(&temp_file, fasta_content).unwrap();
let result = analyzer.analyze_fasta_file(&temp_file);
assert!(result.is_ok());
let results = result.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].sequence_info.header, "test_seq");
let _ = fs::remove_file(temp_file);
}
#[test]
fn test_analyze_empty_sequence() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence = "";
let result = analyzer.analyze_sequence(sequence, None);
assert!(result.is_err());
if let Err(e) = result {
match e {
OrphosError::InvalidSequence(msg) => {
assert!(msg.contains("too short"));
}
_ => panic!("Expected InvalidSequence error for empty sequence"),
}
}
}
#[test]
fn test_analyze_very_short_sequence() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence = "ATG"; let result = analyzer.analyze_sequence(sequence, None);
assert!(result.is_err());
if let Err(e) = result {
match e {
OrphosError::InvalidSequence(msg) => {
assert!(msg.contains("too short"));
}
_ => panic!("Expected InvalidSequence error for very short sequence"),
}
}
}
#[test]
fn test_config_cloning() {
let config1 = OrphosConfig::default();
let orphos1 = UntrainedOrphos::with_config(config1.clone()).unwrap();
let config2 = orphos1.config.clone();
let _orphos2 = UntrainedOrphos::with_config(config2).unwrap();
}
#[test]
fn test_debug_formatting() {
let orphos = UntrainedOrphos::new();
let debug_str = format!("{:?}", orphos);
assert!(debug_str.contains("Orphos"));
assert!(debug_str.contains("config"));
let analyzer = OrphosAnalyzer::new(OrphosConfig::default());
let debug_str2 = format!("{:?}", analyzer);
assert!(debug_str2.contains("OrphosAnalyzer"));
assert!(debug_str2.contains("config"));
}
#[test]
fn test_type_aliases() {
let _untrained: UntrainedOrphos = UntrainedOrphos::new();
let _trained: TrainedOrphos =
TrainedOrphos::new(OrphosConfig::default(), Training::default());
assert_eq!(
std::any::type_name::<UntrainedOrphos>(),
std::any::type_name::<Orphos<Untrained>>()
);
assert_eq!(
std::any::type_name::<TrainedOrphos>(),
std::any::type_name::<Orphos<Trained>>()
);
}
#[test]
fn test_training_state_phantom_data() {
let untrained = UntrainedOrphos::new();
let trained = TrainedOrphos::new(OrphosConfig::default(), Training::default());
assert_eq!(std::mem::size_of_val(&untrained._state), 0);
assert_eq!(std::mem::size_of_val(&trained._state), 0);
}
#[test]
fn test_analyzer_multiple_sequences() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let sequence1 =
"ATGAAACGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTCGTAAATAG".repeat(150);
let sequence2 =
"ATGCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATTTCCCGGGAAATAG".repeat(200);
let result1 = analyzer.analyze_sequence(&sequence1, Some("seq1".to_string()));
let result2 = analyzer.analyze_sequence(&sequence2, Some("seq2".to_string()));
assert!(result1.is_ok());
assert!(result2.is_ok());
let analysis1 = result1.unwrap();
let analysis2 = result2.unwrap();
assert_eq!(analysis1.sequence_info.header, "seq1");
assert_eq!(analysis2.sequence_info.header, "seq2");
assert_ne!(
analysis1.sequence_info.length,
analysis2.sequence_info.length
);
}
#[test]
fn test_error_handling_edge_cases() {
let config = OrphosConfig::default();
let mut analyzer = OrphosAnalyzer::new(config);
let invalid_sequence = "ATCGXYZ123";
let result = analyzer.analyze_sequence(invalid_sequence, None);
let _ = result;
}
}