use std::io::Write;
use crate::{results::OrphosResults, types::OrphosError};
pub fn write_gbk_format<W: Write>(
writer: &mut W,
results: &OrphosResults,
) -> Result<(), OrphosError> {
for (i, gene) in results.genes.iter().enumerate() {
if gene.coordinates.begin > gene.coordinates.end {
writeln!(
writer,
" CDS join({}..{},1..{})",
gene.coordinates.begin + 1,
results.sequence_info.length,
gene.coordinates.end + 1
)?;
} else {
writeln!(
writer,
" CDS {}..{}",
gene.coordinates.begin + 1, gene.coordinates.end + 1
)?;
}
writeln!(
writer,
" /locus_tag=\"{}_{}\"",
results.sequence_info.header,
i + 1
)?;
writeln!(
writer,
" /confidence={:.2}",
gene.score.confidence
)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use bio::bio_types::strand::Strand;
use crate::{
results::{OrphosResults, SequenceInfo},
types::{Gene, GeneCoordinates, GeneScore, Training},
};
use super::*;
#[test]
fn test_write_gbk_format_empty_genes() {
let mut buffer = Vec::new();
let results = OrphosResults {
genes: vec![],
sequence_info: SequenceInfo {
header: "test_seq".to_string(),
length: 1000,
description: Some("Test sequence".to_string()),
gc_content: 12.0,
num_genes: 25,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
assert_eq!(String::from_utf8(buffer).unwrap(), "");
}
#[test]
fn test_write_gbk_format_single_gene() {
let mut buffer = Vec::new();
let gene = Gene {
coordinates: GeneCoordinates {
begin: 99, end: 299,
..Default::default()
},
score: GeneScore {
confidence: 85.75,
..Default::default()
},
..Default::default()
};
let results = OrphosResults {
genes: vec![gene],
sequence_info: SequenceInfo {
header: "sequence1".to_string(),
length: 1000,
description: Some("Test sequence".to_string()),
gc_content: 12.0,
num_genes: 25,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
let output = String::from_utf8(buffer).unwrap();
assert!(output.contains(" CDS 100..300"));
assert!(output.contains("/locus_tag=\"sequence1_1\""));
assert!(output.contains("/confidence=85.75"));
}
#[test]
fn test_write_gbk_format_multiple_genes() {
let mut buffer = Vec::new();
let genes = vec![
Gene {
coordinates: GeneCoordinates {
begin: 0,
end: 100,
..Default::default()
},
score: GeneScore {
confidence: 90.50,
..Default::default()
},
..Default::default()
},
Gene {
coordinates: GeneCoordinates {
begin: 200,
end: 400,
strand: Strand::Forward,
start_index: 0,
stop_index: 0,
},
score: GeneScore {
confidence: 75.25,
..Default::default()
},
..Default::default()
},
];
let results = OrphosResults {
genes,
sequence_info: SequenceInfo {
header: "multi_gene_seq".to_string(),
length: 500,
description: Some("Test sequence".to_string()),
gc_content: 12.0,
num_genes: 25,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
let output = String::from_utf8(buffer).unwrap();
assert!(output.contains(" CDS 1..101"));
assert!(output.contains("/locus_tag=\"multi_gene_seq_1\""));
assert!(output.contains("/confidence=90.50"));
assert!(output.contains(" CDS 201..401"));
assert!(output.contains("/locus_tag=\"multi_gene_seq_2\""));
assert!(output.contains("/confidence=75.25"));
}
#[test]
fn test_write_gbk_format_special_characters_in_header() {
let mut buffer = Vec::new();
let gene = Gene {
coordinates: GeneCoordinates {
begin: 10,
end: 50,
..Default::default()
},
score: GeneScore {
confidence: 95.0,
..Default::default()
},
..Default::default()
};
let results = OrphosResults {
genes: vec![gene],
sequence_info: SequenceInfo {
header: "seq_with-special.chars".to_string(),
length: 100,
description: Some("Test sequence".to_string()),
gc_content: 12.0,
num_genes: 25,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
let output = String::from_utf8(buffer).unwrap();
assert!(output.contains("/locus_tag=\"seq_with-special.chars_1\""));
}
#[test]
fn test_write_gbk_format_confidence_formatting() {
let mut buffer = Vec::new();
let gene = Gene {
coordinates: GeneCoordinates {
begin: 0,
end: 10,
strand: Strand::Forward,
start_index: 2,
stop_index: 10,
},
score: GeneScore {
confidence: 99.999,
total_score: 45.0,
coding_score: 30.0,
start_score: 15.0,
ribosome_binding_score: 10.0,
upstream_score: 5.0,
type_score: 20.0,
},
..Default::default()
};
let results = OrphosResults {
genes: vec![gene],
sequence_info: SequenceInfo {
header: "test".to_string(),
length: 20,
description: Some("Test sequence".to_string()),
gc_content: 12.0,
num_genes: 25,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
let output = String::from_utf8(buffer).unwrap();
assert!(output.contains("/confidence=100.00"));
}
#[test]
fn test_write_gbk_format_wrapped_gene_join_notation() {
let mut buffer = Vec::new();
let gene = Gene {
coordinates: GeneCoordinates {
begin: 900,
end: 100,
..Default::default()
},
score: GeneScore {
confidence: 90.0,
..Default::default()
},
..Default::default()
};
let results = OrphosResults {
genes: vec![gene],
sequence_info: SequenceInfo {
header: "circular_seq".to_string(),
length: 1000,
description: None,
gc_content: 12.0,
num_genes: 1,
},
training_used: Training::default(),
metagenomic_model: None,
};
let result = write_gbk_format(&mut buffer, &results);
assert!(result.is_ok());
let output = String::from_utf8(buffer).unwrap();
assert!(output.contains("CDS join(901..1000,1..101)"));
}
}