use std::io::Write;
use bio::bio_types::strand::Strand;
use crate::{OrphosError, results::OrphosResults};
fn confidence_to_bed_score(confidence: f64) -> i32 {
if !confidence.is_finite() {
return 0;
}
(confidence * 10.0).round().clamp(0.0, 1000.0) as i32
}
pub fn write_bed_format<W: Write>(
writer: &mut W,
results: &OrphosResults,
) -> Result<(), OrphosError> {
for (i, gene) in results.genes.iter().enumerate() {
if gene.coordinates.begin > gene.coordinates.end {
continue;
}
let strand = match gene.coordinates.strand {
Strand::Forward => '+',
Strand::Reverse => '-',
Strand::Unknown => '.',
};
let name = if gene.annotation.identifier.is_empty() {
format!("{}_{}", results.sequence_info.header, i + 1)
} else {
gene.annotation.identifier.clone()
};
writeln!(
writer,
"{}\t{}\t{}\t{}\t{}\t{}",
results.sequence_info.header,
gene.coordinates.begin,
gene.coordinates.end + 1,
name,
confidence_to_bed_score(gene.score.confidence),
strand
)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
results::{OrphosResults, SequenceInfo},
types::{Gene, GeneAnnotation, GeneCoordinates, GeneScore, StartType, Training},
};
use bio::bio_types::strand::Strand;
use std::io::Cursor;
fn create_results(genes: Vec<Gene>) -> OrphosResults {
OrphosResults {
sequence_info: SequenceInfo {
header: "test_seq".to_string(),
length: 1000,
description: Some("Test sequence".to_string()),
gc_content: 0.5,
num_genes: genes.len(),
},
genes,
training_used: Training::default(),
metagenomic_model: None,
}
}
#[test]
fn test_write_bed_format_single_gene() {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
let gene = Gene {
coordinates: GeneCoordinates {
begin: 99,
end: 299,
strand: Strand::Forward,
..Default::default()
},
score: GeneScore {
confidence: 95.5,
..Default::default()
},
annotation: GeneAnnotation::new(
"gene_1".to_string(),
false,
false,
StartType::Atg,
0.45,
),
..Default::default()
};
let results = create_results(vec![gene]);
write_bed_format(&mut cursor, &results).unwrap();
let output = String::from_utf8(buffer).unwrap();
assert_eq!(output, "test_seq\t99\t300\tgene_1\t955\t+\n");
}
#[test]
fn test_write_bed_format_fallback_name() {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
let gene = Gene {
coordinates: GeneCoordinates {
begin: 10,
end: 50,
strand: Strand::Reverse,
..Default::default()
},
score: GeneScore {
confidence: 50.0,
..Default::default()
},
..Default::default()
};
let results = create_results(vec![gene]);
write_bed_format(&mut cursor, &results).unwrap();
let output = String::from_utf8(buffer).unwrap();
assert_eq!(output, "test_seq\t10\t51\ttest_seq_1\t500\t-\n");
}
#[test]
fn test_write_bed_format_unknown_strand() {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
let gene = Gene {
coordinates: GeneCoordinates {
begin: 0,
end: 0,
strand: Strand::Unknown,
..Default::default()
},
score: GeneScore {
confidence: 0.0,
..Default::default()
},
..Default::default()
};
let results = create_results(vec![gene]);
write_bed_format(&mut cursor, &results).unwrap();
let output = String::from_utf8(buffer).unwrap();
assert_eq!(output, "test_seq\t0\t1\ttest_seq_1\t0\t.\n");
}
#[test]
fn test_write_bed_format_score_rounding_and_clamp() {
assert_eq!(confidence_to_bed_score(99.94), 999);
assert_eq!(confidence_to_bed_score(99.95), 1000);
assert_eq!(confidence_to_bed_score(120.0), 1000);
assert_eq!(confidence_to_bed_score(-1.0), 0);
assert_eq!(confidence_to_bed_score(f64::NAN), 0);
}
#[test]
fn test_write_bed_format_no_genes() {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
let results = create_results(vec![]);
write_bed_format(&mut cursor, &results).unwrap();
let output = String::from_utf8(buffer).unwrap();
assert_eq!(output, "");
}
#[test]
fn test_write_bed_format_skips_wrapped_gene() {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
let gene = Gene {
coordinates: GeneCoordinates {
begin: 900,
end: 100,
strand: Strand::Forward,
..Default::default()
},
..Default::default()
};
let results = create_results(vec![gene]);
write_bed_format(&mut cursor, &results).unwrap();
let output = String::from_utf8(buffer).unwrap();
assert_eq!(output, "");
}
}