orphos_core/output/
mod.rs

1//! Output formatting for gene prediction results.
2//!
3//! This module provides writers for converting [`OrphosResults`] into various
4//! standard bioinformatics file formats.
5//!
6//! ## Supported Formats
7//!
8//! - **GenBank (GBK)**: Feature-rich annotation format with sequences
9//! - **GFF3**: General Feature Format version 3
10//! - **GCA**: Gene coordinate annotation (tabular)
11//! - **SCO**: Simple coordinate output
12//!
13//! ## Examples
14//!
15//! ### Write results to a file
16//!
17//! ```rust,no_run
18//! use orphos_core::{OrphosAnalyzer, config::{OrphosConfig, OutputFormat}};
19//! use orphos_core::output::write_results;
20//! use std::fs::File;
21//!
22//! let mut analyzer = OrphosAnalyzer::new(OrphosConfig::default());
23//! let results = analyzer.analyze_sequence("ATGCGATCG...", None)?;
24//!
25//! // Write as GenBank
26//! let mut gbk_file = File::create("output.gbk")?;
27//! write_results(&mut gbk_file, &results, OutputFormat::Genbank)?;
28//!
29//! // Write as GFF3
30//! let mut gff_file = File::create("output.gff")?;
31//! write_results(&mut gff_file, &results, OutputFormat::Gff)?;
32//! # Ok::<(), Box<dyn std::error::Error>>(())
33//! ```
34//!
35//! ### Write to stdout
36//!
37//! ```rust,no_run
38//! use orphos_core::{OrphosAnalyzer, config::{OrphosConfig, OutputFormat}};
39//! use orphos_core::output::write_results;
40//! use std::io::stdout;
41//!
42//! let mut analyzer = OrphosAnalyzer::new(OrphosConfig::default());
43//! let results = analyzer.analyze_sequence("ATGCGATCG...", None)?;
44//!
45//! write_results(&mut stdout(), &results, OutputFormat::Gff)?;
46//! # Ok::<(), Box<dyn std::error::Error>>(())
47//! ```
48
49use crate::{OrphosError, config::OutputFormat, results::OrphosResults};
50use std::io::Write;
51
52mod formats {
53    pub mod gbk;
54    pub mod gca;
55    pub mod gff;
56    pub mod sco;
57}
58
59use formats::{
60    gbk::write_gbk_format, gca::write_gca_format, gff::write_gff_format, sco::write_sco_format,
61};
62
63/// Writes gene prediction results in the specified format.
64///
65/// This is the main entry point for output formatting. It delegates to
66/// format-specific writers based on the requested output format.
67///
68/// # Arguments
69///
70/// * `writer` - Output writer (file, stdout, buffer, etc.)
71/// * `results` - Gene prediction results to write
72/// * `format` - Desired output format
73///
74/// # Errors
75///
76/// Returns [`OrphosError`] if writing fails (I/O errors, invalid data).
77///
78/// # Examples
79///
80/// ```rust,no_run
81/// use orphos_core::{OrphosAnalyzer, config::{OrphosConfig, OutputFormat}};
82/// use orphos_core::output::write_results;
83/// use std::fs::File;
84///
85/// let mut analyzer = OrphosAnalyzer::new(OrphosConfig::default());
86/// let results = analyzer.analyze_fasta_file("genome.fasta")?;
87///
88/// let mut output = File::create("genes.gff")?;
89/// for result in &results {
90///     write_results(&mut output, result, OutputFormat::Gff)?;
91/// }
92/// # Ok::<(), Box<dyn std::error::Error>>(())
93/// ```
94pub fn write_results<W: Write>(
95    writer: &mut W,
96    results: &OrphosResults,
97    format: OutputFormat,
98) -> Result<(), OrphosError> {
99    match format {
100        OutputFormat::Genbank => write_gbk_format(writer, results),
101        OutputFormat::Gff => write_gff_format(writer, results),
102        OutputFormat::Sco => write_sco_format(writer, results),
103        OutputFormat::Gca => write_gca_format(writer, results),
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110    use crate::{
111        config::OutputFormat,
112        results::{OrphosResults, SequenceInfo},
113        types::{Gene, GeneCoordinates, GeneScore, Training},
114    };
115    use bio::bio_types::strand::Strand;
116    use std::io::Cursor;
117
118    fn create_test_results() -> OrphosResults {
119        OrphosResults {
120            sequence_info: SequenceInfo {
121                header: "test_seq".to_string(),
122                length: 1000,
123                description: Some("Test sequence".to_string()),
124                gc_content: 50.0,
125                num_genes: 1,
126            },
127            genes: vec![Gene {
128                coordinates: GeneCoordinates {
129                    begin: 99,
130                    end: 299,
131                    strand: Strand::Forward,
132                    ..Default::default()
133                },
134                score: GeneScore {
135                    confidence: 95.5,
136                    ..Default::default()
137                },
138                ..Default::default()
139            }],
140            training_used: Training::default(),
141            metagenomic_model: None,
142        }
143    }
144
145    #[test]
146    fn test_write_results_genbank_format() {
147        let mut buffer = Vec::new();
148        let mut cursor = Cursor::new(&mut buffer);
149        let results = create_test_results();
150
151        let result = write_results(&mut cursor, &results, OutputFormat::Genbank);
152        assert!(result.is_ok());
153
154        let output = String::from_utf8(buffer).unwrap();
155        assert!(output.contains("CDS"));
156        assert!(output.contains("100..300"));
157        assert!(output.contains("test_seq_1"));
158        assert!(output.contains("confidence=95.50"));
159    }
160
161    #[test]
162    fn test_write_results_gff_format() {
163        let mut buffer = Vec::new();
164        let mut cursor = Cursor::new(&mut buffer);
165        let results = create_test_results();
166
167        let result = write_results(&mut cursor, &results, OutputFormat::Gff);
168        assert!(result.is_ok());
169
170        let output = String::from_utf8(buffer).unwrap();
171        assert!(output.contains("##gff-version"));
172        assert!(output.contains("test_seq"));
173        assert!(output.contains("CDS"));
174        assert!(output.contains("conf=95.50"));
175    }
176
177    #[test]
178    fn test_write_results_sco_format() {
179        let mut buffer = Vec::new();
180        let mut cursor = Cursor::new(&mut buffer);
181        let results = create_test_results();
182
183        let result = write_results(&mut cursor, &results, OutputFormat::Sco);
184        assert!(result.is_ok());
185
186        let output = String::from_utf8(buffer).unwrap();
187        assert_eq!(output, "100\t300\t1\t95.50\n");
188    }
189
190    #[test]
191    fn test_write_results_gca_format() {
192        let mut buffer = Vec::new();
193        let mut cursor = Cursor::new(&mut buffer);
194        let results = create_test_results();
195
196        let result = write_results(&mut cursor, &results, OutputFormat::Gca);
197        assert!(result.is_ok());
198
199        let output = String::from_utf8(buffer).unwrap();
200        assert!(output.contains(">test_seq_1"));
201        assert!(output.contains("100\t300\t+"));
202    }
203
204    #[test]
205    fn test_write_results_format_consistency() {
206        let results = create_test_results();
207        let formats = vec![
208            OutputFormat::Genbank,
209            OutputFormat::Gff,
210            OutputFormat::Sco,
211            OutputFormat::Gca,
212        ];
213
214        for format in formats {
215            let mut buffer = Vec::new();
216            let mut cursor = Cursor::new(&mut buffer);
217
218            let result = write_results(&mut cursor, &results, format);
219            assert!(result.is_ok(), "Failed to write format: {:?}", format);
220
221            let output = String::from_utf8(buffer).unwrap();
222            assert!(!output.is_empty(), "Empty output for format: {:?}", format);
223        }
224    }
225
226    #[test]
227    fn test_write_results_empty_genes() {
228        let results = OrphosResults {
229            sequence_info: SequenceInfo {
230                header: "empty_seq".to_string(),
231                length: 100,
232                description: None,
233                gc_content: 40.0,
234                num_genes: 0,
235            },
236            genes: vec![],
237            training_used: Training::default(),
238            metagenomic_model: None,
239        };
240
241        // Test all formats with empty gene list
242        let formats = vec![
243            OutputFormat::Genbank,
244            OutputFormat::Gff,
245            OutputFormat::Sco,
246            OutputFormat::Gca,
247        ];
248
249        for format in formats {
250            let mut buffer = Vec::new();
251            let mut cursor = Cursor::new(&mut buffer);
252
253            let result = write_results(&mut cursor, &results, format);
254            assert!(
255                result.is_ok(),
256                "Failed to write empty results for format: {:?}",
257                format
258            );
259        }
260    }
261}