orphos_core/config.rs
1/// Output format options for gene prediction results.
2///
3/// Orphos supports multiple output formats for compatibility with
4/// different downstream analysis tools.
5///
6/// # Formats
7///
8/// - **GenBank**: Feature-rich annotation format with gene sequences
9/// - **GFF**: General Feature Format version 3 (widely supported)
10/// - **GCA**: Gene coordinate annotation (simple tabular format)
11/// - **SCO**: Simple coordinate output (minimal format)
12/// - **BED**: Browser Extensible Data format (genome browser friendly)
13///
14/// # Examples
15///
16/// ```rust
17/// use orphos_core::config::{OutputFormat, OrphosConfig};
18///
19/// let config = OrphosConfig {
20/// output_format: OutputFormat::Gff,
21/// ..Default::default()
22/// };
23/// ```
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum OutputFormat {
26 /// GenBank format output with full feature annotations and sequences.
27 ///
28 /// Includes gene coordinates, translation, product names, and sequence data.
29 /// Compatible with NCBI submission tools.
30 Genbank,
31
32 /// Gene coordinate annotation format.
33 ///
34 /// Tab-delimited format with gene coordinates and basic metadata.
35 /// Lightweight and easy to parse.
36 Gca,
37
38 /// Simple coordinate output format.
39 ///
40 /// Minimal format with just start/stop positions and strand.
41 /// Useful for quick gene counting or position extraction.
42 Sco,
43
44 /// General Feature Format version 3.
45 ///
46 /// Standard genome annotation format supported by most bioinformatics tools.
47 /// Includes gene coordinates, scores, and attributes.
48 Gff,
49
50 /// Browser Extensible Data format (BED6).
51 ///
52 /// Tab-delimited format with 0-based, half-open coordinates:
53 /// chrom, chromStart, chromEnd, name, score, strand.
54 Bed,
55}
56
57/// Configuration settings for Orphos gene prediction analysis.
58///
59/// This struct controls all aspects of gene prediction including analysis mode,
60/// sequence handling, and output formatting.
61///
62/// # Examples
63///
64/// ## Default configuration
65///
66/// ```rust
67/// use orphos_core::config::OrphosConfig;
68///
69/// let config = OrphosConfig::default();
70/// ```
71///
72/// ## Custom configuration for closed-ended genomes
73///
74/// ```rust
75/// use orphos_core::config::{OrphosConfig, OutputFormat};
76///
77/// let config = OrphosConfig {
78/// closed_ends: true,
79/// mask_n_runs: true,
80/// output_format: OutputFormat::Gff,
81/// ..Default::default()
82/// };
83/// ```
84///
85/// ## Metagenomic mode with multiple threads
86///
87/// ```rust
88/// use orphos_core::config::OrphosConfig;
89///
90/// let config = OrphosConfig {
91/// metagenomic: true,
92/// num_threads: Some(8),
93/// quiet: true,
94/// ..Default::default()
95/// };
96/// ```
97#[derive(Debug, Clone)]
98pub struct OrphosConfig {
99 /// Enable metagenomic mode for fragmented sequences.
100 ///
101 /// When `true`, uses pre-computed models instead of training on each sequence.
102 /// Recommended for:
103 /// - Short contigs (< 100 kb)
104 /// - Mixed community samples
105 /// - Fragmented assemblies
106 ///
107 /// **Default**: `false` (single genome mode)
108 pub metagenomic: bool,
109
110 /// Treat sequences as having closed ends (complete genomes).
111 ///
112 /// When `true`, prevents genes from extending off sequence edges.
113 /// This does not enable wraparound gene detection; use `circular` for that.
114 ///
115 /// **Default**: `false` (allow edge genes)
116 pub closed_ends: bool,
117
118 /// Treat sequences as circular topology for wraparound gene detection.
119 ///
120 /// When `true`, sequence end and start are treated as adjacent so genes
121 /// can span the breakpoint. This is independent from `closed_ends`.
122 ///
123 /// **Default**: `false` (linear topology)
124 pub circular: bool,
125
126 /// Mask runs of N characters during analysis.
127 ///
128 /// When `true`, treats stretches of N's as gaps and prevents
129 /// genes from spanning them. Useful for draft genomes with gaps.
130 ///
131 /// **Default**: `false`
132 pub mask_n_runs: bool,
133
134 /// Force use of non-Shine-Dalgarno models for start recognition.
135 ///
136 /// When `true`, disables detection of ribosome binding sites.
137 /// Rarely needed except for organisms without canonical RBS.
138 ///
139 /// **Default**: `false` (auto-detect)
140 pub force_non_sd: bool,
141
142 /// Suppress informational output during processing.
143 ///
144 /// When `true`, prevents progress messages and statistics from
145 /// being printed to stderr.
146 ///
147 /// **Default**: `false`
148 pub quiet: bool,
149
150 /// Output format for gene prediction results.
151 ///
152 /// Controls the format of generated output files. See [`OutputFormat`]
153 /// for available options.
154 ///
155 /// **Default**: [`OutputFormat::Genbank`]
156 pub output_format: OutputFormat,
157
158 /// Genetic code translation table number (1-25).
159 ///
160 /// Specifies which genetic code to use for translation:
161 /// - `11`: Bacterial/Archaeal (most common, default)
162 /// - `4`: Mycoplasma/Spiroplasma
163 /// - Others: See NCBI genetic code tables
164 ///
165 /// **Default**: `None` (auto-detect, usually table 11)
166 pub translation_table: Option<u8>,
167
168 /// Number of threads to use for parallel processing.
169 ///
170 /// When set, configures Rayon thread pool for parallel analysis
171 /// of multiple sequences. Set to `None` for automatic detection.
172 ///
173 /// **Default**: `None` (use all available cores)
174 pub num_threads: Option<usize>,
175}
176
177impl Default for OrphosConfig {
178 fn default() -> Self {
179 Self {
180 metagenomic: false,
181 closed_ends: false,
182 circular: false,
183 mask_n_runs: false,
184 force_non_sd: false,
185 quiet: false,
186 output_format: OutputFormat::Genbank,
187 translation_table: None,
188 num_threads: None,
189 }
190 }
191}