orphos_core/config.rs
1/// Output format options for gene prediction results.
2///
3/// Orphos supports multiple output formats for compatibility with
4/// different downstream analysis tools.
5///
6/// # Formats
7///
8/// - **GenBank**: Feature-rich annotation format with gene sequences
9/// - **GFF**: General Feature Format version 3 (widely supported)
10/// - **GCA**: Gene coordinate annotation (simple tabular format)
11/// - **SCO**: Simple coordinate output (minimal format)
12///
13/// # Examples
14///
15/// ```rust
16/// use orphos_core::config::{OutputFormat, OrphosConfig};
17///
18/// let config = OrphosConfig {
19/// output_format: OutputFormat::Gff,
20/// ..Default::default()
21/// };
22/// ```
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum OutputFormat {
25 /// GenBank format output with full feature annotations and sequences.
26 ///
27 /// Includes gene coordinates, translation, product names, and sequence data.
28 /// Compatible with NCBI submission tools.
29 Genbank,
30
31 /// Gene coordinate annotation format.
32 ///
33 /// Tab-delimited format with gene coordinates and basic metadata.
34 /// Lightweight and easy to parse.
35 Gca,
36
37 /// Simple coordinate output format.
38 ///
39 /// Minimal format with just start/stop positions and strand.
40 /// Useful for quick gene counting or position extraction.
41 Sco,
42
43 /// General Feature Format version 3.
44 ///
45 /// Standard genome annotation format supported by most bioinformatics tools.
46 /// Includes gene coordinates, scores, and attributes.
47 Gff,
48}
49
50/// Configuration settings for Orphos gene prediction analysis.
51///
52/// This struct controls all aspects of gene prediction including analysis mode,
53/// sequence handling, and output formatting.
54///
55/// # Examples
56///
57/// ## Default configuration
58///
59/// ```rust
60/// use orphos_core::config::OrphosConfig;
61///
62/// let config = OrphosConfig::default();
63/// ```
64///
65/// ## Custom configuration for closed-ended genomes
66///
67/// ```rust
68/// use orphos_core::config::{OrphosConfig, OutputFormat};
69///
70/// let config = OrphosConfig {
71/// closed_ends: true,
72/// mask_n_runs: true,
73/// output_format: OutputFormat::Gff,
74/// ..Default::default()
75/// };
76/// ```
77///
78/// ## Metagenomic mode with multiple threads
79///
80/// ```rust
81/// use orphos_core::config::OrphosConfig;
82///
83/// let config = OrphosConfig {
84/// metagenomic: true,
85/// num_threads: Some(8),
86/// quiet: true,
87/// ..Default::default()
88/// };
89/// ```
90#[derive(Debug, Clone)]
91pub struct OrphosConfig {
92 /// Enable metagenomic mode for fragmented sequences.
93 ///
94 /// When `true`, uses pre-computed models instead of training on each sequence.
95 /// Recommended for:
96 /// - Short contigs (< 100 kb)
97 /// - Mixed community samples
98 /// - Fragmented assemblies
99 ///
100 /// **Default**: `false` (single genome mode)
101 pub metagenomic: bool,
102
103 /// Treat sequences as having closed ends (complete genomes).
104 ///
105 /// When `true`, prevents genes from extending off sequence edges.
106 /// Use for complete, circularized genomes.
107 ///
108 /// **Default**: `false` (allow edge genes)
109 pub closed_ends: bool,
110
111 /// Mask runs of N characters during analysis.
112 ///
113 /// When `true`, treats stretches of N's as gaps and prevents
114 /// genes from spanning them. Useful for draft genomes with gaps.
115 ///
116 /// **Default**: `false`
117 pub mask_n_runs: bool,
118
119 /// Force use of non-Shine-Dalgarno models for start recognition.
120 ///
121 /// When `true`, disables detection of ribosome binding sites.
122 /// Rarely needed except for organisms without canonical RBS.
123 ///
124 /// **Default**: `false` (auto-detect)
125 pub force_non_sd: bool,
126
127 /// Suppress informational output during processing.
128 ///
129 /// When `true`, prevents progress messages and statistics from
130 /// being printed to stderr.
131 ///
132 /// **Default**: `false`
133 pub quiet: bool,
134
135 /// Output format for gene prediction results.
136 ///
137 /// Controls the format of generated output files. See [`OutputFormat`]
138 /// for available options.
139 ///
140 /// **Default**: [`OutputFormat::Genbank`]
141 pub output_format: OutputFormat,
142
143 /// Genetic code translation table number (1-25).
144 ///
145 /// Specifies which genetic code to use for translation:
146 /// - `11`: Bacterial/Archaeal (most common, default)
147 /// - `4`: Mycoplasma/Spiroplasma
148 /// - Others: See NCBI genetic code tables
149 ///
150 /// **Default**: `None` (auto-detect, usually table 11)
151 pub translation_table: Option<u8>,
152
153 /// Number of threads to use for parallel processing.
154 ///
155 /// When set, configures Rayon thread pool for parallel analysis
156 /// of multiple sequences. Set to `None` for automatic detection.
157 ///
158 /// **Default**: `None` (use all available cores)
159 pub num_threads: Option<usize>,
160}
161
162impl Default for OrphosConfig {
163 fn default() -> Self {
164 Self {
165 metagenomic: false,
166 closed_ends: false,
167 mask_n_runs: false,
168 force_non_sd: false,
169 quiet: false,
170 output_format: OutputFormat::Genbank,
171 translation_table: None,
172 num_threads: None,
173 }
174 }
175}