Skip to main content

orphos_core/
config.rs

1/// Output format options for gene prediction results.
2///
3/// Orphos supports multiple output formats for compatibility with
4/// different downstream analysis tools.
5///
6/// # Formats
7///
8/// - **GenBank**: Feature-rich annotation format with gene sequences
9/// - **GFF**: General Feature Format version 3 (widely supported)
10/// - **GCA**: Gene coordinate annotation (simple tabular format)
11/// - **SCO**: Simple coordinate output (minimal format)
12/// - **BED**: Browser Extensible Data format (genome browser friendly)
13///
14/// # Examples
15///
16/// ```rust
17/// use orphos_core::config::{OutputFormat, OrphosConfig};
18///
19/// let config = OrphosConfig {
20///     output_format: OutputFormat::Gff,
21///     ..Default::default()
22/// };
23/// ```
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum OutputFormat {
26    /// GenBank format output with full feature annotations and sequences.
27    ///
28    /// Includes gene coordinates, translation, product names, and sequence data.
29    /// Compatible with NCBI submission tools.
30    Genbank,
31
32    /// Gene coordinate annotation format.
33    ///
34    /// Tab-delimited format with gene coordinates and basic metadata.
35    /// Lightweight and easy to parse.
36    Gca,
37
38    /// Simple coordinate output format.
39    ///
40    /// Minimal format with just start/stop positions and strand.
41    /// Useful for quick gene counting or position extraction.
42    Sco,
43
44    /// General Feature Format version 3.
45    ///
46    /// Standard genome annotation format supported by most bioinformatics tools.
47    /// Includes gene coordinates, scores, and attributes.
48    Gff,
49
50    /// Browser Extensible Data format (BED6).
51    ///
52    /// Tab-delimited format with 0-based, half-open coordinates:
53    /// chrom, chromStart, chromEnd, name, score, strand.
54    Bed,
55}
56
57/// Configuration settings for Orphos gene prediction analysis.
58///
59/// This struct controls all aspects of gene prediction including analysis mode,
60/// sequence handling, and output formatting.
61///
62/// # Examples
63///
64/// ## Default configuration
65///
66/// ```rust
67/// use orphos_core::config::OrphosConfig;
68///
69/// let config = OrphosConfig::default();
70/// ```
71///
72/// ## Custom configuration for closed-ended genomes
73///
74/// ```rust
75/// use orphos_core::config::{OrphosConfig, OutputFormat};
76///
77/// let config = OrphosConfig {
78///     closed_ends: true,
79///     mask_n_runs: true,
80///     output_format: OutputFormat::Gff,
81///     ..Default::default()
82/// };
83/// ```
84///
85/// ## Metagenomic mode with multiple threads
86///
87/// ```rust
88/// use orphos_core::config::OrphosConfig;
89///
90/// let config = OrphosConfig {
91///     metagenomic: true,
92///     num_threads: Some(8),
93///     quiet: true,
94///     ..Default::default()
95/// };
96/// ```
97#[derive(Debug, Clone)]
98pub struct OrphosConfig {
99    /// Enable metagenomic mode for fragmented sequences.
100    ///
101    /// When `true`, uses pre-computed models instead of training on each sequence.
102    /// Recommended for:
103    /// - Short contigs (< 100 kb)
104    /// - Mixed community samples
105    /// - Fragmented assemblies
106    ///
107    /// **Default**: `false` (single genome mode)
108    pub metagenomic: bool,
109
110    /// Treat sequences as having closed ends (complete genomes).
111    ///
112    /// When `true`, prevents genes from extending off sequence edges.
113    /// This does not enable wraparound gene detection; use `circular` for that.
114    ///
115    /// **Default**: `false` (allow edge genes)
116    pub closed_ends: bool,
117
118    /// Treat sequences as circular topology for wraparound gene detection.
119    ///
120    /// When `true`, sequence end and start are treated as adjacent so genes
121    /// can span the breakpoint. This is independent from `closed_ends`.
122    ///
123    /// **Default**: `false` (linear topology)
124    pub circular: bool,
125
126    /// Mask runs of N characters during analysis.
127    ///
128    /// When `true`, treats stretches of N's as gaps and prevents
129    /// genes from spanning them. Useful for draft genomes with gaps.
130    ///
131    /// **Default**: `false`
132    pub mask_n_runs: bool,
133
134    /// Force use of non-Shine-Dalgarno models for start recognition.
135    ///
136    /// When `true`, disables detection of ribosome binding sites.
137    /// Rarely needed except for organisms without canonical RBS.
138    ///
139    /// **Default**: `false` (auto-detect)
140    pub force_non_sd: bool,
141
142    /// Suppress informational output during processing.
143    ///
144    /// When `true`, prevents progress messages and statistics from
145    /// being printed to stderr.
146    ///
147    /// **Default**: `false`
148    pub quiet: bool,
149
150    /// Output format for gene prediction results.
151    ///
152    /// Controls the format of generated output files. See [`OutputFormat`]
153    /// for available options.
154    ///
155    /// **Default**: [`OutputFormat::Genbank`]
156    pub output_format: OutputFormat,
157
158    /// Genetic code translation table number (1-25).
159    ///
160    /// Specifies which genetic code to use for translation:
161    /// - `11`: Bacterial/Archaeal (most common, default)
162    /// - `4`: Mycoplasma/Spiroplasma
163    /// - Others: See NCBI genetic code tables
164    ///
165    /// **Default**: `None` (auto-detect, usually table 11)
166    pub translation_table: Option<u8>,
167
168    /// Number of threads to use for parallel processing.
169    ///
170    /// When set, configures Rayon thread pool for parallel analysis
171    /// of multiple sequences. Set to `None` for automatic detection.
172    ///
173    /// **Default**: `None` (use all available cores)
174    pub num_threads: Option<usize>,
175}
176
177impl Default for OrphosConfig {
178    fn default() -> Self {
179        Self {
180            metagenomic: false,
181            closed_ends: false,
182            circular: false,
183            mask_n_runs: false,
184            force_non_sd: false,
185            quiet: false,
186            output_format: OutputFormat::Genbank,
187            translation_table: None,
188            num_threads: None,
189        }
190    }
191}