orphos_core/
config.rs

1/// Output format options for gene prediction results.
2///
3/// Orphos supports multiple output formats for compatibility with
4/// different downstream analysis tools.
5///
6/// # Formats
7///
8/// - **GenBank**: Feature-rich annotation format with gene sequences
9/// - **GFF**: General Feature Format version 3 (widely supported)
10/// - **GCA**: Gene coordinate annotation (simple tabular format)
11/// - **SCO**: Simple coordinate output (minimal format)
12///
13/// # Examples
14///
15/// ```rust
16/// use orphos_core::config::{OutputFormat, OrphosConfig};
17///
18/// let config = OrphosConfig {
19///     output_format: OutputFormat::Gff,
20///     ..Default::default()
21/// };
22/// ```
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum OutputFormat {
25    /// GenBank format output with full feature annotations and sequences.
26    ///
27    /// Includes gene coordinates, translation, product names, and sequence data.
28    /// Compatible with NCBI submission tools.
29    Genbank,
30
31    /// Gene coordinate annotation format.
32    ///
33    /// Tab-delimited format with gene coordinates and basic metadata.
34    /// Lightweight and easy to parse.
35    Gca,
36
37    /// Simple coordinate output format.
38    ///
39    /// Minimal format with just start/stop positions and strand.
40    /// Useful for quick gene counting or position extraction.
41    Sco,
42
43    /// General Feature Format version 3.
44    ///
45    /// Standard genome annotation format supported by most bioinformatics tools.
46    /// Includes gene coordinates, scores, and attributes.
47    Gff,
48}
49
50/// Configuration settings for Orphos gene prediction analysis.
51///
52/// This struct controls all aspects of gene prediction including analysis mode,
53/// sequence handling, and output formatting.
54///
55/// # Examples
56///
57/// ## Default configuration
58///
59/// ```rust
60/// use orphos_core::config::OrphosConfig;
61///
62/// let config = OrphosConfig::default();
63/// ```
64///
65/// ## Custom configuration for closed-ended genomes
66///
67/// ```rust
68/// use orphos_core::config::{OrphosConfig, OutputFormat};
69///
70/// let config = OrphosConfig {
71///     closed_ends: true,
72///     mask_n_runs: true,
73///     output_format: OutputFormat::Gff,
74///     ..Default::default()
75/// };
76/// ```
77///
78/// ## Metagenomic mode with multiple threads
79///
80/// ```rust
81/// use orphos_core::config::OrphosConfig;
82///
83/// let config = OrphosConfig {
84///     metagenomic: true,
85///     num_threads: Some(8),
86///     quiet: true,
87///     ..Default::default()
88/// };
89/// ```
90#[derive(Debug, Clone)]
91pub struct OrphosConfig {
92    /// Enable metagenomic mode for fragmented sequences.
93    ///
94    /// When `true`, uses pre-computed models instead of training on each sequence.
95    /// Recommended for:
96    /// - Short contigs (< 100 kb)
97    /// - Mixed community samples
98    /// - Fragmented assemblies
99    ///
100    /// **Default**: `false` (single genome mode)
101    pub metagenomic: bool,
102
103    /// Treat sequences as having closed ends (complete genomes).
104    ///
105    /// When `true`, prevents genes from extending off sequence edges.
106    /// Use for complete, circularized genomes.
107    ///
108    /// **Default**: `false` (allow edge genes)
109    pub closed_ends: bool,
110
111    /// Mask runs of N characters during analysis.
112    ///
113    /// When `true`, treats stretches of N's as gaps and prevents
114    /// genes from spanning them. Useful for draft genomes with gaps.
115    ///
116    /// **Default**: `false`
117    pub mask_n_runs: bool,
118
119    /// Force use of non-Shine-Dalgarno models for start recognition.
120    ///
121    /// When `true`, disables detection of ribosome binding sites.
122    /// Rarely needed except for organisms without canonical RBS.
123    ///
124    /// **Default**: `false` (auto-detect)
125    pub force_non_sd: bool,
126
127    /// Suppress informational output during processing.
128    ///
129    /// When `true`, prevents progress messages and statistics from
130    /// being printed to stderr.
131    ///
132    /// **Default**: `false`
133    pub quiet: bool,
134
135    /// Output format for gene prediction results.
136    ///
137    /// Controls the format of generated output files. See [`OutputFormat`]
138    /// for available options.
139    ///
140    /// **Default**: [`OutputFormat::Genbank`]
141    pub output_format: OutputFormat,
142
143    /// Genetic code translation table number (1-25).
144    ///
145    /// Specifies which genetic code to use for translation:
146    /// - `11`: Bacterial/Archaeal (most common, default)
147    /// - `4`: Mycoplasma/Spiroplasma
148    /// - Others: See NCBI genetic code tables
149    ///
150    /// **Default**: `None` (auto-detect, usually table 11)
151    pub translation_table: Option<u8>,
152
153    /// Number of threads to use for parallel processing.
154    ///
155    /// When set, configures Rayon thread pool for parallel analysis
156    /// of multiple sequences. Set to `None` for automatic detection.
157    ///
158    /// **Default**: `None` (use all available cores)
159    pub num_threads: Option<usize>,
160}
161
162impl Default for OrphosConfig {
163    fn default() -> Self {
164        Self {
165            metagenomic: false,
166            closed_ends: false,
167            mask_n_runs: false,
168            force_non_sd: false,
169            quiet: false,
170            output_format: OutputFormat::Genbank,
171            translation_table: None,
172            num_threads: None,
173        }
174    }
175}