Skip to main content

fastqc_rust/
config.rs

1use std::collections::HashMap;
2use std::fs;
3use std::io;
4use std::path::PathBuf;
5
6/// Report template selection.
7///
8/// Controls the HTML report layout and styling. The `Classic` template produces
9/// byte-identical output to Java FastQC. The `Modern` template uses a redesigned
10/// layout with responsive sidebar, SVG icons, and help text accordions.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, clap::ValueEnum)]
12pub enum TemplateName {
13    /// Original FastQC report layout (Java-compatible)
14    #[default]
15    Classic,
16    /// Modernised report with responsive sidebar and help text
17    Modern,
18}
19
20// Embedded default config files match the Java resource files exactly.
21// These are the same files shipped in the Java FastQC Configuration/ directory.
22const DEFAULT_LIMITS: &str = include_str!("../assets/limits.txt");
23const DEFAULT_ADAPTERS: &str = include_str!("../assets/adapter_list.txt");
24const DEFAULT_CONTAMINANTS: &str = include_str!("../assets/contaminant_list.txt");
25
26/// Configuration for a FastQC run, mirroring all fields from Java FastQCConfig.
27#[derive(Debug, Clone)]
28pub struct FastQCConfig {
29    pub nogroup: bool,
30    pub expgroup: bool,
31    pub quiet: bool,
32    pub kmer_size: u8,
33    pub threads: usize,
34    pub output_dir: Option<PathBuf>,
35    pub casava: bool,
36    pub nano: bool,
37    pub nofilter: bool,
38    pub do_unzip: Option<bool>,
39    pub delete_after_unzip: bool,
40    pub sequence_format: Option<String>,
41    pub contaminant_file: Option<PathBuf>,
42    pub adapter_file: Option<PathBuf>,
43    pub limits_file: Option<PathBuf>,
44    pub min_length: usize,
45    pub dup_length: usize,
46    pub svg_output: bool,
47    pub temp_dir: Option<PathBuf>,
48    pub template: TemplateName,
49}
50
51impl Default for FastQCConfig {
52    fn default() -> Self {
53        Self {
54            nogroup: false,
55            expgroup: false,
56            quiet: false,
57            kmer_size: 7,
58            threads: 1,
59            output_dir: None,
60            casava: false,
61            nano: false,
62            nofilter: false,
63            do_unzip: None,
64            delete_after_unzip: false,
65            sequence_format: None,
66            contaminant_file: None,
67            adapter_file: None,
68            limits_file: None,
69            min_length: 0,
70            dup_length: 0,
71            svg_output: false,
72            temp_dir: None,
73            template: TemplateName::Classic,
74        }
75    }
76}
77
78/// A parsed limit entry from limits.txt.
79/// The key is "{module}\t{level}" (e.g. "duplication\twarn"), value is the threshold.
80///
81/// The Java ModuleConfig stores limits as a nested HashMap keyed on
82/// module name and then level (warn/error/ignore). We flatten into a single HashMap
83/// with a composite key of "module\tlevel" to simplify lookups while keeping the
84/// same data accessible.
85pub type Limits = HashMap<String, f64>;
86
87/// Extension methods for the `Limits` type to reduce boilerplate in modules.
88pub trait LimitsExt {
89    /// Get a threshold value for a module/level key, returning a default if not set.
90    ///
91    /// Replaces the common pattern:
92    ///   `self.limits.get("module\tlevel").copied().unwrap_or(default)`
93    fn threshold(&self, key: &str, default: f64) -> f64;
94
95    /// Check whether a module is configured to be ignored (ignore value > 0).
96    ///
97    /// Replaces the common pattern:
98    ///   `self.limits.get("module\tignore").copied().unwrap_or(0.0) > 0.0`
99    fn is_ignored(&self, module: &str) -> bool;
100
101    /// Check whether a module should be created (not configured to be ignored).
102    ///
103    /// Replaces the common pattern in create_modules():
104    ///   `limits.get("module\tignore").map_or(true, |&v| v == 0.0)`
105    fn is_module_enabled(&self, module: &str) -> bool;
106}
107
108impl LimitsExt for Limits {
109    fn threshold(&self, key: &str, default: f64) -> f64 {
110        self.get(key).copied().unwrap_or(default)
111    }
112
113    fn is_ignored(&self, module: &str) -> bool {
114        let key = format!("{}\tignore", module);
115        self.get(&key).copied().unwrap_or(0.0) > 0.0
116    }
117
118    fn is_module_enabled(&self, module: &str) -> bool {
119        let key = format!("{}\tignore", module);
120        self.get(&key).is_none_or(|&v| v == 0.0)
121    }
122}
123
124impl FastQCConfig {
125    /// Load module limits from the configured file or the embedded default.
126    ///
127    /// Parsing matches `ModuleConfig.java` - lines starting with '#'
128    /// are comments, blank lines are skipped, and each data line has whitespace-
129    /// separated fields: module level value.
130    pub fn load_limits(&self) -> io::Result<Limits> {
131        let text = match &self.limits_file {
132            Some(path) => fs::read_to_string(path)?,
133            None => DEFAULT_LIMITS.to_string(),
134        };
135        Ok(parse_limits(&text))
136    }
137
138    /// Load adapter sequences from the configured file or the embedded default.
139    ///
140    /// Returns a list of (name, sequence) pairs.
141    ///
142    /// Parsing matches the Java adapter loading - lines starting with
143    /// '#' are comments, blank lines are skipped, and each data line is tab-
144    /// delimited with name and sequence columns.
145    pub fn load_adapters(&self) -> io::Result<Vec<(String, String)>> {
146        let text = match &self.adapter_file {
147            Some(path) => fs::read_to_string(path)?,
148            None => DEFAULT_ADAPTERS.to_string(),
149        };
150        Ok(parse_name_sequence_file(&text))
151    }
152
153    /// Load contaminant sequences from the configured file or the embedded default.
154    ///
155    /// Returns a list of (name, sequence) pairs.
156    ///
157    /// Parsing matches the Java contaminant loading - same format as
158    /// adapter files.
159    pub fn load_contaminants(&self) -> io::Result<Vec<(String, String)>> {
160        let text = match &self.contaminant_file {
161            Some(path) => fs::read_to_string(path)?,
162            None => DEFAULT_CONTAMINANTS.to_string(),
163        };
164        Ok(parse_name_sequence_file(&text))
165    }
166}
167
168/// Parse a limits.txt formatted string into a Limits map.
169///
170/// Whitespace-separated fields. The Java code splits on arbitrary
171/// whitespace (tabs/spaces), so we do the same with split_whitespace().
172fn parse_limits(text: &str) -> Limits {
173    let mut limits = HashMap::new();
174    for line in text.lines() {
175        let trimmed = line.trim();
176        if trimmed.is_empty() || trimmed.starts_with('#') {
177            continue;
178        }
179        let parts: Vec<&str> = trimmed.split_whitespace().collect();
180        if parts.len() >= 3 {
181            let module = parts[0];
182            let level = parts[1]; // "warn", "error", or "ignore"
183            if let Ok(value) = parts[2].parse::<f64>() {
184                let key = format!("{}\t{}", module, level);
185                limits.insert(key, value);
186            }
187        }
188    }
189    limits
190}
191
192/// Parse a tab-delimited name/sequence file (adapters or contaminants).
193///
194/// Lines starting with '#' are comments, blank lines are skipped.
195/// Each data line has a name and sequence separated by one or more tabs.
196/// Leading/trailing whitespace on the sequence is trimmed.
197fn parse_name_sequence_file(text: &str) -> Vec<(String, String)> {
198    let mut entries = Vec::new();
199    for line in text.lines() {
200        let trimmed = line.trim();
201        if trimmed.is_empty() || trimmed.starts_with('#') {
202            continue;
203        }
204        // The Java code splits on tab and takes the first two fields.
205        // Names may contain spaces so we split on tab only.
206        if let Some(tab_pos) = trimmed.find('\t') {
207            let name = trimmed[..tab_pos].trim().to_string();
208            let seq = trimmed[tab_pos + 1..].trim().to_string();
209            if !name.is_empty() && !seq.is_empty() {
210                entries.push((name, seq));
211            }
212        }
213    }
214    entries
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn test_parse_limits_default() {
223        let config = FastQCConfig::default();
224        let limits = config.load_limits().unwrap();
225        // Check a few well-known entries from the default limits.txt
226        assert_eq!(limits.get("duplication\twarn"), Some(&70.0));
227        assert_eq!(limits.get("duplication\terror"), Some(&50.0));
228        assert_eq!(limits.get("kmer\tignore"), Some(&1.0));
229        assert_eq!(limits.get("adapter\twarn"), Some(&5.0));
230    }
231
232    #[test]
233    fn test_parse_adapters_default() {
234        let config = FastQCConfig::default();
235        let adapters = config.load_adapters().unwrap();
236        assert!(!adapters.is_empty());
237        // First adapter in the default file
238        assert_eq!(adapters[0].0, "Illumina Universal Adapter");
239        assert_eq!(adapters[0].1, "AGATCGGAAGAG");
240    }
241
242    #[test]
243    fn test_parse_contaminants_default() {
244        let config = FastQCConfig::default();
245        let contaminants = config.load_contaminants().unwrap();
246        assert!(!contaminants.is_empty());
247        assert_eq!(contaminants[0].0, "Illumina Single End Adapter 1");
248        assert_eq!(contaminants[0].1, "GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG");
249    }
250
251    #[test]
252    fn test_parse_limits_comments_and_blanks() {
253        let text = "# comment\n\nduplication\twarn\t70\n";
254        let limits = parse_limits(text);
255        assert_eq!(limits.len(), 1);
256        assert_eq!(limits.get("duplication\twarn"), Some(&70.0));
257    }
258}