1use std::io;
2use std::path::{Path, PathBuf};
3use std::sync::atomic::{AtomicBool, Ordering};
4
5use rayon::prelude::*;
6
7use crate::config::FastQCConfig;
8use crate::modules;
9use crate::report;
10use crate::sequence::casava;
11use crate::sequence::open_sequence_file;
12use crate::sequence::{SequenceFile, SequenceFileGroup};
13
14struct FileGroup {
17 name: String,
19 files: Vec<PathBuf>,
21}
22
23pub fn run(config: &FastQCConfig, files: &[PathBuf]) -> Result<(), i32> {
30 let limits = config.load_limits().map_err(|e| {
31 eprintln!("Failed to load limits: {}", e);
32 1
33 })?;
34
35 let mut valid_files = Vec::new();
39 let mut something_failed = false;
40 for file_path in files {
41 let file_name = file_path.to_string_lossy();
42 if !file_name.starts_with("stdin") && !file_path.exists() {
43 eprintln!("{} doesn't exist", file_name);
44 something_failed = true;
45 } else if config.nano && file_path.is_dir() {
46 match find_fast5_files(file_path) {
49 Ok(fast5_files) => {
50 if fast5_files.is_empty() {
51 eprintln!("No .fast5 files found in {}", file_path.display());
52 something_failed = true;
53 } else {
54 valid_files.extend(fast5_files);
55 }
56 }
57 Err(e) => {
58 eprintln!("Error scanning directory {}: {}", file_path.display(), e);
59 something_failed = true;
60 }
61 }
62 } else {
63 valid_files.push(file_path.clone());
64 }
65 }
66
67 let file_groups = build_file_groups(config, &valid_files);
70
71 let pool = rayon::ThreadPoolBuilder::new()
74 .num_threads(config.threads)
75 .build()
76 .map_err(|e| {
77 eprintln!("Failed to create thread pool: {}", e);
78 1
79 })?;
80
81 let failed = AtomicBool::new(something_failed);
82
83 pool.install(|| {
84 file_groups.par_iter().for_each(|group| {
85 if !config.quiet {
86 eprintln!("Started analysis of {}", group.name);
87 }
88
89 match process_group(config, &limits, group) {
90 Ok(()) => {
91 if !config.quiet {
92 eprintln!("Analysis complete for {}", group.name);
93 }
94 }
95 Err(e) => {
96 eprintln!("Failed to process {}: {}", group.name, e);
97 failed.store(true, Ordering::Relaxed);
98 }
99 }
100 });
101 });
102
103 if failed.load(Ordering::Relaxed) {
104 Err(1)
105 } else {
106 Ok(())
107 }
108}
109
110fn build_file_groups(config: &FastQCConfig, files: &[PathBuf]) -> Vec<FileGroup> {
116 if config.casava {
117 let casava_groups = casava::get_casava_groups(files);
120 casava_groups
121 .into_iter()
122 .map(|(name, paths)| FileGroup { name, files: paths })
123 .collect()
124 } else {
125 files
128 .iter()
129 .map(|path| {
130 let name = path
131 .file_name()
132 .map(|n| n.to_string_lossy().into_owned())
133 .unwrap_or_else(|| path.to_string_lossy().into_owned());
134 FileGroup {
135 name,
136 files: vec![path.clone()],
137 }
138 })
139 .collect()
140 }
141}
142
143fn process_group(
148 config: &FastQCConfig,
149 limits: &crate::config::Limits,
150 group: &FileGroup,
151) -> io::Result<()> {
152 let mut seq_file: Box<dyn SequenceFile> = if group.files.len() == 1 {
154 open_sequence_file(config, &group.files[0])?
157 } else {
158 let mut readers: Vec<Box<dyn SequenceFile>> = Vec::new();
161 for path in &group.files {
162 readers.push(open_sequence_file(config, path)?);
163 }
164 Box::new(SequenceFileGroup::new(group.name.clone(), readers))
165 };
166
167 let file_display_name = group.name.clone();
168
169 let mut modules = modules::create_modules(config, limits);
171
172 for module in modules.iter_mut() {
174 module.set_filename(&file_display_name);
175 }
176
177 let mut sequence_count: u64 = 0;
180 let mut last_percent: i32 = -1;
181
182 loop {
183 match seq_file.next() {
184 Some(Ok(seq)) => {
185 sequence_count += 1;
186
187 for module in modules.iter_mut() {
188 if seq.is_filtered && module.ignore_filtered_sequences() {
190 continue;
191 }
192 module.process_sequence(&seq);
193 }
194
195 if !config.quiet && sequence_count.is_multiple_of(1000) {
197 let percent = seq_file.percent_complete() as i32;
198 if percent != last_percent && percent % 5 == 0 {
199 eprintln!("Approx {}% complete for {}", percent, file_display_name);
200 last_percent = percent;
201 }
202 }
203 }
204 Some(Err(e)) => {
205 return Err(io::Error::new(io::ErrorKind::InvalidData, e));
206 }
207 None => break, }
209 }
210
211 for module in modules.iter_mut() {
213 module.finalize();
214 }
215
216 let base_name = strip_extensions(&file_display_name.replace("stdin:", ""));
221
222 let output_dir = if let Some(ref dir) = config.output_dir {
225 dir.clone()
226 } else {
227 group
228 .files
229 .first()
230 .and_then(|f| f.parent())
231 .unwrap_or_else(|| Path::new("."))
232 .to_path_buf()
233 };
234
235 let html_path = output_dir.join(format!("{}_fastqc.html", base_name));
239 let zip_path = output_dir.join(format!("{}_fastqc.zip", base_name));
240
241 let html_content =
243 report::html::generate_html_report(&modules, &file_display_name, config.template)?;
244
245 std::fs::write(&html_path, &html_content)?;
248
249 report::archive::create_zip_archive(
251 &modules,
252 &file_display_name,
253 &base_name,
254 &zip_path,
255 &html_content,
256 config.svg_output,
257 config.template,
258 )?;
259
260 if config.do_unzip == Some(true) {
264 report::archive::extract_zip(&zip_path)?;
265
266 if config.delete_after_unzip {
269 std::fs::remove_file(&zip_path)?;
270 }
271 }
272
273 Ok(())
274}
275
276fn strip_extensions(name: &str) -> String {
280 let mut result = name.to_string();
281 for ext in &[
283 ".gz", ".bz2", ".txt", ".fastq", ".fq", ".csfastq", ".sam", ".bam", ".ubam", ".fast5",
284 ] {
285 if result.ends_with(ext) {
286 result = result[..result.len() - ext.len()].to_string();
287 }
288 }
289 result
290}
291
292fn find_fast5_files(dir: &Path) -> io::Result<Vec<PathBuf>> {
297 let mut files = Vec::new();
298 find_fast5_files_recursive(dir, &mut files)?;
299 files.sort(); Ok(files)
301}
302
303fn find_fast5_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) -> io::Result<()> {
304 for entry in std::fs::read_dir(dir)? {
305 let entry = entry?;
306 let path = entry.path();
307 if path.is_dir() {
308 find_fast5_files_recursive(&path, files)?;
309 } else if path
310 .extension()
311 .is_some_and(|ext| ext.eq_ignore_ascii_case("fast5"))
312 {
313 files.push(path);
314 }
315 }
316 Ok(())
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322
323 #[test]
324 fn test_strip_extensions() {
325 assert_eq!(strip_extensions("sample.fastq"), "sample");
326 assert_eq!(strip_extensions("sample.fastq.gz"), "sample");
327 assert_eq!(strip_extensions("sample.fq.bz2"), "sample");
328 assert_eq!(strip_extensions("sample.bam"), "sample");
329 assert_eq!(strip_extensions("sample.sam"), "sample");
330 assert_eq!(strip_extensions("sample.txt.gz"), "sample");
331 assert_eq!(strip_extensions("minimal.fastq"), "minimal");
332 }
333
334 #[test]
335 fn test_build_file_groups_default() {
336 let config = FastQCConfig::default();
337 let files = vec![PathBuf::from("a.fastq"), PathBuf::from("b.fastq")];
338 let groups = build_file_groups(&config, &files);
339 assert_eq!(groups.len(), 2);
340 assert_eq!(groups[0].name, "a.fastq");
341 assert_eq!(groups[0].files.len(), 1);
342 assert_eq!(groups[1].name, "b.fastq");
343 assert_eq!(groups[1].files.len(), 1);
344 }
345
346 #[test]
347 fn test_build_file_groups_casava() {
348 let config = FastQCConfig {
349 casava: true,
350 ..FastQCConfig::default()
351 };
352 let files = vec![
353 PathBuf::from("Sample_S1_L001_R1_001.fastq.gz"),
354 PathBuf::from("Sample_S1_L001_R1_002.fastq.gz"),
355 PathBuf::from("Other_S2_L001_R1_001.fastq.gz"),
356 ];
357 let groups = build_file_groups(&config, &files);
358 assert_eq!(groups.len(), 2);
359
360 let sample_group = groups
362 .iter()
363 .find(|g| g.name == "Sample_S1_L001_R1.fastq.gz")
364 .unwrap();
365 assert_eq!(sample_group.files.len(), 2);
366
367 let other_group = groups
369 .iter()
370 .find(|g| g.name == "Other_S2_L001_R1.fastq.gz")
371 .unwrap();
372 assert_eq!(other_group.files.len(), 1);
373 }
374
375 #[test]
376 fn test_build_file_groups_stdin() {
377 let config = FastQCConfig::default();
378 let files = vec![PathBuf::from("stdin")];
379 let groups = build_file_groups(&config, &files);
380 assert_eq!(groups.len(), 1);
381 assert_eq!(groups[0].name, "stdin");
382 }
383}