1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
use std::path::PathBuf;
use std::process;
use clap::Parser;
use fastqc_rust::config::{FastQCConfig, TemplateName};
use fastqc_rust::runner;
/// FastQC - A high throughput sequence QC analysis tool
#[derive(Parser, Debug)]
#[command(name = "fastqc", version = fastqc_rust::VERSION_BANNER, about)]
struct Cli {
/// Create report files in the specified output directory.
/// The directory must already exist.
#[arg(short, long, value_name = "DIR")]
outdir: Option<PathBuf>,
/// Force the file format. Valid formats: bam, sam, bam_mapped, sam_mapped, fastq.
#[arg(short, long, value_name = "FORMAT")]
format: Option<String>,
/// Files were generated by an Illumina CASAVA pipeline version >= 1.8.
/// Sequences flagged as filtered will be excluded from the analysis.
#[arg(long)]
casava: bool,
/// Files come from nanopore sequences and are in fast5 format.
/// In this mode you can pass in directories to process.
#[arg(long)]
nano: bool,
/// If running with --casava, don't remove reads flagged by CASAVA as poor quality.
#[arg(long)]
nofilter: bool,
/// Disable grouping of bases for reads > 50bp.
/// All reports will show data for every base in the read.
/// WARNING: Using this option with very long reads may cause excessive memory use.
#[arg(long)]
nogroup: bool,
/// Use exponential base groups in report.
#[arg(long)]
expgroup: bool,
/// Extract the zipped report file after creating it.
/// By default files are not extracted after creation.
#[arg(long)]
extract: bool,
/// Do not extract the zipped report file after creating it (default behavior).
#[arg(long)]
noextract: bool,
/// Delete the zipped output file after it has been extracted.
/// Only has an effect if --extract is also specified.
#[arg(long)]
delete: bool,
/// Specifies a non-default file which contains the list of contaminants to
/// screen overrepresented sequences against.
#[arg(short = 'c', long, value_name = "FILE")]
contaminants: Option<PathBuf>,
/// Specifies a non-default file which contains the list of adapter sequences
/// which will be explicitly searched against the library.
#[arg(short, long, value_name = "FILE")]
adapters: Option<PathBuf>,
/// Specifies a non-default file which contains a set of criteria used to
/// determine the warn/error limits for the various modules.
#[arg(short, long, value_name = "FILE")]
limits: Option<PathBuf>,
/// Specifies the number of files which can be processed simultaneously.
/// Each thread will be allocated 250MB of memory.
#[arg(short, long, value_name = "N", default_value = "1")]
threads: usize,
/// Specifies the length of Kmer to look for in the Kmer content module.
/// Specified Kmer length must be between 2 and 10. Default length is 7.
#[arg(short, long, value_name = "N", default_value = "7")]
kmers: u8,
/// Suppress all progress messages on stdout and only report errors.
#[arg(short, long)]
quiet: bool,
/// Selects a directory to be used for temporary files written when
/// generating report images. Defaults to system temp dir.
#[arg(short, long, value_name = "DIR")]
dir: Option<PathBuf>,
/// Sets an artificial lower limit on the length of the sequence to be shown
/// in the report. Sequences shorter than this limit will not be shown.
// Java uses --min_length (underscore), clap defaults to --min-length (hyphen).
// Allow both forms.
#[arg(
long = "min_length",
alias = "min-length",
value_name = "N",
default_value = "0"
)]
min_length: usize,
/// Specifies the truncation length used for duplicate detection.
/// Reads longer than this value will be truncated before checking for duplicates.
// Java uses --dup_length (underscore)
#[arg(
long = "dup_length",
alias = "dup-length",
value_name = "N",
default_value = "0"
)]
dup_length: usize,
/// Save images as SVG files as well as PNG.
#[arg(long)]
svg: bool,
/// Select the HTML report template.
/// "classic" produces the original FastQC report layout.
/// "modern" uses a redesigned layout with responsive sidebar and help text.
#[arg(short = 't', long, value_name = "NAME", default_value = "classic")]
template: TemplateName,
/// Input files (one or more FastQ, BAM, or SAM files).
#[arg(required = true)]
files: Vec<PathBuf>,
}
fn main() {
let cli = Cli::parse();
// Validate kmer range
if cli.kmers < 2 || cli.kmers > 10 {
eprintln!(
"Error: kmer size must be between 2 and 10, got {}",
cli.kmers
);
process::exit(1);
}
// Validate format if provided
if let Some(ref fmt) = cli.format {
match fmt.as_str() {
"bam" | "sam" | "bam_mapped" | "sam_mapped" | "fastq" => {}
_ => {
eprintln!(
"Error: unrecognized format '{}'. \
Valid formats: bam, sam, bam_mapped, sam_mapped, fastq",
fmt
);
process::exit(1);
}
}
}
// Validate output directory exists if specified
if let Some(ref dir) = cli.outdir {
if !dir.is_dir() {
eprintln!(
"Error: output directory '{}' does not exist or is not a directory",
dir.display()
);
process::exit(1);
}
}
// Build config
let do_unzip = if cli.extract {
Some(true)
} else if cli.noextract {
Some(false)
} else {
None
};
let config = FastQCConfig {
nogroup: cli.nogroup,
expgroup: cli.expgroup,
quiet: cli.quiet,
kmer_size: cli.kmers,
threads: cli.threads,
output_dir: cli.outdir,
casava: cli.casava,
nano: cli.nano,
nofilter: cli.nofilter,
do_unzip,
delete_after_unzip: cli.delete,
sequence_format: cli.format,
contaminant_file: cli.contaminants,
adapter_file: cli.adapters,
limits_file: cli.limits,
min_length: cli.min_length,
dup_length: cli.dup_length,
svg_output: cli.svg,
temp_dir: cli.dir,
template: cli.template,
};
if let Err(exit_code) = runner::run(&config, &cli.files) {
process::exit(exit_code);
}
}