1use crate::seqio::JunkMode;
2use anyhow::{Context, Result, bail};
3use std::collections::VecDeque;
4use std::ffi::OsString;
5use std::fs;
6use std::path::PathBuf;
7
8pub const USAGE: &str = "bbnorm-rs: Rust BBNorm compatibility port\n\nUsage:\n bbnorm-rs in=<reads.fq> out=<kept.fq> outt=<tossed.fq> hist=<hist.tsv> [passes=1]\n\nThis working Rust slice supports exact k-mer counting for small inputs, automatic bounded count-min input sketches for large inputs, explicit bounded sketches via cells/matrixbits/sketchmemory, conservative atomic bits=32 sketch insertion with packed small-bit fallbacks, constrained and memory-sized prefilter sketch collision behavior, deterministic normalization, managed multipass temp-file orchestration, count-up mode with bounded kept-count sketches when requested, table-based ECC for covered paths, hist/rhist/peaks output, low/mid/high depth bins, zlib-rs gzip, BBTools-style pigz/unpigz hooks when available, bounded cardinality/loglog estimates when requested, and Rayon worker controls including threads=auto/max/all. Wrapper-sampling requests fall back to the supported engine with notes.";
9
10pub const CARDINALITY_DEFAULT_BUCKETS: usize = 2048;
11pub const CARDINALITY_MAX_BUCKETS: usize = 1 << 26;
12
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct CountMinSettings {
15 pub cells: Option<usize>,
16 pub hashes: Option<usize>,
17 pub bits: Option<u8>,
18 pub memory_bytes: Option<usize>,
19}
20
21#[derive(Debug, Clone, Default, PartialEq, Eq)]
22pub struct PrefilterSettings {
23 pub enabled: bool,
24 pub force_disabled: bool,
25 pub cells: Option<usize>,
26 pub hashes: Option<usize>,
27 pub bits: Option<u8>,
28 pub memory_bytes: Option<usize>,
29 pub memory_fraction_micros: Option<u32>,
30}
31
32#[derive(Debug, Clone, PartialEq)]
33pub struct CardinalitySettings {
34 pub input: bool,
35 pub output: bool,
36 pub buckets: usize,
37 pub k: Option<usize>,
38 pub seed: u64,
39 pub min_probability: f64,
40}
41
42impl Default for CardinalitySettings {
43 fn default() -> Self {
44 Self {
45 input: false,
46 output: false,
47 buckets: CARDINALITY_DEFAULT_BUCKETS,
48 k: None,
49 seed: 0,
50 min_probability: 0.0,
51 }
52 }
53}
54
55#[derive(Debug, Clone)]
56pub struct Config {
57 pub in1: Option<PathBuf>,
58 pub in2: Option<PathBuf>,
59 pub extra: Vec<PathBuf>,
60 pub out1: Option<PathBuf>,
61 pub out2: Option<PathBuf>,
62 pub out_toss1: Option<PathBuf>,
63 pub out_toss2: Option<PathBuf>,
64 pub out_low1: Option<PathBuf>,
65 pub out_low2: Option<PathBuf>,
66 pub out_mid1: Option<PathBuf>,
67 pub out_mid2: Option<PathBuf>,
68 pub out_high1: Option<PathBuf>,
69 pub out_high2: Option<PathBuf>,
70 pub out_uncorrected1: Option<PathBuf>,
71 pub out_uncorrected2: Option<PathBuf>,
72 pub hist_in: Option<PathBuf>,
73 pub hist_out: Option<PathBuf>,
74 pub rhist_in: Option<PathBuf>,
75 pub rhist_out: Option<PathBuf>,
76 pub peaks_in: Option<PathBuf>,
77 pub peaks_out: Option<PathBuf>,
78 pub match_hist_out: Option<PathBuf>,
79 pub insert_hist_out: Option<PathBuf>,
80 pub quality_accuracy_hist_out: Option<PathBuf>,
81 pub indel_hist_out: Option<PathBuf>,
82 pub error_hist_out: Option<PathBuf>,
83 pub quality_hist_out: Option<PathBuf>,
84 pub base_quality_hist_out: Option<PathBuf>,
85 pub quality_count_hist_out: Option<PathBuf>,
86 pub average_quality_hist_out: Option<PathBuf>,
87 pub overall_base_quality_hist_out: Option<PathBuf>,
88 pub length_hist_out: Option<PathBuf>,
89 pub gc_hist_out: Option<PathBuf>,
90 pub base_hist_out: Option<PathBuf>,
91 pub entropy_hist_out: Option<PathBuf>,
92 pub identity_hist_out: Option<PathBuf>,
93 pub barcode_stats_out: Option<PathBuf>,
94 pub k: usize,
95 pub min_quality: u8,
96 pub quality_in_offset: u8,
97 pub quality_out_offset: u8,
98 pub change_quality: bool,
99 pub min_called_quality: u8,
100 pub max_called_quality: u8,
101 pub fake_quality: u8,
102 pub fasta_wrap: usize,
103 pub u_to_t: bool,
104 pub to_upper_case: bool,
105 pub lower_case_to_n: bool,
106 pub dot_dash_x_to_n: bool,
107 pub iupac_to_n: bool,
108 pub fix_junk_and_iupac: bool,
109 pub junk_mode: JunkMode,
110 pub min_prob: f64,
111 pub max_reads: Option<u64>,
112 pub table_reads: Option<u64>,
113 pub min_length: usize,
114 pub trim_left: bool,
115 pub trim_right: bool,
116 pub trim_quality: f64,
117 pub trim_optimal: bool,
118 pub trim_optimal_bias: Option<f64>,
119 pub trim_window: bool,
120 pub trim_window_length: usize,
121 pub trim_min_good_interval: usize,
122 pub interleaved: bool,
123 pub test_interleaved: bool,
124 pub keep_all: bool,
125 pub zero_bin: bool,
126 pub deterministic: bool,
127 pub rename_reads: bool,
128 pub canonical: bool,
129 pub remove_duplicate_kmers: bool,
130 pub fix_spikes: bool,
131 pub target_depth: u64,
132 pub target_depth_first: Option<u64>,
133 pub target_bad_percent_low: f64,
134 pub target_bad_percent_high: f64,
135 pub max_depth: Option<u64>,
136 pub min_depth: u64,
137 pub min_kmers_over_min_depth: usize,
138 pub depth_percentile: f64,
139 pub high_percentile: f64,
140 pub low_percentile: f64,
141 pub error_detect_ratio: u64,
142 pub high_thresh: u64,
143 pub low_thresh: u64,
144 pub toss_error_reads: bool,
145 pub toss_error_reads_first: bool,
146 pub require_both_bad: bool,
147 pub save_rare_reads: bool,
148 pub discard_bad_only: bool,
149 pub discard_bad_only_first: bool,
150 pub error_correct: bool,
151 pub error_correct_first: bool,
152 pub error_correct_final: bool,
153 pub overlap_error_correct: bool,
154 pub overlap_error_correct_auto: bool,
155 pub mark_errors_only: bool,
156 pub mark_uncorrectable_errors: bool,
157 pub trim_after_marking: bool,
158 pub mark_with_one: bool,
159 pub error_correct_ratio: u64,
160 pub error_correct_high_thresh: u64,
161 pub error_correct_low_thresh: u64,
162 pub max_errors_to_correct: usize,
163 pub max_quality_to_correct: u8,
164 pub correct_from_left: bool,
165 pub correct_from_right: bool,
166 pub suffix_len: usize,
167 pub prefix_len: usize,
168 pub count_up: bool,
169 pub add_bad_reads_countup: bool,
170 pub use_lower_depth: bool,
171 pub toss_by_low_true_depth: bool,
172 pub low_bin_depth: i64,
173 pub high_bin_depth: i64,
174 pub hist_len: usize,
175 pub side_hist_len: Option<usize>,
176 pub gc_bins: Option<usize>,
177 pub entropy_bins: usize,
178 pub entropy_k: usize,
179 pub entropy_window: usize,
180 pub allow_entropy_ns: bool,
181 pub identity_bins: usize,
182 pub cardinality: CardinalitySettings,
183 pub hist_columns: u8,
184 pub print_zero_coverage: bool,
185 pub peak_min_height: u64,
186 pub peak_min_volume: u64,
187 pub peak_min_width: usize,
188 pub peak_min_peak: usize,
189 pub peak_max_peak: usize,
190 pub peak_max_count: usize,
191 pub peak_ploidy: i32,
192 pub overwrite: bool,
193 pub append: bool,
194 pub passes: usize,
195 pub threads: Option<usize>,
196 pub gzip_threads: Option<usize>,
197 pub temp_dir: Option<PathBuf>,
198 pub use_temp_dir: bool,
199 pub max_countup_spill_initial_runs: Option<usize>,
200 pub max_countup_spill_merge_runs: Option<usize>,
201 pub max_countup_spill_final_runs: Option<usize>,
202 pub max_countup_spill_live_bytes: Option<u64>,
203 pub max_countup_spill_final_live_bytes: Option<u64>,
204 pub max_countup_spill_write_bytes: Option<u64>,
205 pub table_initial_size: Option<usize>,
206 pub table_prealloc_fraction: Option<f64>,
207 pub build_passes: usize,
208 pub auto_count_min: bool,
209 pub force_exact_counts: bool,
210 pub auto_count_min_input_bytes: usize,
211 pub auto_count_min_read_threshold: u64,
212 pub auto_count_min_memory_bytes: Option<usize>,
213 pub count_min: CountMinSettings,
214 pub count_min_bits_first: Option<u8>,
215 pub prefilter: PrefilterSettings,
216 pub locked_increment: Option<bool>,
217 pub gpu_counting: bool,
218 pub gpu_helper: Option<PathBuf>,
219 pub gpu_persistent: bool,
220 pub notes: Vec<String>,
221}
222
223impl Default for Config {
224 fn default() -> Self {
225 Self {
226 in1: None,
227 in2: None,
228 extra: Vec::new(),
229 out1: None,
230 out2: None,
231 out_toss1: None,
232 out_toss2: None,
233 out_low1: None,
234 out_low2: None,
235 out_mid1: None,
236 out_mid2: None,
237 out_high1: None,
238 out_high2: None,
239 out_uncorrected1: None,
240 out_uncorrected2: None,
241 hist_in: None,
242 hist_out: None,
243 rhist_in: None,
244 rhist_out: None,
245 peaks_in: None,
246 peaks_out: None,
247 match_hist_out: None,
248 insert_hist_out: None,
249 quality_accuracy_hist_out: None,
250 indel_hist_out: None,
251 error_hist_out: None,
252 quality_hist_out: None,
253 base_quality_hist_out: None,
254 quality_count_hist_out: None,
255 average_quality_hist_out: None,
256 overall_base_quality_hist_out: None,
257 length_hist_out: None,
258 gc_hist_out: None,
259 base_hist_out: None,
260 entropy_hist_out: None,
261 identity_hist_out: None,
262 barcode_stats_out: None,
263 k: 31,
264 min_quality: 5,
265 quality_in_offset: 33,
266 quality_out_offset: 33,
267 change_quality: true,
268 min_called_quality: 2,
269 max_called_quality: 50,
270 fake_quality: 30,
271 fasta_wrap: 70,
272 u_to_t: false,
273 to_upper_case: false,
274 lower_case_to_n: false,
275 dot_dash_x_to_n: false,
276 iupac_to_n: false,
277 fix_junk_and_iupac: false,
278 junk_mode: JunkMode::Crash,
279 min_prob: 0.5,
280 max_reads: None,
281 table_reads: None,
282 min_length: 1,
283 trim_left: false,
284 trim_right: false,
285 trim_quality: 5.0,
286 trim_optimal: true,
287 trim_optimal_bias: None,
288 trim_window: false,
289 trim_window_length: 4,
290 trim_min_good_interval: 2,
291 interleaved: false,
292 test_interleaved: true,
293 keep_all: false,
294 zero_bin: false,
295 deterministic: true,
296 rename_reads: false,
297 canonical: true,
298 remove_duplicate_kmers: true,
299 fix_spikes: false,
300 target_depth: 100,
301 target_depth_first: None,
302 target_bad_percent_low: 0.85,
303 target_bad_percent_high: 1.5,
304 max_depth: None,
305 min_depth: 5,
306 min_kmers_over_min_depth: 15,
307 depth_percentile: 0.54,
308 high_percentile: 0.90,
309 low_percentile: 0.25,
310 error_detect_ratio: 125,
311 high_thresh: 12,
312 low_thresh: 3,
313 toss_error_reads: false,
314 toss_error_reads_first: false,
315 require_both_bad: false,
316 save_rare_reads: false,
317 discard_bad_only: false,
318 discard_bad_only_first: false,
319 error_correct: false,
320 error_correct_first: false,
321 error_correct_final: false,
322 overlap_error_correct: false,
323 overlap_error_correct_auto: false,
324 mark_errors_only: false,
325 mark_uncorrectable_errors: false,
326 trim_after_marking: false,
327 mark_with_one: false,
328 error_correct_ratio: 140,
329 error_correct_high_thresh: 22,
330 error_correct_low_thresh: 2,
331 max_errors_to_correct: 3,
332 max_quality_to_correct: 127,
333 correct_from_left: true,
334 correct_from_right: true,
335 suffix_len: 3,
336 prefix_len: 3,
337 count_up: false,
338 add_bad_reads_countup: false,
339 use_lower_depth: true,
340 toss_by_low_true_depth: true,
341 low_bin_depth: 10,
342 high_bin_depth: 80,
343 hist_len: (1 << 20) + 1,
344 side_hist_len: None,
345 gc_bins: None,
346 entropy_bins: 1000,
347 entropy_k: 5,
348 entropy_window: 50,
349 allow_entropy_ns: true,
350 identity_bins: 750,
351 cardinality: CardinalitySettings::default(),
352 hist_columns: 3,
353 print_zero_coverage: false,
354 peak_min_height: 2,
355 peak_min_volume: 5,
356 peak_min_width: 3,
357 peak_min_peak: 2,
358 peak_max_peak: i32::MAX as usize,
359 peak_max_count: 10,
360 peak_ploidy: -1,
361 overwrite: false,
362 append: false,
363 passes: 2,
364 threads: None,
365 gzip_threads: None,
366 temp_dir: None,
367 use_temp_dir: false,
368 max_countup_spill_initial_runs: None,
369 max_countup_spill_merge_runs: None,
370 max_countup_spill_final_runs: None,
371 max_countup_spill_live_bytes: None,
372 max_countup_spill_final_live_bytes: None,
373 max_countup_spill_write_bytes: None,
374 table_initial_size: None,
375 table_prealloc_fraction: None,
376 build_passes: 1,
377 auto_count_min: true,
378 force_exact_counts: false,
379 auto_count_min_input_bytes: 32 * 1024 * 1024,
380 auto_count_min_read_threshold: 250_000,
381 auto_count_min_memory_bytes: None,
382 count_min: CountMinSettings::default(),
383 count_min_bits_first: None,
384 prefilter: PrefilterSettings::default(),
385 locked_increment: None,
386 gpu_counting: false,
387 gpu_helper: None,
388 gpu_persistent: false,
389 notes: Vec::new(),
390 }
391 }
392}
393
394pub fn parse_args<I>(args: I) -> Result<Config>
395where
396 I: IntoIterator<Item = OsString>,
397{
398 let mut config = Config::default();
399 let mut positional = Vec::new();
400 let mut saw_arg = false;
401 let mut pending: VecDeque<OsString> = args.into_iter().collect();
402
403 while let Some(raw) = pending.pop_front() {
404 saw_arg = true;
405 let arg = raw.to_string_lossy().into_owned();
406 if arg == "-h" || arg == "--help" || arg.eq_ignore_ascii_case("help") {
407 bail!(USAGE);
408 }
409 if let Some((key, value)) = arg.split_once('=') {
410 let key = key.to_ascii_lowercase();
411 if key == "config" {
412 let expanded = read_config_args(value)?;
413 config.notes.push(format!(
414 "config={value} expanded into {} BBTools-style argument line(s)",
415 expanded.len()
416 ));
417 for item in expanded.into_iter().rev() {
418 pending.push_front(OsString::from(item));
419 }
420 } else {
421 handle_key_value(&mut config, &key, value)?;
422 }
423 } else if arg.eq_ignore_ascii_case("null") {
424 } else if arg.eq_ignore_ascii_case("1pass") || arg.eq_ignore_ascii_case("1p") {
426 config.passes = 1;
427 config.notes.push("single-pass mode selected".to_string());
428 } else if arg.eq_ignore_ascii_case("2pass") || arg.eq_ignore_ascii_case("2p") {
429 config.passes = 2;
430 } else {
431 let key = arg.to_ascii_lowercase();
432 if is_bare_boolean_key(&key) {
433 handle_key_value(&mut config, &key, "t")?;
434 } else {
435 positional.push(PathBuf::from(arg));
436 }
437 }
438 }
439
440 if !saw_arg {
441 bail!(USAGE);
442 }
443
444 if positional.len() > 2 {
445 bail!(
446 "expected at most two positional inputs; use in=<file> and in2=<file> for paired input"
447 );
448 }
449 if config.in1.is_none() {
450 config.in1 = positional.first().cloned();
451 }
452 if config.in2.is_none() {
453 config.in2 = positional.get(1).cloned();
454 }
455
456 fill_default_gzip_threads(&mut config);
457 validate(&mut config)?;
458 Ok(config)
459}
460
461fn is_bare_boolean_key(key: &str) -> bool {
462 matches!(
463 key,
464 "keepall"
465 | "zerobin"
466 | "deterministic"
467 | "dr"
468 | "det"
469 | "rn"
470 | "rename"
471 | "renamereads"
472 | "canonical"
473 | "removeduplicatekmers"
474 | "rdk"
475 | "fixspikes"
476 | "fs"
477 | "tossbadreads"
478 | "tosserrorreads"
479 | "tbr"
480 | "ter"
481 | "requirebothbad"
482 | "rbb"
483 | "removeifeitherbad"
484 | "rieb"
485 | "saverarereads"
486 | "srr"
487 | "discardbadonly"
488 | "dbo"
489 | "uselowerdepth"
490 | "uld"
491 | "printzerocoverage"
492 | "pzc"
493 | "overwrite"
494 | "ow"
495 | "ignorebadquality"
496 | "ibq"
497 | "changequality"
498 | "cq"
499 | "utot"
500 | "tuc"
501 | "touppercase"
502 | "lctn"
503 | "lowercaseton"
504 | "dotdashxton"
505 | "undefinedton"
506 | "iupacton"
507 | "itn"
508 | "fixjunk"
509 | "ignorejunk"
510 | "usebgzip"
511 | "bgzip"
512 | "usepigz"
513 | "pigz"
514 | "usegunzip"
515 | "gunzip"
516 | "ungzip"
517 | "useunpigz"
518 | "unpigz"
519 | "useunbgzip"
520 | "unbgzip"
521 | "usegzip"
522 | "gzip"
523 | "usebgzf"
524 | "bgzf"
525 | "ordered"
526 | "ord"
527 | "verbose"
528 | "printcoverage"
529 | "append"
530 | "app"
531 | "interleaved"
532 | "int"
533 | "testinterleaved"
534 | "forceinterleaved"
535 | "prefilter"
536 | "autocountmin"
537 | "autosketch"
538 | "autosketchtable"
539 | "autosketchtables"
540 | "exact"
541 | "exactcount"
542 | "exactcounts"
543 | "useexact"
544 | "sketchexact"
545 | "auto"
546 | "automatic"
547 | "countup"
548 | "abrc"
549 | "addbadreadscountup"
550 | "markerrors"
551 | "markonly"
552 | "meo"
553 | "markuncorrectableerrors"
554 | "markuncorrectable"
555 | "mue"
556 | "tam"
557 | "trimaftermarking"
558 | "markwith1"
559 | "markwithone"
560 | "mw1"
561 | "aec"
562 | "aecc"
563 | "aggressiveerrorcorrection"
564 | "cec"
565 | "cecc"
566 | "conservativeerrorcorrection"
567 | "ecc"
568 | "ecc1"
569 | "ecc2"
570 | "eccf"
571 | "eccbyoverlap"
572 | "ecco"
573 | "overlap"
574 | "cfl"
575 | "cfr"
576 | "cardinality"
577 | "loglog"
578 | "loglogin"
579 | "cardinalityout"
580 | "loglogout"
581 )
582}
583
584fn fill_default_gzip_threads(config: &mut Config) {
585 if config.gzip_threads.is_some() {
586 return;
587 }
588 let Some(threads) = config.threads.filter(|threads| *threads > 1) else {
589 return;
590 };
591 config.gzip_threads = Some(threads);
592 config.notes.push(format!(
593 "threads={threads} also enables gzip input/output workers up to {threads}; use zipthreads=1 to force single-thread gzip I/O"
594 ));
595}
596
597fn read_config_args(value: &str) -> Result<Vec<String>> {
598 let mut args = Vec::new();
599 for file in value.split(',').filter(|part| !part.trim().is_empty()) {
600 let path = PathBuf::from(file.trim());
601 let text = fs::read_to_string(&path)
602 .with_context(|| format!("could not process config file {}", path.display()))?;
603 for line in text.lines() {
604 let trimmed = line.trim();
605 if !trimmed.is_empty() && !trimmed.starts_with('#') {
606 args.push(trimmed.to_string());
607 }
608 }
609 }
610 Ok(args)
611}
612
613fn handle_key_value(config: &mut Config, key: &str, value: &str) -> Result<()> {
614 match key {
615 "in" | "input" | "in1" | "input1" => config.in1 = Some(path(value)),
616 "in2" | "input2" => config.in2 = Some(path(value)),
617 "extra" => config.extra.extend(extra_paths(value)),
618 "out" | "output" | "out1" | "output1" | "outk" | "outkeep" | "outgood" => {
619 config.out1 = Some(path(value))
620 }
621 "out2" | "output2" | "outk2" | "outkeep2" | "outgood2" => config.out2 = Some(path(value)),
622 "outt" | "outt1" | "outtoss" | "outoss" | "outbad" => {
623 config.out_toss1 = Some(path(value));
624 }
625 "outt2" | "outtoss2" | "outoss2" | "outbad2" => config.out_toss2 = Some(path(value)),
626 "outl" | "outl1" | "outlow" | "outlow1" => config.out_low1 = Some(path(value)),
627 "outl2" | "outlow2" => config.out_low2 = Some(path(value)),
628 "outm" | "outm1" | "outmid" | "outmid1" | "outmiddle" => {
629 config.out_mid1 = Some(path(value));
630 }
631 "outm2" | "outmid2" | "outmiddle2" => config.out_mid2 = Some(path(value)),
632 "outh" | "outh1" | "outhigh" | "outhigh1" => config.out_high1 = Some(path(value)),
633 "outh2" | "outhigh2" => config.out_high2 = Some(path(value)),
634 "outu" | "outu1" | "outuncorrected" => config.out_uncorrected1 = Some(path(value)),
635 "outu2" | "outuncorrected2" => config.out_uncorrected2 = Some(path(value)),
636 "hist" | "histin" | "inhist" | "khist" => config.hist_in = Some(path(value)),
637 "histout" | "outhist" | "hist2" | "khistout" => config.hist_out = Some(path(value)),
638 "rhist" => config.rhist_in = Some(path(value)),
639 "rhistout" => config.rhist_out = Some(path(value)),
640 "peaks" => config.peaks_in = Some(path(value)),
641 "peaksout" => config.peaks_out = Some(path(value)),
642 "extin" | "extout" => {
643 config.notes.push(format!(
644 "{key}={value} is a BBTools file-extension hint; covered Rust paths infer FASTA/FASTQ format from explicit filenames"
645 ));
646 }
647 "k" | "kmer" => config.k = parse_usize(value, key)?,
648 "minq" | "minqual" => config.min_quality = parse_u8(value, key)?,
649 "minprob" => config.min_prob = parse_f64(value, key)?,
650 "reads" | "maxreads" => config.max_reads = parse_limit(value, key)?,
651 "tablereads" | "buildreads" => config.table_reads = parse_limit(value, key)?,
652 "ml" | "minlen" | "minlength" => config.min_length = parse_kmg_usize(value, key)?,
653 "maxlength" | "maxreadlength" | "maxreadlen" | "maxlen" => {
654 let _ = parse_kmg_usize(value, key)?;
655 config.notes.push(format!(
656 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
657 ));
658 }
659 "mingc" | "maxgc" | "mlf" | "minlenfrac" | "minlenfraction" | "minlengthfraction" => {
660 let _ = parse_f64(value, key)?;
661 config.notes.push(format!(
662 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
663 ));
664 }
665 "usepairgc"
666 | "pairgc"
667 | "trimbadsequence"
668 | "chastityfilter"
669 | "cf"
670 | "failnobarcode"
671 | "averagequalitybyprobability"
672 | "aqbp"
673 | "untrim" => {
674 let _ = parse_bool(value, key)?;
675 config.notes.push(format!(
676 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
677 ));
678 }
679 "badbarcodes" | "barcodefilter" => {
680 if !value.eq_ignore_ascii_case("crash") && !value.eq_ignore_ascii_case("fail") {
681 let _ = parse_bool(value, key)?;
682 }
683 config.notes.push(format!(
684 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
685 ));
686 }
687 "barcodes" | "barcode" => {
688 config.notes.push(format!(
689 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
690 ));
691 }
692 "maxns" => {
693 let _ = parse_i64(value, key)?;
694 config.notes.push(format!(
695 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
696 ));
697 }
698 "minconsecutivebases"
699 | "mcb"
700 | "minavgqualitybases"
701 | "maqb"
702 | "mintl"
703 | "mintrimlen"
704 | "mintrimlength" => {
705 let _ = parse_usize(value, key)?;
706 config.notes.push(format!(
707 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
708 ));
709 }
710 "minavgquality" | "minaveragequality" | "maq" => {
711 parse_min_average_quality(value, key)?;
712 config.notes.push(format!(
713 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
714 ));
715 }
716 "minbasequality" | "mbq" => {
717 let _ = parse_i8(value, key)?;
718 config.notes.push(format!(
719 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
720 ));
721 }
722 "build" | "genome" => {
723 let _ = parse_i32(value, key)?;
724 config.notes.push(format!(
725 "{key}={value} is a BBTools genome-build context control; covered Rust FASTA/FASTQ normalization does not use reference build metadata"
726 ));
727 }
728 "qtrim" | "qtrim1" | "qtrim2" => parse_qtrim(config, value, key)?,
729 "trimq" | "trimquality" | "trimq2" => {
730 config.trim_quality = parse_trim_quality(config, value, key)?
731 }
732 "trimleft" | "qtrimleft" => config.trim_left = parse_bool(value, key)?,
733 "trimright" | "qtrimright" => config.trim_right = parse_bool(value, key)?,
734 "optitrim" | "otf" | "otm" => parse_optitrim(config, value, key)?,
735 "trimgoodinterval" => config.trim_min_good_interval = parse_usize(value, key)?,
736 "trimclip" => {
737 let _ = parse_bool(value, key)?;
738 config.notes.push(format!(
739 "{key}={value} is parsed by BBNorm but not used by its trimFast call"
740 ));
741 }
742 "trimpolya" | "trimpolyg" | "trimpolygleft" | "trimpolygright" | "filterpolyg"
743 | "trimpolyc" | "trimpolycleft" | "trimpolycright" | "filterpolyc" | "maxnonpoly" => {
744 let _ = parse_poly(value, key)?;
745 config.notes.push(format!(
746 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
747 ));
748 }
749 "forcetrimmod" | "forcemrimmodulo" | "ftm" | "ftl" | "forcetrimleft" | "ftr"
750 | "forcetrimright" | "ftr2" | "forcetrimright2" => {
751 let _ = parse_i64(value, key)?;
752 config.notes.push(format!(
753 "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
754 ));
755 }
756 "keepall" => config.keep_all = parse_bool(value, key)?,
757 "zerobin" => config.zero_bin = parse_bool(value, key)?,
758 "deterministic" | "dr" | "det" => {
759 config.deterministic = parse_bool(value, key)?;
760 if !config.deterministic {
761 config.notes.push(format!(
762 "{key}={value} enables nondeterministic read selection and faster parallel replay for bounded approximate sketches"
763 ));
764 }
765 }
766 "rn" | "rename" | "renamereads" => config.rename_reads = parse_bool(value, key)?,
767 "canonical" => config.canonical = parse_bool(value, key)?,
768 "removeduplicatekmers" | "rdk" => config.remove_duplicate_kmers = parse_bool(value, key)?,
769 "fixspikes" | "fs" => config.fix_spikes = parse_bool(value, key)?,
770 "target" | "targetdepth" | "tgt" => config.target_depth = parse_u64(value, key)?,
771 "max" | "maxdepth" => config.max_depth = Some(parse_u64(value, key)?),
772 "min" | "mindepth" => config.min_depth = parse_u64(value, key)?,
773 "minkmers" | "minkmersovermindepth" | "mingoodkmersperread" | "mgkpr" => {
774 config.min_kmers_over_min_depth = parse_usize(value, key)?.max(1);
775 }
776 "percentile" | "depthpercentile" | "dp" => {
777 config.depth_percentile = parse_percent(value, key)?;
778 }
779 "highdepthpercentile" | "highpercentile" | "hdp" => {
780 config.high_percentile = parse_percent(value, key)?;
781 }
782 "lowdepthpercentile" | "lowpercentile" | "ldp" => {
783 config.low_percentile = parse_percent(value, key)?;
784 }
785 "errordetectratio" | "edr" => config.error_detect_ratio = parse_u64(value, key)?,
786 "highthresh" | "hthresh" | "ht" => config.high_thresh = parse_u64(value, key)?,
787 "lowthresh" | "lthresh" | "lt" => config.low_thresh = parse_u64(value, key)?,
788 "tossbadreads" | "tosserrorreads" | "tbr" | "ter" => {
789 let enabled = parse_bool(value, key)?;
790 config.toss_error_reads = enabled;
791 config.toss_error_reads_first = enabled;
792 }
793 "tossbadreads2" | "tosserrorreads2" | "tbr2" | "ter2" | "tossbadreadsf"
794 | "tosserrorreadsf" | "tbrf" | "terf" => {
795 config.toss_error_reads = parse_bool(value, key)?;
796 }
797 "tossbadreads1" | "tosserrorreads1" | "tbr1" | "ter1" => {
798 config.toss_error_reads_first = parse_bool(value, key)?;
799 }
800 "requirebothbad" | "rbb" => config.require_both_bad = parse_bool(value, key)?,
801 "removeifeitherbad" | "rieb" => config.require_both_bad = !parse_bool(value, key)?,
802 "saverarereads" | "srr" => config.save_rare_reads = parse_bool(value, key)?,
803 "discardbadonly" | "dbo" | "discardbadonlyf" | "dbof" | "discardbadonly2" | "dbo2" => {
804 let enabled = parse_bool(value, key)?;
805 config.discard_bad_only = enabled;
806 config.discard_bad_only_first = enabled;
807 }
808 "discardbadonly1" | "dbo1" => {
809 config.discard_bad_only_first = parse_bool(value, key)?;
810 }
811 "uselowerdepth" | "uld" => config.use_lower_depth = parse_bool(value, key)?,
812 "lbd" | "lowbindepth" | "lowerlimit" => config.low_bin_depth = parse_i64(value, key)?,
813 "hbd" | "highbindepth" | "upperlimit" => config.high_bin_depth = parse_i64(value, key)?,
814 "histlen" | "histogramlen" => config.hist_len = parse_usize(value, key)?.saturating_add(1),
815 "histcol" | "histcolumns" | "histogramcolumns" => {
816 config.hist_columns = parse_u8(value, key)?
817 }
818 "printzerocoverage" | "pzc" => config.print_zero_coverage = parse_bool(value, key)?,
819 "minheight" | "h" => config.peak_min_height = parse_u64(value, key)?,
820 "minvolume" | "v" => config.peak_min_volume = parse_u64(value, key)?,
821 "minwidth" | "w" => config.peak_min_width = parse_usize(value, key)?,
822 "minpeak" | "minp" => config.peak_min_peak = parse_usize(value, key)?,
823 "maxpeak" | "maxp" => config.peak_max_peak = parse_usize(value, key)?,
824 "ploidy" => config.peak_ploidy = parse_i32(value, key)?,
825 "maxpeakcount" | "maxpc" | "maxpeaks" => {
826 config.peak_max_count = parse_usize(value, key)?.max(1)
827 }
828 "overwrite" | "ow" => config.overwrite = parse_bool(value, key)?,
829 "passes" | "p" => {
830 config.passes = parse_usize(value, key)?;
831 }
832 "1pass" | "1p" => {
833 config.passes = 1;
834 config.notes.push("single-pass mode selected".to_string());
835 }
836 "2pass" | "2p" => {
837 config.passes = 2;
838 }
839 "ascii" | "asciioffset" | "quality" | "qual" => {
840 let offset = parse_quality_offset(value, key)?.unwrap_or(33);
841 config.quality_in_offset = offset;
842 config.quality_out_offset = offset;
843 }
844 "qin" | "asciiin" | "qualityin" | "qualin" => {
845 config.quality_in_offset = parse_quality_offset(value, key)?.unwrap_or(33);
846 }
847 "qout" | "asciiout" | "qualityout" | "qualout" => {
848 config.quality_out_offset = parse_quality_offset(value, key)?.unwrap_or(33);
849 }
850 "qauto" => config
851 .notes
852 .push("qauto accepted for BBTools-compatible quality alias handling".into()),
853 key if matches!(
854 quality_recal_base_key(key),
855 "recalibrate" | "recalibratequality" | "recal"
856 ) =>
857 {
858 if parse_bool(value, key)? {
859 bail!(
860 "{key}={value} enables BBTools quality recalibration; Rust does not implement output-affecting recalibration yet"
861 );
862 }
863 config.notes.push(format!(
864 "{key}={value} keeps BBTools quality recalibration disabled in the supported Rust path"
865 ));
866 }
867 key if is_quality_recal_bool_key(key) => {
868 let _ = parse_java_bool(value);
869 config.notes.push(format!(
870 "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
871 ));
872 }
873 key if quality_recal_base_key(key) == "observationcutoff" => {
874 let _ = parse_kmg_i64(value, key)?;
875 config.notes.push(format!(
876 "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
877 ));
878 }
879 key if matches!(
880 quality_recal_base_key(key),
881 "recalpasses" | "recalqmax" | "recalqmin"
882 ) =>
883 {
884 let _ = parse_i32(value, key)?;
885 config.notes.push(format!(
886 "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
887 ));
888 }
889 key if quality_recal_base_key(key) == "qmatrixmode" => {
890 config.notes.push(format!(
891 "{key}={value} is a BBTools quality-recalibration matrix mode; covered Rust output is unchanged"
892 ));
893 }
894 "ignorebadquality" | "ibq" => {
895 if parse_bool(value, key)? {
896 config.change_quality = false;
897 }
898 }
899 "changequality" | "cq" => config.change_quality = parse_bool(value, key)?,
900 "mincalledquality" => {
901 config.min_called_quality = parse_i32_clamped(value, key, 0, 93)? as u8
902 }
903 "maxcalledquality" => {
904 config.max_called_quality = parse_i32_clamped(value, key, 1, 93)? as u8
905 }
906 "fakequality" | "qfake" => {
907 config.fake_quality = parse_i32_clamped(value, key, 0, 93)? as u8
908 }
909 "fakefastaqual" | "fakefastaquality" | "ffq" => parse_fake_fasta_quality(config, value)?,
910 "fastawrap" | "wrap" => config.fasta_wrap = parse_fasta_wrap(value, key)?,
911 "trd" | "trc" | "trimreaddescription" | "trimreaddescriptions" => {
912 let _ = parse_java_bool(value);
913 config.notes.push(format!(
914 "{key}={value} is accepted for KmerNormalize compatibility; covered FASTA/FASTQ read output keeps full headers like Java"
915 ));
916 }
917 "trimrefdescription" | "trimrefdescriptions" | "trimrname" => {
918 let _ = parse_java_bool(value);
919 config.notes.push(format!(
920 "{key}={value} is a BBTools reference-name trimming control; covered FASTA/FASTQ read output is unchanged"
921 ));
922 }
923 "utot" => config.u_to_t = parse_bool(value, key)?,
924 "tuc" | "touppercase" => config.to_upper_case = parse_bool(value, key)?,
925 "lctn" | "lowercaseton" => config.lower_case_to_n = parse_bool(value, key)?,
926 "dotdashxton" => config.dot_dash_x_to_n = parse_bool(value, key)?,
927 "undefinedton" | "iupacton" | "itn" => config.iupac_to_n = parse_bool(value, key)?,
928 "fixjunk" => {
929 if parse_bool(value, key)? {
930 config.junk_mode = JunkMode::Fix;
931 } else if config.junk_mode == JunkMode::Fix {
932 config.junk_mode = JunkMode::Crash;
933 }
934 }
935 "ignorejunk" => {
936 if parse_bool(value, key)? {
937 config.junk_mode = JunkMode::Ignore;
938 } else if config.junk_mode == JunkMode::Ignore {
939 config.junk_mode = JunkMode::Crash;
940 }
941 }
942 "flagjunk" => {
943 if parse_bool(value, key)? {
944 config.junk_mode = JunkMode::Flag;
945 } else if config.junk_mode == JunkMode::Flag {
946 config.junk_mode = JunkMode::Crash;
947 }
948 }
949 "tossjunk" => {
950 if parse_bool(value, key)? {
951 config.junk_mode = JunkMode::Flag;
952 }
953 }
954 "crashjunk" | "failjunk" => {
955 if parse_bool(value, key)? {
956 config.junk_mode = JunkMode::Crash;
957 } else if config.junk_mode == JunkMode::Crash {
958 config.junk_mode = JunkMode::Ignore;
959 }
960 }
961 "junk" => parse_junk_mode(config, value)?,
962 "threads" | "t" => {
963 let threads = value.to_ascii_lowercase();
964 if threads == "auto" {
965 config
966 .notes
967 .push("threads=auto accepted; Rayon will use its default worker count".into());
968 } else if matches!(threads.as_str(), "max" | "all") {
969 let workers = std::thread::available_parallelism()
970 .map(|threads| threads.get())
971 .unwrap_or(1);
972 config.threads = Some(workers);
973 config.notes.push(format!(
974 "threads={threads} accepted; Rayon worker count will use all {workers} available workers"
975 ));
976 } else {
977 let threads = parse_i64(value, key)?;
978 if threads > 1 {
979 config.threads = Some(threads as usize);
980 config.notes.push(format!(
981 "threads={threads} accepted; Rayon worker count will be capped to {threads}"
982 ));
983 } else if threads == 1 {
984 config.threads = Some(1);
985 }
986 }
987 }
988 "null" => {}
989 "monitor" | "killswitch" => {
990 parse_monitor(value, key)?;
991 config.notes.push(format!(
992 "{key}={value} is a BBTools watchdog runtime control; the Rust CLI accepts it as a no-op"
993 ));
994 }
995 "outstream" | "proxyhost" | "proxyport" | "metadatafile" => {
996 config.notes.push(format!(
997 "{key}={value} is a BBTools preparser runtime control; covered Rust output records are unchanged"
998 ));
999 }
1000 "json" | "silent" | "printexecuting" | "bufferbf" | "bufferbf1" => {
1001 let _ = parse_java_bool(value);
1002 config.notes.push(format!(
1003 "{key}={value} is a BBTools preparser runtime control; covered Rust output records are unchanged"
1004 ));
1005 }
1006 "testsize" => {
1007 let _ = parse_java_bool(value);
1008 config.notes.push(format!(
1009 "{key}={value} is a BBTools diagnostic sizing control; covered Rust output records are unchanged"
1010 ));
1011 }
1012 "breaklen" | "breaklength" => {
1013 let break_len = parse_i32(value, key)?;
1014 if break_len > 0 {
1015 bail!(
1016 "{key}={value} enables BBTools read breaking; Rust does not implement output-affecting read splitting yet"
1017 );
1018 }
1019 config.notes.push(format!(
1020 "{key}={value} keeps BBTools read breaking disabled in the supported Rust path"
1021 ));
1022 }
1023 "usejni" | "jni" | "skipvalidation" | "validate" | "validateinconstructor" | "vic" => {
1024 let _ = parse_java_bool(value);
1025 config.notes.push(format!(
1026 "{key}={value} is a BBTools shared runtime/validation control; covered Rust output is unchanged"
1027 ));
1028 }
1029 "usempi" | "mpi" => {
1030 let enabled = parse_mpi_enabled(value, key)?;
1031 config.notes.push(format!(
1032 "{key}={value} is a BBTools MPI execution control; Rust runs locally and ignores MPI mode{}",
1033 if enabled { " for ASAP output" } else { "" }
1034 ));
1035 }
1036 "crismpi" | "mpikeepall" => {
1037 let enabled = parse_java_bool(value);
1038 config.notes.push(format!(
1039 "{key}={value} is a BBTools MPI stream control; Rust runs locally and ignores MPI stream mode{}",
1040 if enabled { " for ASAP output" } else { "" }
1041 ));
1042 }
1043 "bf1" | "bytefile1" | "bf2" | "bytefile2" | "bf3" | "bytefile3" | "bf4" | "bytefile4" => {
1044 let _ = parse_java_bool(value);
1045 config.notes.push(format!(
1046 "{key}={value} is a BBTools byte-file runtime control; covered Rust output is unchanged"
1047 ));
1048 }
1049 "bf1bufferlen" | "readbufferlength" | "readbufferlen" | "readbufferdata" => {
1050 let _ = parse_kmg_i64(value, key)?;
1051 config.notes.push(format!(
1052 "{key}={value} is a BBTools buffer-sizing control; covered Rust output is unchanged"
1053 ));
1054 }
1055 "bf4threads" | "bfthreads" | "readbuffers" => {
1056 let _ = parse_i32(value, key)?;
1057 config.notes.push(format!(
1058 "{key}={value} is a BBTools I/O threading control; current Rust engine manages I/O internally"
1059 ));
1060 }
1061 "workers" | "workerthreads" | "wt" | "threadsin" | "tin" | "threadsout" | "tout" => {
1062 parse_auto_or_i32(value, key)?;
1063 config.notes.push(format!(
1064 "{key}={value} is a BBTools I/O worker control; current Rust engine manages I/O internally"
1065 ));
1066 }
1067 "zipthreads" | "bgzfthreadsin" | "bgzftin" | "bgzfreadthreads" | "bgzfthreadsout"
1068 | "bgzftout" | "bgzfwritethreads" => {
1069 let threads = parse_i32(value, key)?;
1070 if threads > 0 {
1071 config.gzip_threads = Some(threads as usize);
1072 }
1073 config.notes.push(format!(
1074 "{key}={value} is a BBTools compression/threading control; Rust uses gzip input/output worker settings for .gz files when threads > 1"
1075 ));
1076 }
1077 "ziplevel" | "zl" | "bziplevel" | "bzl" | "blocksize" | "pigziterations" | "pigziters" => {
1078 let _ = parse_i32(value, key)?;
1079 config.notes.push(format!(
1080 "{key}={value} is a BBTools compression/threading control; covered Rust output records are unchanged"
1081 ));
1082 }
1083 "zipthreaddivisor" | "ztd" => {
1084 let _ = parse_f64(value, key)?;
1085 config.notes.push(format!(
1086 "{key}={value} is a BBTools compression/threading control; covered Rust output records are unchanged"
1087 ));
1088 }
1089 "usebgzip" | "bgzip" | "usepigz" | "pigz" => {
1090 if value
1091 .as_bytes()
1092 .first()
1093 .is_some_and(|byte| byte.is_ascii_digit())
1094 {
1095 let threads = parse_i32(value, key)?;
1096 if threads > 0 {
1097 config.gzip_threads = Some(threads as usize);
1098 }
1099 } else if parse_java_bool(value) {
1100 let workers = config
1101 .threads
1102 .unwrap_or_else(|| std::thread::available_parallelism().map_or(1, |n| n.get()));
1103 if workers > 1 {
1104 config.gzip_threads = Some(workers);
1105 }
1106 } else {
1107 config.gzip_threads = Some(1);
1108 }
1109 config.notes.push(format!(
1110 "{key}={value} is a BBTools compression control; Rust uses zlib-rs gzip plus pigz/unpigz hooks for .gz input/output when enabled and available"
1111 ));
1112 }
1113 "usegunzip" | "gunzip" | "ungzip" | "useunpigz" | "unpigz" | "useunbgzip" | "unbgzip" => {
1114 if value
1115 .as_bytes()
1116 .first()
1117 .is_some_and(|byte| byte.is_ascii_digit())
1118 {
1119 let threads = parse_i32(value, key)?;
1120 if threads > 0 {
1121 config.gzip_threads = Some(threads as usize);
1122 }
1123 } else if parse_java_bool(value) {
1124 let workers = config
1125 .threads
1126 .unwrap_or_else(|| std::thread::available_parallelism().map_or(1, |n| n.get()));
1127 if workers > 1 {
1128 config.gzip_threads = Some(workers);
1129 }
1130 } else {
1131 config.gzip_threads = Some(1);
1132 }
1133 config.notes.push(format!(
1134 "{key}={value} is a BBTools gzip-input control; Rust uses zlib-rs and tries pigz/unpigz for .gz input when worker count is >1"
1135 ));
1136 }
1137 "allowziplevelchange"
1138 | "usegzip"
1139 | "gzip"
1140 | "usebgzf"
1141 | "bgzf"
1142 | "forcepigz"
1143 | "forcebgzip"
1144 | "preferbgzip"
1145 | "nativebgzip"
1146 | "nativebgzf"
1147 | "usenativebgzip"
1148 | "usenativebgzf"
1149 | "allownativebgzip"
1150 | "allownativebgzf"
1151 | "nativebgzipin"
1152 | "nativebgzfin"
1153 | "nativebgzipout"
1154 | "nativebgzfout"
1155 | "prefernativebgzip"
1156 | "prefernativebgzf"
1157 | "nativebgzipmt"
1158 | "nativebgzfmt"
1159 | "multithreadedbgzf"
1160 | "bgzfosmt2"
1161 | "filteredbgzf"
1162 | "preferunbgzip"
1163 | "usebzip2"
1164 | "bzip2"
1165 | "usepbzip2"
1166 | "pbzip2"
1167 | "uselbzip2"
1168 | "lbzip2" => {
1169 let _ = parse_java_bool(value);
1170 config.notes.push(format!(
1171 "{key}={value} is a BBTools compression/runtime control; covered Rust output records are unchanged"
1172 ));
1173 }
1174 "samversion" | "samv" | "sam" => {
1175 let _ = parse_f64(value, key)?;
1176 config.notes.push(format!(
1177 "{key}={value} is a BBTools SAM-version control; covered Rust FASTA/FASTQ output is unchanged"
1178 ));
1179 }
1180 "streamerthreads"
1181 | "ssthreads"
1182 | "bsthreads"
1183 | "fastqstreamerthreads"
1184 | "fqsthreads"
1185 | "fastastreamerthreads"
1186 | "fasthreads"
1187 | "samwriterthreads"
1188 | "swthreads"
1189 | "bamwriterthreads"
1190 | "bwthreads"
1191 | "fastqwriterthreads"
1192 | "fqwthreads"
1193 | "intronlen"
1194 | "intronlength" => {
1195 let _ = parse_i32(value, key)?;
1196 config.notes.push(format!(
1197 "{key}={value} is a BBTools SAM/streamer threading control; current Rust engine manages FASTA/FASTQ I/O internally"
1198 ));
1199 }
1200 "sambamba"
1201 | "samtools"
1202 | "printheaderwait"
1203 | "nativebam"
1204 | "usenativebam"
1205 | "allownativebam"
1206 | "nativebamout"
1207 | "usenativebamout"
1208 | "nativebamin"
1209 | "usenativebamin"
1210 | "prefernativebamout"
1211 | "prefernativebamin"
1212 | "prefernativebam"
1213 | "userssw"
1214 | "attachedsamline"
1215 | "useattachedsamline"
1216 | "fastastreamer2"
1217 | "prefermd"
1218 | "prefermdtag"
1219 | "notags"
1220 | "mdtag"
1221 | "md"
1222 | "idtag"
1223 | "mateqtag"
1224 | "xmtag"
1225 | "xm"
1226 | "smtag"
1227 | "amtag"
1228 | "nmtag"
1229 | "xttag"
1230 | "stoptag"
1231 | "lengthtag"
1232 | "boundstag"
1233 | "scoretag"
1234 | "sortscaffolds"
1235 | "customtag"
1236 | "nhtag"
1237 | "keepnames"
1238 | "saa"
1239 | "secondaryalignmentasterisks"
1240 | "inserttag"
1241 | "correctnesstag"
1242 | "suppressheader"
1243 | "noheader"
1244 | "noheadersequences"
1245 | "nhs"
1246 | "suppressheadersequences"
1247 | "tophat"
1248 | "flipsam" => {
1249 let _ = parse_java_bool(value);
1250 config.notes.push(format!(
1251 "{key}={value} is a BBTools SAM/BAM runtime control; covered Rust FASTA/FASTQ output is unchanged"
1252 ));
1253 }
1254 "xstag" | "xs" => {
1255 let lower = value.to_ascii_lowercase();
1256 if !matches!(
1257 lower.strip_prefix("fr-").unwrap_or(&lower),
1258 "ss" | "secondstrand" | "fs" | "firststrand" | "us" | "unstranded"
1259 ) {
1260 let _ = parse_java_bool(value);
1261 }
1262 config.notes.push(format!(
1263 "{key}={value} is a BBTools SAM XS-tag control; covered Rust FASTA/FASTQ output is unchanged"
1264 ));
1265 }
1266 "readgroup" | "readgroupid" | "rgid" | "readgroupcn" | "rgcn" | "readgroupds" | "rgds"
1267 | "readgroupdt" | "rgdt" | "readgroupfo" | "rgfo" | "readgroupks" | "rgks"
1268 | "readgrouplb" | "rglb" | "readgrouppg" | "rgpg" | "readgrouppi" | "rgpi"
1269 | "readgrouppl" | "rgpl" | "readgrouppu" | "rgpu" | "readgroupsm" | "rgsm" => {
1270 config.notes.push(format!(
1271 "{key}={value} is a BBTools read-group metadata control; covered Rust FASTA/FASTQ output is unchanged"
1272 ));
1273 }
1274 "tossbrokenreads"
1275 | "nullifybrokenquality"
1276 | "nbq"
1277 | "rbm"
1278 | "renamebymapping"
1279 | "don"
1280 | "deleteoldname"
1281 | "assertcigar"
1282 | "verbosesamline"
1283 | "parsecustom"
1284 | "fastqparsecustom"
1285 | "shrinkheaders"
1286 | "fixheader"
1287 | "fixheaders"
1288 | "allownullheader"
1289 | "allownullheaders"
1290 | "recalpairnum"
1291 | "recalibratepairnum" => {
1292 let _ = parse_java_bool(value);
1293 config.notes.push(format!(
1294 "{key}={value} is a BBTools shared read/header runtime control; covered Rust FASTA/FASTQ output is unchanged"
1295 ));
1296 }
1297 "pairreads" | "flipr2" => {
1298 let enabled = parse_java_bool(value);
1299 config.notes.push(format!(
1300 "{key}={value} is a BBTools global pairing behavior control; Rust pairing uses explicit in2=, interleaved=, and # routing{}",
1301 if enabled { " for ASAP output" } else { "" }
1302 ));
1303 }
1304 "aminoin" | "amino" | "amino8" => {
1305 if parse_java_bool(value) {
1306 bail!(
1307 "{key}={value} enables BBTools amino-acid kmer mode; the Rust engine currently supports nucleotide BBNorm only"
1308 );
1309 }
1310 config.notes.push(format!(
1311 "{key}={value} keeps BBTools amino-acid kmer mode disabled in the supported Rust path"
1312 ));
1313 }
1314 "validatebranchless"
1315 | "fairqueues"
1316 | "fixextensions"
1317 | "fixextension"
1318 | "tryallextensions"
1319 | "2passresize"
1320 | "twopassresize"
1321 | "parallelsort"
1322 | "paralellsort"
1323 | "gcbeforemem"
1324 | "warnifnosequence"
1325 | "warnfirsttimeonly"
1326 | "kmg"
1327 | "outputkmg"
1328 | "forcejavaparsedouble"
1329 | "simdsparse"
1330 | "simdmultsparse"
1331 | "simdfmasparse"
1332 | "simdcopy"
1333 | "awsservers"
1334 | "aws"
1335 | "nerscservers"
1336 | "nersc"
1337 | "lowmem"
1338 | "lowram"
1339 | "lowmemory"
1340 | "buffer"
1341 | "buffered"
1342 | "sidechannelstats"
1343 | "comment"
1344 | "taxpath"
1345 | "silva"
1346 | "unite"
1347 | "imghq"
1348 | "callins"
1349 | "callinss"
1350 | "calldel"
1351 | "calldels"
1352 | "callsub"
1353 | "callsubs"
1354 | "callsnp"
1355 | "callsnps"
1356 | "callindel"
1357 | "callindels"
1358 | "calljunct"
1359 | "calljunction"
1360 | "calljunctions"
1361 | "callnocall"
1362 | "callnocalls"
1363 | "protfull" => {
1364 let _ = parse_java_bool(value);
1365 config.notes.push(format!(
1366 "{key}={value} is a BBTools shared environment/performance control; covered Rust output is unchanged"
1367 ));
1368 }
1369 "lockedincrement" | "symmetricwrite" | "symmetric" | "sw" => {
1370 if value.eq_ignore_ascii_case("auto") {
1371 config.locked_increment = None;
1372 } else {
1373 config.locked_increment = Some(parse_java_bool(value));
1374 }
1375 config.notes.push(format!(
1376 "{key}={value} is a BBTools KCountArray write-symmetry control; bounded Rust sketches use the matching locked/conservative update mode when applicable"
1377 ));
1378 }
1379 "gpucounting" | "gpu_counting" | "usegpu" => {
1380 config.gpu_counting = parse_bool(value, key)?;
1381 config.notes.push(format!(
1382 "{key}={value} toggles experimental CUDA sort/reduce-assisted input counting; defaults remain CPU-only"
1383 ));
1384 }
1385 "gpuhelper" | "cudahelper" | "gpucountinghelper" => {
1386 config.gpu_helper = Some(PathBuf::from(value));
1387 config.notes.push(format!(
1388 "{key}={value} selects the experimental CUDA k-mer reduce helper"
1389 ));
1390 }
1391 "gpupersistent" | "gpucountingpersistent" | "persistentgpuhelper" => {
1392 config.gpu_persistent = parse_bool(value, key)?;
1393 config.notes.push(format!(
1394 "{key}={value} toggles the experimental persistent CUDA helper protocol"
1395 ));
1396 }
1397 "simd" => {
1398 if !value.eq_ignore_ascii_case("auto") {
1399 let _ = parse_java_bool(value);
1400 }
1401 config.notes.push(format!(
1402 "{key}={value} is a BBTools SIMD runtime control; covered Rust output is unchanged"
1403 ));
1404 }
1405 "entropyk" | "ek" | "entropywindow" | "ew" => {
1406 let parsed = parse_i32(value, key)?;
1407 if parsed <= 0 {
1408 bail!("{key} expects a positive integer, got {value}");
1409 }
1410 if matches!(key, "entropyk" | "ek") {
1411 config.entropy_k = parsed as usize;
1412 } else {
1413 config.entropy_window = parsed as usize;
1414 }
1415 config.notes.push(format!(
1416 "{key}={value} is a BBTools entropy-stat runtime control; Rust applies it to emitted entropy histograms"
1417 ));
1418 }
1419 "barcodestats" | "barcodecounts" => {
1420 config.barcode_stats_out = Some(path(value));
1421 config.notes.push(format!(
1422 "{key}={value} is a BBTools side-output barcode stats file; Rust emits a covered barcode-count fallback from read headers"
1423 ));
1424 }
1425 "timehistogram" | "thist" => {
1426 config.notes.push(format!(
1427 "{key}={value} is a BBTools side-output mapper time histogram; Rust does not emit this auxiliary file yet and keeps the supported normalization path"
1428 ));
1429 }
1430 "matchhistogram" | "matchhist" | "mhist" => {
1431 config.match_hist_out = Some(path(value));
1432 config.notes.push(format!(
1433 "{key}={value} is a BBTools side-output match histogram; Rust emits a covered no-alignment sequence-match fallback histogram"
1434 ));
1435 }
1436 "inserthistogram" | "inserthist" | "ihist" => {
1437 config.insert_hist_out = Some(path(value));
1438 config.notes.push(format!(
1439 "{key}={value} is a BBTools side-output insert histogram; Rust emits a covered no-alignment insert-size fallback histogram"
1440 ));
1441 }
1442 "qualityaccuracyhistogram" | "qahist" => {
1443 config.quality_accuracy_hist_out = Some(path(value));
1444 config.notes.push(format!(
1445 "{key}={value} is a BBTools side-output quality-accuracy histogram; Rust emits a covered no-alignment quality-accuracy fallback histogram"
1446 ));
1447 }
1448 "indelhistogram" | "indelhist" => {
1449 config.indel_hist_out = Some(path(value));
1450 config.notes.push(format!(
1451 "{key}={value} is a BBTools side-output indel histogram; Rust emits a covered no-alignment indel fallback histogram"
1452 ));
1453 }
1454 "errorhistogram" | "ehist" => {
1455 config.error_hist_out = Some(path(value));
1456 config.notes.push(format!(
1457 "{key}={value} is a BBTools side-output error histogram; Rust emits a covered no-alignment error-count fallback histogram"
1458 ));
1459 }
1460 "gchistogram" | "gchist" => {
1461 config.gc_hist_out = Some(path(value));
1462 config.notes.push(format!(
1463 "{key}={value} is a BBTools side-output GC histogram; Rust emits a covered primary input GC-bin histogram"
1464 ));
1465 }
1466 "qualityhistogram" | "qualityhist" | "qhist" => {
1467 config.quality_hist_out = Some(path(value));
1468 config.notes.push(format!(
1469 "{key}={value} is a BBTools side-output quality histogram; Rust emits a covered primary input quality histogram"
1470 ));
1471 }
1472 "basequalityhistogram" | "basequalityhist" | "bqhist" => {
1473 config.base_quality_hist_out = Some(path(value));
1474 config.notes.push(format!(
1475 "{key}={value} is a BBTools side-output base-quality histogram; Rust emits a covered primary input base-quality histogram"
1476 ));
1477 }
1478 "qualitycounthistogram" | "qualitycounthist" | "qchist" | "qdhist" | "qfhist" => {
1479 config.quality_count_hist_out = Some(path(value));
1480 config.notes.push(format!(
1481 "{key}={value} is a BBTools side-output quality-count histogram; Rust emits a covered primary input quality-count histogram"
1482 ));
1483 }
1484 "averagequalityhistogram" | "aqhist" => {
1485 config.average_quality_hist_out = Some(path(value));
1486 config.notes.push(format!(
1487 "{key}={value} is a BBTools side-output average-quality histogram; Rust emits a covered primary input average-quality histogram"
1488 ));
1489 }
1490 "overallbasequalityhistogram" | "overallbasequalityhist" | "obqhist" => {
1491 config.overall_base_quality_hist_out = Some(path(value));
1492 config.notes.push(format!(
1493 "{key}={value} is a BBTools side-output overall base-quality histogram; Rust emits a covered primary input overall base-quality histogram"
1494 ));
1495 }
1496 "lengthhistogram" | "lhist" => {
1497 config.length_hist_out = Some(path(value));
1498 config.notes.push(format!(
1499 "{key}={value} is a BBTools side-output length histogram; Rust emits a covered read-length histogram for the primary input"
1500 ));
1501 }
1502 "basehistogram" | "basehist" | "bhist" => {
1503 config.base_hist_out = Some(path(value));
1504 config.notes.push(format!(
1505 "{key}={value} is a BBTools side-output base-content histogram; Rust emits a covered primary input base-content histogram"
1506 ));
1507 }
1508 "entropyhistogram" | "entropyhist" | "enhist" | "enthist" => {
1509 config.entropy_hist_out = Some(path(value));
1510 config.notes.push(format!(
1511 "{key}={value} is a BBTools side-output entropy histogram; Rust emits a covered primary input entropy histogram"
1512 ));
1513 }
1514 "identityhistogram" | "idhist" => {
1515 config.identity_hist_out = Some(path(value));
1516 config.notes.push(format!(
1517 "{key}={value} is a BBTools side-output identity histogram; Rust emits a covered sequence-input identity fallback histogram because this BBNorm path has no aligner"
1518 ));
1519 }
1520 "gcbins" | "gchistbins" => {
1521 if !value.eq_ignore_ascii_case("auto") {
1522 let bins = parse_i32(value, key)?;
1523 if bins <= 0 {
1524 bail!("{key} expects a positive integer or auto, got {value}");
1525 }
1526 config.gc_bins = Some(bins as usize);
1527 }
1528 config.notes.push(format!(
1529 "{key}={value} is a BBTools side-output GC histogram sizing control; Rust applies it to emitted GC histograms"
1530 ));
1531 }
1532 "entropybins" | "entropyhistbins" | "entbins" | "enthistbins" => {
1533 if !value.eq_ignore_ascii_case("auto") {
1534 let bins = parse_i32(value, key)?;
1535 if bins <= 0 {
1536 bail!("{key} expects a positive integer or auto, got {value}");
1537 }
1538 config.entropy_bins = bins as usize;
1539 }
1540 config.notes.push(format!(
1541 "{key}={value} is a BBTools side-output entropy histogram sizing control; Rust applies it to emitted entropy histograms"
1542 ));
1543 }
1544 "idhistlen" | "idhistlength" | "idhistbins" | "idbins" => {
1545 if !value.eq_ignore_ascii_case("auto") {
1546 let bins = parse_i32(value, key)?;
1547 if bins <= 0 {
1548 bail!("{key} expects a positive integer or auto, got {value}");
1549 }
1550 config.identity_bins = bins as usize;
1551 }
1552 config.notes.push(format!(
1553 "{key}={value} is a BBTools side-output identity histogram sizing control; Rust applies it to emitted identity histograms"
1554 ));
1555 }
1556 "entropyns" | "entropyhistns" => {
1557 config.allow_entropy_ns = parse_java_bool(value);
1558 config.notes.push(format!(
1559 "{key}={value} is a BBTools side-output entropy control; Rust applies it to emitted entropy histograms"
1560 ));
1561 }
1562 "gcchart" | "gcplot" | "fixindels" | "ignorevcfindels" => {
1563 let _ = parse_java_bool(value);
1564 config.notes.push(format!(
1565 "{key}={value} is a BBTools side-output stats control; covered Rust FASTA/FASTQ output is unchanged"
1566 ));
1567 }
1568 "maxhistlen" => {
1569 let len = parse_kmg_i64(value, key)?;
1570 if len <= 0 {
1571 bail!("{key} expects a positive KMG value, got {value}");
1572 }
1573 config.side_hist_len = Some(
1574 usize::try_from(len)
1575 .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?,
1576 );
1577 config.notes.push(format!(
1578 "{key}={value} is a BBTools side-output histogram length control; Rust applies it to emitted side histograms"
1579 ));
1580 }
1581 "cardinality" | "loglog" => {
1582 match parse_cardinality_bool_or_int(value, key)? {
1583 CardinalityToggle::Bool(enabled) => config.cardinality.input = enabled,
1584 CardinalityToggle::Int(k) => {
1585 config.cardinality.input = true;
1586 config.cardinality.k = Some(k);
1587 }
1588 }
1589 config.notes.push(format!(
1590 "{key}={value} is a BBTools cardinality/loglog control; Rust emits a bounded input estimate when enabled"
1591 ));
1592 }
1593 "loglogin" => {
1594 match parse_cardinality_bool_or_int(value, key)? {
1595 CardinalityToggle::Bool(enabled) => config.cardinality.input = enabled,
1596 CardinalityToggle::Int(k) => {
1597 config.cardinality.input = true;
1598 config.cardinality.k = Some(k);
1599 }
1600 }
1601 config.notes.push(format!(
1602 "{key}={value} is a BBTools cardinality/loglog input control; Rust emits a bounded input estimate when enabled"
1603 ));
1604 }
1605 "cardinalityout" | "loglogout" => {
1606 match parse_cardinality_bool_or_int(value, key)? {
1607 CardinalityToggle::Bool(enabled) => config.cardinality.output = enabled,
1608 CardinalityToggle::Int(k) => {
1609 config.cardinality.output = true;
1610 config.cardinality.k = Some(k);
1611 }
1612 }
1613 config.notes.push(format!(
1614 "{key}={value} is a BBTools cardinality/loglog control; Rust emits a bounded output estimate when enabled"
1615 ));
1616 }
1617 "buckets" | "loglogbuckets" => {
1618 let buckets = parse_cardinality_buckets(value, key)?;
1619 config.cardinality.buckets = buckets;
1620 config.notes.push(format!(
1621 "{key}={value} is a BBTools cardinality/loglog bucket control; Rust applies it to bounded cardinality estimates"
1622 ));
1623 }
1624 "loglogk" | "cardinalityk" | "kcardinality" => {
1625 config.cardinality.k = Some(parse_cardinality_k(value, key)?);
1626 config.notes.push(format!(
1627 "{key}={value} is a BBTools cardinality/loglog numeric control; Rust applies it to bounded cardinality estimates"
1628 ));
1629 }
1630 "loglogbits" | "loglogmantissa" => {
1631 let _ = parse_i32(value, key)?;
1632 config.notes.push(format!(
1633 "{key}={value} is a BBTools cardinality/loglog numeric control; Rust accepts it while using compact byte registers"
1634 ));
1635 }
1636 "loglogklist" => {
1637 let mut first_k = None;
1638 for part in value.split(',') {
1639 let trimmed = part.trim();
1640 if trimmed.is_empty() {
1641 bail!("{key} expects a comma-separated integer list, got {value}");
1642 }
1643 let k = parse_cardinality_k(trimmed, key)?;
1644 first_k.get_or_insert(k);
1645 }
1646 config.cardinality.k = first_k;
1647 config.notes.push(format!(
1648 "{key}={value} is a BBTools cardinality/loglog k-list; Rust uses the first k for bounded cardinality estimates"
1649 ));
1650 }
1651 "loglogseed" => {
1652 config.cardinality.seed = parse_cardinality_seed(value, key)?;
1653 config.notes.push(format!(
1654 "{key}={value} is a BBTools cardinality/loglog seed; Rust applies it to bounded cardinality estimates"
1655 ));
1656 }
1657 "loglogminprob" => {
1658 let min_probability = parse_f64(value, key)?;
1659 if !(0.0..=1.0).contains(&min_probability) {
1660 bail!("{key} expects a probability between 0 and 1, got {value}");
1661 }
1662 config.cardinality.min_probability = min_probability;
1663 config.notes.push(format!(
1664 "{key}={value} is a BBTools cardinality/loglog probability threshold; Rust records it for bounded cardinality estimates"
1665 ));
1666 }
1667 "loglogtype" => {
1668 config.notes.push(format!(
1669 "{key}={value} is a BBTools cardinality/loglog estimator type; Rust uses its compact bounded estimator"
1670 ));
1671 }
1672 "loglogcorrection" | "loglogcf" | "loglogmean" | "loglogmedian" | "loglogmwa"
1673 | "logloghmean" | "logloggmean" | "loglogcounts" | "loglogcount" => {
1674 let _ = parse_java_bool(value);
1675 config.notes.push(format!(
1676 "{key}={value} is a BBTools cardinality/loglog output-control toggle; Rust emits compact summary estimates"
1677 ));
1678 }
1679 "countup" => {
1680 config.count_up = parse_bool(value, key)?;
1681 if !config.count_up {
1682 config.notes.push(
1683 "countup=f selected; standard single-pass normalization remains active"
1684 .to_string(),
1685 );
1686 }
1687 }
1688 "bits" | "cbits" | "cellbits" => {
1689 let bits = parse_kcount_cell_bits(value, key)?;
1690 config.count_min.bits = Some(bits);
1691 config.notes.push(format!(
1692 "{key}={bits} is a BBTools count-min cell-width control; constrained Rust count-min tables use it for saturation"
1693 ));
1694 }
1695 "bits1" | "cbits1" | "cellbits1" => {
1696 let bits = parse_kcount_cell_bits(value, key)?;
1697 config.count_min_bits_first = Some(bits);
1698 config.notes.push(format!(
1699 "{key}={bits} is a BBTools first/intermediate-pass sketch width control; Rust uses it for multipass bounded sketches"
1700 ));
1701 }
1702 "hashes" => {
1703 let hashes = parse_kcount_hashes(value, key)?;
1704 config.count_min.hashes = Some(hashes);
1705 config.notes.push(format!(
1706 "hashes={hashes} is a BBTools count-min hashing control; constrained Rust count-min tables use it for collision estimates"
1707 ));
1708 }
1709 "cells" | "matrixbits" => {
1710 let cells = if key == "matrixbits" {
1711 parse_matrixbits_cells(value, key)?
1712 } else {
1713 parse_positive_kmg_usize(value, key)?
1714 };
1715 config.count_min.cells = Some(cells.max(1));
1716 config.notes.push(format!(
1717 "{key}={value} is a BBTools count-min table-sizing control; Rust treats it as a total-cell budget and builds a fixed-memory count-min input sketch"
1718 ));
1719 }
1720 "sketchmemory" | "sketchmem" | "countminmemory" | "countminmem" | "cmem" => {
1721 let bytes = parse_positive_kmg_usize(value, key)?;
1722 config.count_min.memory_bytes = Some(bytes);
1723 config.notes.push(format!(
1724 "{key}={value} is a Rust count-min memory budget; Rust sizes the fixed-memory input sketch from this budget when cells/matrixbits are not set"
1725 ));
1726 }
1727 "maxcountupspillbytes"
1728 | "maxcountupspilllivebytes"
1729 | "countupspillbytes"
1730 | "countupspilllimit" => {
1731 let bytes = parse_kmg_usize(value, key)?;
1732 config.max_countup_spill_live_bytes = Some(bytes as u64);
1733 config.notes.push(format!(
1734 "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if peak live spill bytes exceed {bytes}"
1735 ));
1736 }
1737 "maxcountupspillfinallivebytes"
1738 | "maxcountupspillfinalbytes"
1739 | "countupspillfinallivebytes" => {
1740 let bytes = parse_kmg_usize(value, key)?;
1741 config.max_countup_spill_final_live_bytes = Some(bytes as u64);
1742 config.notes.push(format!(
1743 "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if current/final live spill bytes exceed {bytes}"
1744 ));
1745 }
1746 "maxcountupspillinitialruns" | "countupspillinitialruns" => {
1747 let runs = parse_kmg_usize(value, key)?;
1748 config.max_countup_spill_initial_runs = Some(runs);
1749 config.notes.push(format!(
1750 "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if initial spill run count exceeds {runs}"
1751 ));
1752 }
1753 "maxcountupspillmergeruns" | "countupspillmergeruns" => {
1754 let runs = parse_kmg_usize(value, key)?;
1755 config.max_countup_spill_merge_runs = Some(runs);
1756 config.notes.push(format!(
1757 "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if merge spill run count exceeds {runs}"
1758 ));
1759 }
1760 "maxcountupspillfinalruns" | "maxcountupspillruns" | "countupspillfinalruns" => {
1761 let runs = parse_kmg_usize(value, key)?;
1762 config.max_countup_spill_final_runs = Some(runs);
1763 config.notes.push(format!(
1764 "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if live/final spill run count exceeds {runs}"
1765 ));
1766 }
1767 "maxcountupspillwritebytes" | "maxcountupspillwrittenbytes" | "countupspillwritebytes" => {
1768 let bytes = parse_kmg_usize(value, key)?;
1769 config.max_countup_spill_write_bytes = Some(bytes as u64);
1770 config.notes.push(format!(
1771 "{key}={value} is a Rust count-up temp-spill I/O safety cap; Rust aborts count-up if cumulative spill bytes written exceed {bytes}"
1772 ));
1773 }
1774 "memory" | "mem" | "ram" | "maxmemory" | "maxmem" | "xmx" => {
1775 let bytes = parse_positive_kmg_usize(value, key)?;
1776 config.auto_count_min_memory_bytes = Some(bytes);
1777 config.auto_count_min = true;
1778 config.notes.push(format!(
1779 "{key}={value} is a BBTools-style memory budget; automatic Rust count-min sizing uses it for large inputs"
1780 ));
1781 }
1782 "autocountmin" | "autosketch" | "autosketchtable" | "autosketchtables" => {
1783 config.auto_count_min = parse_bool(value, key)?;
1784 config.notes.push(format!(
1785 "{key}={} controls Rust's large-input automatic bounded count-min table selection",
1786 config.auto_count_min
1787 ));
1788 }
1789 "exact" | "exactcount" | "exactcounts" | "useexact" | "sketchexact" => {
1790 config.force_exact_counts = parse_bool(value, key)?;
1791 config.notes.push(format!(
1792 "{key}={} forces Rust exact-count maps and disables automatic/explicit count-min sketches",
1793 config.force_exact_counts
1794 ));
1795 }
1796 "autosketchbytes" | "autosketchminbytes" | "autocountminbytes" | "autocountminminbytes" => {
1797 config.auto_count_min_input_bytes = parse_positive_kmg_usize(value, key)?;
1798 config.notes.push(format!(
1799 "{key}={value} sets the compressed/uncompressed input-size trigger for automatic Rust count-min tables"
1800 ));
1801 }
1802 "autosketchtablereads" | "autocountminreads" | "autosketchtablereadthreshold" => {
1803 config.auto_count_min_read_threshold = parse_u64(value, key)?.max(1);
1804 config.notes.push(format!(
1805 "{key}={value} sets the read-limit trigger for automatic Rust count-min tables"
1806 ));
1807 }
1808 "precells" | "prefiltercells" => {
1809 let cells = parse_kmg_usize(value, key)?;
1810 config.prefilter.cells = (cells > 0).then_some(cells);
1811 if cells == 0 {
1812 config.notes.push(format!(
1813 "{key}=0 is a BBTools prefilter sketch control; Rust leaves prefilter cells unset unless prefiltering is otherwise requested"
1814 ));
1815 } else {
1816 config.prefilter.enabled = true;
1817 config.prefilter.force_disabled = false;
1818 config.notes.push(format!(
1819 "{key}={value} is a BBTools prefilter sketch control; Rust applies deterministic prefilter collision estimates when prefilter cells are constrained"
1820 ));
1821 }
1822 }
1823 "prefiltersize" | "prefilterfraction" => {
1824 let fraction = parse_fraction_micros(value, key)?;
1825 config.prefilter.memory_fraction_micros = (fraction > 0).then_some(fraction);
1826 config.prefilter.enabled = fraction > 0;
1827 config.prefilter.force_disabled = fraction == 0;
1828 if fraction == 0 {
1829 config.notes.push(format!(
1830 "{key}=0 is a BBTools prefilter sketch control; Rust disables fraction-derived prefilter sizing"
1831 ));
1832 } else {
1833 config.notes.push(format!(
1834 "{key}={value} is a BBTools prefilter sketch control; Rust derives deterministic prefilter collision memory from the configured table memory budget"
1835 ));
1836 }
1837 }
1838 "prefilterbits" | "prebits" | "pbits" => {
1839 let bits = parse_kcount_cell_bits(value, key)?;
1840 config.prefilter.bits = Some(bits);
1841 config.notes.push(format!(
1842 "{key}={value} is a BBTools prefilter sketch control; Rust uses it with constrained prefilter cells"
1843 ));
1844 }
1845 "prehashes" | "prefilterhashes" => {
1846 let hashes = parse_prefilter_hashes(value, key)?;
1847 config.prefilter.hashes = (hashes > 0).then_some(hashes);
1848 if hashes == 0 {
1849 config.notes.push(format!(
1850 "{key}=0 is a BBTools prefilter sketch control; Rust leaves prefilter hashes unset unless prefiltering is otherwise requested"
1851 ));
1852 } else {
1853 config.prefilter.enabled = true;
1854 config.prefilter.force_disabled = false;
1855 config.notes.push(format!(
1856 "{key}={value} is a BBTools prefilter sketch control; Rust applies deterministic prefilter collision estimates with explicit or implicit prefilter cells"
1857 ));
1858 }
1859 }
1860 "buildpasses" => {
1861 let build_passes = parse_i64(value, key)?;
1862 if build_passes <= 0 {
1863 bail!("{key} expects a positive integer, got {value}");
1864 }
1865 config.build_passes = usize::try_from(build_passes)
1866 .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?;
1867 config.notes.push(format!(
1868 "{key}={build_passes} is a BBTools table-construction pass control; Rust applies deterministic trusted-kmer filtering when buildpasses is greater than 1"
1869 ));
1870 }
1871 "initialsize" => {
1872 let initial_size = parse_positive_kmg_usize(value, key)?;
1873 config.table_initial_size = Some(initial_size);
1874 config.notes.push(format!(
1875 "{key}={value} is a BBTools kmer-table runtime sizing control; Rust pre-reserves exact-count table capacity when practical"
1876 ));
1877 }
1878 "ways" => {
1879 let _ = parse_kmg_i64(value, key)?;
1880 config.notes.push(format!(
1881 "{key}={value} is a BBTools kmer-table runtime sizing control; exact Rust counting keeps native map sharding"
1882 ));
1883 }
1884 "buflen" | "bufflen" | "bufferlength" => {
1885 let _ = parse_kmg_i64(value, key)?;
1886 config.notes.push(format!(
1887 "{key}={value} is a BBTools kmer-table buffer-length control; covered Rust output records are unchanged"
1888 ));
1889 }
1890 "tabletype" => {
1891 let _ = parse_i32(value, key)?;
1892 config.notes.push(format!(
1893 "{key}={value} is a BBTools kmer-table implementation control; exact Rust counting uses its native map"
1894 ));
1895 }
1896 "rcomp" | "maskmiddle" => {
1897 let _ = parse_java_bool(value);
1898 config.notes.push(format!(
1899 "{key}={value} is a BBTools kmer-table matching control; covered Rust BBNorm canonical/exact-count behavior is unchanged"
1900 ));
1901 }
1902 "showstats" | "stats" | "showspeed" | "ss" | "verbose2" => {
1903 let _ = parse_java_bool(value);
1904 config.notes.push(format!(
1905 "{key}={value} is a BBTools kmer-table reporting control; covered Rust output records are unchanged"
1906 ));
1907 }
1908 "prealloc" | "preallocate" => {
1909 config.table_prealloc_fraction = parse_preallocation_fraction(value, key)?;
1910 config.notes.push(format!(
1911 "{key}={value} is a BBTools kmer-table preallocation control; Rust pre-reserves exact-count table capacity when practical"
1912 ));
1913 }
1914 "filtermemory" | "prefiltermemory" | "filtermem" | "filtermemoryoverride" => {
1915 let bytes = parse_positive_kmg_usize(value, key)?;
1916 config.prefilter.memory_bytes = Some(bytes);
1917 config.prefilter.enabled = true;
1918 config.prefilter.force_disabled = false;
1919 config.notes.push(format!(
1920 "{key}={value} is a BBTools prefilter memory-sizing control; Rust sizes deterministic prefilter collision estimates from this budget when prefilter cells are not set"
1921 ));
1922 }
1923 "minprobprefilter" | "mpp" | "minprobmain" | "mpm" => {
1924 let _ = parse_java_bool(value);
1925 config.notes.push(format!(
1926 "{key}={value} is a BBTools kmer-table minprob routing control; covered Rust minprob behavior is unchanged"
1927 ));
1928 }
1929 "prefilterpasses" | "prepasses" => {
1930 parse_auto_or_kmg_i64(value, key)?;
1931 config.notes.push(format!(
1932 "{key}={value} is a BBTools prefilter pass-count control; exact Rust counting uses one deterministic table build"
1933 ));
1934 }
1935 "onepass" => {
1936 let _ = parse_java_bool(value);
1937 config.notes.push(format!(
1938 "{key}={value} is a BBTools kmer-table construction-mode control; covered Rust output remains single-pass"
1939 ));
1940 }
1941 "stepsize" | "buildstepsize" => {
1942 let _ = parse_i32(value, key)?;
1943 config.notes.push(format!(
1944 "{key}={value} is a BBTools trusted-kmer sampling control; the covered no-ECC single-pass path ignores it"
1945 ));
1946 }
1947 "prefilter" => {
1948 config.prefilter.enabled = parse_bool(value, key)?;
1949 if config.prefilter.enabled {
1950 config.prefilter.force_disabled = false;
1951 config.notes.push(
1952 "prefilter=t requested; Rust applies BBTools-style default prefilter partitioning when bounded count-min counting is selected"
1953 .to_string(),
1954 );
1955 } else {
1956 config.prefilter.force_disabled = true;
1957 config.notes.push(
1958 "prefilter=f requested; Rust disables prefilter sketch construction unless a later prefilter control re-enables it"
1959 .to_string(),
1960 );
1961 }
1962 }
1963 "auto" | "automatic" => {
1964 let enabled = parse_bool(value, key)?;
1965 config.auto_count_min = enabled;
1966 config.notes.push(format!(
1967 "{key}={enabled} is a BBTools automatic count-table sizing control; Rust uses it to select bounded count-min tables for large inputs"
1968 ));
1969 }
1970 "tmpdir" => {
1971 config.temp_dir = Some(PathBuf::from(value));
1972 config.use_temp_dir = true;
1973 config.notes.push(format!(
1974 "{key}={value} is a BBTools temporary-directory control; covered Rust multipass and stdin paths use managed temp files there when enabled"
1975 ));
1976 }
1977 "usetmpdir" | "usetempdir" => {
1978 config.use_temp_dir = parse_java_bool(value);
1979 config.notes.push(format!(
1980 "{key}={value} is a BBTools temporary-directory control; covered Rust multipass and stdin paths use managed temp files there when enabled"
1981 ));
1982 }
1983 "ordered" | "ord" | "verbose" | "printcoverage" => {
1984 config.notes.push(format!(
1985 "{key}={value} is accepted as a no-op in this Rust parity slice"
1986 ));
1987 }
1988 "append" | "app" => {
1989 config.append = parse_bool(value, key)?;
1990 }
1991 "interleaved" | "int" => {
1992 let lower = value.to_ascii_lowercase();
1993 if lower == "auto" {
1994 config.interleaved = false;
1995 config.test_interleaved = true;
1996 } else {
1997 config.interleaved = parse_bool(value, key)?;
1998 config.test_interleaved = false;
1999 }
2000 }
2001 "testinterleaved" => {
2002 config.test_interleaved = parse_bool(value, key)?;
2003 }
2004 "forceinterleaved" => {
2005 config.interleaved = parse_bool(value, key)?;
2006 config.test_interleaved = false;
2007 }
2008 "overrideinterleaved" => {
2009 let _ = parse_bool(value, key)?;
2010 config.notes.push(format!(
2011 "{key}={value} is a BBTools paired-output assertion override; covered Rust paired output is unchanged"
2012 ));
2013 }
2014 "fastareadlen" | "fastareadlength" => {
2015 if parse_u64(value, key)? != u64::MAX && value != "2147483647" {
2016 config.notes.push(
2017 "fastareadlen is accepted for KmerNormalize parity; covered FASTA records are processed as-is".to_string(),
2018 );
2019 }
2020 }
2021 "fastaminread" | "fastaminlen" | "fastaminlength" => {
2022 let _ = parse_i32(value, key)?;
2023 config.notes.push(format!(
2024 "{key}={value} is a BBTools FASTA parser control; covered KmerNormalize FASTA records are processed as-is"
2025 ));
2026 }
2027 "forcesectionname" | "fastadump" => {
2028 let _ = parse_java_bool(value);
2029 config.notes.push(format!(
2030 "{key}={value} is a BBTools FASTA parser control; covered Rust output is unchanged"
2031 ));
2032 }
2033 "sampleoutput" | "readsample" | "kmersample" => {
2034 config.notes.push(format!(
2035 "{key}={value} is advertised in bbnorm.sh but rejected by vendored KmerNormalize; Rust ignores it and keeps the supported normalization path"
2036 ));
2037 }
2038 "samplerate" | "sample" | "sampleseed" | "seed" => {
2039 config.notes.push(format!(
2040 "{key}={value} is a BBTools stream-wrapper sampling option; Rust ignores it and keeps the supported normalization path"
2041 ));
2042 }
2043 "markerrors" | "markonly" | "meo" => {
2044 config.mark_errors_only = parse_bool(value, key)?;
2045 if config.mark_errors_only {
2046 enable_error_correction_if_unset(config);
2047 }
2048 }
2049 "markuncorrectableerrors" | "markuncorrectable" | "mue" => {
2050 config.mark_uncorrectable_errors = parse_bool(value, key)?;
2051 }
2052 "tam" | "trimaftermarking" => {
2053 config.trim_after_marking = parse_bool(value, key)?;
2054 }
2055 "markwith1" | "markwithone" | "mw1" => {
2056 config.mark_with_one = parse_bool(value, key)?;
2057 }
2058 "aec" | "aecc" | "aggressiveerrorcorrection" => {
2059 let enabled = parse_bool(value, key)?;
2060 if enabled {
2061 config.error_correct = true;
2062 config.error_correct_first = true;
2063 config.error_correct_final = true;
2064 config.error_correct_high_thresh = config.error_correct_high_thresh.min(16);
2065 config.error_correct_low_thresh = config.error_correct_low_thresh.max(3);
2066 config.error_correct_ratio = config.error_correct_ratio.min(100);
2067 config.max_errors_to_correct = config.max_errors_to_correct.max(7);
2068 config.suffix_len = config.suffix_len.min(3);
2069 config.prefix_len = config.prefix_len.min(2);
2070 }
2071 }
2072 "cec" | "cecc" | "conservativeerrorcorrection" => {
2073 let enabled = parse_bool(value, key)?;
2074 if enabled {
2075 config.error_correct = true;
2076 config.error_correct_first = true;
2077 config.error_correct_final = true;
2078 config.error_correct_high_thresh = config.error_correct_high_thresh.max(30);
2079 config.error_correct_low_thresh = config.error_correct_low_thresh.min(1);
2080 config.error_correct_ratio = config.error_correct_ratio.max(170);
2081 config.max_errors_to_correct = config.max_errors_to_correct.min(2);
2082 config.max_quality_to_correct = config.max_quality_to_correct.min(25);
2083 config.suffix_len = config.suffix_len.max(4);
2084 config.prefix_len = config.prefix_len.max(4);
2085 }
2086 }
2087 "ecc" => {
2088 let enabled = parse_bool(value, key)?;
2089 config.error_correct = enabled;
2090 config.error_correct_first = enabled;
2091 config.error_correct_final = enabled;
2092 config.overlap_error_correct &= enabled;
2093 config.overlap_error_correct_auto &= enabled;
2094 }
2095 "ecc1" => {
2096 config.error_correct_first = parse_bool(value, key)?;
2097 config.error_correct = config.error_correct_first || config.error_correct_final;
2098 }
2099 "ecc2" | "eccf" => {
2100 config.error_correct_final = parse_bool(value, key)?;
2101 config.error_correct = config.error_correct_first || config.error_correct_final;
2102 }
2103 "eccbyoverlap" | "ecco" | "overlap" => {
2104 if value.eq_ignore_ascii_case("auto") {
2105 config.notes.push(format!(
2106 "{key}=auto requests automatic overlap-based error correction; Rust samples paired reads and enables paired overlap repair when the overlap fraction is high"
2107 ));
2108 config.error_correct = true;
2109 config.error_correct_first = true;
2110 config.error_correct_final = true;
2111 config.overlap_error_correct = false;
2112 config.overlap_error_correct_auto = true;
2113 } else if parse_bool(value, key)? {
2114 config.notes.push(format!(
2115 "{key}={value} requests overlap-based error correction; Rust uses paired overlap repair before the table-based ECC path"
2116 ));
2117 config.error_correct = true;
2118 config.error_correct_first = true;
2119 config.error_correct_final = true;
2120 config.overlap_error_correct = true;
2121 config.overlap_error_correct_auto = false;
2122 } else {
2123 config.overlap_error_correct = false;
2124 config.overlap_error_correct_auto = false;
2125 }
2126 }
2127 "ecclimit" => config.max_errors_to_correct = parse_usize(value, key)?,
2128 "eccmaxqual" => config.max_quality_to_correct = parse_u8(value, key)?,
2129 "errorcorrectratio" | "ecr" => config.error_correct_ratio = parse_u64(value, key)?,
2130 "echighthresh" | "echthresh" | "echt" => {
2131 config.error_correct_high_thresh = parse_u64(value, key)?
2132 }
2133 "eclowthresh" | "eclthresh" | "eclt" => {
2134 config.error_correct_low_thresh = parse_u64(value, key)?
2135 }
2136 "sl" | "suflen" | "suffixlen" => config.suffix_len = parse_usize(value, key)?,
2137 "pl" | "prelen" | "prefixlen" => config.prefix_len = parse_usize(value, key)?,
2138 "cfl" => config.correct_from_left = parse_bool(value, key)?,
2139 "cfr" => config.correct_from_right = parse_bool(value, key)?,
2140 "target1" | "targetdepth1" | "tgt1" => {
2141 config.target_depth_first = Some(parse_u64(value, key)?);
2142 }
2143 "targetbadpercentilelow" | "tbpl" => {
2144 let value = parse_percent(value, key)?;
2145 config.target_bad_percent_low = value;
2146 config.target_bad_percent_high = config.target_bad_percent_high.max(value);
2147 }
2148 "targetbadpercentilehigh" | "tbph" => {
2149 let value = parse_percent(value, key)?;
2150 config.target_bad_percent_high = value;
2151 config.target_bad_percent_low = config.target_bad_percent_low.min(value);
2152 }
2153 "abrc" | "addbadreadscountup" => {
2154 config.add_bad_reads_countup = parse_bool(value, key)?;
2155 }
2156 _ => bail!("unknown or unsupported BBNorm option: {key}={value}"),
2157 }
2158 Ok(())
2159}
2160
2161fn validate(config: &mut Config) -> Result<()> {
2162 if config.in1.is_none() {
2163 bail!("missing input: provide in=<reads.fq>");
2164 }
2165 if !(1..=4).contains(&config.passes) {
2166 bail!("passes should be in range 1 through 4");
2167 }
2168 expand_hash_paired_input(config);
2169 validate_extra_inputs(config)?;
2170 if config.k == 0 {
2171 bail!("k must be greater than zero");
2172 }
2173 if !(0.0..1.0).contains(&config.min_prob) && (config.min_prob - 1.0).abs() > f64::EPSILON {
2174 bail!("minprob must be between 0 and 1");
2175 }
2176 if config.target_depth == 0 {
2177 bail!("target depth must be greater than zero");
2178 }
2179 if config.passes == 1 {
2180 config.target_bad_percent_low = 1.0;
2181 config.target_bad_percent_high = 1.0;
2182 }
2183 config.max_depth = Some(
2184 config
2185 .max_depth
2186 .unwrap_or(config.target_depth)
2187 .max(config.target_depth),
2188 );
2189 if config.error_detect_ratio == 0 {
2190 bail!("errordetectratio must be greater than zero");
2191 }
2192 if config.hist_columns == 0 || config.hist_columns > 3 {
2193 bail!("histcol must be 1, 2, or 3");
2194 }
2195 if config.hist_len < 2 {
2196 bail!("histlen must be at least 1");
2197 }
2198 if config.in2.is_some() {
2199 if config.out2.is_some() && config.out1.is_none() {
2200 bail!("out2 requires out=<file> for paired input");
2201 }
2202 if config.out_toss2.is_some() && config.out_toss1.is_none() {
2203 bail!("outt2 requires outt=<file> for paired input");
2204 }
2205 if config.out_low2.is_some() && config.out_low1.is_none() {
2206 bail!("outlow2 requires outlow=<file> for paired input");
2207 }
2208 if config.out_mid2.is_some() && config.out_mid1.is_none() {
2209 bail!("outmid2 requires outmid=<file> for paired input");
2210 }
2211 if config.out_high2.is_some() && config.out_high1.is_none() {
2212 bail!("outhigh2 requires outhigh=<file> for paired input");
2213 }
2214 if config.out_uncorrected2.is_some() && config.out_uncorrected1.is_none() {
2215 bail!("outuncorrected2 requires outuncorrected=<file> for paired input");
2216 }
2217 } else if config.interleaved {
2218 if config.out2.is_some() && config.out1.is_none() {
2219 bail!("out2 requires out=<file> for interleaved input");
2220 }
2221 if config.out_toss2.is_some() && config.out_toss1.is_none() {
2222 bail!("outt2 requires outt=<file> for interleaved input");
2223 }
2224 if config.out_low2.is_some() && config.out_low1.is_none() {
2225 bail!("outlow2 requires outlow=<file> for interleaved input");
2226 }
2227 if config.out_mid2.is_some() && config.out_mid1.is_none() {
2228 bail!("outmid2 requires outmid=<file> for interleaved input");
2229 }
2230 if config.out_high2.is_some() && config.out_high1.is_none() {
2231 bail!("outhigh2 requires outhigh=<file> for interleaved input");
2232 }
2233 if config.out_uncorrected2.is_some() && config.out_uncorrected1.is_none() {
2234 bail!("outuncorrected2 requires outuncorrected=<file> for interleaved input");
2235 }
2236 } else if !config.test_interleaved && (config.out2.is_some() || config.out_toss2.is_some()) {
2237 bail!("out2/outt2 require paired input with in2=<file> or interleaved=t");
2238 } else if !config.test_interleaved
2239 && (config.out_low2.is_some()
2240 || config.out_mid2.is_some()
2241 || config.out_high2.is_some()
2242 || config.out_uncorrected2.is_some())
2243 {
2244 bail!(
2245 "outlow2/outmid2/outhigh2/outuncorrected2 require paired input with in2=<file> or interleaved=t"
2246 );
2247 }
2248 Ok(())
2249}
2250
2251fn validate_extra_inputs(config: &Config) -> Result<()> {
2252 for extra in &config.extra {
2253 if !extra.exists() || !extra.is_file() {
2254 bail!("extra input {} does not exist", extra.display());
2255 }
2256 }
2257 Ok(())
2258}
2259
2260fn expand_hash_paired_input(config: &mut Config) {
2261 let Some(input) = config.in1.as_ref() else {
2262 return;
2263 };
2264 if input.exists() {
2265 return;
2266 }
2267 let text = input.to_string_lossy().into_owned();
2268 if !text.contains('#') {
2269 return;
2270 }
2271
2272 config.in1 = Some(PathBuf::from(text.replacen('#', "1", 1)));
2273 config.in2 = Some(PathBuf::from(text.replacen('#', "2", 1)));
2274}
2275
2276fn path(value: &str) -> PathBuf {
2277 PathBuf::from(value)
2278}
2279
2280fn split_paths(value: &str) -> Vec<PathBuf> {
2281 value
2282 .split(',')
2283 .filter(|part| !part.trim().is_empty())
2284 .map(|part| PathBuf::from(part.trim()))
2285 .collect()
2286}
2287
2288fn extra_paths(value: &str) -> Vec<PathBuf> {
2289 let trimmed = value.trim();
2290 if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("null") {
2291 return Vec::new();
2292 }
2293 let literal = PathBuf::from(trimmed);
2294 if literal.exists() {
2295 vec![literal]
2296 } else {
2297 split_paths(trimmed)
2298 }
2299}
2300
2301fn parse_bool(value: &str, key: &str) -> Result<bool> {
2302 match value.to_ascii_lowercase().as_str() {
2303 "t" | "true" | "1" | "y" | "yes" => Ok(true),
2304 "f" | "false" | "0" | "n" | "no" => Ok(false),
2305 _ => bail!("{key} expects a boolean value, got {value}"),
2306 }
2307}
2308
2309fn quality_recal_base_key(key: &str) -> &str {
2310 key.strip_suffix("_p1")
2311 .or_else(|| key.strip_suffix("_p2"))
2312 .unwrap_or(key)
2313}
2314
2315fn is_quality_recal_bool_key(key: &str) -> bool {
2316 matches!(
2317 quality_recal_base_key(key),
2318 "trackall"
2319 | "clearmatrices"
2320 | "loadq102"
2321 | "loadqap"
2322 | "loadqbp"
2323 | "loadqpt"
2324 | "loadqbt"
2325 | "loadq10"
2326 | "loadq12"
2327 | "loadqb12"
2328 | "loadqb012"
2329 | "loadqb123"
2330 | "loadqb234"
2331 | "loadq12b12"
2332 | "loadqp"
2333 | "loadq"
2334 | "recalwithposition"
2335 | "recalwithpos"
2336 | "recalusepos"
2337 | "recaltile"
2338 | "recaltiles"
2339 | "usetiles"
2340 )
2341}
2342
2343fn parse_java_bool(value: &str) -> bool {
2344 if value.is_empty() {
2345 return true;
2346 }
2347 if value.len() == 1 {
2348 let byte = value.as_bytes()[0].to_ascii_lowercase();
2349 return byte == b't' || byte == b'1';
2350 }
2351 if value.eq_ignore_ascii_case("null") || value.eq_ignore_ascii_case("none") {
2352 return false;
2353 }
2354 value.eq_ignore_ascii_case("true")
2355}
2356
2357fn parse_mpi_enabled(value: &str, key: &str) -> Result<bool> {
2358 if value
2359 .as_bytes()
2360 .first()
2361 .is_some_and(|byte| byte.is_ascii_digit())
2362 {
2363 Ok(parse_i32(value, key)? > 0)
2364 } else {
2365 Ok(parse_java_bool(value))
2366 }
2367}
2368
2369enum CardinalityToggle {
2370 Bool(bool),
2371 Int(usize),
2372}
2373
2374fn parse_cardinality_bool_or_int(value: &str, key: &str) -> Result<CardinalityToggle> {
2375 if value
2376 .as_bytes()
2377 .first()
2378 .is_some_and(|byte| byte.is_ascii_digit())
2379 {
2380 Ok(CardinalityToggle::Int(parse_cardinality_k(value, key)?))
2381 } else {
2382 Ok(CardinalityToggle::Bool(parse_bool(value, key)?))
2383 }
2384}
2385
2386fn parse_cardinality_k(value: &str, key: &str) -> Result<usize> {
2387 let parsed = parse_i32(value, key)?;
2388 if parsed <= 0 {
2389 bail!("{key} expects a positive integer, got {value}");
2390 }
2391 usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2392}
2393
2394fn parse_cardinality_buckets(value: &str, key: &str) -> Result<usize> {
2395 let buckets = parse_kmg_i64(value, key)?;
2396 if buckets <= 0 {
2397 bail!("{key} expects a positive KMG value, got {value}");
2398 }
2399 let buckets = usize::try_from(buckets)
2400 .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?;
2401 if buckets > CARDINALITY_MAX_BUCKETS {
2402 bail!(
2403 "{key} requests {buckets} cardinality buckets, above the Rust safety cap of {CARDINALITY_MAX_BUCKETS}"
2404 );
2405 }
2406 Ok(buckets)
2407}
2408
2409fn parse_cardinality_seed(value: &str, key: &str) -> Result<u64> {
2410 let parsed = parse_i64(value, key)?;
2411 if parsed < 0 {
2412 Ok(parsed as u64)
2413 } else {
2414 Ok(u64::try_from(parsed)
2415 .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?)
2416 }
2417}
2418
2419fn parse_kcount_cell_bits(value: &str, key: &str) -> Result<u8> {
2420 let bits = parse_i64(value, key)?;
2421 if bits <= 0 || bits > 32 || !(bits as u64).is_power_of_two() {
2422 bail!("{key} expects a power-of-two integer from 1 to 32, got {value}");
2423 }
2424 Ok(bits as u8)
2425}
2426
2427fn parse_kcount_hashes(value: &str, key: &str) -> Result<usize> {
2428 let hashes = parse_i64(value, key)?;
2429 if !(1..=8).contains(&hashes) {
2430 bail!("{key} expects an integer from 1 to 8, got {value}");
2431 }
2432 Ok(hashes as usize)
2433}
2434
2435fn parse_prefilter_hashes(value: &str, key: &str) -> Result<usize> {
2436 let hashes = parse_i64(value, key)?;
2437 if !(0..=8).contains(&hashes) {
2438 bail!("{key} expects an integer from 0 to 8, got {value}");
2439 }
2440 Ok(hashes as usize)
2441}
2442
2443fn parse_matrixbits_cells(value: &str, key: &str) -> Result<usize> {
2444 let bits = parse_i64(value, key)?;
2445 if !(1..63).contains(&bits) {
2446 bail!("{key} expects an integer exponent from 1 to 62, got {value}");
2447 }
2448 1usize
2449 .checked_shl(bits as u32)
2450 .with_context(|| format!("{key} exponent is too large for this platform: {value}"))
2451}
2452
2453fn parse_auto_or_i32(value: &str, key: &str) -> Result<()> {
2454 if !value.eq_ignore_ascii_case("auto") {
2455 let _ = parse_i32(value, key)?;
2456 }
2457 Ok(())
2458}
2459
2460fn parse_auto_or_kmg_i64(value: &str, key: &str) -> Result<()> {
2461 if !value.eq_ignore_ascii_case("auto") {
2462 let _ = parse_kmg_i64(value, key)?;
2463 }
2464 Ok(())
2465}
2466
2467fn parse_preallocation_fraction(value: &str, key: &str) -> Result<Option<f64>> {
2468 if value
2469 .as_bytes()
2470 .first()
2471 .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'.')
2472 {
2473 let fraction = parse_f64(value, key)?;
2474 if !(0.0..=1.0).contains(&fraction) {
2475 bail!("{key} expects a fraction between 0 and 1 or a boolean value, got {value}");
2476 }
2477 Ok((fraction > 0.0).then_some(fraction))
2478 } else if parse_java_bool(value) {
2479 Ok(Some(1.0))
2480 } else {
2481 Ok(None)
2482 }
2483}
2484
2485fn parse_fraction_micros(value: &str, key: &str) -> Result<u32> {
2486 let fraction = parse_f64(value, key)?;
2487 if !(0.0..=1.0).contains(&fraction) {
2488 bail!("{key} expects a fraction between 0 and 1, got {value}");
2489 }
2490 Ok((fraction * 1_000_000.0).round() as u32)
2491}
2492
2493fn parse_monitor(value: &str, key: &str) -> Result<()> {
2494 if value
2495 .as_bytes()
2496 .first()
2497 .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'.')
2498 {
2499 let mut parts = value.split(',');
2500 let first = parts.next().unwrap_or_default();
2501 parse_f64(first, key)?;
2502 if let Some(second) = parts.next() {
2503 parse_f64(second, key)?;
2504 }
2505 if parts.next().is_some() {
2506 bail!("{key} expects one or two numeric watchdog values, got {value}");
2507 }
2508 } else {
2509 let _ = parse_java_bool(value);
2510 }
2511 Ok(())
2512}
2513
2514fn parse_qtrim(config: &mut Config, value: &str, key: &str) -> Result<()> {
2515 let lower = value.to_ascii_lowercase();
2516 match lower.as_str() {
2517 "" => {
2518 config.trim_left = true;
2519 config.trim_right = true;
2520 }
2521 "left" | "l" => {
2522 config.trim_left = true;
2523 config.trim_right = false;
2524 }
2525 "right" | "r" => {
2526 config.trim_left = false;
2527 config.trim_right = true;
2528 }
2529 "both" | "rl" | "lr" => {
2530 config.trim_left = true;
2531 config.trim_right = true;
2532 }
2533 "window" | "w" => {
2534 config.trim_left = false;
2535 config.trim_right = true;
2536 config.trim_window = true;
2537 config.trim_optimal = false;
2538 config.trim_optimal_bias = None;
2539 }
2540 _ if lower.starts_with("window,") || lower.starts_with("w,") => {
2541 let Some((_, length)) = value.split_once(',') else {
2542 unreachable!("guard requires a comma");
2543 };
2544 config.trim_window_length = parse_usize(length, key)?;
2545 config.trim_left = false;
2546 config.trim_right = true;
2547 config.trim_window = true;
2548 config.trim_optimal = false;
2549 config.trim_optimal_bias = None;
2550 }
2551 _ if value
2552 .as_bytes()
2553 .first()
2554 .is_some_and(|byte| byte.is_ascii_digit()) =>
2555 {
2556 config.trim_quality = parse_trim_quality(config, value, key)?;
2557 config.trim_right = true;
2558 }
2559 _ => {
2560 let enabled = parse_bool(value, key)?;
2561 config.trim_left = enabled;
2562 config.trim_right = enabled;
2563 }
2564 }
2565 Ok(())
2566}
2567
2568fn parse_trim_quality(config: &mut Config, value: &str, key: &str) -> Result<f64> {
2569 if value.contains(',') {
2570 let mut parts = value.split(',');
2571 let first = parts.next().unwrap_or_default();
2572 let trim_quality = parse_f64(first, key)?;
2573 for part in parts {
2574 parse_f64(part, key)?;
2575 }
2576 config.notes.push(format!(
2577 "{key}={value} requests position-specific trim qualities; Rust uses the first threshold {trim_quality} for the supported trimming path"
2578 ));
2579 return Ok(trim_quality);
2580 }
2581 parse_f64(value, key)
2582}
2583
2584fn parse_poly(value: &str, key: &str) -> Result<usize> {
2585 if value.is_empty() {
2586 bail!("{key} expects a polymer threshold or boolean value");
2587 }
2588 if value
2589 .as_bytes()
2590 .first()
2591 .is_some_and(|byte| byte.is_ascii_digit())
2592 {
2593 parse_usize(value, key)
2594 } else {
2595 Ok(if parse_bool(value, key)? { 2 } else { 0 })
2596 }
2597}
2598
2599fn parse_optitrim(config: &mut Config, value: &str, key: &str) -> Result<()> {
2600 if value
2601 .as_bytes()
2602 .first()
2603 .is_some_and(|byte| *byte == b'.' || byte.is_ascii_digit())
2604 {
2605 let bias = parse_f64(value, key)?;
2606 if !(0.0..1.0).contains(&bias) {
2607 bail!("{key} bias must be greater than or equal to 0 and less than 1");
2608 }
2609 config.trim_optimal = true;
2610 config.trim_optimal_bias = Some(bias);
2611 } else {
2612 config.trim_optimal = parse_bool(value, key)?;
2613 config.trim_optimal_bias = None;
2614 }
2615 Ok(())
2616}
2617
2618fn enable_error_correction_if_unset(config: &mut Config) {
2619 if !config.error_correct_first && !config.error_correct_final {
2620 config.error_correct_first = true;
2621 }
2622 config.error_correct = config.error_correct_first || config.error_correct_final;
2623}
2624
2625fn parse_u8(value: &str, key: &str) -> Result<u8> {
2626 value
2627 .parse::<u8>()
2628 .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2629}
2630
2631fn parse_i8(value: &str, key: &str) -> Result<i8> {
2632 value
2633 .parse::<i8>()
2634 .map_err(|_| anyhow::anyhow!("{key} expects a byte integer, got {value}"))
2635}
2636
2637fn parse_usize(value: &str, key: &str) -> Result<usize> {
2638 value
2639 .parse::<usize>()
2640 .map_err(|_| anyhow::anyhow!("{key} expects a non-negative integer, got {value}"))
2641}
2642
2643fn parse_u64(value: &str, key: &str) -> Result<u64> {
2644 value
2645 .parse::<u64>()
2646 .map_err(|_| anyhow::anyhow!("{key} expects a non-negative integer, got {value}"))
2647}
2648
2649fn parse_i64(value: &str, key: &str) -> Result<i64> {
2650 value
2651 .parse::<i64>()
2652 .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2653}
2654
2655fn parse_i32(value: &str, key: &str) -> Result<i32> {
2656 value
2657 .parse::<i32>()
2658 .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2659}
2660
2661fn parse_i32_clamped(value: &str, key: &str, min: i32, max: i32) -> Result<i32> {
2662 Ok(parse_i32(value, key)?.clamp(min, max))
2663}
2664
2665fn parse_f64(value: &str, key: &str) -> Result<f64> {
2666 value
2667 .parse::<f64>()
2668 .map_err(|_| anyhow::anyhow!("{key} expects a number, got {value}"))
2669}
2670
2671fn parse_min_average_quality(value: &str, key: &str) -> Result<()> {
2672 let mut parts = value.split(',');
2673 let quality = parts.next().unwrap_or_default();
2674 parse_f64(quality, key)?;
2675 if let Some(bases) = parts.next() {
2676 parse_usize(bases, key)?;
2677 }
2678 if parts.next().is_some() {
2679 bail!("{key} expects quality or quality,bases, got {value}");
2680 }
2681 Ok(())
2682}
2683
2684fn parse_quality_offset(value: &str, key: &str) -> Result<Option<u8>> {
2685 match value.to_ascii_lowercase().as_str() {
2686 "auto" => Ok(None),
2687 "sanger" => Ok(Some(33)),
2688 "illumina" => Ok(Some(64)),
2689 "33" => Ok(Some(33)),
2690 "64" => Ok(Some(64)),
2691 _ => bail!("{key} expects auto, sanger, illumina, 33, or 64, got {value}"),
2692 }
2693}
2694
2695fn parse_fake_fasta_quality(config: &mut Config, value: &str) -> Result<()> {
2696 if value.is_empty() {
2697 return Ok(());
2698 }
2699 if value.as_bytes()[0].is_ascii_alphabetic() {
2700 let _ = parse_bool(value, "fakefastaquality")?;
2701 return Ok(());
2702 }
2703
2704 let parsed = parse_i32(value, "fakefastaquality")?;
2705 if parsed > 0 {
2706 config.fake_quality = parsed.min(50) as u8;
2707 }
2708 Ok(())
2709}
2710
2711fn parse_fasta_wrap(value: &str, key: &str) -> Result<usize> {
2712 let parsed = parse_kmg_i64(value, key)?;
2713 if parsed < 1 {
2714 Ok(0)
2715 } else {
2716 usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2717 }
2718}
2719
2720fn parse_junk_mode(config: &mut Config, value: &str) -> Result<()> {
2721 match value.to_ascii_lowercase().as_str() {
2722 "ignore" => {
2723 config.fix_junk_and_iupac = false;
2724 config.junk_mode = JunkMode::Ignore;
2725 }
2726 "crash" | "fail" => {
2727 config.fix_junk_and_iupac = false;
2728 config.junk_mode = JunkMode::Crash;
2729 }
2730 "fix" => {
2731 config.fix_junk_and_iupac = false;
2732 config.junk_mode = JunkMode::Fix;
2733 }
2734 "flag" | "discard" => {
2735 config.fix_junk_and_iupac = false;
2736 config.junk_mode = JunkMode::Flag;
2737 }
2738 "iupacton" => {
2739 config.fix_junk_and_iupac = true;
2740 config.junk_mode = JunkMode::Fix;
2741 }
2742 _ => {
2743 bail!("junk expects ignore, crash, fail, fix, flag, discard, or iupacton, got {value}")
2744 }
2745 }
2746 Ok(())
2747}
2748
2749fn parse_percent(value: &str, key: &str) -> Result<f64> {
2750 let mut parsed = parse_f64(value, key)?;
2751 if parsed > 1.0 && parsed <= 100.0 {
2752 parsed /= 100.0;
2753 }
2754 if !(0.0..=1.0).contains(&parsed) {
2755 bail!("{key} must be between 0 and 100");
2756 }
2757 Ok(parsed)
2758}
2759
2760fn parse_limit(value: &str, key: &str) -> Result<Option<u64>> {
2761 let parsed = parse_kmg_i64(value, key)?;
2762 if parsed < 0 {
2763 Ok(None)
2764 } else {
2765 Ok(Some(parsed as u64))
2766 }
2767}
2768
2769fn parse_kmg_i64(value: &str, key: &str) -> Result<i64> {
2770 let lower = value.to_ascii_lowercase();
2771 if matches!(lower.as_str(), "big" | "inf" | "infinity" | "max" | "huge") {
2772 return Ok(i64::MAX);
2773 }
2774
2775 let Some(last) = lower.chars().last() else {
2776 bail!("{key} expects an integer or KMG value, got {value}");
2777 };
2778 let (number, multiplier) = match last {
2779 'k' => (&value[..value.len() - 1], 1_000_f64),
2780 'm' => (&value[..value.len() - 1], 1_000_000_f64),
2781 'g' | 'b' => (&value[..value.len() - 1], 1_000_000_000_f64),
2782 't' => (&value[..value.len() - 1], 1_000_000_000_000_f64),
2783 'p' | 'q' => (&value[..value.len() - 1], 1_000_000_000_000_000_f64),
2784 'e' => (&value[..value.len() - 1], 1_000_000_000_000_000_000_f64),
2785 'c' | 'h' => (&value[..value.len() - 1], 100_f64),
2786 'd' => (&value[..value.len() - 1], 10_f64),
2787 _ if last.is_ascii_alphabetic() => {
2788 bail!("{key} has an unsupported KMG suffix in {value}");
2789 }
2790 _ => (value, 1_f64),
2791 };
2792
2793 if number
2794 .chars()
2795 .last()
2796 .is_some_and(|char| char.is_ascii_alphabetic())
2797 {
2798 bail!("{key} has too many suffix letters in {value}");
2799 }
2800
2801 let parsed = if number.contains('.') || multiplier != 1.0 {
2802 let scaled = number
2803 .parse::<f64>()
2804 .map_err(|_| anyhow::anyhow!("{key} expects an integer or KMG value, got {value}"))?
2805 * multiplier;
2806 if scaled > i64::MAX as f64 || scaled < i64::MIN as f64 {
2807 bail!("{key} value is out of range: {value}");
2808 }
2809 scaled as i64
2810 } else {
2811 number
2812 .parse::<i64>()
2813 .map_err(|_| anyhow::anyhow!("{key} expects an integer or KMG value, got {value}"))?
2814 };
2815 Ok(parsed)
2816}
2817
2818fn parse_kmg_usize(value: &str, key: &str) -> Result<usize> {
2819 let parsed = parse_kmg_i64(value, key)?;
2820 if parsed < 0 {
2821 bail!("{key} expects a non-negative KMG value, got {value}");
2822 }
2823 usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2824}
2825
2826fn parse_positive_kmg_usize(value: &str, key: &str) -> Result<usize> {
2827 let parsed = parse_kmg_usize(value, key)?;
2828 if parsed == 0 {
2829 bail!("{key} expects a positive KMG value, got {value}");
2830 }
2831 Ok(parsed)
2832}
2833
2834#[cfg(test)]
2835mod tests {
2836 use super::*;
2837
2838 fn parse(values: &[&str]) -> Config {
2839 let mut args: Vec<OsString> = values.iter().map(OsString::from).collect();
2840 if !values.iter().any(|value| is_pass_selector(value)) {
2841 args.push(OsString::from("passes=1"));
2842 }
2843 parse_args(args).unwrap()
2844 }
2845
2846 fn is_pass_selector(value: &str) -> bool {
2847 let lower = value.to_ascii_lowercase();
2848 matches!(lower.as_str(), "1pass" | "1p" | "2pass" | "2p")
2849 || lower.split_once('=').is_some_and(|(key, _)| {
2850 matches!(key, "passes" | "p" | "1pass" | "1p" | "2pass" | "2p")
2851 })
2852 }
2853
2854 #[test]
2855 fn implicit_bbnorm_default_keeps_two_pass_mode() {
2856 let cfg = parse_args(["in=reads.fq"].into_iter().map(OsString::from)).unwrap();
2857 assert_eq!(cfg.passes, 2);
2858 }
2859
2860 #[test]
2861 fn one_pass_aliases_select_supported_single_pass_like_bbnorm() {
2862 let cfg = parse_args(["in=reads.fq", "1pass"].into_iter().map(OsString::from)).unwrap();
2863 assert_eq!(cfg.passes, 1);
2864
2865 let cfg = parse_args(["in=reads.fq", "1pass=f"].into_iter().map(OsString::from)).unwrap();
2866 assert_eq!(cfg.passes, 1);
2867 }
2868
2869 #[test]
2870 fn two_pass_aliases_select_multipass_like_bbnorm() {
2871 let cfg = parse_args(["in=reads.fq", "2pass=f"].into_iter().map(OsString::from)).unwrap();
2872 assert_eq!(cfg.passes, 2);
2873 }
2874
2875 #[test]
2876 fn parses_core_aliases() {
2877 let cfg = parse(&[
2878 "reads.fq",
2879 "out=keep.fq",
2880 "outt=toss.fq",
2881 "hist=hist.tsv",
2882 "k=21",
2883 "min=3",
2884 "max=9",
2885 "minkmers=2",
2886 "ml=42",
2887 "dp=60",
2888 "tbr=t",
2889 "rbb=t",
2890 "srr=t",
2891 "overwrite=t",
2892 "append=t",
2893 ]);
2894 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
2895 assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
2896 assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
2897 assert_eq!(cfg.hist_in.unwrap(), PathBuf::from("hist.tsv"));
2898 assert_eq!(cfg.k, 21);
2899 assert_eq!(cfg.min_depth, 3);
2900 assert_eq!(cfg.max_depth, Some(100));
2901 assert_eq!(cfg.min_kmers_over_min_depth, 2);
2902 assert_eq!(cfg.min_length, 42);
2903 assert!((cfg.depth_percentile - 0.60).abs() < f64::EPSILON);
2904 assert!(cfg.toss_error_reads);
2905 assert!(cfg.require_both_bad);
2906 assert!(cfg.save_rare_reads);
2907 assert!(cfg.overwrite);
2908 assert!(cfg.append);
2909 }
2910
2911 #[test]
2912 fn accepts_shared_input_output_file_aliases() {
2913 let cfg = parse(&[
2914 "input=reads1.fq",
2915 "input2=reads2.fq",
2916 "output=keep1.fq",
2917 "output2=keep2.fq",
2918 ]);
2919
2920 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
2921 assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
2922 assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep1.fq"));
2923 assert_eq!(cfg.out2.unwrap(), PathBuf::from("keep2.fq"));
2924 }
2925
2926 #[test]
2927 fn parses_bare_boolean_flags_like_bbnorm() {
2928 let cfg = parse_args(
2929 [
2930 "reads.fq",
2931 "prefilter",
2932 "countup",
2933 "keepall",
2934 "ecc",
2935 "ecco",
2936 "ow",
2937 ]
2938 .into_iter()
2939 .map(OsString::from),
2940 )
2941 .unwrap();
2942 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
2943 assert!(cfg.in2.is_none());
2944 assert!(cfg.prefilter.enabled);
2945 assert!(!cfg.prefilter.force_disabled);
2946 assert!(cfg.count_up);
2947 assert!(cfg.keep_all);
2948 assert!(cfg.error_correct);
2949 assert!(cfg.overlap_error_correct);
2950 assert!(cfg.overwrite);
2951
2952 let cfg = parse_args(
2953 ["in=x.fq", "prefilter", "prefilter=f"]
2954 .into_iter()
2955 .map(OsString::from),
2956 )
2957 .unwrap();
2958 assert!(!cfg.prefilter.enabled);
2959 assert!(cfg.prefilter.force_disabled);
2960
2961 let cfg = parse_args(
2962 ["in=x.fq", "prefilter=f", "prefilter"]
2963 .into_iter()
2964 .map(OsString::from),
2965 )
2966 .unwrap();
2967 assert!(cfg.prefilter.enabled);
2968 assert!(!cfg.prefilter.force_disabled);
2969 }
2970
2971 #[test]
2972 fn clamps_max_depth_and_minkmers_like_bbnorm() {
2973 let cfg = parse(&["in=reads.fq", "target=100", "max=50", "minkmers=0"]);
2974 assert_eq!(cfg.target_depth, 100);
2975 assert_eq!(cfg.max_depth, Some(100));
2976 assert_eq!(cfg.min_kmers_over_min_depth, 1);
2977
2978 let cfg = parse(&["in=reads.fq", "max=150", "target=100"]);
2979 assert_eq!(cfg.max_depth, Some(150));
2980 }
2981
2982 #[test]
2983 fn parses_fixspikes_aliases() {
2984 let cfg = parse(&["in=reads.fq", "fixspikes=t"]);
2985 assert!(cfg.fix_spikes);
2986
2987 let cfg = parse(&["in=reads.fq", "fs=f"]);
2988 assert!(!cfg.fix_spikes);
2989 }
2990
2991 #[test]
2992 fn parses_kmg_read_limits_like_bbnorm() {
2993 let cfg = parse(&["in=reads.fq", "reads=0.01k", "tablereads=1d"]);
2994 assert_eq!(cfg.max_reads, Some(10));
2995 assert_eq!(cfg.table_reads, Some(10));
2996
2997 let cfg = parse(&["in=reads.fq", "reads=-1", "tablereads=max"]);
2998 assert_eq!(cfg.max_reads, None);
2999 assert_eq!(cfg.table_reads, Some(i64::MAX as u64));
3000 }
3001
3002 #[test]
3003 fn parses_kmg_min_length_like_bbnorm() {
3004 let cfg = parse(&["in=reads.fq", "minlen=0.101k"]);
3005 assert_eq!(cfg.min_length, 101);
3006 }
3007
3008 #[test]
3009 fn parses_quality_trimming_like_bbnorm() {
3010 let cfg = parse(&["in=reads.fq", "qtrim=r", "trimq=10"]);
3011 assert!(!cfg.trim_left);
3012 assert!(cfg.trim_right);
3013 assert!((cfg.trim_quality - 10.0).abs() < f64::EPSILON);
3014
3015 let cfg = parse(&["in=reads.fq", "qtrim=12"]);
3016 assert!(!cfg.trim_left);
3017 assert!(cfg.trim_right);
3018 assert!((cfg.trim_quality - 12.0).abs() < f64::EPSILON);
3019
3020 let cfg = parse(&["in=reads.fq", "qtrim=r", "trimq=10,20"]);
3021 assert!(!cfg.trim_left);
3022 assert!(cfg.trim_right);
3023 assert!((cfg.trim_quality - 10.0).abs() < f64::EPSILON);
3024 assert!(cfg.notes.iter().any(|note| note.contains("trimq=10,20")));
3025
3026 let cfg = parse(&["in=reads.fq", "qtrim=12,20"]);
3027 assert!(!cfg.trim_left);
3028 assert!(cfg.trim_right);
3029 assert!((cfg.trim_quality - 12.0).abs() < f64::EPSILON);
3030 assert!(cfg.notes.iter().any(|note| note.contains("qtrim=12,20")));
3031
3032 let cfg = parse(&["in=reads.fq", "qtrim=t", "optitrim=f", "trimgoodinterval=3"]);
3033 assert!(cfg.trim_left);
3034 assert!(cfg.trim_right);
3035 assert!(!cfg.trim_optimal);
3036 assert_eq!(cfg.trim_min_good_interval, 3);
3037
3038 let cfg = parse(&["in=reads.fq", "qtrim=w,5"]);
3039 assert!(!cfg.trim_left);
3040 assert!(cfg.trim_right);
3041 assert!(cfg.trim_window);
3042 assert!(!cfg.trim_optimal);
3043 assert_eq!(cfg.trim_window_length, 5);
3044 }
3045
3046 #[test]
3047 fn parses_quality_output_offset_like_bbnorm() {
3048 let cfg = parse(&["in=reads.fq", "qin=64", "qout=64"]);
3049 assert_eq!(cfg.quality_in_offset, 64);
3050 assert_eq!(cfg.quality_out_offset, 64);
3051
3052 let cfg = parse(&["in=reads.fq", "qout=auto", "qin=sanger"]);
3053 assert_eq!(cfg.quality_in_offset, 33);
3054 assert_eq!(cfg.quality_out_offset, 33);
3055
3056 let cfg = parse(&["in=reads.fq", "qual=illumina"]);
3057 assert_eq!(cfg.quality_in_offset, 64);
3058 assert_eq!(cfg.quality_out_offset, 64);
3059
3060 let cfg = parse(&["in=reads.fq", "asciiin=64", "qualityout=64"]);
3061 assert_eq!(cfg.quality_in_offset, 64);
3062 assert_eq!(cfg.quality_out_offset, 64);
3063
3064 let cfg = parse(&["in=reads.fq", "qauto=t"]);
3065 assert_eq!(cfg.quality_in_offset, 33);
3066 assert_eq!(cfg.quality_out_offset, 33);
3067 assert!(cfg.notes.iter().any(|note| note.contains("qauto")));
3068
3069 let cfg = parse(&["in=reads.fq", "qin=64", "qauto=f", "qout=64"]);
3070 assert_eq!(cfg.quality_in_offset, 64);
3071 assert_eq!(cfg.quality_out_offset, 64);
3072 }
3073
3074 #[test]
3075 fn parses_quality_change_controls_like_bbnorm() {
3076 let cfg = parse(&[
3077 "in=reads.fq",
3078 "changequality=f",
3079 "mincalledquality=5",
3080 "maxcalledquality=30",
3081 ]);
3082 assert!(!cfg.change_quality);
3083 assert_eq!(cfg.min_called_quality, 5);
3084 assert_eq!(cfg.max_called_quality, 30);
3085
3086 let cfg = parse(&[
3087 "in=reads.fq",
3088 "cq=t",
3089 "mincalledquality=-5",
3090 "maxcalledquality=200",
3091 ]);
3092 assert!(cfg.change_quality);
3093 assert_eq!(cfg.min_called_quality, 0);
3094 assert_eq!(cfg.max_called_quality, 93);
3095
3096 let cfg = parse(&["in=reads.fq", "ignorebadquality=t"]);
3097 assert!(!cfg.change_quality);
3098
3099 let cfg = parse(&["in=reads.fq", "ibq=t"]);
3100 assert!(!cfg.change_quality);
3101
3102 let cfg = parse(&["in=reads.fq", "changequality=f", "ignorebadquality=f"]);
3103 assert!(!cfg.change_quality);
3104
3105 let cfg = parse(&["in=reads.fq", "ignorebadquality=t", "changequality=t"]);
3106 assert!(cfg.change_quality);
3107 }
3108
3109 #[test]
3110 fn parses_fake_quality_controls_like_bbnorm() {
3111 let cfg = parse(&["in=reads.fa", "fakequality=20"]);
3112 assert_eq!(cfg.fake_quality, 20);
3113
3114 let cfg = parse(&["in=reads.fa", "qfake=15"]);
3115 assert_eq!(cfg.fake_quality, 15);
3116
3117 let cfg = parse(&["in=reads.fa", "fakefastaquality=80"]);
3118 assert_eq!(cfg.fake_quality, 50);
3119
3120 let cfg = parse(&["in=reads.fa", "fakefastaquality=0"]);
3121 assert_eq!(cfg.fake_quality, 30);
3122
3123 let cfg = parse(&["in=reads.fa", "ffq=t"]);
3124 assert_eq!(cfg.fake_quality, 30);
3125 }
3126
3127 #[test]
3128 fn parses_fasta_wrap_like_bbnorm() {
3129 let cfg = parse(&["in=reads.fq"]);
3130 assert_eq!(cfg.fasta_wrap, 70);
3131
3132 let cfg = parse(&["in=reads.fq", "fastawrap=20"]);
3133 assert_eq!(cfg.fasta_wrap, 20);
3134
3135 let cfg = parse(&["in=reads.fq", "wrap=0"]);
3136 assert_eq!(cfg.fasta_wrap, 0);
3137
3138 let cfg = parse(&["in=reads.fq", "wrap=-1"]);
3139 assert_eq!(cfg.fasta_wrap, 0);
3140 }
3141
3142 #[test]
3143 fn accepts_thread_counts_like_bbnorm_as_rayon_controls() {
3144 let cfg = parse(&["in=reads.fq", "threads=2"]);
3145 assert_eq!(cfg.threads, Some(2));
3146 assert_eq!(cfg.gzip_threads, Some(2));
3147 assert!(
3148 cfg.notes
3149 .iter()
3150 .any(|note| note.contains("threads=2 accepted"))
3151 );
3152 assert!(
3153 cfg.notes
3154 .iter()
3155 .any(|note| note.contains("also enables gzip input/output workers"))
3156 );
3157
3158 let cfg = parse(&["in=reads.fq", "threads=2", "zipthreads=1"]);
3159 assert_eq!(cfg.threads, Some(2));
3160 assert_eq!(cfg.gzip_threads, Some(1));
3161
3162 let cfg = parse(&["in=reads.fq", "threads=2", "useunpigz=t"]);
3163 assert_eq!(cfg.gzip_threads, Some(2));
3164
3165 let cfg = parse(&["in=reads.fq", "t=-1"]);
3166 assert_eq!(cfg.threads, None);
3167 assert!(cfg.notes.is_empty());
3168
3169 let cfg = parse(&["in=reads.fq", "threads=auto"]);
3170 assert_eq!(cfg.threads, None);
3171 assert!(
3172 cfg.notes
3173 .iter()
3174 .any(|note| note.contains("threads=auto accepted"))
3175 );
3176
3177 let cfg = parse(&["in=reads.fq", "threads=max"]);
3178 assert_eq!(
3179 cfg.threads,
3180 Some(
3181 std::thread::available_parallelism()
3182 .map(|threads| threads.get())
3183 .unwrap_or(1)
3184 )
3185 );
3186 assert!(
3187 cfg.notes
3188 .iter()
3189 .any(|note| note.contains("threads=max accepted"))
3190 );
3191 }
3192
3193 #[test]
3194 fn accepts_build_step_size_controls_as_covered_noops() {
3195 for case in ["stepsize=2", "buildstepsize=4"] {
3196 let cfg = parse(&["in=reads.fq", case]);
3197 assert!(
3198 cfg.notes
3199 .iter()
3200 .any(|note| note.contains("trusted-kmer sampling control")),
3201 "missing trusted-kmer note for {case}: {:?}",
3202 cfg.notes
3203 );
3204 }
3205
3206 for case in ["stepsize=abc", "buildstepsize=abc"] {
3207 let err = parse_args(
3208 ["in=reads.fq", "passes=1", case]
3209 .into_iter()
3210 .map(OsString::from),
3211 )
3212 .unwrap_err()
3213 .to_string();
3214 assert!(
3215 err.contains("expects"),
3216 "unexpected error for malformed {case}: {err}"
3217 );
3218 }
3219 }
3220
3221 #[test]
3222 fn accepts_default_equivalent_sketch_controls_as_noops() {
3223 for case in [
3224 "bits=32",
3225 "bits1=16",
3226 "cbits1=16",
3227 "cellbits1=16",
3228 "hashes=3",
3229 "buildpasses=1",
3230 "prefilter=t",
3231 ] {
3232 let cfg = parse(&["in=reads.fq", case]);
3233 assert!(
3234 !cfg.notes.is_empty(),
3235 "expected an explanatory no-op note for {case}"
3236 );
3237 if case.contains("bits1") || case.contains("cbits1") || case.contains("cellbits1") {
3238 assert_eq!(
3239 cfg.count_min_bits_first,
3240 Some(16),
3241 "expected first-pass bit width for {case}"
3242 );
3243 }
3244 }
3245
3246 for case in ["bits1=abc", "cbits1=abc", "cellbits1=abc"] {
3247 let err = parse_args(
3248 ["in=reads.fq", "passes=1", case]
3249 .into_iter()
3250 .map(OsString::from),
3251 )
3252 .unwrap_err()
3253 .to_string();
3254 assert!(
3255 err.contains("expects"),
3256 "unexpected error for malformed {case}: {err}"
3257 );
3258 }
3259 }
3260
3261 #[test]
3262 fn accepts_prefilter_controls_with_constrained_sketch_settings() {
3263 let cfg = parse_args(
3264 [
3265 "in=x.fq",
3266 "passes=1",
3267 "prefiltercells=1k",
3268 "prehashes=2",
3269 "pbits=8",
3270 ]
3271 .into_iter()
3272 .map(OsString::from),
3273 )
3274 .unwrap();
3275 assert_eq!(cfg.prefilter.cells, Some(1000));
3276 assert_eq!(cfg.prefilter.hashes, Some(2));
3277 assert_eq!(cfg.prefilter.bits, Some(8));
3278 assert_eq!(cfg.prefilter.memory_bytes, None);
3279 assert_eq!(cfg.prefilter.memory_fraction_micros, None);
3280 assert!(cfg.prefilter.enabled);
3281 assert!(!cfg.prefilter.force_disabled);
3282 assert!(
3283 cfg.notes
3284 .iter()
3285 .any(|note| note.contains("prefilter collision estimates")),
3286 "expected constrained prefilter note: {:?}",
3287 cfg.notes
3288 );
3289
3290 let cfg = parse_args(
3291 ["in=x.fq", "passes=1", "precells=1k"]
3292 .into_iter()
3293 .map(OsString::from),
3294 )
3295 .unwrap();
3296 assert_eq!(cfg.prefilter.cells, Some(1000));
3297 assert!(cfg.prefilter.enabled);
3298 assert!(!cfg.prefilter.force_disabled);
3299
3300 let cfg = parse_args(
3301 ["in=x.fq", "passes=1", "precells=0"]
3302 .into_iter()
3303 .map(OsString::from),
3304 )
3305 .unwrap();
3306 assert_eq!(cfg.prefilter.cells, None);
3307 assert!(!cfg.prefilter.enabled);
3308 assert!(!cfg.prefilter.force_disabled);
3309
3310 let cfg = parse_args(
3311 ["in=x.fq", "passes=1", "prefilter=t", "prefiltercells=0"]
3312 .into_iter()
3313 .map(OsString::from),
3314 )
3315 .unwrap();
3316 assert_eq!(cfg.prefilter.cells, None);
3317 assert!(cfg.prefilter.enabled);
3318 assert!(!cfg.prefilter.force_disabled);
3319
3320 let cfg = parse_args(
3321 ["in=x.fq", "passes=1", "prefilterhashes=1"]
3322 .into_iter()
3323 .map(OsString::from),
3324 )
3325 .unwrap();
3326 assert_eq!(cfg.prefilter.hashes, Some(1));
3327 assert!(cfg.prefilter.enabled);
3328 assert!(!cfg.prefilter.force_disabled);
3329 assert!(
3330 cfg.notes
3331 .iter()
3332 .any(|note| note.contains("prefilter collision estimates")),
3333 "expected implicit prefilter note: {:?}",
3334 cfg.notes
3335 );
3336
3337 let cfg = parse_args(
3338 ["in=x.fq", "passes=1", "prehashes=0"]
3339 .into_iter()
3340 .map(OsString::from),
3341 )
3342 .unwrap();
3343 assert_eq!(cfg.prefilter.hashes, None);
3344 assert!(!cfg.prefilter.enabled);
3345 assert!(!cfg.prefilter.force_disabled);
3346
3347 let cfg = parse_args(
3348 ["in=x.fq", "passes=1", "prefilter=t", "prehashes=0"]
3349 .into_iter()
3350 .map(OsString::from),
3351 )
3352 .unwrap();
3353 assert_eq!(cfg.prefilter.hashes, None);
3354 assert!(cfg.prefilter.enabled);
3355 assert!(!cfg.prefilter.force_disabled);
3356
3357 let cfg = parse_args(
3358 ["in=x.fq", "passes=1", "prefiltermemory=1k"]
3359 .into_iter()
3360 .map(OsString::from),
3361 )
3362 .unwrap();
3363 assert_eq!(cfg.prefilter.memory_bytes, Some(1000));
3364 assert!(cfg.prefilter.enabled);
3365 assert!(!cfg.prefilter.force_disabled);
3366 assert!(
3367 cfg.notes
3368 .iter()
3369 .any(|note| note.contains("prefilter memory-sizing")),
3370 "expected memory-backed prefilter note: {:?}",
3371 cfg.notes
3372 );
3373
3374 for case in ["prefiltersize=0.1", "prefilterfraction=0.1"] {
3375 let cfg = parse_args(
3376 ["in=x.fq", "passes=1", case]
3377 .into_iter()
3378 .map(OsString::from),
3379 )
3380 .unwrap();
3381 assert!(cfg.prefilter.enabled);
3382 assert!(!cfg.prefilter.force_disabled);
3383 assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3384 assert!(
3385 cfg.notes
3386 .iter()
3387 .any(|note| note.contains("prefilter collision memory")),
3388 "expected prefilter fraction note for {case}"
3389 );
3390 }
3391
3392 for case in ["prefiltersize=0", "prefilterfraction=0"] {
3393 let cfg = parse_args(
3394 ["in=x.fq", "passes=1", case]
3395 .into_iter()
3396 .map(OsString::from),
3397 )
3398 .unwrap();
3399 assert!(!cfg.prefilter.enabled);
3400 assert!(cfg.prefilter.force_disabled);
3401 assert_eq!(cfg.prefilter.memory_fraction_micros, None);
3402 assert!(
3403 cfg.notes
3404 .iter()
3405 .any(|note| note.contains("disables fraction-derived")),
3406 "expected zero-fraction note for {case}"
3407 );
3408 }
3409
3410 let cfg = parse_args(
3411 ["in=x.fq", "passes=1", "prefilter=t"]
3412 .into_iter()
3413 .map(OsString::from),
3414 )
3415 .unwrap();
3416 assert!(cfg.prefilter.enabled);
3417 assert!(!cfg.prefilter.force_disabled);
3418 assert!(
3419 cfg.notes
3420 .iter()
3421 .any(|note| note.contains("default prefilter partitioning")),
3422 "expected enabled prefilter note: {:?}",
3423 cfg.notes
3424 );
3425
3426 let cfg = parse_args(
3427 ["in=x.fq", "passes=1", "prehashes=1", "prefilter=f"]
3428 .into_iter()
3429 .map(OsString::from),
3430 )
3431 .unwrap();
3432 assert_eq!(cfg.prefilter.hashes, Some(1));
3433 assert!(!cfg.prefilter.enabled);
3434 assert!(cfg.prefilter.force_disabled);
3435
3436 let cfg = parse_args(
3437 [
3438 "in=x.fq",
3439 "passes=1",
3440 "prehashes=1",
3441 "prefilter=f",
3442 "prefilter=t",
3443 ]
3444 .into_iter()
3445 .map(OsString::from),
3446 )
3447 .unwrap();
3448 assert_eq!(cfg.prefilter.hashes, Some(1));
3449 assert!(cfg.prefilter.enabled);
3450 assert!(!cfg.prefilter.force_disabled);
3451
3452 let cfg = parse_args(
3453 ["in=x.fq", "passes=1", "prefilter=f", "prehashes=1"]
3454 .into_iter()
3455 .map(OsString::from),
3456 )
3457 .unwrap();
3458 assert_eq!(cfg.prefilter.hashes, Some(1));
3459 assert!(cfg.prefilter.enabled);
3460 assert!(!cfg.prefilter.force_disabled);
3461
3462 let cfg = parse_args(
3463 ["in=x.fq", "passes=1", "prefiltercells=1k", "prefilter=f"]
3464 .into_iter()
3465 .map(OsString::from),
3466 )
3467 .unwrap();
3468 assert_eq!(cfg.prefilter.cells, Some(1000));
3469 assert!(!cfg.prefilter.enabled);
3470 assert!(cfg.prefilter.force_disabled);
3471
3472 let cfg = parse_args(
3473 [
3474 "in=x.fq",
3475 "passes=1",
3476 "prefilterfraction=0.1",
3477 "prefilter=f",
3478 ]
3479 .into_iter()
3480 .map(OsString::from),
3481 )
3482 .unwrap();
3483 assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3484 assert!(!cfg.prefilter.enabled);
3485 assert!(cfg.prefilter.force_disabled);
3486
3487 let cfg = parse_args(
3488 [
3489 "in=x.fq",
3490 "passes=1",
3491 "prefilter=f",
3492 "prefilterfraction=0.1",
3493 ]
3494 .into_iter()
3495 .map(OsString::from),
3496 )
3497 .unwrap();
3498 assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3499 assert!(cfg.prefilter.enabled);
3500 assert!(!cfg.prefilter.force_disabled);
3501
3502 let cfg = parse_args(
3503 ["in=x.fq", "passes=1", "buildpasses=2"]
3504 .into_iter()
3505 .map(OsString::from),
3506 )
3507 .unwrap();
3508 assert_eq!(cfg.build_passes, 2);
3509 assert!(
3510 cfg.notes
3511 .iter()
3512 .any(|note| note.contains("trusted-kmer filtering")),
3513 "expected build-pass trusted-filter note: {:?}",
3514 cfg.notes
3515 );
3516 }
3517
3518 #[test]
3519 fn accepts_constrained_count_min_controls_as_real_sketch_settings() {
3520 let cfg = parse_args(
3521 ["in=x.fq", "passes=1", "bits=16", "hashes=2", "cells=1k"]
3522 .into_iter()
3523 .map(OsString::from),
3524 )
3525 .unwrap();
3526 assert_eq!(cfg.count_min.bits, Some(16));
3527 assert_eq!(cfg.count_min.hashes, Some(2));
3528 assert_eq!(cfg.count_min.cells, Some(1000));
3529 assert!(
3530 cfg.notes
3531 .iter()
3532 .any(|note| note.contains("fixed-memory count-min input sketch")),
3533 "expected fixed-memory count-min sketch note: {:?}",
3534 cfg.notes
3535 );
3536
3537 let cfg = parse_args(
3538 ["in=x.fq", "passes=1", "matrixbits=10"]
3539 .into_iter()
3540 .map(OsString::from),
3541 )
3542 .unwrap();
3543 assert_eq!(cfg.count_min.cells, Some(1024));
3544
3545 let cfg = parse_args(
3546 [
3547 "in=x.fq",
3548 "passes=1",
3549 "bits=8",
3550 "hashes=2",
3551 "sketchmemory=1k",
3552 ]
3553 .into_iter()
3554 .map(OsString::from),
3555 )
3556 .unwrap();
3557 assert_eq!(cfg.count_min.memory_bytes, Some(1000));
3558 assert!(
3559 cfg.notes
3560 .iter()
3561 .any(|note| note.contains("count-min memory budget")),
3562 "expected count-min memory-budget note: {:?}",
3563 cfg.notes
3564 );
3565
3566 let cfg = parse_args(
3567 [
3568 "in=x.fq",
3569 "passes=1",
3570 "maxcountupspillbytes=64m",
3571 "maxcountupspillfinallivebytes=96m",
3572 "maxcountupspillwritebytes=128m",
3573 "maxcountupspillinitialruns=10",
3574 "maxcountupspillmergeruns=2",
3575 "maxcountupspillfinalruns=4",
3576 ]
3577 .into_iter()
3578 .map(OsString::from),
3579 )
3580 .unwrap();
3581 assert_eq!(cfg.max_countup_spill_live_bytes, Some(64_000_000));
3582 assert_eq!(cfg.max_countup_spill_final_live_bytes, Some(96_000_000));
3583 assert_eq!(cfg.max_countup_spill_write_bytes, Some(128_000_000));
3584 assert_eq!(cfg.max_countup_spill_initial_runs, Some(10));
3585 assert_eq!(cfg.max_countup_spill_merge_runs, Some(2));
3586 assert_eq!(cfg.max_countup_spill_final_runs, Some(4));
3587 assert!(
3588 cfg.notes
3589 .iter()
3590 .any(|note| note.contains("count-up temp-spill safety cap")),
3591 "expected count-up spill live cap note: {:?}",
3592 cfg.notes
3593 );
3594 assert!(
3595 cfg.notes
3596 .iter()
3597 .any(|note| note.contains("count-up temp-spill I/O safety cap")),
3598 "expected count-up spill write cap note: {:?}",
3599 cfg.notes
3600 );
3601
3602 let cfg = parse_args(
3603 [
3604 "in=x.fq",
3605 "passes=1",
3606 "mem=2g",
3607 "autocountmin=f",
3608 "exact=t",
3609 "autosketchbytes=4m",
3610 "autocountminreads=500",
3611 ]
3612 .into_iter()
3613 .map(OsString::from),
3614 )
3615 .unwrap();
3616 assert_eq!(cfg.auto_count_min_memory_bytes, Some(2_000_000_000));
3617 assert!(!cfg.auto_count_min);
3618 assert!(cfg.force_exact_counts);
3619 assert_eq!(cfg.auto_count_min_input_bytes, 4_000_000);
3620 assert_eq!(cfg.auto_count_min_read_threshold, 500);
3621
3622 for case in [
3623 "bits=abc",
3624 "bits=0",
3625 "bits=3",
3626 "bits=64",
3627 "hashes=abc",
3628 "hashes=0",
3629 "hashes=9",
3630 "cells=abc",
3631 "cells=0",
3632 "matrixbits=abc",
3633 "matrixbits=0",
3634 "matrixbits=64",
3635 "sketchmemory=abc",
3636 "sketchmemory=0",
3637 "maxcountupspillbytes=abc",
3638 "maxcountupspillfinallivebytes=abc",
3639 "maxcountupspillwritebytes=-1",
3640 "maxcountupspillinitialruns=abc",
3641 "maxcountupspillmergeruns=-1",
3642 "maxcountupspillfinalruns=abc",
3643 "mem=abc",
3644 "autosketchbytes=0",
3645 "autocountminreads=0x",
3646 "buildpasses=abc",
3647 "prehashes=abc",
3648 "prefilterhashes=abc",
3649 "prefiltercells=abc",
3650 "precells=abc",
3651 "prefiltersize=abc",
3652 "prefilterfraction=abc",
3653 "prefilterbits=abc",
3654 "prefilterbits=64",
3655 "prebits=abc",
3656 "prebits=3",
3657 "pbits=3",
3658 "prehashes=9",
3659 ] {
3660 let err = parse_args(
3661 ["in=x.fq", "passes=1", case]
3662 .into_iter()
3663 .map(OsString::from),
3664 )
3665 .unwrap_err()
3666 .to_string();
3667 assert!(
3668 err.contains("expects")
3669 || err.contains("unsupported KMG suffix")
3670 || err.contains("too many suffix letters"),
3671 "unexpected error for malformed {case}: {err}"
3672 );
3673 }
3674 }
3675
3676 #[test]
3677 fn accepts_kmer_table_runtime_controls_as_working_fallbacks() {
3678 for case in [
3679 "initialsize=1k",
3680 "ways=31",
3681 "buflen=64k",
3682 "bufflen=64k",
3683 "bufferlength=64k",
3684 "tabletype=2",
3685 "rcomp=t",
3686 "maskmiddle=f",
3687 "showstats=t",
3688 "stats=f",
3689 "showspeed=f",
3690 "ss=t",
3691 "verbose2=t",
3692 "prealloc=0.25",
3693 "preallocate=f",
3694 "filtermemory=1k",
3695 "prefiltermemory=1k",
3696 "filtermem=1k",
3697 "filtermemoryoverride=1k",
3698 "minprobprefilter=f",
3699 "mpp=t",
3700 "minprobmain=t",
3701 "mpm=f",
3702 "prefilterpasses=auto",
3703 "prepasses=1",
3704 "onepass=t",
3705 ] {
3706 let cfg = parse_args(
3707 ["in=x.fq", "passes=1", case]
3708 .into_iter()
3709 .map(OsString::from),
3710 )
3711 .unwrap();
3712 assert!(
3713 cfg.notes.iter().any(|note| {
3714 note.contains("kmer-table")
3715 || note.contains("prefilter memory-sizing")
3716 || note.contains("prefilter pass-count")
3717 }),
3718 "expected kmer-table fallback note for {case}: {:?}",
3719 cfg.notes
3720 );
3721 }
3722
3723 let cfg = parse(&["in=x.fq", "passes=1", "initialsize=1k", "prealloc=0.25"]);
3724 assert_eq!(cfg.table_initial_size, Some(1000));
3725 assert_eq!(cfg.table_prealloc_fraction, Some(0.25));
3726
3727 let cfg = parse(&["in=x.fq", "passes=1", "preallocate=t"]);
3728 assert_eq!(cfg.table_prealloc_fraction, Some(1.0));
3729
3730 let cfg = parse(&["in=x.fq", "passes=1", "preallocate=f"]);
3731 assert_eq!(cfg.table_prealloc_fraction, None);
3732
3733 for case in [
3734 "initialsize=abc",
3735 "ways=abc",
3736 "buflen=abc",
3737 "tabletype=abc",
3738 "prealloc=0.abc",
3739 "prealloc=1.5",
3740 "filtermemory=abc",
3741 "prepasses=abc",
3742 ] {
3743 let err = parse_args(
3744 ["in=x.fq", "passes=1", case]
3745 .into_iter()
3746 .map(OsString::from),
3747 )
3748 .unwrap_err()
3749 .to_string();
3750 assert!(
3751 err.contains("expects")
3752 || err.contains("unsupported KMG suffix")
3753 || err.contains("too many suffix letters"),
3754 "unexpected error for malformed {case}: {err}"
3755 );
3756 }
3757 }
3758
3759 #[test]
3760 fn accepts_covered_runtime_noops_and_manual_auto_sizing_fallback() {
3761 for case in [
3762 "auto=t",
3763 "auto=f",
3764 "ordered=f",
3765 "verbose=t",
3766 "printcoverage=t",
3767 "tmpdir=/tmp",
3768 "usetmpdir=t",
3769 "usetmpdir=f",
3770 "usetempdir=f",
3771 "fastareadlen=4",
3772 "fastareadlength=4",
3773 "fastaminread=1",
3774 "fastaminlen=1",
3775 "fastaminlength=1",
3776 "forcesectionname=t",
3777 "fastadump=f",
3778 ] {
3779 let cfg = parse(&["in=reads.fq", case]);
3780 assert!(
3781 !cfg.notes.is_empty(),
3782 "expected an explanatory no-op note for {case}"
3783 );
3784 }
3785
3786 for case in ["fastaminread=abc", "fastaminlen=abc", "fastaminlength=abc"] {
3787 let err = parse_args(
3788 ["in=reads.fq", "passes=1", case]
3789 .into_iter()
3790 .map(OsString::from),
3791 )
3792 .unwrap_err()
3793 .to_string();
3794 assert!(
3795 err.contains("expects"),
3796 "unexpected error for malformed {case}: {err}"
3797 );
3798 }
3799 }
3800
3801 #[test]
3802 fn accepts_temporary_directory_controls_for_managed_temp_paths() {
3803 for case in [
3804 "tmpdir=/tmp/bbnorm",
3805 "usetmpdir=t",
3806 "usetmpdir=f",
3807 "usetempdir=t",
3808 ] {
3809 let cfg = parse(&["in=reads.fq", "passes=1", case]);
3810 assert!(
3811 cfg.notes
3812 .iter()
3813 .any(|note| note.contains("temporary-directory control")),
3814 "expected temporary-directory note for {case}: {:?}",
3815 cfg.notes
3816 );
3817 }
3818 let enabled = parse(&["in=reads.fq", "tmpdir=/tmp/bbnorm"]);
3819 assert_eq!(enabled.temp_dir, Some(PathBuf::from("/tmp/bbnorm")));
3820 assert!(enabled.use_temp_dir);
3821
3822 let disabled = parse(&["in=reads.fq", "tmpdir=/tmp/bbnorm", "usetmpdir=f"]);
3823 assert_eq!(disabled.temp_dir, Some(PathBuf::from("/tmp/bbnorm")));
3824 assert!(!disabled.use_temp_dir);
3825 }
3826
3827 #[test]
3828 fn parses_header_trimming_controls_like_bbnorm() {
3829 for case in ["trd=t", "trc=t", "trimreaddescriptions=f", "trimrname=t"] {
3830 let cfg = parse(&["in=reads.fq", case]);
3831 assert!(
3832 !cfg.notes.is_empty(),
3833 "expected an explanatory no-op note for {case}"
3834 );
3835 }
3836 }
3837
3838 #[test]
3839 fn accepts_shared_io_runtime_controls_as_noops_and_validates_values() {
3840 let cfg = parse(&[
3841 "in=reads.fq",
3842 "null",
3843 "monitor=f",
3844 "killswitch=600,0.002",
3845 "json=t",
3846 "silent=t",
3847 "printexecuting=f",
3848 "proxyhost=localhost",
3849 "proxyport=8080",
3850 "metadatafile=metadata.json",
3851 "testsize=t",
3852 "extin=.fq.gz",
3853 "extout=.fq",
3854 "bufferbf=f",
3855 "bufferbf1=f",
3856 "usejni=f",
3857 "bytefile1=t",
3858 "bytefile2=maybe",
3859 "bf1bufferlen=64k",
3860 "bfthreads=1",
3861 "readbufferlength=64k",
3862 "readbufferdata=1m",
3863 "readbuffers=1",
3864 "workers=auto",
3865 "workerthreads=1",
3866 "wt=auto",
3867 "threadsin=1",
3868 "tin=auto",
3869 "threadsout=1",
3870 "tout=auto",
3871 "ziplevel=2",
3872 "pigz=2",
3873 "bgzip=f",
3874 "zipthreads=1",
3875 "ztd=2.0",
3876 "blocksize=128",
3877 "nativebgzip=f",
3878 "usebzip2=f",
3879 "skipvalidation=t",
3880 "validate=maybe",
3881 "vic=f",
3882 "usempi=f",
3883 "mpi=0",
3884 "crismpi=f",
3885 "mpikeepall=f",
3886 "tossbrokenreads=f",
3887 "nullifybrokenquality=f",
3888 "deleteoldname=f",
3889 "renamebymapping=f",
3890 "assertcigar=f",
3891 "parsecustom=f",
3892 "shrinkheaders=f",
3893 "fixheader=f",
3894 "allownullheader=f",
3895 "recalpairnum=f",
3896 "pairreads=f",
3897 "flipr2=f",
3898 "int=f",
3899 "testinterleaved=f",
3900 "forceinterleaved=f",
3901 "overrideinterleaved=t",
3902 ]);
3903 assert_eq!(cfg.notes.len(), 56);
3904 assert_eq!(cfg.gzip_threads, Some(1));
3905
3906 for case in [
3907 "monitor=1,2,3",
3908 "bf1bufferlen=abc",
3909 "bfthreads=abc",
3910 "readbufferlength=abc",
3911 "readbuffers=abc",
3912 "workers=abc",
3913 "threadsin=abc",
3914 "threadsout=abc",
3915 "mpi=2k",
3916 "ziplevel=abc",
3917 "pigz=2k",
3918 "zipthreads=abc",
3919 "ztd=abc",
3920 "blocksize=abc",
3921 ] {
3922 let err = parse_args(
3923 ["in=reads.fq", "passes=1", case]
3924 .into_iter()
3925 .map(OsString::from),
3926 )
3927 .unwrap_err()
3928 .to_string();
3929 assert!(
3930 err.contains("expects") || err.contains("suffix"),
3931 "unexpected error for malformed {case}: {err}"
3932 );
3933 }
3934
3935 for case in ["usempi=t", "mpi=2", "crismpi=t", "mpikeepall=t"] {
3936 let cfg = parse(&["in=reads.fq", "passes=1", case]);
3937 assert!(
3938 cfg.notes.iter().any(|note| note.contains("MPI")),
3939 "missing MPI fallback note for {case}: {:?}",
3940 cfg.notes
3941 );
3942 }
3943
3944 for case in ["pairreads=t", "flipr2=t"] {
3945 let cfg = parse(&["in=reads.fq", "passes=1", case]);
3946 assert!(
3947 cfg.notes.iter().any(|note| note.contains("pairing")),
3948 "missing pairing fallback note for {case}: {:?}",
3949 cfg.notes
3950 );
3951 }
3952 }
3953
3954 #[test]
3955 fn accepts_shared_sam_runtime_controls_as_fastq_noops_and_validates_values() {
3956 for case in [
3957 "sam=1.4",
3958 "samv=1.6",
3959 "samtools=f",
3960 "sambamba=f",
3961 "printHeaderWait=f",
3962 "nativebam=f",
3963 "prefernativebam=f",
3964 "userssw=f",
3965 "attachedsamline=f",
3966 "streamerthreads=1",
3967 "fastqstreamerthreads=1",
3968 "fastastreamerthreads=1",
3969 "samwriterthreads=1",
3970 "bamwriterthreads=1",
3971 "fastqwriterthreads=1",
3972 "fastastreamer2=f",
3973 "prefermd=f",
3974 "notags=f",
3975 "mdtag=f",
3976 "idtag=f",
3977 "mateqtag=f",
3978 "xmtag=f",
3979 "smtag=f",
3980 "amtag=f",
3981 "nmtag=f",
3982 "xttag=f",
3983 "stoptag=f",
3984 "lengthtag=f",
3985 "boundstag=f",
3986 "scoretag=f",
3987 "sortscaffolds=f",
3988 "customtag=f",
3989 "nhtag=f",
3990 "keepnames=f",
3991 "saa=f",
3992 "inserttag=f",
3993 "correctnesstag=f",
3994 "intronlen=10",
3995 "suppressheader=f",
3996 "noheadersequences=f",
3997 "tophat=f",
3998 "xs=us",
3999 "xstag=fr-ss",
4000 "flipsam=f",
4001 "readgroupid=rg1",
4002 "rgsm=sample",
4003 ] {
4004 let cfg = parse_args(
4005 ["in=reads.fq", "passes=1", case]
4006 .into_iter()
4007 .map(OsString::from),
4008 )
4009 .unwrap();
4010 assert!(
4011 cfg.notes
4012 .iter()
4013 .any(|note| note.contains("SAM") || note.contains("read-group")),
4014 "expected SAM/read-group no-op note for {case}: {:?}",
4015 cfg.notes
4016 );
4017 }
4018
4019 for case in [
4020 "sam=abc",
4021 "streamerthreads=abc",
4022 "fastqwriterthreads=abc",
4023 "intronlen=abc",
4024 ] {
4025 let err = parse_args(
4026 ["in=reads.fq", "passes=1", case]
4027 .into_iter()
4028 .map(OsString::from),
4029 )
4030 .unwrap_err()
4031 .to_string();
4032 assert!(
4033 err.contains("expects") || err.contains("invalid float"),
4034 "unexpected error for malformed {case}: {err}"
4035 );
4036 }
4037 }
4038
4039 #[test]
4040 fn accepts_side_output_stats_histograms_and_emits_quality_length_gc_and_base_histograms() {
4041 for case in [
4042 "qhist=qual.tsv",
4043 "bqhist=basequal.tsv",
4044 "qchist=qcount.tsv",
4045 "aqhist=avg.tsv",
4046 "obqhist=overall.tsv",
4047 "mhist=match.tsv",
4048 "ihist=insert.tsv",
4049 "bhist=base.tsv",
4050 "qahist=qacc.tsv",
4051 "indelhist=indel.tsv",
4052 "ehist=error.tsv",
4053 "lhist=length.tsv",
4054 "gchist=gc.tsv",
4055 "enthist=entropy.tsv",
4056 "barcodestats=barcode.tsv",
4057 "thist=time.tsv",
4058 "idhist=id.tsv",
4059 "gcbins=auto",
4060 "gchistbins=100",
4061 "entropybins=auto",
4062 "enthistbins=100",
4063 "idhistbins=auto",
4064 "idbins=100",
4065 "gcplot=f",
4066 "entropyns=t",
4067 "maxhistlen=1k",
4068 "fixindels=f",
4069 ] {
4070 let cfg = parse_args(
4071 ["in=reads.fq", "passes=1", case]
4072 .into_iter()
4073 .map(OsString::from),
4074 )
4075 .unwrap();
4076 assert!(
4077 cfg.notes.iter().any(|note| note.contains("side-output")),
4078 "expected side-output fallback note for {case}: {:?}",
4079 cfg.notes
4080 );
4081 }
4082
4083 let cfg = parse_args(
4084 [
4085 "in=reads.fq",
4086 "passes=1",
4087 "qhist=quality.tsv",
4088 "bqhist=basequal.tsv",
4089 "qchist=qcount.tsv",
4090 "aqhist=avg.tsv",
4091 "obqhist=overall.tsv",
4092 "mhist=match.tsv",
4093 "ihist=insert.tsv",
4094 "qahist=qacc.tsv",
4095 "indelhist=indel.tsv",
4096 "ehist=error.tsv",
4097 "lhist=length.tsv",
4098 "gchist=gc.tsv",
4099 "bhist=base.tsv",
4100 "enthist=entropy.tsv",
4101 "idhist=id.tsv",
4102 "gcbins=100",
4103 "entropybins=100",
4104 "idbins=100",
4105 "maxhistlen=1k",
4106 ]
4107 .into_iter()
4108 .map(OsString::from),
4109 )
4110 .unwrap();
4111 assert_eq!(cfg.quality_hist_out, Some(PathBuf::from("quality.tsv")));
4112 assert_eq!(cfg.match_hist_out, Some(PathBuf::from("match.tsv")));
4113 assert_eq!(cfg.insert_hist_out, Some(PathBuf::from("insert.tsv")));
4114 assert_eq!(
4115 cfg.quality_accuracy_hist_out,
4116 Some(PathBuf::from("qacc.tsv"))
4117 );
4118 assert_eq!(cfg.indel_hist_out, Some(PathBuf::from("indel.tsv")));
4119 assert_eq!(cfg.error_hist_out, Some(PathBuf::from("error.tsv")));
4120 assert_eq!(
4121 cfg.base_quality_hist_out,
4122 Some(PathBuf::from("basequal.tsv"))
4123 );
4124 assert_eq!(
4125 cfg.quality_count_hist_out,
4126 Some(PathBuf::from("qcount.tsv"))
4127 );
4128 assert_eq!(cfg.average_quality_hist_out, Some(PathBuf::from("avg.tsv")));
4129 assert_eq!(
4130 cfg.overall_base_quality_hist_out,
4131 Some(PathBuf::from("overall.tsv"))
4132 );
4133 assert_eq!(cfg.length_hist_out, Some(PathBuf::from("length.tsv")));
4134 assert_eq!(cfg.gc_hist_out, Some(PathBuf::from("gc.tsv")));
4135 assert_eq!(cfg.base_hist_out, Some(PathBuf::from("base.tsv")));
4136 assert_eq!(cfg.entropy_hist_out, Some(PathBuf::from("entropy.tsv")));
4137 assert_eq!(cfg.identity_hist_out, Some(PathBuf::from("id.tsv")));
4138 assert_eq!(cfg.barcode_stats_out, None);
4139 assert_eq!(cfg.gc_bins, Some(100));
4140 assert_eq!(cfg.entropy_bins, 100);
4141 assert_eq!(cfg.identity_bins, 100);
4142 assert_eq!(cfg.side_hist_len, Some(1000));
4143
4144 let cfg = parse(&["in=reads.fq", "barcodestats=barcode.tsv"]);
4145 assert_eq!(cfg.barcode_stats_out, Some(PathBuf::from("barcode.tsv")));
4146
4147 for case in [
4148 "gcbins=abc",
4149 "entropybins=abc",
4150 "idhistbins=abc",
4151 "maxhistlen=abc",
4152 "maxhistlen=0",
4153 ] {
4154 let err = parse_args(
4155 ["in=reads.fq", "passes=1", case]
4156 .into_iter()
4157 .map(OsString::from),
4158 )
4159 .unwrap_err()
4160 .to_string();
4161 assert!(
4162 err.contains("expects") || err.contains("suffix"),
4163 "unexpected error for malformed {case}: {err}"
4164 );
4165 }
4166 }
4167
4168 #[test]
4169 fn accepts_cardinality_loglog_controls_as_bounded_estimates_and_validates_values() {
4170 for case in [
4171 "cardinality=t",
4172 "cardinality=31",
4173 "loglog=f",
4174 "loglogin=t",
4175 "cardinalityout=t",
4176 "loglogout=f",
4177 "buckets=1k",
4178 "loglogbuckets=100",
4179 "loglogcorrection=t",
4180 "loglogcf=f",
4181 "loglogbits=16",
4182 "loglogk=31",
4183 "cardinalityk=31",
4184 "kcardinality=31",
4185 "loglogklist=21,31",
4186 "loglogseed=42",
4187 "loglogminprob=0.5",
4188 "loglogtype=loglog2",
4189 "loglogmean=t",
4190 "loglogmedian=t",
4191 "loglogmwa=t",
4192 "logloghmean=t",
4193 "logloggmean=t",
4194 "loglogmantissa=8",
4195 "loglogcounts=t",
4196 "loglogcount=f",
4197 ] {
4198 let cfg = parse_args(
4199 ["in=reads.fq", "passes=1", case]
4200 .into_iter()
4201 .map(OsString::from),
4202 )
4203 .unwrap();
4204 assert!(
4205 cfg.notes
4206 .iter()
4207 .any(|note| note.contains("cardinality/loglog")),
4208 "expected cardinality/loglog fallback note for {case}: {:?}",
4209 cfg.notes
4210 );
4211 }
4212
4213 let cfg = parse(&[
4214 "in=reads.fq",
4215 "passes=1",
4216 "cardinality=t",
4217 "cardinalityout=t",
4218 "buckets=1k",
4219 "loglogseed=42",
4220 "loglogk=25",
4221 "loglogminprob=0.25",
4222 ]);
4223 assert!(cfg.cardinality.input);
4224 assert!(cfg.cardinality.output);
4225 assert_eq!(cfg.cardinality.buckets, 1000);
4226 assert_eq!(cfg.cardinality.seed, 42);
4227 assert_eq!(cfg.cardinality.k, Some(25));
4228 assert_eq!(cfg.cardinality.min_probability, 0.25);
4229
4230 let cfg = parse(&[
4231 "in=reads.fq",
4232 "passes=1",
4233 "cardinality=t",
4234 "cardinality=f",
4235 "cardinalityout=t",
4236 "loglogout=f",
4237 ]);
4238 assert!(!cfg.cardinality.input);
4239 assert!(!cfg.cardinality.output);
4240
4241 for case in [
4242 "cardinality=maybe",
4243 "buckets=0",
4244 "buckets=100g",
4245 "loglogbits=abc",
4246 "loglogklist=21,abc",
4247 "loglogseed=abc",
4248 "loglogminprob=abc",
4249 "loglogminprob=2",
4250 ] {
4251 let err = parse_args(
4252 ["in=reads.fq", "passes=1", case]
4253 .into_iter()
4254 .map(OsString::from),
4255 )
4256 .unwrap_err()
4257 .to_string();
4258 assert!(
4259 err.contains("expects") || err.contains("above the Rust safety cap"),
4260 "unexpected error for malformed {case}: {err}"
4261 );
4262 }
4263 }
4264
4265 #[test]
4266 fn accepts_quality_recalibration_controls_as_noops_and_validates_values() {
4267 let cfg = parse(&[
4268 "in=reads.fq",
4269 "trackall=f",
4270 "clearmatrices=f",
4271 "loadq=f",
4272 "loadq102=f",
4273 "loadqap=f",
4274 "loadqbp=f",
4275 "loadqpt=f",
4276 "loadqbt=f",
4277 "loadq10=f",
4278 "loadq12=f",
4279 "loadqb12=f",
4280 "loadqb012=f",
4281 "loadqb123=f",
4282 "loadqb234=f",
4283 "loadq12b12=f",
4284 "loadqp=f",
4285 "observationcutoff=1k",
4286 "recalpasses=1",
4287 "recalqmax=50",
4288 "recalqmin=2",
4289 "recalwithposition=t",
4290 "qmatrixmode=max",
4291 "recaltile=f",
4292 ]);
4293 assert_eq!(cfg.notes.len(), 23);
4294
4295 let cfg = parse(&[
4296 "in=reads.fq",
4297 "loadq102_p1=f",
4298 "loadq_p2=t",
4299 "observationcutoff_p1=1k",
4300 "recalpasses_p2=1",
4301 "recalqmax_p1=50",
4302 "recalqmin_p2=2",
4303 "recalwithposition_p1=t",
4304 "qmatrixmode_p2=max",
4305 "recaltile_p1=f",
4306 ]);
4307 assert_eq!(cfg.notes.len(), 9);
4308
4309 for case in [
4310 "observationcutoff=abc",
4311 "recalpasses=abc",
4312 "recalqmax=abc",
4313 "observationcutoff_p1=abc",
4314 "recalpasses_p2=abc",
4315 "recalqmax_p1=abc",
4316 ] {
4317 let err = parse_args(
4318 ["in=reads.fq", "passes=1", case]
4319 .into_iter()
4320 .map(OsString::from),
4321 )
4322 .unwrap_err()
4323 .to_string();
4324 assert!(
4325 err.contains("expects") || err.contains("suffix"),
4326 "unexpected error for malformed {case}: {err}"
4327 );
4328 }
4329 }
4330
4331 #[test]
4332 fn accepts_disabled_recalibrate_controls_and_rejects_enabled_recalibration() {
4333 let cfg = parse(&[
4334 "in=reads.fq",
4335 "recalibrate=f",
4336 "recalibratequality=f",
4337 "recal=f",
4338 "recalibrate_p1=f",
4339 ]);
4340 assert_eq!(cfg.notes.len(), 4);
4341 assert!(
4342 cfg.notes
4343 .iter()
4344 .all(|note| note.contains("keeps BBTools quality recalibration disabled"))
4345 );
4346
4347 for case in ["recalibrate=t", "recalibratequality=t", "recal=t"] {
4348 let err = parse_args(
4349 ["in=reads.fq", "passes=1", case]
4350 .into_iter()
4351 .map(OsString::from),
4352 )
4353 .unwrap_err()
4354 .to_string();
4355 assert!(
4356 err.contains("enables BBTools quality recalibration"),
4357 "unexpected error for enabled {case}: {err}"
4358 );
4359 }
4360
4361 let err = parse_args(
4362 ["in=reads.fq", "passes=1", "recalibrate=maybe"]
4363 .into_iter()
4364 .map(OsString::from),
4365 )
4366 .unwrap_err()
4367 .to_string();
4368 assert!(
4369 err.contains("recalibrate expects a boolean value"),
4370 "unexpected error for malformed recalibrate: {err}"
4371 );
4372 }
4373
4374 #[test]
4375 fn accepts_disabled_break_length_controls_and_rejects_read_splitting() {
4376 let cfg = parse(&["in=reads.fq", "breaklen=0", "breaklength=-1"]);
4377 assert_eq!(cfg.notes.len(), 2);
4378 assert!(
4379 cfg.notes
4380 .iter()
4381 .all(|note| note.contains("keeps BBTools read breaking disabled"))
4382 );
4383
4384 for case in ["breaklen=50", "breaklength=1"] {
4385 let err = parse_args(
4386 ["in=reads.fq", "passes=1", case]
4387 .into_iter()
4388 .map(OsString::from),
4389 )
4390 .unwrap_err()
4391 .to_string();
4392 assert!(
4393 err.contains("enables BBTools read breaking"),
4394 "unexpected error for enabled {case}: {err}"
4395 );
4396 }
4397
4398 let err = parse_args(
4399 ["in=reads.fq", "passes=1", "breaklen=abc"]
4400 .into_iter()
4401 .map(OsString::from),
4402 )
4403 .unwrap_err()
4404 .to_string();
4405 assert!(
4406 err.contains("breaklen expects"),
4407 "unexpected error for malformed breaklen: {err}"
4408 );
4409 }
4410
4411 #[test]
4412 fn accepts_shared_environment_runtime_controls_as_noops_and_validates_values() {
4413 let cfg = parse(&[
4414 "in=reads.fq",
4415 "amino=f",
4416 "amino8=f",
4417 "validatebranchless=maybe",
4418 "fairqueues=t",
4419 "fixextensions=f",
4420 "2passresize=f",
4421 "parallelsort=f",
4422 "gcbeforemem=t",
4423 "warnifnosequence=f",
4424 "warnfirsttimeonly=f",
4425 "kmg=t",
4426 "forceJavaParseDouble=f",
4427 "simd=auto",
4428 "simdsparse=f",
4429 "simdmultsparse=f",
4430 "simdfmasparse=f",
4431 "simdcopy=f",
4432 "aws=f",
4433 "nersc=t",
4434 "lowmem=f",
4435 "lockedincrement=auto",
4436 "symmetricwrite=f",
4437 "buffer=10",
4438 "buffered=f",
4439 "sidechannelstats=f",
4440 "silva=f",
4441 "unite=f",
4442 "imghq=f",
4443 "callins=f",
4444 "calldel=f",
4445 "callsub=f",
4446 "callindel=f",
4447 "calljunct=f",
4448 "callnocall=f",
4449 "protFull=t",
4450 "entropyk=3",
4451 "entropywindow=50",
4452 ]);
4453 assert_eq!(cfg.notes.len(), 37);
4454 assert_eq!(cfg.locked_increment, Some(false));
4455
4456 for case in ["entropyk=abc", "entropywindow=abc"] {
4457 let err = parse_args(
4458 ["in=reads.fq", "passes=1", case]
4459 .into_iter()
4460 .map(OsString::from),
4461 )
4462 .unwrap_err()
4463 .to_string();
4464 assert!(
4465 err.contains("expects"),
4466 "unexpected error for malformed {case}: {err}"
4467 );
4468 }
4469
4470 for case in ["amino=t", "amino8=t"] {
4471 let err = parse_args(
4472 ["in=reads.fq", "passes=1", case]
4473 .into_iter()
4474 .map(OsString::from),
4475 )
4476 .unwrap_err()
4477 .to_string();
4478 assert!(
4479 err.contains("amino-acid kmer mode"),
4480 "unexpected error for enabled {case}: {err}"
4481 );
4482 }
4483 }
4484
4485 #[test]
4486 fn parses_base_cleanup_controls_like_bbnorm() {
4487 let cfg = parse(&[
4488 "in=reads.fq",
4489 "utot=t",
4490 "tuc=t",
4491 "lctn=t",
4492 "dotdashxton=t",
4493 "itn=t",
4494 "fixjunk=t",
4495 ]);
4496 assert!(cfg.u_to_t);
4497 assert!(cfg.to_upper_case);
4498 assert!(cfg.lower_case_to_n);
4499 assert!(cfg.dot_dash_x_to_n);
4500 assert!(cfg.iupac_to_n);
4501 assert_eq!(cfg.junk_mode, JunkMode::Fix);
4502
4503 let cfg = parse(&["in=reads.fq", "ignorejunk=t"]);
4504 assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4505
4506 let cfg = parse(&["in=reads.fq", "flagjunk=t"]);
4507 assert_eq!(cfg.junk_mode, JunkMode::Flag);
4508
4509 let cfg = parse(&["in=reads.fq", "tossjunk=t"]);
4510 assert_eq!(cfg.junk_mode, JunkMode::Flag);
4511
4512 let cfg = parse(&["in=reads.fq", "junk=discard"]);
4513 assert_eq!(cfg.junk_mode, JunkMode::Flag);
4514
4515 let cfg = parse(&["in=reads.fq", "crashjunk=f"]);
4516 assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4517
4518 let cfg = parse(&["in=reads.fq", "failjunk=f"]);
4519 assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4520
4521 let cfg = parse(&["in=reads.fq", "ignorejunk=t", "crashjunk=t"]);
4522 assert_eq!(cfg.junk_mode, JunkMode::Crash);
4523
4524 let cfg = parse(&["in=reads.fq", "junk=fail"]);
4525 assert_eq!(cfg.junk_mode, JunkMode::Crash);
4526
4527 let cfg = parse(&["in=reads.fq", "junk=iupacton"]);
4528 assert!(cfg.fix_junk_and_iupac);
4529 assert_eq!(cfg.junk_mode, JunkMode::Fix);
4530 }
4531
4532 #[test]
4533 fn false_flagjunk_alias_resets_to_crash_like_bbnorm() {
4534 let cfg = parse(&["in=reads.fq", "flagjunk=t", "flagjunk=f"]);
4535 assert_eq!(cfg.junk_mode, JunkMode::Crash);
4536
4537 let cfg = parse(&["in=reads.fq", "tossjunk=t", "tossjunk=f"]);
4538 assert_eq!(cfg.junk_mode, JunkMode::Flag);
4539 }
4540
4541 #[test]
4542 fn accepts_bbnorm_inactive_trim_parser_options_as_noops() {
4543 let cfg = parse(&[
4544 "in=reads.fq",
4545 "trimclip=t",
4546 "trimpolya=t",
4547 "trimpolyg=10",
4548 "trimpolygleft=f",
4549 "trimpolycright=2",
4550 "maxnonpoly=3",
4551 "ftr=10",
4552 "ftl=2",
4553 "ftm=4",
4554 "ftr2=7",
4555 ]);
4556 assert_eq!(cfg.notes.len(), 10);
4557 }
4558
4559 #[test]
4560 fn accepts_bbnorm_inactive_read_filter_parser_options_as_noops() {
4561 let cfg = parse(&[
4562 "in=reads.fq",
4563 "maxlen=50",
4564 "minlenfraction=0.8",
4565 "maxns=0",
4566 "mingc=0.9",
4567 "maxgc=0.1",
4568 "usepairgc=t",
4569 "minconsecutivebases=200",
4570 "maq=40,20",
4571 "maqb=20",
4572 "mbq=30",
4573 "chastityfilter=t",
4574 "trimbadsequence=t",
4575 "failnobarcode=f",
4576 "badbarcodes=fail",
4577 "barcodefilter=f",
4578 "barcodes=ACGT,TGCA",
4579 "aqbp=t",
4580 "mintrimlen=10",
4581 "untrim=f",
4582 ]);
4583 assert_eq!(cfg.notes.len(), 19);
4584
4585 for case in ["mintrimlen=abc", "badbarcodes=maybe"] {
4586 let err = parse_args(
4587 ["in=reads.fq", "passes=1", case]
4588 .into_iter()
4589 .map(OsString::from),
4590 )
4591 .unwrap_err()
4592 .to_string();
4593 assert!(
4594 err.contains("expects"),
4595 "unexpected error for malformed {case}: {err}"
4596 );
4597 }
4598 }
4599
4600 #[test]
4601 fn accepts_genome_build_context_controls_as_normalization_noops() {
4602 for case in ["build=1", "genome=1"] {
4603 let cfg = parse_args(
4604 ["in=reads.fq", "passes=1", case]
4605 .into_iter()
4606 .map(OsString::from),
4607 )
4608 .unwrap();
4609 assert!(
4610 cfg.notes
4611 .iter()
4612 .any(|note| note.contains("genome-build context")),
4613 "expected genome-build context no-op note for {case}: {:?}",
4614 cfg.notes
4615 );
4616 }
4617
4618 for case in ["genome=abc", "idfilter=0.9", "subfilter=1"] {
4619 let err = parse_args(
4620 ["in=reads.fq", "passes=1", case]
4621 .into_iter()
4622 .map(OsString::from),
4623 )
4624 .unwrap_err()
4625 .to_string();
4626 assert!(
4627 err.contains("expects") || err.contains("unknown or unsupported"),
4628 "unexpected error for malformed {case}: {err}"
4629 );
4630 }
4631 }
4632
4633 #[test]
4634 fn parses_explicit_interleaved_single_stream_outputs() {
4635 let cfg = parse(&[
4636 "in=reads.fq",
4637 "interleaved=t",
4638 "out=keep.fq",
4639 "outt=toss.fq",
4640 ]);
4641 assert!(cfg.interleaved);
4642 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
4643 assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
4644 assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
4645 assert!(cfg.out2.is_none());
4646 assert!(cfg.out_toss2.is_none());
4647
4648 let cfg = parse(&["in=reads.fq", "int=t"]);
4649 assert!(cfg.interleaved);
4650 assert!(!cfg.test_interleaved);
4651
4652 let cfg = parse(&["in=reads.fq", "forceinterleaved=t"]);
4653 assert!(cfg.interleaved);
4654 assert!(!cfg.test_interleaved);
4655
4656 let cfg = parse(&["in=reads.fq", "testinterleaved=f"]);
4657 assert!(!cfg.interleaved);
4658 assert!(!cfg.test_interleaved);
4659
4660 let cfg = parse(&["in=reads.fq", "overrideinterleaved=t"]);
4661 assert!(!cfg.notes.is_empty());
4662 }
4663
4664 #[test]
4665 fn defaults_to_auto_interleaved_detection() {
4666 let cfg = parse(&[
4667 "in=reads.fq",
4668 "out=keep1.fq",
4669 "out2=keep2.fq",
4670 "outt=toss1.fq",
4671 "outt2=toss2.fq",
4672 ]);
4673 assert!(!cfg.interleaved);
4674 assert!(cfg.test_interleaved);
4675 }
4676
4677 #[test]
4678 fn paired_input_allows_bbnorm_single_stream_or_hash_pattern_outputs() {
4679 let cfg = parse(&[
4680 "in=reads1.fq",
4681 "in2=reads2.fq",
4682 "out=keep#.fq",
4683 "outt=toss.fq",
4684 ]);
4685 assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep#.fq"));
4686 assert!(cfg.out2.is_none());
4687 assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
4688 assert!(cfg.out_toss2.is_none());
4689 }
4690
4691 #[test]
4692 fn interleaved_true_with_in2_remains_two_file_paired_like_bbnorm() {
4693 let cfg = parse(&["in=reads1.fq", "in2=reads2.fq", "interleaved=t"]);
4694 assert!(cfg.interleaved);
4695 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
4696 assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
4697 }
4698
4699 #[test]
4700 fn expands_missing_hash_input_pattern_like_bbnorm() {
4701 let cfg = parse(&["in=reads#.fq"]);
4702 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
4703 assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
4704 }
4705
4706 #[test]
4707 fn keeps_literal_hash_input_when_file_exists_like_bbnorm() {
4708 let dir = tempfile::tempdir().unwrap();
4709 let literal = dir.path().join("reads#.fq");
4710 std::fs::write(&literal, b"@r1\nACGT\n+\nIIII\n").unwrap();
4711
4712 let cfg = parse_args(
4713 [format!("in={}", literal.display()), "passes=1".to_string()]
4714 .into_iter()
4715 .map(OsString::from),
4716 )
4717 .unwrap();
4718 assert_eq!(cfg.in1.unwrap(), literal);
4719 assert!(cfg.in2.is_none());
4720 }
4721
4722 #[test]
4723 fn keeps_literal_comma_extra_when_file_exists_like_bbnorm() {
4724 let dir = tempfile::tempdir().unwrap();
4725 let input = dir.path().join("main.fq");
4726 let literal = dir.path().join("extra,with,commas.fq");
4727 std::fs::write(&input, b"@r1\nACGT\n+\nIIII\n").unwrap();
4728 std::fs::write(&literal, b"@r2\nACGT\n+\nIIII\n").unwrap();
4729
4730 let cfg = parse_args(
4731 [
4732 format!("in={}", input.display()),
4733 format!("extra={}", literal.display()),
4734 "extra=null".to_string(),
4735 "passes=1".to_string(),
4736 ]
4737 .into_iter()
4738 .map(OsString::from),
4739 )
4740 .unwrap();
4741 assert_eq!(cfg.extra, vec![literal]);
4742 }
4743
4744 #[test]
4745 fn expands_config_files_like_bbnorm() {
4746 let dir = tempfile::tempdir().unwrap();
4747 let cfg1 = dir.path().join("a.config");
4748 let cfg2 = dir.path().join("b.config");
4749 std::fs::write(
4750 &cfg1,
4751 "\n# comment\nin=reads.fq\npasses=1\nkeepall=t\nk=21\n",
4752 )
4753 .unwrap();
4754 std::fs::write(&cfg2, "target=7\nout=keep.fq\n").unwrap();
4755
4756 let cfg = parse_args(
4757 [
4758 format!("config={},{}", cfg1.display(), cfg2.display()),
4759 "target=9".to_string(),
4760 ]
4761 .into_iter()
4762 .map(OsString::from),
4763 )
4764 .unwrap();
4765
4766 assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
4767 assert_eq!(cfg.k, 21);
4768 assert_eq!(cfg.target_depth, 9);
4769 assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
4770 assert!(cfg.keep_all);
4771 assert!(
4772 cfg.notes
4773 .iter()
4774 .any(|note| note.contains("expanded into 6 BBTools-style argument line"))
4775 );
4776 }
4777
4778 #[test]
4779 fn reports_missing_config_files_like_bbnorm() {
4780 let dir = tempfile::tempdir().unwrap();
4781 let missing = dir.path().join("missing.config");
4782 let err = parse_args(
4783 [format!("config={}", missing.display())]
4784 .into_iter()
4785 .map(OsString::from),
4786 )
4787 .unwrap_err()
4788 .to_string();
4789
4790 assert!(err.contains("could not process config file"));
4791 }
4792
4793 #[test]
4794 fn rejects_missing_extra_inputs_like_bbnorm() {
4795 let err = parse_args(
4796 ["in=reads.fq", "extra=missing#.fq", "passes=1"]
4797 .into_iter()
4798 .map(OsString::from),
4799 )
4800 .unwrap_err();
4801 assert!(err.to_string().contains("extra input missing#.fq"));
4802 }
4803
4804 #[test]
4805 fn parses_single_pass_final_stage_aliases() {
4806 let cfg = parse(&[
4807 "in=reads.fq",
4808 "tbrf=t",
4809 "dbo2=t",
4810 "tossbadreads1=t",
4811 "dbo1=t",
4812 ]);
4813 assert!(cfg.toss_error_reads);
4814 assert!(cfg.toss_error_reads_first);
4815 assert!(cfg.discard_bad_only);
4816 assert!(cfg.discard_bad_only_first);
4817 }
4818
4819 #[test]
4820 fn parses_multipass_and_countup_controls() {
4821 let cfg = parse(&[
4822 "in=reads.fq",
4823 "passes=2",
4824 "target1=7",
4825 "targetbadpercentilelow=20",
4826 "tbph=0.8",
4827 "abrc=t",
4828 ]);
4829 assert_eq!(cfg.target_depth_first, Some(7));
4830 assert_eq!(cfg.target_bad_percent_low, 0.2);
4831 assert_eq!(cfg.target_bad_percent_high, 0.8);
4832 assert!(cfg.add_bad_reads_countup);
4833
4834 for case in ["target1=abc", "targetbadpercentilelow=abc", "tbph=abc"] {
4835 let err = parse_args(
4836 ["in=reads.fq", "passes=1", case]
4837 .into_iter()
4838 .map(OsString::from),
4839 )
4840 .unwrap_err()
4841 .to_string();
4842 assert!(
4843 err.contains("expects"),
4844 "unexpected error for malformed {case}: {err}"
4845 );
4846 }
4847 }
4848
4849 #[test]
4850 fn allows_outuncorrected_in_multipass_runs() {
4851 let cfg = parse(&[
4852 "in=reads_1.fq",
4853 "in2=reads_2.fq",
4854 "passes=2",
4855 "out=keep_1.fq",
4856 "out2=keep_2.fq",
4857 "outuncorrected=unc_1.fq",
4858 "outuncorrected2=unc_2.fq",
4859 ]);
4860 assert_eq!(cfg.passes, 2);
4861 assert_eq!(
4862 cfg.out_uncorrected1.as_deref(),
4863 Some(std::path::Path::new("unc_1.fq"))
4864 );
4865 assert_eq!(
4866 cfg.out_uncorrected2.as_deref(),
4867 Some(std::path::Path::new("unc_2.fq"))
4868 );
4869 }
4870
4871 #[test]
4872 fn final_stage_alias_can_override_conflated_alias() {
4873 let cfg = parse(&["in=reads.fq", "tossbadreads=t", "tossbadreadsf=f"]);
4874 assert!(!cfg.toss_error_reads);
4875 }
4876
4877 #[test]
4878 fn remove_if_either_bad_alias_inverts_require_both_bad() {
4879 let cfg = parse(&["in=reads.fq", "requirebothbad=t", "removeifeitherbad=t"]);
4880 assert!(!cfg.require_both_bad);
4881
4882 let cfg = parse(&["in=reads.fq", "rieb=f"]);
4883 assert!(cfg.require_both_bad);
4884 }
4885
4886 #[test]
4887 fn explicit_interleaved_false_rejects_second_outputs_without_in2() {
4888 let err = parse_args(
4889 [
4890 "in=reads.fq",
4891 "interleaved=f",
4892 "out=keep1.fq",
4893 "out2=keep2.fq",
4894 "passes=1",
4895 ]
4896 .into_iter()
4897 .map(OsString::from),
4898 )
4899 .unwrap_err();
4900 assert!(err.to_string().contains("out2"));
4901 }
4902
4903 #[test]
4904 fn enabled_ecc_sets_real_correction_fields() {
4905 let cfg = parse_args(["in=x.fq", "ecc=t"].into_iter().map(OsString::from)).unwrap();
4906 assert_eq!(cfg.passes, 2);
4907 assert!(cfg.error_correct);
4908 assert!(cfg.error_correct_first);
4909 assert!(cfg.error_correct_final);
4910 assert!(!cfg.overlap_error_correct);
4911 assert!(!cfg.mark_errors_only);
4912 assert!(cfg.notes.is_empty());
4913
4914 let cfg = parse(&["in=x.fq", "ecc=f"]);
4915 assert!(!cfg.error_correct);
4916 assert!(!cfg.error_correct_first);
4917 assert!(!cfg.error_correct_final);
4918 assert!(!cfg.overlap_error_correct);
4919
4920 let cfg = parse(&["in=x.fq", "ecc1=t", "ecc2=f"]);
4921 assert!(cfg.error_correct);
4922 assert!(cfg.error_correct_first);
4923 assert!(!cfg.error_correct_final);
4924
4925 let cfg = parse(&["in=x.fq", "ecc1=f", "eccf=t"]);
4926 assert!(cfg.error_correct);
4927 assert!(!cfg.error_correct_first);
4928 assert!(cfg.error_correct_final);
4929
4930 let cfg = parse(&["in=x.fq", "markerrors=t"]);
4931 assert!(cfg.error_correct);
4932 assert!(cfg.error_correct_first);
4933 assert!(!cfg.error_correct_final);
4934
4935 let cfg = parse(&["in=x.fq", "ecco=t"]);
4936 assert!(cfg.error_correct);
4937 assert!(cfg.error_correct_first);
4938 assert!(cfg.error_correct_final);
4939 assert!(cfg.overlap_error_correct);
4940 assert!(!cfg.overlap_error_correct_auto);
4941 assert!(cfg.notes[0].contains("paired overlap repair"));
4942
4943 let cfg = parse(&["in=x.fq", "ecco=auto"]);
4944 assert!(cfg.error_correct);
4945 assert!(cfg.error_correct_first);
4946 assert!(cfg.error_correct_final);
4947 assert!(!cfg.overlap_error_correct);
4948 assert!(cfg.overlap_error_correct_auto);
4949 assert!(cfg.notes[0].contains("automatic overlap"));
4950
4951 let cfg = parse(&["in=x.fq", "ecco=t", "ecco=f"]);
4952 assert!(cfg.error_correct);
4953 assert!(!cfg.overlap_error_correct);
4954 assert!(!cfg.overlap_error_correct_auto);
4955 }
4956
4957 #[test]
4958 fn accepts_ecc_tuning_controls_and_validates_integers() {
4959 let cfg = parse(&[
4960 "in=reads.fq",
4961 "ecclimit=3",
4962 "eccmaxqual=127",
4963 "errorcorrectratio=140",
4964 "echighthresh=22",
4965 "eclowthresh=2",
4966 "suflen=3",
4967 "prefixlen=3",
4968 "cfl=t",
4969 "cfr=f",
4970 ]);
4971 assert_eq!(cfg.max_errors_to_correct, 3);
4972 assert_eq!(cfg.max_quality_to_correct, 127);
4973 assert_eq!(cfg.error_correct_ratio, 140);
4974 assert_eq!(cfg.error_correct_high_thresh, 22);
4975 assert_eq!(cfg.error_correct_low_thresh, 2);
4976 assert_eq!(cfg.suffix_len, 3);
4977 assert_eq!(cfg.prefix_len, 3);
4978 assert!(cfg.correct_from_left);
4979 assert!(!cfg.correct_from_right);
4980 assert!(cfg.notes.is_empty());
4981
4982 for case in [
4983 "ecclimit=abc",
4984 "eccmaxqual=abc",
4985 "ecr=abc",
4986 "echthresh=abc",
4987 "eclt=abc",
4988 "suflen=abc",
4989 "prelen=abc",
4990 ] {
4991 let err = parse_args(
4992 ["in=reads.fq", "passes=1", case]
4993 .into_iter()
4994 .map(OsString::from),
4995 )
4996 .unwrap_err()
4997 .to_string();
4998 assert!(
4999 err.contains("expects"),
5000 "unexpected error for malformed {case}: {err}"
5001 );
5002 }
5003 }
5004
5005 #[test]
5006 fn parses_countup_mode() {
5007 let cfg = parse_args(["in=x.fq", "countup=t"].into_iter().map(OsString::from)).unwrap();
5008 assert!(cfg.count_up);
5009
5010 let cfg = parse(&["in=x.fq", "countup=f"]);
5011 assert!(!cfg.count_up);
5012 assert!(cfg.notes.iter().any(|note| note.contains("countup=f")));
5013 }
5014
5015 #[test]
5016 fn parses_experimental_gpu_counting_controls() {
5017 let cfg = parse(&[
5018 "in=reads.fq",
5019 "gpucounting=t",
5020 "gpuhelper=tmp/cuda_kmer_reduce_runs",
5021 "gpupersistent=t",
5022 ]);
5023 assert!(cfg.gpu_counting);
5024 assert_eq!(
5025 cfg.gpu_helper,
5026 Some(PathBuf::from("tmp/cuda_kmer_reduce_runs"))
5027 );
5028 assert!(cfg.gpu_persistent);
5029 assert!(
5030 cfg.notes
5031 .iter()
5032 .any(|note| note.contains("experimental CUDA"))
5033 );
5034 }
5035
5036 #[test]
5037 fn wrapper_sampling_options_fall_back_to_supported_normalization() {
5038 for case in [
5039 "sampleoutput=1",
5040 "readsample=1",
5041 "kmersample=1",
5042 "samplerate=0.5",
5043 "sample=0.5",
5044 "sampleseed=1",
5045 "seed=1",
5046 ] {
5047 let cfg = parse_args(
5048 ["in=x.fq", "passes=1", case]
5049 .into_iter()
5050 .map(OsString::from),
5051 )
5052 .unwrap();
5053 assert!(
5054 cfg.notes
5055 .iter()
5056 .any(|note| note.contains("Rust ignores it")),
5057 "expected sampling fallback note for {case}"
5058 );
5059 }
5060 }
5061
5062 #[test]
5063 fn nondeterministic_mode_stays_enabled_for_random_selection() {
5064 for case in ["deterministic=t", "dr=t", "det=t"] {
5065 let cfg = parse(&["in=reads.fq", case]);
5066 assert!(cfg.deterministic, "expected deterministic mode for {case}");
5067 }
5068
5069 let cfg = parse_args(
5070 ["in=reads.fq", "passes=1", "deterministic=f"]
5071 .into_iter()
5072 .map(OsString::from),
5073 )
5074 .unwrap();
5075 assert!(!cfg.deterministic);
5076 assert!(
5077 cfg.notes
5078 .iter()
5079 .all(|note| !note.contains("deterministic=f is not implemented yet"))
5080 );
5081 assert!(
5082 cfg.notes
5083 .iter()
5084 .any(|note| note.contains("faster parallel replay"))
5085 );
5086 }
5087}