Skip to main content

bbnorm_rs/
cli.rs

1use crate::seqio::JunkMode;
2use anyhow::{Context, Result, bail};
3use std::collections::VecDeque;
4use std::ffi::OsString;
5use std::fs;
6use std::path::PathBuf;
7
8pub const USAGE: &str = "bbnorm-rs: Rust BBNorm compatibility port\n\nUsage:\n  bbnorm-rs in=<reads.fq> out=<kept.fq> outt=<tossed.fq> hist=<hist.tsv> [passes=1]\n\nThis working Rust slice supports exact k-mer counting for small inputs, automatic bounded count-min input sketches for large inputs, explicit bounded sketches via cells/matrixbits/sketchmemory, conservative atomic bits=32 sketch insertion with packed small-bit fallbacks, constrained and memory-sized prefilter sketch collision behavior, deterministic normalization, managed multipass temp-file orchestration, count-up mode with bounded kept-count sketches when requested, table-based ECC for covered paths, hist/rhist/peaks output, low/mid/high depth bins, zlib-rs gzip, BBTools-style pigz/unpigz hooks when available, bounded cardinality/loglog estimates when requested, and Rayon worker controls including threads=auto/max/all. Wrapper-sampling requests fall back to the supported engine with notes.";
9
10pub const CARDINALITY_DEFAULT_BUCKETS: usize = 2048;
11pub const CARDINALITY_MAX_BUCKETS: usize = 1 << 26;
12
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct CountMinSettings {
15    pub cells: Option<usize>,
16    pub hashes: Option<usize>,
17    pub bits: Option<u8>,
18    pub memory_bytes: Option<usize>,
19}
20
21#[derive(Debug, Clone, Default, PartialEq, Eq)]
22pub struct PrefilterSettings {
23    pub enabled: bool,
24    pub force_disabled: bool,
25    pub cells: Option<usize>,
26    pub hashes: Option<usize>,
27    pub bits: Option<u8>,
28    pub memory_bytes: Option<usize>,
29    pub memory_fraction_micros: Option<u32>,
30}
31
32#[derive(Debug, Clone, PartialEq)]
33pub struct CardinalitySettings {
34    pub input: bool,
35    pub output: bool,
36    pub buckets: usize,
37    pub k: Option<usize>,
38    pub seed: u64,
39    pub min_probability: f64,
40}
41
42impl Default for CardinalitySettings {
43    fn default() -> Self {
44        Self {
45            input: false,
46            output: false,
47            buckets: CARDINALITY_DEFAULT_BUCKETS,
48            k: None,
49            seed: 0,
50            min_probability: 0.0,
51        }
52    }
53}
54
55#[derive(Debug, Clone)]
56pub struct Config {
57    pub in1: Option<PathBuf>,
58    pub in2: Option<PathBuf>,
59    pub extra: Vec<PathBuf>,
60    pub out1: Option<PathBuf>,
61    pub out2: Option<PathBuf>,
62    pub out_toss1: Option<PathBuf>,
63    pub out_toss2: Option<PathBuf>,
64    pub out_low1: Option<PathBuf>,
65    pub out_low2: Option<PathBuf>,
66    pub out_mid1: Option<PathBuf>,
67    pub out_mid2: Option<PathBuf>,
68    pub out_high1: Option<PathBuf>,
69    pub out_high2: Option<PathBuf>,
70    pub out_uncorrected1: Option<PathBuf>,
71    pub out_uncorrected2: Option<PathBuf>,
72    pub hist_in: Option<PathBuf>,
73    pub hist_out: Option<PathBuf>,
74    pub rhist_in: Option<PathBuf>,
75    pub rhist_out: Option<PathBuf>,
76    pub peaks_in: Option<PathBuf>,
77    pub peaks_out: Option<PathBuf>,
78    pub match_hist_out: Option<PathBuf>,
79    pub insert_hist_out: Option<PathBuf>,
80    pub quality_accuracy_hist_out: Option<PathBuf>,
81    pub indel_hist_out: Option<PathBuf>,
82    pub error_hist_out: Option<PathBuf>,
83    pub quality_hist_out: Option<PathBuf>,
84    pub base_quality_hist_out: Option<PathBuf>,
85    pub quality_count_hist_out: Option<PathBuf>,
86    pub average_quality_hist_out: Option<PathBuf>,
87    pub overall_base_quality_hist_out: Option<PathBuf>,
88    pub length_hist_out: Option<PathBuf>,
89    pub gc_hist_out: Option<PathBuf>,
90    pub base_hist_out: Option<PathBuf>,
91    pub entropy_hist_out: Option<PathBuf>,
92    pub identity_hist_out: Option<PathBuf>,
93    pub barcode_stats_out: Option<PathBuf>,
94    pub k: usize,
95    pub min_quality: u8,
96    pub quality_in_offset: u8,
97    pub quality_out_offset: u8,
98    pub change_quality: bool,
99    pub min_called_quality: u8,
100    pub max_called_quality: u8,
101    pub fake_quality: u8,
102    pub fasta_wrap: usize,
103    pub u_to_t: bool,
104    pub to_upper_case: bool,
105    pub lower_case_to_n: bool,
106    pub dot_dash_x_to_n: bool,
107    pub iupac_to_n: bool,
108    pub fix_junk_and_iupac: bool,
109    pub junk_mode: JunkMode,
110    pub min_prob: f64,
111    pub max_reads: Option<u64>,
112    pub table_reads: Option<u64>,
113    pub min_length: usize,
114    pub trim_left: bool,
115    pub trim_right: bool,
116    pub trim_quality: f64,
117    pub trim_optimal: bool,
118    pub trim_optimal_bias: Option<f64>,
119    pub trim_window: bool,
120    pub trim_window_length: usize,
121    pub trim_min_good_interval: usize,
122    pub interleaved: bool,
123    pub test_interleaved: bool,
124    pub keep_all: bool,
125    pub zero_bin: bool,
126    pub deterministic: bool,
127    pub rename_reads: bool,
128    pub canonical: bool,
129    pub remove_duplicate_kmers: bool,
130    pub fix_spikes: bool,
131    pub target_depth: u64,
132    pub target_depth_first: Option<u64>,
133    pub target_bad_percent_low: f64,
134    pub target_bad_percent_high: f64,
135    pub max_depth: Option<u64>,
136    pub min_depth: u64,
137    pub min_kmers_over_min_depth: usize,
138    pub depth_percentile: f64,
139    pub high_percentile: f64,
140    pub low_percentile: f64,
141    pub error_detect_ratio: u64,
142    pub high_thresh: u64,
143    pub low_thresh: u64,
144    pub toss_error_reads: bool,
145    pub toss_error_reads_first: bool,
146    pub require_both_bad: bool,
147    pub save_rare_reads: bool,
148    pub discard_bad_only: bool,
149    pub discard_bad_only_first: bool,
150    pub error_correct: bool,
151    pub error_correct_first: bool,
152    pub error_correct_final: bool,
153    pub overlap_error_correct: bool,
154    pub overlap_error_correct_auto: bool,
155    pub mark_errors_only: bool,
156    pub mark_uncorrectable_errors: bool,
157    pub trim_after_marking: bool,
158    pub mark_with_one: bool,
159    pub error_correct_ratio: u64,
160    pub error_correct_high_thresh: u64,
161    pub error_correct_low_thresh: u64,
162    pub max_errors_to_correct: usize,
163    pub max_quality_to_correct: u8,
164    pub correct_from_left: bool,
165    pub correct_from_right: bool,
166    pub suffix_len: usize,
167    pub prefix_len: usize,
168    pub count_up: bool,
169    pub add_bad_reads_countup: bool,
170    pub use_lower_depth: bool,
171    pub toss_by_low_true_depth: bool,
172    pub low_bin_depth: i64,
173    pub high_bin_depth: i64,
174    pub hist_len: usize,
175    pub side_hist_len: Option<usize>,
176    pub gc_bins: Option<usize>,
177    pub entropy_bins: usize,
178    pub entropy_k: usize,
179    pub entropy_window: usize,
180    pub allow_entropy_ns: bool,
181    pub identity_bins: usize,
182    pub cardinality: CardinalitySettings,
183    pub hist_columns: u8,
184    pub print_zero_coverage: bool,
185    pub peak_min_height: u64,
186    pub peak_min_volume: u64,
187    pub peak_min_width: usize,
188    pub peak_min_peak: usize,
189    pub peak_max_peak: usize,
190    pub peak_max_count: usize,
191    pub peak_ploidy: i32,
192    pub overwrite: bool,
193    pub append: bool,
194    pub passes: usize,
195    pub threads: Option<usize>,
196    pub gzip_threads: Option<usize>,
197    pub temp_dir: Option<PathBuf>,
198    pub use_temp_dir: bool,
199    pub max_countup_spill_initial_runs: Option<usize>,
200    pub max_countup_spill_merge_runs: Option<usize>,
201    pub max_countup_spill_final_runs: Option<usize>,
202    pub max_countup_spill_live_bytes: Option<u64>,
203    pub max_countup_spill_final_live_bytes: Option<u64>,
204    pub max_countup_spill_write_bytes: Option<u64>,
205    pub table_initial_size: Option<usize>,
206    pub table_prealloc_fraction: Option<f64>,
207    pub build_passes: usize,
208    pub auto_count_min: bool,
209    pub force_exact_counts: bool,
210    pub auto_count_min_input_bytes: usize,
211    pub auto_count_min_read_threshold: u64,
212    pub auto_count_min_memory_bytes: Option<usize>,
213    pub count_min: CountMinSettings,
214    pub count_min_bits_first: Option<u8>,
215    pub prefilter: PrefilterSettings,
216    pub locked_increment: Option<bool>,
217    pub gpu_counting: bool,
218    pub gpu_helper: Option<PathBuf>,
219    pub gpu_persistent: bool,
220    pub notes: Vec<String>,
221}
222
223impl Default for Config {
224    fn default() -> Self {
225        Self {
226            in1: None,
227            in2: None,
228            extra: Vec::new(),
229            out1: None,
230            out2: None,
231            out_toss1: None,
232            out_toss2: None,
233            out_low1: None,
234            out_low2: None,
235            out_mid1: None,
236            out_mid2: None,
237            out_high1: None,
238            out_high2: None,
239            out_uncorrected1: None,
240            out_uncorrected2: None,
241            hist_in: None,
242            hist_out: None,
243            rhist_in: None,
244            rhist_out: None,
245            peaks_in: None,
246            peaks_out: None,
247            match_hist_out: None,
248            insert_hist_out: None,
249            quality_accuracy_hist_out: None,
250            indel_hist_out: None,
251            error_hist_out: None,
252            quality_hist_out: None,
253            base_quality_hist_out: None,
254            quality_count_hist_out: None,
255            average_quality_hist_out: None,
256            overall_base_quality_hist_out: None,
257            length_hist_out: None,
258            gc_hist_out: None,
259            base_hist_out: None,
260            entropy_hist_out: None,
261            identity_hist_out: None,
262            barcode_stats_out: None,
263            k: 31,
264            min_quality: 5,
265            quality_in_offset: 33,
266            quality_out_offset: 33,
267            change_quality: true,
268            min_called_quality: 2,
269            max_called_quality: 50,
270            fake_quality: 30,
271            fasta_wrap: 70,
272            u_to_t: false,
273            to_upper_case: false,
274            lower_case_to_n: false,
275            dot_dash_x_to_n: false,
276            iupac_to_n: false,
277            fix_junk_and_iupac: false,
278            junk_mode: JunkMode::Crash,
279            min_prob: 0.5,
280            max_reads: None,
281            table_reads: None,
282            min_length: 1,
283            trim_left: false,
284            trim_right: false,
285            trim_quality: 5.0,
286            trim_optimal: true,
287            trim_optimal_bias: None,
288            trim_window: false,
289            trim_window_length: 4,
290            trim_min_good_interval: 2,
291            interleaved: false,
292            test_interleaved: true,
293            keep_all: false,
294            zero_bin: false,
295            deterministic: true,
296            rename_reads: false,
297            canonical: true,
298            remove_duplicate_kmers: true,
299            fix_spikes: false,
300            target_depth: 100,
301            target_depth_first: None,
302            target_bad_percent_low: 0.85,
303            target_bad_percent_high: 1.5,
304            max_depth: None,
305            min_depth: 5,
306            min_kmers_over_min_depth: 15,
307            depth_percentile: 0.54,
308            high_percentile: 0.90,
309            low_percentile: 0.25,
310            error_detect_ratio: 125,
311            high_thresh: 12,
312            low_thresh: 3,
313            toss_error_reads: false,
314            toss_error_reads_first: false,
315            require_both_bad: false,
316            save_rare_reads: false,
317            discard_bad_only: false,
318            discard_bad_only_first: false,
319            error_correct: false,
320            error_correct_first: false,
321            error_correct_final: false,
322            overlap_error_correct: false,
323            overlap_error_correct_auto: false,
324            mark_errors_only: false,
325            mark_uncorrectable_errors: false,
326            trim_after_marking: false,
327            mark_with_one: false,
328            error_correct_ratio: 140,
329            error_correct_high_thresh: 22,
330            error_correct_low_thresh: 2,
331            max_errors_to_correct: 3,
332            max_quality_to_correct: 127,
333            correct_from_left: true,
334            correct_from_right: true,
335            suffix_len: 3,
336            prefix_len: 3,
337            count_up: false,
338            add_bad_reads_countup: false,
339            use_lower_depth: true,
340            toss_by_low_true_depth: true,
341            low_bin_depth: 10,
342            high_bin_depth: 80,
343            hist_len: (1 << 20) + 1,
344            side_hist_len: None,
345            gc_bins: None,
346            entropy_bins: 1000,
347            entropy_k: 5,
348            entropy_window: 50,
349            allow_entropy_ns: true,
350            identity_bins: 750,
351            cardinality: CardinalitySettings::default(),
352            hist_columns: 3,
353            print_zero_coverage: false,
354            peak_min_height: 2,
355            peak_min_volume: 5,
356            peak_min_width: 3,
357            peak_min_peak: 2,
358            peak_max_peak: i32::MAX as usize,
359            peak_max_count: 10,
360            peak_ploidy: -1,
361            overwrite: false,
362            append: false,
363            passes: 2,
364            threads: None,
365            gzip_threads: None,
366            temp_dir: None,
367            use_temp_dir: false,
368            max_countup_spill_initial_runs: None,
369            max_countup_spill_merge_runs: None,
370            max_countup_spill_final_runs: None,
371            max_countup_spill_live_bytes: None,
372            max_countup_spill_final_live_bytes: None,
373            max_countup_spill_write_bytes: None,
374            table_initial_size: None,
375            table_prealloc_fraction: None,
376            build_passes: 1,
377            auto_count_min: true,
378            force_exact_counts: false,
379            auto_count_min_input_bytes: 32 * 1024 * 1024,
380            auto_count_min_read_threshold: 250_000,
381            auto_count_min_memory_bytes: None,
382            count_min: CountMinSettings::default(),
383            count_min_bits_first: None,
384            prefilter: PrefilterSettings::default(),
385            locked_increment: None,
386            gpu_counting: false,
387            gpu_helper: None,
388            gpu_persistent: false,
389            notes: Vec::new(),
390        }
391    }
392}
393
394pub fn parse_args<I>(args: I) -> Result<Config>
395where
396    I: IntoIterator<Item = OsString>,
397{
398    let mut config = Config::default();
399    let mut positional = Vec::new();
400    let mut saw_arg = false;
401    let mut pending: VecDeque<OsString> = args.into_iter().collect();
402
403    while let Some(raw) = pending.pop_front() {
404        saw_arg = true;
405        let arg = raw.to_string_lossy().into_owned();
406        if arg == "-h" || arg == "--help" || arg.eq_ignore_ascii_case("help") {
407            bail!(USAGE);
408        }
409        if let Some((key, value)) = arg.split_once('=') {
410            let key = key.to_ascii_lowercase();
411            if key == "config" {
412                let expanded = read_config_args(value)?;
413                config.notes.push(format!(
414                    "config={value} expanded into {} BBTools-style argument line(s)",
415                    expanded.len()
416                ));
417                for item in expanded.into_iter().rev() {
418                    pending.push_front(OsString::from(item));
419                }
420            } else {
421                handle_key_value(&mut config, &key, value)?;
422            }
423        } else if arg.eq_ignore_ascii_case("null") {
424            // BBTools treats a literal "null" argument as an inert placeholder.
425        } else if arg.eq_ignore_ascii_case("1pass") || arg.eq_ignore_ascii_case("1p") {
426            config.passes = 1;
427            config.notes.push("single-pass mode selected".to_string());
428        } else if arg.eq_ignore_ascii_case("2pass") || arg.eq_ignore_ascii_case("2p") {
429            config.passes = 2;
430        } else {
431            let key = arg.to_ascii_lowercase();
432            if is_bare_boolean_key(&key) {
433                handle_key_value(&mut config, &key, "t")?;
434            } else {
435                positional.push(PathBuf::from(arg));
436            }
437        }
438    }
439
440    if !saw_arg {
441        bail!(USAGE);
442    }
443
444    if positional.len() > 2 {
445        bail!(
446            "expected at most two positional inputs; use in=<file> and in2=<file> for paired input"
447        );
448    }
449    if config.in1.is_none() {
450        config.in1 = positional.first().cloned();
451    }
452    if config.in2.is_none() {
453        config.in2 = positional.get(1).cloned();
454    }
455
456    fill_default_gzip_threads(&mut config);
457    validate(&mut config)?;
458    Ok(config)
459}
460
461fn is_bare_boolean_key(key: &str) -> bool {
462    matches!(
463        key,
464        "keepall"
465            | "zerobin"
466            | "deterministic"
467            | "dr"
468            | "det"
469            | "rn"
470            | "rename"
471            | "renamereads"
472            | "canonical"
473            | "removeduplicatekmers"
474            | "rdk"
475            | "fixspikes"
476            | "fs"
477            | "tossbadreads"
478            | "tosserrorreads"
479            | "tbr"
480            | "ter"
481            | "requirebothbad"
482            | "rbb"
483            | "removeifeitherbad"
484            | "rieb"
485            | "saverarereads"
486            | "srr"
487            | "discardbadonly"
488            | "dbo"
489            | "uselowerdepth"
490            | "uld"
491            | "printzerocoverage"
492            | "pzc"
493            | "overwrite"
494            | "ow"
495            | "ignorebadquality"
496            | "ibq"
497            | "changequality"
498            | "cq"
499            | "utot"
500            | "tuc"
501            | "touppercase"
502            | "lctn"
503            | "lowercaseton"
504            | "dotdashxton"
505            | "undefinedton"
506            | "iupacton"
507            | "itn"
508            | "fixjunk"
509            | "ignorejunk"
510            | "usebgzip"
511            | "bgzip"
512            | "usepigz"
513            | "pigz"
514            | "usegunzip"
515            | "gunzip"
516            | "ungzip"
517            | "useunpigz"
518            | "unpigz"
519            | "useunbgzip"
520            | "unbgzip"
521            | "usegzip"
522            | "gzip"
523            | "usebgzf"
524            | "bgzf"
525            | "ordered"
526            | "ord"
527            | "verbose"
528            | "printcoverage"
529            | "append"
530            | "app"
531            | "interleaved"
532            | "int"
533            | "testinterleaved"
534            | "forceinterleaved"
535            | "prefilter"
536            | "autocountmin"
537            | "autosketch"
538            | "autosketchtable"
539            | "autosketchtables"
540            | "exact"
541            | "exactcount"
542            | "exactcounts"
543            | "useexact"
544            | "sketchexact"
545            | "auto"
546            | "automatic"
547            | "countup"
548            | "abrc"
549            | "addbadreadscountup"
550            | "markerrors"
551            | "markonly"
552            | "meo"
553            | "markuncorrectableerrors"
554            | "markuncorrectable"
555            | "mue"
556            | "tam"
557            | "trimaftermarking"
558            | "markwith1"
559            | "markwithone"
560            | "mw1"
561            | "aec"
562            | "aecc"
563            | "aggressiveerrorcorrection"
564            | "cec"
565            | "cecc"
566            | "conservativeerrorcorrection"
567            | "ecc"
568            | "ecc1"
569            | "ecc2"
570            | "eccf"
571            | "eccbyoverlap"
572            | "ecco"
573            | "overlap"
574            | "cfl"
575            | "cfr"
576            | "cardinality"
577            | "loglog"
578            | "loglogin"
579            | "cardinalityout"
580            | "loglogout"
581    )
582}
583
584fn fill_default_gzip_threads(config: &mut Config) {
585    if config.gzip_threads.is_some() {
586        return;
587    }
588    let Some(threads) = config.threads.filter(|threads| *threads > 1) else {
589        return;
590    };
591    config.gzip_threads = Some(threads);
592    config.notes.push(format!(
593        "threads={threads} also enables gzip input/output workers up to {threads}; use zipthreads=1 to force single-thread gzip I/O"
594    ));
595}
596
597fn read_config_args(value: &str) -> Result<Vec<String>> {
598    let mut args = Vec::new();
599    for file in value.split(',').filter(|part| !part.trim().is_empty()) {
600        let path = PathBuf::from(file.trim());
601        let text = fs::read_to_string(&path)
602            .with_context(|| format!("could not process config file {}", path.display()))?;
603        for line in text.lines() {
604            let trimmed = line.trim();
605            if !trimmed.is_empty() && !trimmed.starts_with('#') {
606                args.push(trimmed.to_string());
607            }
608        }
609    }
610    Ok(args)
611}
612
613fn handle_key_value(config: &mut Config, key: &str, value: &str) -> Result<()> {
614    match key {
615        "in" | "input" | "in1" | "input1" => config.in1 = Some(path(value)),
616        "in2" | "input2" => config.in2 = Some(path(value)),
617        "extra" => config.extra.extend(extra_paths(value)),
618        "out" | "output" | "out1" | "output1" | "outk" | "outkeep" | "outgood" => {
619            config.out1 = Some(path(value))
620        }
621        "out2" | "output2" | "outk2" | "outkeep2" | "outgood2" => config.out2 = Some(path(value)),
622        "outt" | "outt1" | "outtoss" | "outoss" | "outbad" => {
623            config.out_toss1 = Some(path(value));
624        }
625        "outt2" | "outtoss2" | "outoss2" | "outbad2" => config.out_toss2 = Some(path(value)),
626        "outl" | "outl1" | "outlow" | "outlow1" => config.out_low1 = Some(path(value)),
627        "outl2" | "outlow2" => config.out_low2 = Some(path(value)),
628        "outm" | "outm1" | "outmid" | "outmid1" | "outmiddle" => {
629            config.out_mid1 = Some(path(value));
630        }
631        "outm2" | "outmid2" | "outmiddle2" => config.out_mid2 = Some(path(value)),
632        "outh" | "outh1" | "outhigh" | "outhigh1" => config.out_high1 = Some(path(value)),
633        "outh2" | "outhigh2" => config.out_high2 = Some(path(value)),
634        "outu" | "outu1" | "outuncorrected" => config.out_uncorrected1 = Some(path(value)),
635        "outu2" | "outuncorrected2" => config.out_uncorrected2 = Some(path(value)),
636        "hist" | "histin" | "inhist" | "khist" => config.hist_in = Some(path(value)),
637        "histout" | "outhist" | "hist2" | "khistout" => config.hist_out = Some(path(value)),
638        "rhist" => config.rhist_in = Some(path(value)),
639        "rhistout" => config.rhist_out = Some(path(value)),
640        "peaks" => config.peaks_in = Some(path(value)),
641        "peaksout" => config.peaks_out = Some(path(value)),
642        "extin" | "extout" => {
643            config.notes.push(format!(
644                "{key}={value} is a BBTools file-extension hint; covered Rust paths infer FASTA/FASTQ format from explicit filenames"
645            ));
646        }
647        "k" | "kmer" => config.k = parse_usize(value, key)?,
648        "minq" | "minqual" => config.min_quality = parse_u8(value, key)?,
649        "minprob" => config.min_prob = parse_f64(value, key)?,
650        "reads" | "maxreads" => config.max_reads = parse_limit(value, key)?,
651        "tablereads" | "buildreads" => config.table_reads = parse_limit(value, key)?,
652        "ml" | "minlen" | "minlength" => config.min_length = parse_kmg_usize(value, key)?,
653        "maxlength" | "maxreadlength" | "maxreadlen" | "maxlen" => {
654            let _ = parse_kmg_usize(value, key)?;
655            config.notes.push(format!(
656                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
657            ));
658        }
659        "mingc" | "maxgc" | "mlf" | "minlenfrac" | "minlenfraction" | "minlengthfraction" => {
660            let _ = parse_f64(value, key)?;
661            config.notes.push(format!(
662                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
663            ));
664        }
665        "usepairgc"
666        | "pairgc"
667        | "trimbadsequence"
668        | "chastityfilter"
669        | "cf"
670        | "failnobarcode"
671        | "averagequalitybyprobability"
672        | "aqbp"
673        | "untrim" => {
674            let _ = parse_bool(value, key)?;
675            config.notes.push(format!(
676                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
677            ));
678        }
679        "badbarcodes" | "barcodefilter" => {
680            if !value.eq_ignore_ascii_case("crash") && !value.eq_ignore_ascii_case("fail") {
681                let _ = parse_bool(value, key)?;
682            }
683            config.notes.push(format!(
684                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
685            ));
686        }
687        "barcodes" | "barcode" => {
688            config.notes.push(format!(
689                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
690            ));
691        }
692        "maxns" => {
693            let _ = parse_i64(value, key)?;
694            config.notes.push(format!(
695                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
696            ));
697        }
698        "minconsecutivebases"
699        | "mcb"
700        | "minavgqualitybases"
701        | "maqb"
702        | "mintl"
703        | "mintrimlen"
704        | "mintrimlength" => {
705            let _ = parse_usize(value, key)?;
706            config.notes.push(format!(
707                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
708            ));
709        }
710        "minavgquality" | "minaveragequality" | "maq" => {
711            parse_min_average_quality(value, key)?;
712            config.notes.push(format!(
713                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
714            ));
715        }
716        "minbasequality" | "mbq" => {
717            let _ = parse_i8(value, key)?;
718            config.notes.push(format!(
719                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
720            ));
721        }
722        "build" | "genome" => {
723            let _ = parse_i32(value, key)?;
724            config.notes.push(format!(
725                "{key}={value} is a BBTools genome-build context control; covered Rust FASTA/FASTQ normalization does not use reference build metadata"
726            ));
727        }
728        "qtrim" | "qtrim1" | "qtrim2" => parse_qtrim(config, value, key)?,
729        "trimq" | "trimquality" | "trimq2" => {
730            config.trim_quality = parse_trim_quality(config, value, key)?
731        }
732        "trimleft" | "qtrimleft" => config.trim_left = parse_bool(value, key)?,
733        "trimright" | "qtrimright" => config.trim_right = parse_bool(value, key)?,
734        "optitrim" | "otf" | "otm" => parse_optitrim(config, value, key)?,
735        "trimgoodinterval" => config.trim_min_good_interval = parse_usize(value, key)?,
736        "trimclip" => {
737            let _ = parse_bool(value, key)?;
738            config.notes.push(format!(
739                "{key}={value} is parsed by BBNorm but not used by its trimFast call"
740            ));
741        }
742        "trimpolya" | "trimpolyg" | "trimpolygleft" | "trimpolygright" | "filterpolyg"
743        | "trimpolyc" | "trimpolycleft" | "trimpolycright" | "filterpolyc" | "maxnonpoly" => {
744            let _ = parse_poly(value, key)?;
745            config.notes.push(format!(
746                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
747            ));
748        }
749        "forcetrimmod" | "forcemrimmodulo" | "ftm" | "ftl" | "forcetrimleft" | "ftr"
750        | "forcetrimright" | "ftr2" | "forcetrimright2" => {
751            let _ = parse_i64(value, key)?;
752            config.notes.push(format!(
753                "{key}={value} is parsed by BBNorm but not used by KmerNormalize"
754            ));
755        }
756        "keepall" => config.keep_all = parse_bool(value, key)?,
757        "zerobin" => config.zero_bin = parse_bool(value, key)?,
758        "deterministic" | "dr" | "det" => {
759            config.deterministic = parse_bool(value, key)?;
760            if !config.deterministic {
761                config.notes.push(format!(
762                    "{key}={value} enables nondeterministic read selection and faster parallel replay for bounded approximate sketches"
763                ));
764            }
765        }
766        "rn" | "rename" | "renamereads" => config.rename_reads = parse_bool(value, key)?,
767        "canonical" => config.canonical = parse_bool(value, key)?,
768        "removeduplicatekmers" | "rdk" => config.remove_duplicate_kmers = parse_bool(value, key)?,
769        "fixspikes" | "fs" => config.fix_spikes = parse_bool(value, key)?,
770        "target" | "targetdepth" | "tgt" => config.target_depth = parse_u64(value, key)?,
771        "max" | "maxdepth" => config.max_depth = Some(parse_u64(value, key)?),
772        "min" | "mindepth" => config.min_depth = parse_u64(value, key)?,
773        "minkmers" | "minkmersovermindepth" | "mingoodkmersperread" | "mgkpr" => {
774            config.min_kmers_over_min_depth = parse_usize(value, key)?.max(1);
775        }
776        "percentile" | "depthpercentile" | "dp" => {
777            config.depth_percentile = parse_percent(value, key)?;
778        }
779        "highdepthpercentile" | "highpercentile" | "hdp" => {
780            config.high_percentile = parse_percent(value, key)?;
781        }
782        "lowdepthpercentile" | "lowpercentile" | "ldp" => {
783            config.low_percentile = parse_percent(value, key)?;
784        }
785        "errordetectratio" | "edr" => config.error_detect_ratio = parse_u64(value, key)?,
786        "highthresh" | "hthresh" | "ht" => config.high_thresh = parse_u64(value, key)?,
787        "lowthresh" | "lthresh" | "lt" => config.low_thresh = parse_u64(value, key)?,
788        "tossbadreads" | "tosserrorreads" | "tbr" | "ter" => {
789            let enabled = parse_bool(value, key)?;
790            config.toss_error_reads = enabled;
791            config.toss_error_reads_first = enabled;
792        }
793        "tossbadreads2" | "tosserrorreads2" | "tbr2" | "ter2" | "tossbadreadsf"
794        | "tosserrorreadsf" | "tbrf" | "terf" => {
795            config.toss_error_reads = parse_bool(value, key)?;
796        }
797        "tossbadreads1" | "tosserrorreads1" | "tbr1" | "ter1" => {
798            config.toss_error_reads_first = parse_bool(value, key)?;
799        }
800        "requirebothbad" | "rbb" => config.require_both_bad = parse_bool(value, key)?,
801        "removeifeitherbad" | "rieb" => config.require_both_bad = !parse_bool(value, key)?,
802        "saverarereads" | "srr" => config.save_rare_reads = parse_bool(value, key)?,
803        "discardbadonly" | "dbo" | "discardbadonlyf" | "dbof" | "discardbadonly2" | "dbo2" => {
804            let enabled = parse_bool(value, key)?;
805            config.discard_bad_only = enabled;
806            config.discard_bad_only_first = enabled;
807        }
808        "discardbadonly1" | "dbo1" => {
809            config.discard_bad_only_first = parse_bool(value, key)?;
810        }
811        "uselowerdepth" | "uld" => config.use_lower_depth = parse_bool(value, key)?,
812        "lbd" | "lowbindepth" | "lowerlimit" => config.low_bin_depth = parse_i64(value, key)?,
813        "hbd" | "highbindepth" | "upperlimit" => config.high_bin_depth = parse_i64(value, key)?,
814        "histlen" | "histogramlen" => config.hist_len = parse_usize(value, key)?.saturating_add(1),
815        "histcol" | "histcolumns" | "histogramcolumns" => {
816            config.hist_columns = parse_u8(value, key)?
817        }
818        "printzerocoverage" | "pzc" => config.print_zero_coverage = parse_bool(value, key)?,
819        "minheight" | "h" => config.peak_min_height = parse_u64(value, key)?,
820        "minvolume" | "v" => config.peak_min_volume = parse_u64(value, key)?,
821        "minwidth" | "w" => config.peak_min_width = parse_usize(value, key)?,
822        "minpeak" | "minp" => config.peak_min_peak = parse_usize(value, key)?,
823        "maxpeak" | "maxp" => config.peak_max_peak = parse_usize(value, key)?,
824        "ploidy" => config.peak_ploidy = parse_i32(value, key)?,
825        "maxpeakcount" | "maxpc" | "maxpeaks" => {
826            config.peak_max_count = parse_usize(value, key)?.max(1)
827        }
828        "overwrite" | "ow" => config.overwrite = parse_bool(value, key)?,
829        "passes" | "p" => {
830            config.passes = parse_usize(value, key)?;
831        }
832        "1pass" | "1p" => {
833            config.passes = 1;
834            config.notes.push("single-pass mode selected".to_string());
835        }
836        "2pass" | "2p" => {
837            config.passes = 2;
838        }
839        "ascii" | "asciioffset" | "quality" | "qual" => {
840            let offset = parse_quality_offset(value, key)?.unwrap_or(33);
841            config.quality_in_offset = offset;
842            config.quality_out_offset = offset;
843        }
844        "qin" | "asciiin" | "qualityin" | "qualin" => {
845            config.quality_in_offset = parse_quality_offset(value, key)?.unwrap_or(33);
846        }
847        "qout" | "asciiout" | "qualityout" | "qualout" => {
848            config.quality_out_offset = parse_quality_offset(value, key)?.unwrap_or(33);
849        }
850        "qauto" => config
851            .notes
852            .push("qauto accepted for BBTools-compatible quality alias handling".into()),
853        key if matches!(
854            quality_recal_base_key(key),
855            "recalibrate" | "recalibratequality" | "recal"
856        ) =>
857        {
858            if parse_bool(value, key)? {
859                bail!(
860                    "{key}={value} enables BBTools quality recalibration; Rust does not implement output-affecting recalibration yet"
861                );
862            }
863            config.notes.push(format!(
864                "{key}={value} keeps BBTools quality recalibration disabled in the supported Rust path"
865            ));
866        }
867        key if is_quality_recal_bool_key(key) => {
868            let _ = parse_java_bool(value);
869            config.notes.push(format!(
870                "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
871            ));
872        }
873        key if quality_recal_base_key(key) == "observationcutoff" => {
874            let _ = parse_kmg_i64(value, key)?;
875            config.notes.push(format!(
876                "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
877            ));
878        }
879        key if matches!(
880            quality_recal_base_key(key),
881            "recalpasses" | "recalqmax" | "recalqmin"
882        ) =>
883        {
884            let _ = parse_i32(value, key)?;
885            config.notes.push(format!(
886                "{key}={value} is a BBTools quality-recalibration control; covered Rust output is unchanged"
887            ));
888        }
889        key if quality_recal_base_key(key) == "qmatrixmode" => {
890            config.notes.push(format!(
891                "{key}={value} is a BBTools quality-recalibration matrix mode; covered Rust output is unchanged"
892            ));
893        }
894        "ignorebadquality" | "ibq" => {
895            if parse_bool(value, key)? {
896                config.change_quality = false;
897            }
898        }
899        "changequality" | "cq" => config.change_quality = parse_bool(value, key)?,
900        "mincalledquality" => {
901            config.min_called_quality = parse_i32_clamped(value, key, 0, 93)? as u8
902        }
903        "maxcalledquality" => {
904            config.max_called_quality = parse_i32_clamped(value, key, 1, 93)? as u8
905        }
906        "fakequality" | "qfake" => {
907            config.fake_quality = parse_i32_clamped(value, key, 0, 93)? as u8
908        }
909        "fakefastaqual" | "fakefastaquality" | "ffq" => parse_fake_fasta_quality(config, value)?,
910        "fastawrap" | "wrap" => config.fasta_wrap = parse_fasta_wrap(value, key)?,
911        "trd" | "trc" | "trimreaddescription" | "trimreaddescriptions" => {
912            let _ = parse_java_bool(value);
913            config.notes.push(format!(
914                "{key}={value} is accepted for KmerNormalize compatibility; covered FASTA/FASTQ read output keeps full headers like Java"
915            ));
916        }
917        "trimrefdescription" | "trimrefdescriptions" | "trimrname" => {
918            let _ = parse_java_bool(value);
919            config.notes.push(format!(
920                "{key}={value} is a BBTools reference-name trimming control; covered FASTA/FASTQ read output is unchanged"
921            ));
922        }
923        "utot" => config.u_to_t = parse_bool(value, key)?,
924        "tuc" | "touppercase" => config.to_upper_case = parse_bool(value, key)?,
925        "lctn" | "lowercaseton" => config.lower_case_to_n = parse_bool(value, key)?,
926        "dotdashxton" => config.dot_dash_x_to_n = parse_bool(value, key)?,
927        "undefinedton" | "iupacton" | "itn" => config.iupac_to_n = parse_bool(value, key)?,
928        "fixjunk" => {
929            if parse_bool(value, key)? {
930                config.junk_mode = JunkMode::Fix;
931            } else if config.junk_mode == JunkMode::Fix {
932                config.junk_mode = JunkMode::Crash;
933            }
934        }
935        "ignorejunk" => {
936            if parse_bool(value, key)? {
937                config.junk_mode = JunkMode::Ignore;
938            } else if config.junk_mode == JunkMode::Ignore {
939                config.junk_mode = JunkMode::Crash;
940            }
941        }
942        "flagjunk" => {
943            if parse_bool(value, key)? {
944                config.junk_mode = JunkMode::Flag;
945            } else if config.junk_mode == JunkMode::Flag {
946                config.junk_mode = JunkMode::Crash;
947            }
948        }
949        "tossjunk" => {
950            if parse_bool(value, key)? {
951                config.junk_mode = JunkMode::Flag;
952            }
953        }
954        "crashjunk" | "failjunk" => {
955            if parse_bool(value, key)? {
956                config.junk_mode = JunkMode::Crash;
957            } else if config.junk_mode == JunkMode::Crash {
958                config.junk_mode = JunkMode::Ignore;
959            }
960        }
961        "junk" => parse_junk_mode(config, value)?,
962        "threads" | "t" => {
963            let threads = value.to_ascii_lowercase();
964            if threads == "auto" {
965                config
966                    .notes
967                    .push("threads=auto accepted; Rayon will use its default worker count".into());
968            } else if matches!(threads.as_str(), "max" | "all") {
969                let workers = std::thread::available_parallelism()
970                    .map(|threads| threads.get())
971                    .unwrap_or(1);
972                config.threads = Some(workers);
973                config.notes.push(format!(
974                    "threads={threads} accepted; Rayon worker count will use all {workers} available workers"
975                ));
976            } else {
977                let threads = parse_i64(value, key)?;
978                if threads > 1 {
979                    config.threads = Some(threads as usize);
980                    config.notes.push(format!(
981                        "threads={threads} accepted; Rayon worker count will be capped to {threads}"
982                    ));
983                } else if threads == 1 {
984                    config.threads = Some(1);
985                }
986            }
987        }
988        "null" => {}
989        "monitor" | "killswitch" => {
990            parse_monitor(value, key)?;
991            config.notes.push(format!(
992                "{key}={value} is a BBTools watchdog runtime control; the Rust CLI accepts it as a no-op"
993            ));
994        }
995        "outstream" | "proxyhost" | "proxyport" | "metadatafile" => {
996            config.notes.push(format!(
997                "{key}={value} is a BBTools preparser runtime control; covered Rust output records are unchanged"
998            ));
999        }
1000        "json" | "silent" | "printexecuting" | "bufferbf" | "bufferbf1" => {
1001            let _ = parse_java_bool(value);
1002            config.notes.push(format!(
1003                "{key}={value} is a BBTools preparser runtime control; covered Rust output records are unchanged"
1004            ));
1005        }
1006        "testsize" => {
1007            let _ = parse_java_bool(value);
1008            config.notes.push(format!(
1009                "{key}={value} is a BBTools diagnostic sizing control; covered Rust output records are unchanged"
1010            ));
1011        }
1012        "breaklen" | "breaklength" => {
1013            let break_len = parse_i32(value, key)?;
1014            if break_len > 0 {
1015                bail!(
1016                    "{key}={value} enables BBTools read breaking; Rust does not implement output-affecting read splitting yet"
1017                );
1018            }
1019            config.notes.push(format!(
1020                "{key}={value} keeps BBTools read breaking disabled in the supported Rust path"
1021            ));
1022        }
1023        "usejni" | "jni" | "skipvalidation" | "validate" | "validateinconstructor" | "vic" => {
1024            let _ = parse_java_bool(value);
1025            config.notes.push(format!(
1026                "{key}={value} is a BBTools shared runtime/validation control; covered Rust output is unchanged"
1027            ));
1028        }
1029        "usempi" | "mpi" => {
1030            let enabled = parse_mpi_enabled(value, key)?;
1031            config.notes.push(format!(
1032                "{key}={value} is a BBTools MPI execution control; Rust runs locally and ignores MPI mode{}",
1033                if enabled { " for ASAP output" } else { "" }
1034            ));
1035        }
1036        "crismpi" | "mpikeepall" => {
1037            let enabled = parse_java_bool(value);
1038            config.notes.push(format!(
1039                "{key}={value} is a BBTools MPI stream control; Rust runs locally and ignores MPI stream mode{}",
1040                if enabled { " for ASAP output" } else { "" }
1041            ));
1042        }
1043        "bf1" | "bytefile1" | "bf2" | "bytefile2" | "bf3" | "bytefile3" | "bf4" | "bytefile4" => {
1044            let _ = parse_java_bool(value);
1045            config.notes.push(format!(
1046                "{key}={value} is a BBTools byte-file runtime control; covered Rust output is unchanged"
1047            ));
1048        }
1049        "bf1bufferlen" | "readbufferlength" | "readbufferlen" | "readbufferdata" => {
1050            let _ = parse_kmg_i64(value, key)?;
1051            config.notes.push(format!(
1052                "{key}={value} is a BBTools buffer-sizing control; covered Rust output is unchanged"
1053            ));
1054        }
1055        "bf4threads" | "bfthreads" | "readbuffers" => {
1056            let _ = parse_i32(value, key)?;
1057            config.notes.push(format!(
1058                "{key}={value} is a BBTools I/O threading control; current Rust engine manages I/O internally"
1059            ));
1060        }
1061        "workers" | "workerthreads" | "wt" | "threadsin" | "tin" | "threadsout" | "tout" => {
1062            parse_auto_or_i32(value, key)?;
1063            config.notes.push(format!(
1064                "{key}={value} is a BBTools I/O worker control; current Rust engine manages I/O internally"
1065            ));
1066        }
1067        "zipthreads" | "bgzfthreadsin" | "bgzftin" | "bgzfreadthreads" | "bgzfthreadsout"
1068        | "bgzftout" | "bgzfwritethreads" => {
1069            let threads = parse_i32(value, key)?;
1070            if threads > 0 {
1071                config.gzip_threads = Some(threads as usize);
1072            }
1073            config.notes.push(format!(
1074                "{key}={value} is a BBTools compression/threading control; Rust uses gzip input/output worker settings for .gz files when threads > 1"
1075            ));
1076        }
1077        "ziplevel" | "zl" | "bziplevel" | "bzl" | "blocksize" | "pigziterations" | "pigziters" => {
1078            let _ = parse_i32(value, key)?;
1079            config.notes.push(format!(
1080                "{key}={value} is a BBTools compression/threading control; covered Rust output records are unchanged"
1081            ));
1082        }
1083        "zipthreaddivisor" | "ztd" => {
1084            let _ = parse_f64(value, key)?;
1085            config.notes.push(format!(
1086                "{key}={value} is a BBTools compression/threading control; covered Rust output records are unchanged"
1087            ));
1088        }
1089        "usebgzip" | "bgzip" | "usepigz" | "pigz" => {
1090            if value
1091                .as_bytes()
1092                .first()
1093                .is_some_and(|byte| byte.is_ascii_digit())
1094            {
1095                let threads = parse_i32(value, key)?;
1096                if threads > 0 {
1097                    config.gzip_threads = Some(threads as usize);
1098                }
1099            } else if parse_java_bool(value) {
1100                let workers = config
1101                    .threads
1102                    .unwrap_or_else(|| std::thread::available_parallelism().map_or(1, |n| n.get()));
1103                if workers > 1 {
1104                    config.gzip_threads = Some(workers);
1105                }
1106            } else {
1107                config.gzip_threads = Some(1);
1108            }
1109            config.notes.push(format!(
1110                "{key}={value} is a BBTools compression control; Rust uses zlib-rs gzip plus pigz/unpigz hooks for .gz input/output when enabled and available"
1111            ));
1112        }
1113        "usegunzip" | "gunzip" | "ungzip" | "useunpigz" | "unpigz" | "useunbgzip" | "unbgzip" => {
1114            if value
1115                .as_bytes()
1116                .first()
1117                .is_some_and(|byte| byte.is_ascii_digit())
1118            {
1119                let threads = parse_i32(value, key)?;
1120                if threads > 0 {
1121                    config.gzip_threads = Some(threads as usize);
1122                }
1123            } else if parse_java_bool(value) {
1124                let workers = config
1125                    .threads
1126                    .unwrap_or_else(|| std::thread::available_parallelism().map_or(1, |n| n.get()));
1127                if workers > 1 {
1128                    config.gzip_threads = Some(workers);
1129                }
1130            } else {
1131                config.gzip_threads = Some(1);
1132            }
1133            config.notes.push(format!(
1134                "{key}={value} is a BBTools gzip-input control; Rust uses zlib-rs and tries pigz/unpigz for .gz input when worker count is >1"
1135            ));
1136        }
1137        "allowziplevelchange"
1138        | "usegzip"
1139        | "gzip"
1140        | "usebgzf"
1141        | "bgzf"
1142        | "forcepigz"
1143        | "forcebgzip"
1144        | "preferbgzip"
1145        | "nativebgzip"
1146        | "nativebgzf"
1147        | "usenativebgzip"
1148        | "usenativebgzf"
1149        | "allownativebgzip"
1150        | "allownativebgzf"
1151        | "nativebgzipin"
1152        | "nativebgzfin"
1153        | "nativebgzipout"
1154        | "nativebgzfout"
1155        | "prefernativebgzip"
1156        | "prefernativebgzf"
1157        | "nativebgzipmt"
1158        | "nativebgzfmt"
1159        | "multithreadedbgzf"
1160        | "bgzfosmt2"
1161        | "filteredbgzf"
1162        | "preferunbgzip"
1163        | "usebzip2"
1164        | "bzip2"
1165        | "usepbzip2"
1166        | "pbzip2"
1167        | "uselbzip2"
1168        | "lbzip2" => {
1169            let _ = parse_java_bool(value);
1170            config.notes.push(format!(
1171                "{key}={value} is a BBTools compression/runtime control; covered Rust output records are unchanged"
1172            ));
1173        }
1174        "samversion" | "samv" | "sam" => {
1175            let _ = parse_f64(value, key)?;
1176            config.notes.push(format!(
1177                "{key}={value} is a BBTools SAM-version control; covered Rust FASTA/FASTQ output is unchanged"
1178            ));
1179        }
1180        "streamerthreads"
1181        | "ssthreads"
1182        | "bsthreads"
1183        | "fastqstreamerthreads"
1184        | "fqsthreads"
1185        | "fastastreamerthreads"
1186        | "fasthreads"
1187        | "samwriterthreads"
1188        | "swthreads"
1189        | "bamwriterthreads"
1190        | "bwthreads"
1191        | "fastqwriterthreads"
1192        | "fqwthreads"
1193        | "intronlen"
1194        | "intronlength" => {
1195            let _ = parse_i32(value, key)?;
1196            config.notes.push(format!(
1197                "{key}={value} is a BBTools SAM/streamer threading control; current Rust engine manages FASTA/FASTQ I/O internally"
1198            ));
1199        }
1200        "sambamba"
1201        | "samtools"
1202        | "printheaderwait"
1203        | "nativebam"
1204        | "usenativebam"
1205        | "allownativebam"
1206        | "nativebamout"
1207        | "usenativebamout"
1208        | "nativebamin"
1209        | "usenativebamin"
1210        | "prefernativebamout"
1211        | "prefernativebamin"
1212        | "prefernativebam"
1213        | "userssw"
1214        | "attachedsamline"
1215        | "useattachedsamline"
1216        | "fastastreamer2"
1217        | "prefermd"
1218        | "prefermdtag"
1219        | "notags"
1220        | "mdtag"
1221        | "md"
1222        | "idtag"
1223        | "mateqtag"
1224        | "xmtag"
1225        | "xm"
1226        | "smtag"
1227        | "amtag"
1228        | "nmtag"
1229        | "xttag"
1230        | "stoptag"
1231        | "lengthtag"
1232        | "boundstag"
1233        | "scoretag"
1234        | "sortscaffolds"
1235        | "customtag"
1236        | "nhtag"
1237        | "keepnames"
1238        | "saa"
1239        | "secondaryalignmentasterisks"
1240        | "inserttag"
1241        | "correctnesstag"
1242        | "suppressheader"
1243        | "noheader"
1244        | "noheadersequences"
1245        | "nhs"
1246        | "suppressheadersequences"
1247        | "tophat"
1248        | "flipsam" => {
1249            let _ = parse_java_bool(value);
1250            config.notes.push(format!(
1251                "{key}={value} is a BBTools SAM/BAM runtime control; covered Rust FASTA/FASTQ output is unchanged"
1252            ));
1253        }
1254        "xstag" | "xs" => {
1255            let lower = value.to_ascii_lowercase();
1256            if !matches!(
1257                lower.strip_prefix("fr-").unwrap_or(&lower),
1258                "ss" | "secondstrand" | "fs" | "firststrand" | "us" | "unstranded"
1259            ) {
1260                let _ = parse_java_bool(value);
1261            }
1262            config.notes.push(format!(
1263                "{key}={value} is a BBTools SAM XS-tag control; covered Rust FASTA/FASTQ output is unchanged"
1264            ));
1265        }
1266        "readgroup" | "readgroupid" | "rgid" | "readgroupcn" | "rgcn" | "readgroupds" | "rgds"
1267        | "readgroupdt" | "rgdt" | "readgroupfo" | "rgfo" | "readgroupks" | "rgks"
1268        | "readgrouplb" | "rglb" | "readgrouppg" | "rgpg" | "readgrouppi" | "rgpi"
1269        | "readgrouppl" | "rgpl" | "readgrouppu" | "rgpu" | "readgroupsm" | "rgsm" => {
1270            config.notes.push(format!(
1271                "{key}={value} is a BBTools read-group metadata control; covered Rust FASTA/FASTQ output is unchanged"
1272            ));
1273        }
1274        "tossbrokenreads"
1275        | "nullifybrokenquality"
1276        | "nbq"
1277        | "rbm"
1278        | "renamebymapping"
1279        | "don"
1280        | "deleteoldname"
1281        | "assertcigar"
1282        | "verbosesamline"
1283        | "parsecustom"
1284        | "fastqparsecustom"
1285        | "shrinkheaders"
1286        | "fixheader"
1287        | "fixheaders"
1288        | "allownullheader"
1289        | "allownullheaders"
1290        | "recalpairnum"
1291        | "recalibratepairnum" => {
1292            let _ = parse_java_bool(value);
1293            config.notes.push(format!(
1294                "{key}={value} is a BBTools shared read/header runtime control; covered Rust FASTA/FASTQ output is unchanged"
1295            ));
1296        }
1297        "pairreads" | "flipr2" => {
1298            let enabled = parse_java_bool(value);
1299            config.notes.push(format!(
1300                "{key}={value} is a BBTools global pairing behavior control; Rust pairing uses explicit in2=, interleaved=, and # routing{}",
1301                if enabled { " for ASAP output" } else { "" }
1302            ));
1303        }
1304        "aminoin" | "amino" | "amino8" => {
1305            if parse_java_bool(value) {
1306                bail!(
1307                    "{key}={value} enables BBTools amino-acid kmer mode; the Rust engine currently supports nucleotide BBNorm only"
1308                );
1309            }
1310            config.notes.push(format!(
1311                "{key}={value} keeps BBTools amino-acid kmer mode disabled in the supported Rust path"
1312            ));
1313        }
1314        "validatebranchless"
1315        | "fairqueues"
1316        | "fixextensions"
1317        | "fixextension"
1318        | "tryallextensions"
1319        | "2passresize"
1320        | "twopassresize"
1321        | "parallelsort"
1322        | "paralellsort"
1323        | "gcbeforemem"
1324        | "warnifnosequence"
1325        | "warnfirsttimeonly"
1326        | "kmg"
1327        | "outputkmg"
1328        | "forcejavaparsedouble"
1329        | "simdsparse"
1330        | "simdmultsparse"
1331        | "simdfmasparse"
1332        | "simdcopy"
1333        | "awsservers"
1334        | "aws"
1335        | "nerscservers"
1336        | "nersc"
1337        | "lowmem"
1338        | "lowram"
1339        | "lowmemory"
1340        | "buffer"
1341        | "buffered"
1342        | "sidechannelstats"
1343        | "comment"
1344        | "taxpath"
1345        | "silva"
1346        | "unite"
1347        | "imghq"
1348        | "callins"
1349        | "callinss"
1350        | "calldel"
1351        | "calldels"
1352        | "callsub"
1353        | "callsubs"
1354        | "callsnp"
1355        | "callsnps"
1356        | "callindel"
1357        | "callindels"
1358        | "calljunct"
1359        | "calljunction"
1360        | "calljunctions"
1361        | "callnocall"
1362        | "callnocalls"
1363        | "protfull" => {
1364            let _ = parse_java_bool(value);
1365            config.notes.push(format!(
1366                "{key}={value} is a BBTools shared environment/performance control; covered Rust output is unchanged"
1367            ));
1368        }
1369        "lockedincrement" | "symmetricwrite" | "symmetric" | "sw" => {
1370            if value.eq_ignore_ascii_case("auto") {
1371                config.locked_increment = None;
1372            } else {
1373                config.locked_increment = Some(parse_java_bool(value));
1374            }
1375            config.notes.push(format!(
1376                "{key}={value} is a BBTools KCountArray write-symmetry control; bounded Rust sketches use the matching locked/conservative update mode when applicable"
1377            ));
1378        }
1379        "gpucounting" | "gpu_counting" | "usegpu" => {
1380            config.gpu_counting = parse_bool(value, key)?;
1381            config.notes.push(format!(
1382                "{key}={value} toggles experimental CUDA sort/reduce-assisted input counting; defaults remain CPU-only"
1383            ));
1384        }
1385        "gpuhelper" | "cudahelper" | "gpucountinghelper" => {
1386            config.gpu_helper = Some(PathBuf::from(value));
1387            config.notes.push(format!(
1388                "{key}={value} selects the experimental CUDA k-mer reduce helper"
1389            ));
1390        }
1391        "gpupersistent" | "gpucountingpersistent" | "persistentgpuhelper" => {
1392            config.gpu_persistent = parse_bool(value, key)?;
1393            config.notes.push(format!(
1394                "{key}={value} toggles the experimental persistent CUDA helper protocol"
1395            ));
1396        }
1397        "simd" => {
1398            if !value.eq_ignore_ascii_case("auto") {
1399                let _ = parse_java_bool(value);
1400            }
1401            config.notes.push(format!(
1402                "{key}={value} is a BBTools SIMD runtime control; covered Rust output is unchanged"
1403            ));
1404        }
1405        "entropyk" | "ek" | "entropywindow" | "ew" => {
1406            let parsed = parse_i32(value, key)?;
1407            if parsed <= 0 {
1408                bail!("{key} expects a positive integer, got {value}");
1409            }
1410            if matches!(key, "entropyk" | "ek") {
1411                config.entropy_k = parsed as usize;
1412            } else {
1413                config.entropy_window = parsed as usize;
1414            }
1415            config.notes.push(format!(
1416                "{key}={value} is a BBTools entropy-stat runtime control; Rust applies it to emitted entropy histograms"
1417            ));
1418        }
1419        "barcodestats" | "barcodecounts" => {
1420            config.barcode_stats_out = Some(path(value));
1421            config.notes.push(format!(
1422                "{key}={value} is a BBTools side-output barcode stats file; Rust emits a covered barcode-count fallback from read headers"
1423            ));
1424        }
1425        "timehistogram" | "thist" => {
1426            config.notes.push(format!(
1427                "{key}={value} is a BBTools side-output mapper time histogram; Rust does not emit this auxiliary file yet and keeps the supported normalization path"
1428            ));
1429        }
1430        "matchhistogram" | "matchhist" | "mhist" => {
1431            config.match_hist_out = Some(path(value));
1432            config.notes.push(format!(
1433                "{key}={value} is a BBTools side-output match histogram; Rust emits a covered no-alignment sequence-match fallback histogram"
1434            ));
1435        }
1436        "inserthistogram" | "inserthist" | "ihist" => {
1437            config.insert_hist_out = Some(path(value));
1438            config.notes.push(format!(
1439                "{key}={value} is a BBTools side-output insert histogram; Rust emits a covered no-alignment insert-size fallback histogram"
1440            ));
1441        }
1442        "qualityaccuracyhistogram" | "qahist" => {
1443            config.quality_accuracy_hist_out = Some(path(value));
1444            config.notes.push(format!(
1445                "{key}={value} is a BBTools side-output quality-accuracy histogram; Rust emits a covered no-alignment quality-accuracy fallback histogram"
1446            ));
1447        }
1448        "indelhistogram" | "indelhist" => {
1449            config.indel_hist_out = Some(path(value));
1450            config.notes.push(format!(
1451                "{key}={value} is a BBTools side-output indel histogram; Rust emits a covered no-alignment indel fallback histogram"
1452            ));
1453        }
1454        "errorhistogram" | "ehist" => {
1455            config.error_hist_out = Some(path(value));
1456            config.notes.push(format!(
1457                "{key}={value} is a BBTools side-output error histogram; Rust emits a covered no-alignment error-count fallback histogram"
1458            ));
1459        }
1460        "gchistogram" | "gchist" => {
1461            config.gc_hist_out = Some(path(value));
1462            config.notes.push(format!(
1463                "{key}={value} is a BBTools side-output GC histogram; Rust emits a covered primary input GC-bin histogram"
1464            ));
1465        }
1466        "qualityhistogram" | "qualityhist" | "qhist" => {
1467            config.quality_hist_out = Some(path(value));
1468            config.notes.push(format!(
1469                "{key}={value} is a BBTools side-output quality histogram; Rust emits a covered primary input quality histogram"
1470            ));
1471        }
1472        "basequalityhistogram" | "basequalityhist" | "bqhist" => {
1473            config.base_quality_hist_out = Some(path(value));
1474            config.notes.push(format!(
1475                "{key}={value} is a BBTools side-output base-quality histogram; Rust emits a covered primary input base-quality histogram"
1476            ));
1477        }
1478        "qualitycounthistogram" | "qualitycounthist" | "qchist" | "qdhist" | "qfhist" => {
1479            config.quality_count_hist_out = Some(path(value));
1480            config.notes.push(format!(
1481                "{key}={value} is a BBTools side-output quality-count histogram; Rust emits a covered primary input quality-count histogram"
1482            ));
1483        }
1484        "averagequalityhistogram" | "aqhist" => {
1485            config.average_quality_hist_out = Some(path(value));
1486            config.notes.push(format!(
1487                "{key}={value} is a BBTools side-output average-quality histogram; Rust emits a covered primary input average-quality histogram"
1488            ));
1489        }
1490        "overallbasequalityhistogram" | "overallbasequalityhist" | "obqhist" => {
1491            config.overall_base_quality_hist_out = Some(path(value));
1492            config.notes.push(format!(
1493                "{key}={value} is a BBTools side-output overall base-quality histogram; Rust emits a covered primary input overall base-quality histogram"
1494            ));
1495        }
1496        "lengthhistogram" | "lhist" => {
1497            config.length_hist_out = Some(path(value));
1498            config.notes.push(format!(
1499                "{key}={value} is a BBTools side-output length histogram; Rust emits a covered read-length histogram for the primary input"
1500            ));
1501        }
1502        "basehistogram" | "basehist" | "bhist" => {
1503            config.base_hist_out = Some(path(value));
1504            config.notes.push(format!(
1505                "{key}={value} is a BBTools side-output base-content histogram; Rust emits a covered primary input base-content histogram"
1506            ));
1507        }
1508        "entropyhistogram" | "entropyhist" | "enhist" | "enthist" => {
1509            config.entropy_hist_out = Some(path(value));
1510            config.notes.push(format!(
1511                "{key}={value} is a BBTools side-output entropy histogram; Rust emits a covered primary input entropy histogram"
1512            ));
1513        }
1514        "identityhistogram" | "idhist" => {
1515            config.identity_hist_out = Some(path(value));
1516            config.notes.push(format!(
1517                "{key}={value} is a BBTools side-output identity histogram; Rust emits a covered sequence-input identity fallback histogram because this BBNorm path has no aligner"
1518            ));
1519        }
1520        "gcbins" | "gchistbins" => {
1521            if !value.eq_ignore_ascii_case("auto") {
1522                let bins = parse_i32(value, key)?;
1523                if bins <= 0 {
1524                    bail!("{key} expects a positive integer or auto, got {value}");
1525                }
1526                config.gc_bins = Some(bins as usize);
1527            }
1528            config.notes.push(format!(
1529                "{key}={value} is a BBTools side-output GC histogram sizing control; Rust applies it to emitted GC histograms"
1530            ));
1531        }
1532        "entropybins" | "entropyhistbins" | "entbins" | "enthistbins" => {
1533            if !value.eq_ignore_ascii_case("auto") {
1534                let bins = parse_i32(value, key)?;
1535                if bins <= 0 {
1536                    bail!("{key} expects a positive integer or auto, got {value}");
1537                }
1538                config.entropy_bins = bins as usize;
1539            }
1540            config.notes.push(format!(
1541                "{key}={value} is a BBTools side-output entropy histogram sizing control; Rust applies it to emitted entropy histograms"
1542            ));
1543        }
1544        "idhistlen" | "idhistlength" | "idhistbins" | "idbins" => {
1545            if !value.eq_ignore_ascii_case("auto") {
1546                let bins = parse_i32(value, key)?;
1547                if bins <= 0 {
1548                    bail!("{key} expects a positive integer or auto, got {value}");
1549                }
1550                config.identity_bins = bins as usize;
1551            }
1552            config.notes.push(format!(
1553                "{key}={value} is a BBTools side-output identity histogram sizing control; Rust applies it to emitted identity histograms"
1554            ));
1555        }
1556        "entropyns" | "entropyhistns" => {
1557            config.allow_entropy_ns = parse_java_bool(value);
1558            config.notes.push(format!(
1559                "{key}={value} is a BBTools side-output entropy control; Rust applies it to emitted entropy histograms"
1560            ));
1561        }
1562        "gcchart" | "gcplot" | "fixindels" | "ignorevcfindels" => {
1563            let _ = parse_java_bool(value);
1564            config.notes.push(format!(
1565                "{key}={value} is a BBTools side-output stats control; covered Rust FASTA/FASTQ output is unchanged"
1566            ));
1567        }
1568        "maxhistlen" => {
1569            let len = parse_kmg_i64(value, key)?;
1570            if len <= 0 {
1571                bail!("{key} expects a positive KMG value, got {value}");
1572            }
1573            config.side_hist_len = Some(
1574                usize::try_from(len)
1575                    .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?,
1576            );
1577            config.notes.push(format!(
1578                "{key}={value} is a BBTools side-output histogram length control; Rust applies it to emitted side histograms"
1579            ));
1580        }
1581        "cardinality" | "loglog" => {
1582            match parse_cardinality_bool_or_int(value, key)? {
1583                CardinalityToggle::Bool(enabled) => config.cardinality.input = enabled,
1584                CardinalityToggle::Int(k) => {
1585                    config.cardinality.input = true;
1586                    config.cardinality.k = Some(k);
1587                }
1588            }
1589            config.notes.push(format!(
1590                "{key}={value} is a BBTools cardinality/loglog control; Rust emits a bounded input estimate when enabled"
1591            ));
1592        }
1593        "loglogin" => {
1594            match parse_cardinality_bool_or_int(value, key)? {
1595                CardinalityToggle::Bool(enabled) => config.cardinality.input = enabled,
1596                CardinalityToggle::Int(k) => {
1597                    config.cardinality.input = true;
1598                    config.cardinality.k = Some(k);
1599                }
1600            }
1601            config.notes.push(format!(
1602                "{key}={value} is a BBTools cardinality/loglog input control; Rust emits a bounded input estimate when enabled"
1603            ));
1604        }
1605        "cardinalityout" | "loglogout" => {
1606            match parse_cardinality_bool_or_int(value, key)? {
1607                CardinalityToggle::Bool(enabled) => config.cardinality.output = enabled,
1608                CardinalityToggle::Int(k) => {
1609                    config.cardinality.output = true;
1610                    config.cardinality.k = Some(k);
1611                }
1612            }
1613            config.notes.push(format!(
1614                "{key}={value} is a BBTools cardinality/loglog control; Rust emits a bounded output estimate when enabled"
1615            ));
1616        }
1617        "buckets" | "loglogbuckets" => {
1618            let buckets = parse_cardinality_buckets(value, key)?;
1619            config.cardinality.buckets = buckets;
1620            config.notes.push(format!(
1621                "{key}={value} is a BBTools cardinality/loglog bucket control; Rust applies it to bounded cardinality estimates"
1622            ));
1623        }
1624        "loglogk" | "cardinalityk" | "kcardinality" => {
1625            config.cardinality.k = Some(parse_cardinality_k(value, key)?);
1626            config.notes.push(format!(
1627                "{key}={value} is a BBTools cardinality/loglog numeric control; Rust applies it to bounded cardinality estimates"
1628            ));
1629        }
1630        "loglogbits" | "loglogmantissa" => {
1631            let _ = parse_i32(value, key)?;
1632            config.notes.push(format!(
1633                "{key}={value} is a BBTools cardinality/loglog numeric control; Rust accepts it while using compact byte registers"
1634            ));
1635        }
1636        "loglogklist" => {
1637            let mut first_k = None;
1638            for part in value.split(',') {
1639                let trimmed = part.trim();
1640                if trimmed.is_empty() {
1641                    bail!("{key} expects a comma-separated integer list, got {value}");
1642                }
1643                let k = parse_cardinality_k(trimmed, key)?;
1644                first_k.get_or_insert(k);
1645            }
1646            config.cardinality.k = first_k;
1647            config.notes.push(format!(
1648                "{key}={value} is a BBTools cardinality/loglog k-list; Rust uses the first k for bounded cardinality estimates"
1649            ));
1650        }
1651        "loglogseed" => {
1652            config.cardinality.seed = parse_cardinality_seed(value, key)?;
1653            config.notes.push(format!(
1654                "{key}={value} is a BBTools cardinality/loglog seed; Rust applies it to bounded cardinality estimates"
1655            ));
1656        }
1657        "loglogminprob" => {
1658            let min_probability = parse_f64(value, key)?;
1659            if !(0.0..=1.0).contains(&min_probability) {
1660                bail!("{key} expects a probability between 0 and 1, got {value}");
1661            }
1662            config.cardinality.min_probability = min_probability;
1663            config.notes.push(format!(
1664                "{key}={value} is a BBTools cardinality/loglog probability threshold; Rust records it for bounded cardinality estimates"
1665            ));
1666        }
1667        "loglogtype" => {
1668            config.notes.push(format!(
1669                "{key}={value} is a BBTools cardinality/loglog estimator type; Rust uses its compact bounded estimator"
1670            ));
1671        }
1672        "loglogcorrection" | "loglogcf" | "loglogmean" | "loglogmedian" | "loglogmwa"
1673        | "logloghmean" | "logloggmean" | "loglogcounts" | "loglogcount" => {
1674            let _ = parse_java_bool(value);
1675            config.notes.push(format!(
1676                "{key}={value} is a BBTools cardinality/loglog output-control toggle; Rust emits compact summary estimates"
1677            ));
1678        }
1679        "countup" => {
1680            config.count_up = parse_bool(value, key)?;
1681            if !config.count_up {
1682                config.notes.push(
1683                    "countup=f selected; standard single-pass normalization remains active"
1684                        .to_string(),
1685                );
1686            }
1687        }
1688        "bits" | "cbits" | "cellbits" => {
1689            let bits = parse_kcount_cell_bits(value, key)?;
1690            config.count_min.bits = Some(bits);
1691            config.notes.push(format!(
1692                "{key}={bits} is a BBTools count-min cell-width control; constrained Rust count-min tables use it for saturation"
1693            ));
1694        }
1695        "bits1" | "cbits1" | "cellbits1" => {
1696            let bits = parse_kcount_cell_bits(value, key)?;
1697            config.count_min_bits_first = Some(bits);
1698            config.notes.push(format!(
1699                "{key}={bits} is a BBTools first/intermediate-pass sketch width control; Rust uses it for multipass bounded sketches"
1700            ));
1701        }
1702        "hashes" => {
1703            let hashes = parse_kcount_hashes(value, key)?;
1704            config.count_min.hashes = Some(hashes);
1705            config.notes.push(format!(
1706                "hashes={hashes} is a BBTools count-min hashing control; constrained Rust count-min tables use it for collision estimates"
1707            ));
1708        }
1709        "cells" | "matrixbits" => {
1710            let cells = if key == "matrixbits" {
1711                parse_matrixbits_cells(value, key)?
1712            } else {
1713                parse_positive_kmg_usize(value, key)?
1714            };
1715            config.count_min.cells = Some(cells.max(1));
1716            config.notes.push(format!(
1717                "{key}={value} is a BBTools count-min table-sizing control; Rust treats it as a total-cell budget and builds a fixed-memory count-min input sketch"
1718            ));
1719        }
1720        "sketchmemory" | "sketchmem" | "countminmemory" | "countminmem" | "cmem" => {
1721            let bytes = parse_positive_kmg_usize(value, key)?;
1722            config.count_min.memory_bytes = Some(bytes);
1723            config.notes.push(format!(
1724                "{key}={value} is a Rust count-min memory budget; Rust sizes the fixed-memory input sketch from this budget when cells/matrixbits are not set"
1725            ));
1726        }
1727        "maxcountupspillbytes"
1728        | "maxcountupspilllivebytes"
1729        | "countupspillbytes"
1730        | "countupspilllimit" => {
1731            let bytes = parse_kmg_usize(value, key)?;
1732            config.max_countup_spill_live_bytes = Some(bytes as u64);
1733            config.notes.push(format!(
1734                "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if peak live spill bytes exceed {bytes}"
1735            ));
1736        }
1737        "maxcountupspillfinallivebytes"
1738        | "maxcountupspillfinalbytes"
1739        | "countupspillfinallivebytes" => {
1740            let bytes = parse_kmg_usize(value, key)?;
1741            config.max_countup_spill_final_live_bytes = Some(bytes as u64);
1742            config.notes.push(format!(
1743                "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if current/final live spill bytes exceed {bytes}"
1744            ));
1745        }
1746        "maxcountupspillinitialruns" | "countupspillinitialruns" => {
1747            let runs = parse_kmg_usize(value, key)?;
1748            config.max_countup_spill_initial_runs = Some(runs);
1749            config.notes.push(format!(
1750                "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if initial spill run count exceeds {runs}"
1751            ));
1752        }
1753        "maxcountupspillmergeruns" | "countupspillmergeruns" => {
1754            let runs = parse_kmg_usize(value, key)?;
1755            config.max_countup_spill_merge_runs = Some(runs);
1756            config.notes.push(format!(
1757                "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if merge spill run count exceeds {runs}"
1758            ));
1759        }
1760        "maxcountupspillfinalruns" | "maxcountupspillruns" | "countupspillfinalruns" => {
1761            let runs = parse_kmg_usize(value, key)?;
1762            config.max_countup_spill_final_runs = Some(runs);
1763            config.notes.push(format!(
1764                "{key}={value} is a Rust count-up temp-spill safety cap; Rust aborts count-up if live/final spill run count exceeds {runs}"
1765            ));
1766        }
1767        "maxcountupspillwritebytes" | "maxcountupspillwrittenbytes" | "countupspillwritebytes" => {
1768            let bytes = parse_kmg_usize(value, key)?;
1769            config.max_countup_spill_write_bytes = Some(bytes as u64);
1770            config.notes.push(format!(
1771                "{key}={value} is a Rust count-up temp-spill I/O safety cap; Rust aborts count-up if cumulative spill bytes written exceed {bytes}"
1772            ));
1773        }
1774        "memory" | "mem" | "ram" | "maxmemory" | "maxmem" | "xmx" => {
1775            let bytes = parse_positive_kmg_usize(value, key)?;
1776            config.auto_count_min_memory_bytes = Some(bytes);
1777            config.auto_count_min = true;
1778            config.notes.push(format!(
1779                "{key}={value} is a BBTools-style memory budget; automatic Rust count-min sizing uses it for large inputs"
1780            ));
1781        }
1782        "autocountmin" | "autosketch" | "autosketchtable" | "autosketchtables" => {
1783            config.auto_count_min = parse_bool(value, key)?;
1784            config.notes.push(format!(
1785                "{key}={} controls Rust's large-input automatic bounded count-min table selection",
1786                config.auto_count_min
1787            ));
1788        }
1789        "exact" | "exactcount" | "exactcounts" | "useexact" | "sketchexact" => {
1790            config.force_exact_counts = parse_bool(value, key)?;
1791            config.notes.push(format!(
1792                "{key}={} forces Rust exact-count maps and disables automatic/explicit count-min sketches",
1793                config.force_exact_counts
1794            ));
1795        }
1796        "autosketchbytes" | "autosketchminbytes" | "autocountminbytes" | "autocountminminbytes" => {
1797            config.auto_count_min_input_bytes = parse_positive_kmg_usize(value, key)?;
1798            config.notes.push(format!(
1799                "{key}={value} sets the compressed/uncompressed input-size trigger for automatic Rust count-min tables"
1800            ));
1801        }
1802        "autosketchtablereads" | "autocountminreads" | "autosketchtablereadthreshold" => {
1803            config.auto_count_min_read_threshold = parse_u64(value, key)?.max(1);
1804            config.notes.push(format!(
1805                "{key}={value} sets the read-limit trigger for automatic Rust count-min tables"
1806            ));
1807        }
1808        "precells" | "prefiltercells" => {
1809            let cells = parse_kmg_usize(value, key)?;
1810            config.prefilter.cells = (cells > 0).then_some(cells);
1811            if cells == 0 {
1812                config.notes.push(format!(
1813                    "{key}=0 is a BBTools prefilter sketch control; Rust leaves prefilter cells unset unless prefiltering is otherwise requested"
1814                ));
1815            } else {
1816                config.prefilter.enabled = true;
1817                config.prefilter.force_disabled = false;
1818                config.notes.push(format!(
1819                    "{key}={value} is a BBTools prefilter sketch control; Rust applies deterministic prefilter collision estimates when prefilter cells are constrained"
1820                ));
1821            }
1822        }
1823        "prefiltersize" | "prefilterfraction" => {
1824            let fraction = parse_fraction_micros(value, key)?;
1825            config.prefilter.memory_fraction_micros = (fraction > 0).then_some(fraction);
1826            config.prefilter.enabled = fraction > 0;
1827            config.prefilter.force_disabled = fraction == 0;
1828            if fraction == 0 {
1829                config.notes.push(format!(
1830                    "{key}=0 is a BBTools prefilter sketch control; Rust disables fraction-derived prefilter sizing"
1831                ));
1832            } else {
1833                config.notes.push(format!(
1834                    "{key}={value} is a BBTools prefilter sketch control; Rust derives deterministic prefilter collision memory from the configured table memory budget"
1835                ));
1836            }
1837        }
1838        "prefilterbits" | "prebits" | "pbits" => {
1839            let bits = parse_kcount_cell_bits(value, key)?;
1840            config.prefilter.bits = Some(bits);
1841            config.notes.push(format!(
1842                "{key}={value} is a BBTools prefilter sketch control; Rust uses it with constrained prefilter cells"
1843            ));
1844        }
1845        "prehashes" | "prefilterhashes" => {
1846            let hashes = parse_prefilter_hashes(value, key)?;
1847            config.prefilter.hashes = (hashes > 0).then_some(hashes);
1848            if hashes == 0 {
1849                config.notes.push(format!(
1850                    "{key}=0 is a BBTools prefilter sketch control; Rust leaves prefilter hashes unset unless prefiltering is otherwise requested"
1851                ));
1852            } else {
1853                config.prefilter.enabled = true;
1854                config.prefilter.force_disabled = false;
1855                config.notes.push(format!(
1856                    "{key}={value} is a BBTools prefilter sketch control; Rust applies deterministic prefilter collision estimates with explicit or implicit prefilter cells"
1857                ));
1858            }
1859        }
1860        "buildpasses" => {
1861            let build_passes = parse_i64(value, key)?;
1862            if build_passes <= 0 {
1863                bail!("{key} expects a positive integer, got {value}");
1864            }
1865            config.build_passes = usize::try_from(build_passes)
1866                .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?;
1867            config.notes.push(format!(
1868                "{key}={build_passes} is a BBTools table-construction pass control; Rust applies deterministic trusted-kmer filtering when buildpasses is greater than 1"
1869            ));
1870        }
1871        "initialsize" => {
1872            let initial_size = parse_positive_kmg_usize(value, key)?;
1873            config.table_initial_size = Some(initial_size);
1874            config.notes.push(format!(
1875                "{key}={value} is a BBTools kmer-table runtime sizing control; Rust pre-reserves exact-count table capacity when practical"
1876            ));
1877        }
1878        "ways" => {
1879            let _ = parse_kmg_i64(value, key)?;
1880            config.notes.push(format!(
1881                "{key}={value} is a BBTools kmer-table runtime sizing control; exact Rust counting keeps native map sharding"
1882            ));
1883        }
1884        "buflen" | "bufflen" | "bufferlength" => {
1885            let _ = parse_kmg_i64(value, key)?;
1886            config.notes.push(format!(
1887                "{key}={value} is a BBTools kmer-table buffer-length control; covered Rust output records are unchanged"
1888            ));
1889        }
1890        "tabletype" => {
1891            let _ = parse_i32(value, key)?;
1892            config.notes.push(format!(
1893                "{key}={value} is a BBTools kmer-table implementation control; exact Rust counting uses its native map"
1894            ));
1895        }
1896        "rcomp" | "maskmiddle" => {
1897            let _ = parse_java_bool(value);
1898            config.notes.push(format!(
1899                "{key}={value} is a BBTools kmer-table matching control; covered Rust BBNorm canonical/exact-count behavior is unchanged"
1900            ));
1901        }
1902        "showstats" | "stats" | "showspeed" | "ss" | "verbose2" => {
1903            let _ = parse_java_bool(value);
1904            config.notes.push(format!(
1905                "{key}={value} is a BBTools kmer-table reporting control; covered Rust output records are unchanged"
1906            ));
1907        }
1908        "prealloc" | "preallocate" => {
1909            config.table_prealloc_fraction = parse_preallocation_fraction(value, key)?;
1910            config.notes.push(format!(
1911                "{key}={value} is a BBTools kmer-table preallocation control; Rust pre-reserves exact-count table capacity when practical"
1912            ));
1913        }
1914        "filtermemory" | "prefiltermemory" | "filtermem" | "filtermemoryoverride" => {
1915            let bytes = parse_positive_kmg_usize(value, key)?;
1916            config.prefilter.memory_bytes = Some(bytes);
1917            config.prefilter.enabled = true;
1918            config.prefilter.force_disabled = false;
1919            config.notes.push(format!(
1920                "{key}={value} is a BBTools prefilter memory-sizing control; Rust sizes deterministic prefilter collision estimates from this budget when prefilter cells are not set"
1921            ));
1922        }
1923        "minprobprefilter" | "mpp" | "minprobmain" | "mpm" => {
1924            let _ = parse_java_bool(value);
1925            config.notes.push(format!(
1926                "{key}={value} is a BBTools kmer-table minprob routing control; covered Rust minprob behavior is unchanged"
1927            ));
1928        }
1929        "prefilterpasses" | "prepasses" => {
1930            parse_auto_or_kmg_i64(value, key)?;
1931            config.notes.push(format!(
1932                "{key}={value} is a BBTools prefilter pass-count control; exact Rust counting uses one deterministic table build"
1933            ));
1934        }
1935        "onepass" => {
1936            let _ = parse_java_bool(value);
1937            config.notes.push(format!(
1938                "{key}={value} is a BBTools kmer-table construction-mode control; covered Rust output remains single-pass"
1939            ));
1940        }
1941        "stepsize" | "buildstepsize" => {
1942            let _ = parse_i32(value, key)?;
1943            config.notes.push(format!(
1944                "{key}={value} is a BBTools trusted-kmer sampling control; the covered no-ECC single-pass path ignores it"
1945            ));
1946        }
1947        "prefilter" => {
1948            config.prefilter.enabled = parse_bool(value, key)?;
1949            if config.prefilter.enabled {
1950                config.prefilter.force_disabled = false;
1951                config.notes.push(
1952                    "prefilter=t requested; Rust applies BBTools-style default prefilter partitioning when bounded count-min counting is selected"
1953                        .to_string(),
1954                );
1955            } else {
1956                config.prefilter.force_disabled = true;
1957                config.notes.push(
1958                    "prefilter=f requested; Rust disables prefilter sketch construction unless a later prefilter control re-enables it"
1959                        .to_string(),
1960                );
1961            }
1962        }
1963        "auto" | "automatic" => {
1964            let enabled = parse_bool(value, key)?;
1965            config.auto_count_min = enabled;
1966            config.notes.push(format!(
1967                "{key}={enabled} is a BBTools automatic count-table sizing control; Rust uses it to select bounded count-min tables for large inputs"
1968            ));
1969        }
1970        "tmpdir" => {
1971            config.temp_dir = Some(PathBuf::from(value));
1972            config.use_temp_dir = true;
1973            config.notes.push(format!(
1974                "{key}={value} is a BBTools temporary-directory control; covered Rust multipass and stdin paths use managed temp files there when enabled"
1975            ));
1976        }
1977        "usetmpdir" | "usetempdir" => {
1978            config.use_temp_dir = parse_java_bool(value);
1979            config.notes.push(format!(
1980                "{key}={value} is a BBTools temporary-directory control; covered Rust multipass and stdin paths use managed temp files there when enabled"
1981            ));
1982        }
1983        "ordered" | "ord" | "verbose" | "printcoverage" => {
1984            config.notes.push(format!(
1985                "{key}={value} is accepted as a no-op in this Rust parity slice"
1986            ));
1987        }
1988        "append" | "app" => {
1989            config.append = parse_bool(value, key)?;
1990        }
1991        "interleaved" | "int" => {
1992            let lower = value.to_ascii_lowercase();
1993            if lower == "auto" {
1994                config.interleaved = false;
1995                config.test_interleaved = true;
1996            } else {
1997                config.interleaved = parse_bool(value, key)?;
1998                config.test_interleaved = false;
1999            }
2000        }
2001        "testinterleaved" => {
2002            config.test_interleaved = parse_bool(value, key)?;
2003        }
2004        "forceinterleaved" => {
2005            config.interleaved = parse_bool(value, key)?;
2006            config.test_interleaved = false;
2007        }
2008        "overrideinterleaved" => {
2009            let _ = parse_bool(value, key)?;
2010            config.notes.push(format!(
2011                "{key}={value} is a BBTools paired-output assertion override; covered Rust paired output is unchanged"
2012            ));
2013        }
2014        "fastareadlen" | "fastareadlength" => {
2015            if parse_u64(value, key)? != u64::MAX && value != "2147483647" {
2016                config.notes.push(
2017                    "fastareadlen is accepted for KmerNormalize parity; covered FASTA records are processed as-is".to_string(),
2018                );
2019            }
2020        }
2021        "fastaminread" | "fastaminlen" | "fastaminlength" => {
2022            let _ = parse_i32(value, key)?;
2023            config.notes.push(format!(
2024                "{key}={value} is a BBTools FASTA parser control; covered KmerNormalize FASTA records are processed as-is"
2025            ));
2026        }
2027        "forcesectionname" | "fastadump" => {
2028            let _ = parse_java_bool(value);
2029            config.notes.push(format!(
2030                "{key}={value} is a BBTools FASTA parser control; covered Rust output is unchanged"
2031            ));
2032        }
2033        "sampleoutput" | "readsample" | "kmersample" => {
2034            config.notes.push(format!(
2035                "{key}={value} is advertised in bbnorm.sh but rejected by vendored KmerNormalize; Rust ignores it and keeps the supported normalization path"
2036            ));
2037        }
2038        "samplerate" | "sample" | "sampleseed" | "seed" => {
2039            config.notes.push(format!(
2040                "{key}={value} is a BBTools stream-wrapper sampling option; Rust ignores it and keeps the supported normalization path"
2041            ));
2042        }
2043        "markerrors" | "markonly" | "meo" => {
2044            config.mark_errors_only = parse_bool(value, key)?;
2045            if config.mark_errors_only {
2046                enable_error_correction_if_unset(config);
2047            }
2048        }
2049        "markuncorrectableerrors" | "markuncorrectable" | "mue" => {
2050            config.mark_uncorrectable_errors = parse_bool(value, key)?;
2051        }
2052        "tam" | "trimaftermarking" => {
2053            config.trim_after_marking = parse_bool(value, key)?;
2054        }
2055        "markwith1" | "markwithone" | "mw1" => {
2056            config.mark_with_one = parse_bool(value, key)?;
2057        }
2058        "aec" | "aecc" | "aggressiveerrorcorrection" => {
2059            let enabled = parse_bool(value, key)?;
2060            if enabled {
2061                config.error_correct = true;
2062                config.error_correct_first = true;
2063                config.error_correct_final = true;
2064                config.error_correct_high_thresh = config.error_correct_high_thresh.min(16);
2065                config.error_correct_low_thresh = config.error_correct_low_thresh.max(3);
2066                config.error_correct_ratio = config.error_correct_ratio.min(100);
2067                config.max_errors_to_correct = config.max_errors_to_correct.max(7);
2068                config.suffix_len = config.suffix_len.min(3);
2069                config.prefix_len = config.prefix_len.min(2);
2070            }
2071        }
2072        "cec" | "cecc" | "conservativeerrorcorrection" => {
2073            let enabled = parse_bool(value, key)?;
2074            if enabled {
2075                config.error_correct = true;
2076                config.error_correct_first = true;
2077                config.error_correct_final = true;
2078                config.error_correct_high_thresh = config.error_correct_high_thresh.max(30);
2079                config.error_correct_low_thresh = config.error_correct_low_thresh.min(1);
2080                config.error_correct_ratio = config.error_correct_ratio.max(170);
2081                config.max_errors_to_correct = config.max_errors_to_correct.min(2);
2082                config.max_quality_to_correct = config.max_quality_to_correct.min(25);
2083                config.suffix_len = config.suffix_len.max(4);
2084                config.prefix_len = config.prefix_len.max(4);
2085            }
2086        }
2087        "ecc" => {
2088            let enabled = parse_bool(value, key)?;
2089            config.error_correct = enabled;
2090            config.error_correct_first = enabled;
2091            config.error_correct_final = enabled;
2092            config.overlap_error_correct &= enabled;
2093            config.overlap_error_correct_auto &= enabled;
2094        }
2095        "ecc1" => {
2096            config.error_correct_first = parse_bool(value, key)?;
2097            config.error_correct = config.error_correct_first || config.error_correct_final;
2098        }
2099        "ecc2" | "eccf" => {
2100            config.error_correct_final = parse_bool(value, key)?;
2101            config.error_correct = config.error_correct_first || config.error_correct_final;
2102        }
2103        "eccbyoverlap" | "ecco" | "overlap" => {
2104            if value.eq_ignore_ascii_case("auto") {
2105                config.notes.push(format!(
2106                    "{key}=auto requests automatic overlap-based error correction; Rust samples paired reads and enables paired overlap repair when the overlap fraction is high"
2107                ));
2108                config.error_correct = true;
2109                config.error_correct_first = true;
2110                config.error_correct_final = true;
2111                config.overlap_error_correct = false;
2112                config.overlap_error_correct_auto = true;
2113            } else if parse_bool(value, key)? {
2114                config.notes.push(format!(
2115                    "{key}={value} requests overlap-based error correction; Rust uses paired overlap repair before the table-based ECC path"
2116                ));
2117                config.error_correct = true;
2118                config.error_correct_first = true;
2119                config.error_correct_final = true;
2120                config.overlap_error_correct = true;
2121                config.overlap_error_correct_auto = false;
2122            } else {
2123                config.overlap_error_correct = false;
2124                config.overlap_error_correct_auto = false;
2125            }
2126        }
2127        "ecclimit" => config.max_errors_to_correct = parse_usize(value, key)?,
2128        "eccmaxqual" => config.max_quality_to_correct = parse_u8(value, key)?,
2129        "errorcorrectratio" | "ecr" => config.error_correct_ratio = parse_u64(value, key)?,
2130        "echighthresh" | "echthresh" | "echt" => {
2131            config.error_correct_high_thresh = parse_u64(value, key)?
2132        }
2133        "eclowthresh" | "eclthresh" | "eclt" => {
2134            config.error_correct_low_thresh = parse_u64(value, key)?
2135        }
2136        "sl" | "suflen" | "suffixlen" => config.suffix_len = parse_usize(value, key)?,
2137        "pl" | "prelen" | "prefixlen" => config.prefix_len = parse_usize(value, key)?,
2138        "cfl" => config.correct_from_left = parse_bool(value, key)?,
2139        "cfr" => config.correct_from_right = parse_bool(value, key)?,
2140        "target1" | "targetdepth1" | "tgt1" => {
2141            config.target_depth_first = Some(parse_u64(value, key)?);
2142        }
2143        "targetbadpercentilelow" | "tbpl" => {
2144            let value = parse_percent(value, key)?;
2145            config.target_bad_percent_low = value;
2146            config.target_bad_percent_high = config.target_bad_percent_high.max(value);
2147        }
2148        "targetbadpercentilehigh" | "tbph" => {
2149            let value = parse_percent(value, key)?;
2150            config.target_bad_percent_high = value;
2151            config.target_bad_percent_low = config.target_bad_percent_low.min(value);
2152        }
2153        "abrc" | "addbadreadscountup" => {
2154            config.add_bad_reads_countup = parse_bool(value, key)?;
2155        }
2156        _ => bail!("unknown or unsupported BBNorm option: {key}={value}"),
2157    }
2158    Ok(())
2159}
2160
2161fn validate(config: &mut Config) -> Result<()> {
2162    if config.in1.is_none() {
2163        bail!("missing input: provide in=<reads.fq>");
2164    }
2165    if !(1..=4).contains(&config.passes) {
2166        bail!("passes should be in range 1 through 4");
2167    }
2168    expand_hash_paired_input(config);
2169    validate_extra_inputs(config)?;
2170    if config.k == 0 {
2171        bail!("k must be greater than zero");
2172    }
2173    if !(0.0..1.0).contains(&config.min_prob) && (config.min_prob - 1.0).abs() > f64::EPSILON {
2174        bail!("minprob must be between 0 and 1");
2175    }
2176    if config.target_depth == 0 {
2177        bail!("target depth must be greater than zero");
2178    }
2179    if config.passes == 1 {
2180        config.target_bad_percent_low = 1.0;
2181        config.target_bad_percent_high = 1.0;
2182    }
2183    config.max_depth = Some(
2184        config
2185            .max_depth
2186            .unwrap_or(config.target_depth)
2187            .max(config.target_depth),
2188    );
2189    if config.error_detect_ratio == 0 {
2190        bail!("errordetectratio must be greater than zero");
2191    }
2192    if config.hist_columns == 0 || config.hist_columns > 3 {
2193        bail!("histcol must be 1, 2, or 3");
2194    }
2195    if config.hist_len < 2 {
2196        bail!("histlen must be at least 1");
2197    }
2198    if config.in2.is_some() {
2199        if config.out2.is_some() && config.out1.is_none() {
2200            bail!("out2 requires out=<file> for paired input");
2201        }
2202        if config.out_toss2.is_some() && config.out_toss1.is_none() {
2203            bail!("outt2 requires outt=<file> for paired input");
2204        }
2205        if config.out_low2.is_some() && config.out_low1.is_none() {
2206            bail!("outlow2 requires outlow=<file> for paired input");
2207        }
2208        if config.out_mid2.is_some() && config.out_mid1.is_none() {
2209            bail!("outmid2 requires outmid=<file> for paired input");
2210        }
2211        if config.out_high2.is_some() && config.out_high1.is_none() {
2212            bail!("outhigh2 requires outhigh=<file> for paired input");
2213        }
2214        if config.out_uncorrected2.is_some() && config.out_uncorrected1.is_none() {
2215            bail!("outuncorrected2 requires outuncorrected=<file> for paired input");
2216        }
2217    } else if config.interleaved {
2218        if config.out2.is_some() && config.out1.is_none() {
2219            bail!("out2 requires out=<file> for interleaved input");
2220        }
2221        if config.out_toss2.is_some() && config.out_toss1.is_none() {
2222            bail!("outt2 requires outt=<file> for interleaved input");
2223        }
2224        if config.out_low2.is_some() && config.out_low1.is_none() {
2225            bail!("outlow2 requires outlow=<file> for interleaved input");
2226        }
2227        if config.out_mid2.is_some() && config.out_mid1.is_none() {
2228            bail!("outmid2 requires outmid=<file> for interleaved input");
2229        }
2230        if config.out_high2.is_some() && config.out_high1.is_none() {
2231            bail!("outhigh2 requires outhigh=<file> for interleaved input");
2232        }
2233        if config.out_uncorrected2.is_some() && config.out_uncorrected1.is_none() {
2234            bail!("outuncorrected2 requires outuncorrected=<file> for interleaved input");
2235        }
2236    } else if !config.test_interleaved && (config.out2.is_some() || config.out_toss2.is_some()) {
2237        bail!("out2/outt2 require paired input with in2=<file> or interleaved=t");
2238    } else if !config.test_interleaved
2239        && (config.out_low2.is_some()
2240            || config.out_mid2.is_some()
2241            || config.out_high2.is_some()
2242            || config.out_uncorrected2.is_some())
2243    {
2244        bail!(
2245            "outlow2/outmid2/outhigh2/outuncorrected2 require paired input with in2=<file> or interleaved=t"
2246        );
2247    }
2248    Ok(())
2249}
2250
2251fn validate_extra_inputs(config: &Config) -> Result<()> {
2252    for extra in &config.extra {
2253        if !extra.exists() || !extra.is_file() {
2254            bail!("extra input {} does not exist", extra.display());
2255        }
2256    }
2257    Ok(())
2258}
2259
2260fn expand_hash_paired_input(config: &mut Config) {
2261    let Some(input) = config.in1.as_ref() else {
2262        return;
2263    };
2264    if input.exists() {
2265        return;
2266    }
2267    let text = input.to_string_lossy().into_owned();
2268    if !text.contains('#') {
2269        return;
2270    }
2271
2272    config.in1 = Some(PathBuf::from(text.replacen('#', "1", 1)));
2273    config.in2 = Some(PathBuf::from(text.replacen('#', "2", 1)));
2274}
2275
2276fn path(value: &str) -> PathBuf {
2277    PathBuf::from(value)
2278}
2279
2280fn split_paths(value: &str) -> Vec<PathBuf> {
2281    value
2282        .split(',')
2283        .filter(|part| !part.trim().is_empty())
2284        .map(|part| PathBuf::from(part.trim()))
2285        .collect()
2286}
2287
2288fn extra_paths(value: &str) -> Vec<PathBuf> {
2289    let trimmed = value.trim();
2290    if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("null") {
2291        return Vec::new();
2292    }
2293    let literal = PathBuf::from(trimmed);
2294    if literal.exists() {
2295        vec![literal]
2296    } else {
2297        split_paths(trimmed)
2298    }
2299}
2300
2301fn parse_bool(value: &str, key: &str) -> Result<bool> {
2302    match value.to_ascii_lowercase().as_str() {
2303        "t" | "true" | "1" | "y" | "yes" => Ok(true),
2304        "f" | "false" | "0" | "n" | "no" => Ok(false),
2305        _ => bail!("{key} expects a boolean value, got {value}"),
2306    }
2307}
2308
2309fn quality_recal_base_key(key: &str) -> &str {
2310    key.strip_suffix("_p1")
2311        .or_else(|| key.strip_suffix("_p2"))
2312        .unwrap_or(key)
2313}
2314
2315fn is_quality_recal_bool_key(key: &str) -> bool {
2316    matches!(
2317        quality_recal_base_key(key),
2318        "trackall"
2319            | "clearmatrices"
2320            | "loadq102"
2321            | "loadqap"
2322            | "loadqbp"
2323            | "loadqpt"
2324            | "loadqbt"
2325            | "loadq10"
2326            | "loadq12"
2327            | "loadqb12"
2328            | "loadqb012"
2329            | "loadqb123"
2330            | "loadqb234"
2331            | "loadq12b12"
2332            | "loadqp"
2333            | "loadq"
2334            | "recalwithposition"
2335            | "recalwithpos"
2336            | "recalusepos"
2337            | "recaltile"
2338            | "recaltiles"
2339            | "usetiles"
2340    )
2341}
2342
2343fn parse_java_bool(value: &str) -> bool {
2344    if value.is_empty() {
2345        return true;
2346    }
2347    if value.len() == 1 {
2348        let byte = value.as_bytes()[0].to_ascii_lowercase();
2349        return byte == b't' || byte == b'1';
2350    }
2351    if value.eq_ignore_ascii_case("null") || value.eq_ignore_ascii_case("none") {
2352        return false;
2353    }
2354    value.eq_ignore_ascii_case("true")
2355}
2356
2357fn parse_mpi_enabled(value: &str, key: &str) -> Result<bool> {
2358    if value
2359        .as_bytes()
2360        .first()
2361        .is_some_and(|byte| byte.is_ascii_digit())
2362    {
2363        Ok(parse_i32(value, key)? > 0)
2364    } else {
2365        Ok(parse_java_bool(value))
2366    }
2367}
2368
2369enum CardinalityToggle {
2370    Bool(bool),
2371    Int(usize),
2372}
2373
2374fn parse_cardinality_bool_or_int(value: &str, key: &str) -> Result<CardinalityToggle> {
2375    if value
2376        .as_bytes()
2377        .first()
2378        .is_some_and(|byte| byte.is_ascii_digit())
2379    {
2380        Ok(CardinalityToggle::Int(parse_cardinality_k(value, key)?))
2381    } else {
2382        Ok(CardinalityToggle::Bool(parse_bool(value, key)?))
2383    }
2384}
2385
2386fn parse_cardinality_k(value: &str, key: &str) -> Result<usize> {
2387    let parsed = parse_i32(value, key)?;
2388    if parsed <= 0 {
2389        bail!("{key} expects a positive integer, got {value}");
2390    }
2391    usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2392}
2393
2394fn parse_cardinality_buckets(value: &str, key: &str) -> Result<usize> {
2395    let buckets = parse_kmg_i64(value, key)?;
2396    if buckets <= 0 {
2397        bail!("{key} expects a positive KMG value, got {value}");
2398    }
2399    let buckets = usize::try_from(buckets)
2400        .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?;
2401    if buckets > CARDINALITY_MAX_BUCKETS {
2402        bail!(
2403            "{key} requests {buckets} cardinality buckets, above the Rust safety cap of {CARDINALITY_MAX_BUCKETS}"
2404        );
2405    }
2406    Ok(buckets)
2407}
2408
2409fn parse_cardinality_seed(value: &str, key: &str) -> Result<u64> {
2410    let parsed = parse_i64(value, key)?;
2411    if parsed < 0 {
2412        Ok(parsed as u64)
2413    } else {
2414        Ok(u64::try_from(parsed)
2415            .map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))?)
2416    }
2417}
2418
2419fn parse_kcount_cell_bits(value: &str, key: &str) -> Result<u8> {
2420    let bits = parse_i64(value, key)?;
2421    if bits <= 0 || bits > 32 || !(bits as u64).is_power_of_two() {
2422        bail!("{key} expects a power-of-two integer from 1 to 32, got {value}");
2423    }
2424    Ok(bits as u8)
2425}
2426
2427fn parse_kcount_hashes(value: &str, key: &str) -> Result<usize> {
2428    let hashes = parse_i64(value, key)?;
2429    if !(1..=8).contains(&hashes) {
2430        bail!("{key} expects an integer from 1 to 8, got {value}");
2431    }
2432    Ok(hashes as usize)
2433}
2434
2435fn parse_prefilter_hashes(value: &str, key: &str) -> Result<usize> {
2436    let hashes = parse_i64(value, key)?;
2437    if !(0..=8).contains(&hashes) {
2438        bail!("{key} expects an integer from 0 to 8, got {value}");
2439    }
2440    Ok(hashes as usize)
2441}
2442
2443fn parse_matrixbits_cells(value: &str, key: &str) -> Result<usize> {
2444    let bits = parse_i64(value, key)?;
2445    if !(1..63).contains(&bits) {
2446        bail!("{key} expects an integer exponent from 1 to 62, got {value}");
2447    }
2448    1usize
2449        .checked_shl(bits as u32)
2450        .with_context(|| format!("{key} exponent is too large for this platform: {value}"))
2451}
2452
2453fn parse_auto_or_i32(value: &str, key: &str) -> Result<()> {
2454    if !value.eq_ignore_ascii_case("auto") {
2455        let _ = parse_i32(value, key)?;
2456    }
2457    Ok(())
2458}
2459
2460fn parse_auto_or_kmg_i64(value: &str, key: &str) -> Result<()> {
2461    if !value.eq_ignore_ascii_case("auto") {
2462        let _ = parse_kmg_i64(value, key)?;
2463    }
2464    Ok(())
2465}
2466
2467fn parse_preallocation_fraction(value: &str, key: &str) -> Result<Option<f64>> {
2468    if value
2469        .as_bytes()
2470        .first()
2471        .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'.')
2472    {
2473        let fraction = parse_f64(value, key)?;
2474        if !(0.0..=1.0).contains(&fraction) {
2475            bail!("{key} expects a fraction between 0 and 1 or a boolean value, got {value}");
2476        }
2477        Ok((fraction > 0.0).then_some(fraction))
2478    } else if parse_java_bool(value) {
2479        Ok(Some(1.0))
2480    } else {
2481        Ok(None)
2482    }
2483}
2484
2485fn parse_fraction_micros(value: &str, key: &str) -> Result<u32> {
2486    let fraction = parse_f64(value, key)?;
2487    if !(0.0..=1.0).contains(&fraction) {
2488        bail!("{key} expects a fraction between 0 and 1, got {value}");
2489    }
2490    Ok((fraction * 1_000_000.0).round() as u32)
2491}
2492
2493fn parse_monitor(value: &str, key: &str) -> Result<()> {
2494    if value
2495        .as_bytes()
2496        .first()
2497        .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'.')
2498    {
2499        let mut parts = value.split(',');
2500        let first = parts.next().unwrap_or_default();
2501        parse_f64(first, key)?;
2502        if let Some(second) = parts.next() {
2503            parse_f64(second, key)?;
2504        }
2505        if parts.next().is_some() {
2506            bail!("{key} expects one or two numeric watchdog values, got {value}");
2507        }
2508    } else {
2509        let _ = parse_java_bool(value);
2510    }
2511    Ok(())
2512}
2513
2514fn parse_qtrim(config: &mut Config, value: &str, key: &str) -> Result<()> {
2515    let lower = value.to_ascii_lowercase();
2516    match lower.as_str() {
2517        "" => {
2518            config.trim_left = true;
2519            config.trim_right = true;
2520        }
2521        "left" | "l" => {
2522            config.trim_left = true;
2523            config.trim_right = false;
2524        }
2525        "right" | "r" => {
2526            config.trim_left = false;
2527            config.trim_right = true;
2528        }
2529        "both" | "rl" | "lr" => {
2530            config.trim_left = true;
2531            config.trim_right = true;
2532        }
2533        "window" | "w" => {
2534            config.trim_left = false;
2535            config.trim_right = true;
2536            config.trim_window = true;
2537            config.trim_optimal = false;
2538            config.trim_optimal_bias = None;
2539        }
2540        _ if lower.starts_with("window,") || lower.starts_with("w,") => {
2541            let Some((_, length)) = value.split_once(',') else {
2542                unreachable!("guard requires a comma");
2543            };
2544            config.trim_window_length = parse_usize(length, key)?;
2545            config.trim_left = false;
2546            config.trim_right = true;
2547            config.trim_window = true;
2548            config.trim_optimal = false;
2549            config.trim_optimal_bias = None;
2550        }
2551        _ if value
2552            .as_bytes()
2553            .first()
2554            .is_some_and(|byte| byte.is_ascii_digit()) =>
2555        {
2556            config.trim_quality = parse_trim_quality(config, value, key)?;
2557            config.trim_right = true;
2558        }
2559        _ => {
2560            let enabled = parse_bool(value, key)?;
2561            config.trim_left = enabled;
2562            config.trim_right = enabled;
2563        }
2564    }
2565    Ok(())
2566}
2567
2568fn parse_trim_quality(config: &mut Config, value: &str, key: &str) -> Result<f64> {
2569    if value.contains(',') {
2570        let mut parts = value.split(',');
2571        let first = parts.next().unwrap_or_default();
2572        let trim_quality = parse_f64(first, key)?;
2573        for part in parts {
2574            parse_f64(part, key)?;
2575        }
2576        config.notes.push(format!(
2577            "{key}={value} requests position-specific trim qualities; Rust uses the first threshold {trim_quality} for the supported trimming path"
2578        ));
2579        return Ok(trim_quality);
2580    }
2581    parse_f64(value, key)
2582}
2583
2584fn parse_poly(value: &str, key: &str) -> Result<usize> {
2585    if value.is_empty() {
2586        bail!("{key} expects a polymer threshold or boolean value");
2587    }
2588    if value
2589        .as_bytes()
2590        .first()
2591        .is_some_and(|byte| byte.is_ascii_digit())
2592    {
2593        parse_usize(value, key)
2594    } else {
2595        Ok(if parse_bool(value, key)? { 2 } else { 0 })
2596    }
2597}
2598
2599fn parse_optitrim(config: &mut Config, value: &str, key: &str) -> Result<()> {
2600    if value
2601        .as_bytes()
2602        .first()
2603        .is_some_and(|byte| *byte == b'.' || byte.is_ascii_digit())
2604    {
2605        let bias = parse_f64(value, key)?;
2606        if !(0.0..1.0).contains(&bias) {
2607            bail!("{key} bias must be greater than or equal to 0 and less than 1");
2608        }
2609        config.trim_optimal = true;
2610        config.trim_optimal_bias = Some(bias);
2611    } else {
2612        config.trim_optimal = parse_bool(value, key)?;
2613        config.trim_optimal_bias = None;
2614    }
2615    Ok(())
2616}
2617
2618fn enable_error_correction_if_unset(config: &mut Config) {
2619    if !config.error_correct_first && !config.error_correct_final {
2620        config.error_correct_first = true;
2621    }
2622    config.error_correct = config.error_correct_first || config.error_correct_final;
2623}
2624
2625fn parse_u8(value: &str, key: &str) -> Result<u8> {
2626    value
2627        .parse::<u8>()
2628        .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2629}
2630
2631fn parse_i8(value: &str, key: &str) -> Result<i8> {
2632    value
2633        .parse::<i8>()
2634        .map_err(|_| anyhow::anyhow!("{key} expects a byte integer, got {value}"))
2635}
2636
2637fn parse_usize(value: &str, key: &str) -> Result<usize> {
2638    value
2639        .parse::<usize>()
2640        .map_err(|_| anyhow::anyhow!("{key} expects a non-negative integer, got {value}"))
2641}
2642
2643fn parse_u64(value: &str, key: &str) -> Result<u64> {
2644    value
2645        .parse::<u64>()
2646        .map_err(|_| anyhow::anyhow!("{key} expects a non-negative integer, got {value}"))
2647}
2648
2649fn parse_i64(value: &str, key: &str) -> Result<i64> {
2650    value
2651        .parse::<i64>()
2652        .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2653}
2654
2655fn parse_i32(value: &str, key: &str) -> Result<i32> {
2656    value
2657        .parse::<i32>()
2658        .map_err(|_| anyhow::anyhow!("{key} expects an integer, got {value}"))
2659}
2660
2661fn parse_i32_clamped(value: &str, key: &str, min: i32, max: i32) -> Result<i32> {
2662    Ok(parse_i32(value, key)?.clamp(min, max))
2663}
2664
2665fn parse_f64(value: &str, key: &str) -> Result<f64> {
2666    value
2667        .parse::<f64>()
2668        .map_err(|_| anyhow::anyhow!("{key} expects a number, got {value}"))
2669}
2670
2671fn parse_min_average_quality(value: &str, key: &str) -> Result<()> {
2672    let mut parts = value.split(',');
2673    let quality = parts.next().unwrap_or_default();
2674    parse_f64(quality, key)?;
2675    if let Some(bases) = parts.next() {
2676        parse_usize(bases, key)?;
2677    }
2678    if parts.next().is_some() {
2679        bail!("{key} expects quality or quality,bases, got {value}");
2680    }
2681    Ok(())
2682}
2683
2684fn parse_quality_offset(value: &str, key: &str) -> Result<Option<u8>> {
2685    match value.to_ascii_lowercase().as_str() {
2686        "auto" => Ok(None),
2687        "sanger" => Ok(Some(33)),
2688        "illumina" => Ok(Some(64)),
2689        "33" => Ok(Some(33)),
2690        "64" => Ok(Some(64)),
2691        _ => bail!("{key} expects auto, sanger, illumina, 33, or 64, got {value}"),
2692    }
2693}
2694
2695fn parse_fake_fasta_quality(config: &mut Config, value: &str) -> Result<()> {
2696    if value.is_empty() {
2697        return Ok(());
2698    }
2699    if value.as_bytes()[0].is_ascii_alphabetic() {
2700        let _ = parse_bool(value, "fakefastaquality")?;
2701        return Ok(());
2702    }
2703
2704    let parsed = parse_i32(value, "fakefastaquality")?;
2705    if parsed > 0 {
2706        config.fake_quality = parsed.min(50) as u8;
2707    }
2708    Ok(())
2709}
2710
2711fn parse_fasta_wrap(value: &str, key: &str) -> Result<usize> {
2712    let parsed = parse_kmg_i64(value, key)?;
2713    if parsed < 1 {
2714        Ok(0)
2715    } else {
2716        usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2717    }
2718}
2719
2720fn parse_junk_mode(config: &mut Config, value: &str) -> Result<()> {
2721    match value.to_ascii_lowercase().as_str() {
2722        "ignore" => {
2723            config.fix_junk_and_iupac = false;
2724            config.junk_mode = JunkMode::Ignore;
2725        }
2726        "crash" | "fail" => {
2727            config.fix_junk_and_iupac = false;
2728            config.junk_mode = JunkMode::Crash;
2729        }
2730        "fix" => {
2731            config.fix_junk_and_iupac = false;
2732            config.junk_mode = JunkMode::Fix;
2733        }
2734        "flag" | "discard" => {
2735            config.fix_junk_and_iupac = false;
2736            config.junk_mode = JunkMode::Flag;
2737        }
2738        "iupacton" => {
2739            config.fix_junk_and_iupac = true;
2740            config.junk_mode = JunkMode::Fix;
2741        }
2742        _ => {
2743            bail!("junk expects ignore, crash, fail, fix, flag, discard, or iupacton, got {value}")
2744        }
2745    }
2746    Ok(())
2747}
2748
2749fn parse_percent(value: &str, key: &str) -> Result<f64> {
2750    let mut parsed = parse_f64(value, key)?;
2751    if parsed > 1.0 && parsed <= 100.0 {
2752        parsed /= 100.0;
2753    }
2754    if !(0.0..=1.0).contains(&parsed) {
2755        bail!("{key} must be between 0 and 100");
2756    }
2757    Ok(parsed)
2758}
2759
2760fn parse_limit(value: &str, key: &str) -> Result<Option<u64>> {
2761    let parsed = parse_kmg_i64(value, key)?;
2762    if parsed < 0 {
2763        Ok(None)
2764    } else {
2765        Ok(Some(parsed as u64))
2766    }
2767}
2768
2769fn parse_kmg_i64(value: &str, key: &str) -> Result<i64> {
2770    let lower = value.to_ascii_lowercase();
2771    if matches!(lower.as_str(), "big" | "inf" | "infinity" | "max" | "huge") {
2772        return Ok(i64::MAX);
2773    }
2774
2775    let Some(last) = lower.chars().last() else {
2776        bail!("{key} expects an integer or KMG value, got {value}");
2777    };
2778    let (number, multiplier) = match last {
2779        'k' => (&value[..value.len() - 1], 1_000_f64),
2780        'm' => (&value[..value.len() - 1], 1_000_000_f64),
2781        'g' | 'b' => (&value[..value.len() - 1], 1_000_000_000_f64),
2782        't' => (&value[..value.len() - 1], 1_000_000_000_000_f64),
2783        'p' | 'q' => (&value[..value.len() - 1], 1_000_000_000_000_000_f64),
2784        'e' => (&value[..value.len() - 1], 1_000_000_000_000_000_000_f64),
2785        'c' | 'h' => (&value[..value.len() - 1], 100_f64),
2786        'd' => (&value[..value.len() - 1], 10_f64),
2787        _ if last.is_ascii_alphabetic() => {
2788            bail!("{key} has an unsupported KMG suffix in {value}");
2789        }
2790        _ => (value, 1_f64),
2791    };
2792
2793    if number
2794        .chars()
2795        .last()
2796        .is_some_and(|char| char.is_ascii_alphabetic())
2797    {
2798        bail!("{key} has too many suffix letters in {value}");
2799    }
2800
2801    let parsed = if number.contains('.') || multiplier != 1.0 {
2802        let scaled = number
2803            .parse::<f64>()
2804            .map_err(|_| anyhow::anyhow!("{key} expects an integer or KMG value, got {value}"))?
2805            * multiplier;
2806        if scaled > i64::MAX as f64 || scaled < i64::MIN as f64 {
2807            bail!("{key} value is out of range: {value}");
2808        }
2809        scaled as i64
2810    } else {
2811        number
2812            .parse::<i64>()
2813            .map_err(|_| anyhow::anyhow!("{key} expects an integer or KMG value, got {value}"))?
2814    };
2815    Ok(parsed)
2816}
2817
2818fn parse_kmg_usize(value: &str, key: &str) -> Result<usize> {
2819    let parsed = parse_kmg_i64(value, key)?;
2820    if parsed < 0 {
2821        bail!("{key} expects a non-negative KMG value, got {value}");
2822    }
2823    usize::try_from(parsed).map_err(|_| anyhow::anyhow!("{key} value is out of range: {value}"))
2824}
2825
2826fn parse_positive_kmg_usize(value: &str, key: &str) -> Result<usize> {
2827    let parsed = parse_kmg_usize(value, key)?;
2828    if parsed == 0 {
2829        bail!("{key} expects a positive KMG value, got {value}");
2830    }
2831    Ok(parsed)
2832}
2833
2834#[cfg(test)]
2835mod tests {
2836    use super::*;
2837
2838    fn parse(values: &[&str]) -> Config {
2839        let mut args: Vec<OsString> = values.iter().map(OsString::from).collect();
2840        if !values.iter().any(|value| is_pass_selector(value)) {
2841            args.push(OsString::from("passes=1"));
2842        }
2843        parse_args(args).unwrap()
2844    }
2845
2846    fn is_pass_selector(value: &str) -> bool {
2847        let lower = value.to_ascii_lowercase();
2848        matches!(lower.as_str(), "1pass" | "1p" | "2pass" | "2p")
2849            || lower.split_once('=').is_some_and(|(key, _)| {
2850                matches!(key, "passes" | "p" | "1pass" | "1p" | "2pass" | "2p")
2851            })
2852    }
2853
2854    #[test]
2855    fn implicit_bbnorm_default_keeps_two_pass_mode() {
2856        let cfg = parse_args(["in=reads.fq"].into_iter().map(OsString::from)).unwrap();
2857        assert_eq!(cfg.passes, 2);
2858    }
2859
2860    #[test]
2861    fn one_pass_aliases_select_supported_single_pass_like_bbnorm() {
2862        let cfg = parse_args(["in=reads.fq", "1pass"].into_iter().map(OsString::from)).unwrap();
2863        assert_eq!(cfg.passes, 1);
2864
2865        let cfg = parse_args(["in=reads.fq", "1pass=f"].into_iter().map(OsString::from)).unwrap();
2866        assert_eq!(cfg.passes, 1);
2867    }
2868
2869    #[test]
2870    fn two_pass_aliases_select_multipass_like_bbnorm() {
2871        let cfg = parse_args(["in=reads.fq", "2pass=f"].into_iter().map(OsString::from)).unwrap();
2872        assert_eq!(cfg.passes, 2);
2873    }
2874
2875    #[test]
2876    fn parses_core_aliases() {
2877        let cfg = parse(&[
2878            "reads.fq",
2879            "out=keep.fq",
2880            "outt=toss.fq",
2881            "hist=hist.tsv",
2882            "k=21",
2883            "min=3",
2884            "max=9",
2885            "minkmers=2",
2886            "ml=42",
2887            "dp=60",
2888            "tbr=t",
2889            "rbb=t",
2890            "srr=t",
2891            "overwrite=t",
2892            "append=t",
2893        ]);
2894        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
2895        assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
2896        assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
2897        assert_eq!(cfg.hist_in.unwrap(), PathBuf::from("hist.tsv"));
2898        assert_eq!(cfg.k, 21);
2899        assert_eq!(cfg.min_depth, 3);
2900        assert_eq!(cfg.max_depth, Some(100));
2901        assert_eq!(cfg.min_kmers_over_min_depth, 2);
2902        assert_eq!(cfg.min_length, 42);
2903        assert!((cfg.depth_percentile - 0.60).abs() < f64::EPSILON);
2904        assert!(cfg.toss_error_reads);
2905        assert!(cfg.require_both_bad);
2906        assert!(cfg.save_rare_reads);
2907        assert!(cfg.overwrite);
2908        assert!(cfg.append);
2909    }
2910
2911    #[test]
2912    fn accepts_shared_input_output_file_aliases() {
2913        let cfg = parse(&[
2914            "input=reads1.fq",
2915            "input2=reads2.fq",
2916            "output=keep1.fq",
2917            "output2=keep2.fq",
2918        ]);
2919
2920        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
2921        assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
2922        assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep1.fq"));
2923        assert_eq!(cfg.out2.unwrap(), PathBuf::from("keep2.fq"));
2924    }
2925
2926    #[test]
2927    fn parses_bare_boolean_flags_like_bbnorm() {
2928        let cfg = parse_args(
2929            [
2930                "reads.fq",
2931                "prefilter",
2932                "countup",
2933                "keepall",
2934                "ecc",
2935                "ecco",
2936                "ow",
2937            ]
2938            .into_iter()
2939            .map(OsString::from),
2940        )
2941        .unwrap();
2942        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
2943        assert!(cfg.in2.is_none());
2944        assert!(cfg.prefilter.enabled);
2945        assert!(!cfg.prefilter.force_disabled);
2946        assert!(cfg.count_up);
2947        assert!(cfg.keep_all);
2948        assert!(cfg.error_correct);
2949        assert!(cfg.overlap_error_correct);
2950        assert!(cfg.overwrite);
2951
2952        let cfg = parse_args(
2953            ["in=x.fq", "prefilter", "prefilter=f"]
2954                .into_iter()
2955                .map(OsString::from),
2956        )
2957        .unwrap();
2958        assert!(!cfg.prefilter.enabled);
2959        assert!(cfg.prefilter.force_disabled);
2960
2961        let cfg = parse_args(
2962            ["in=x.fq", "prefilter=f", "prefilter"]
2963                .into_iter()
2964                .map(OsString::from),
2965        )
2966        .unwrap();
2967        assert!(cfg.prefilter.enabled);
2968        assert!(!cfg.prefilter.force_disabled);
2969    }
2970
2971    #[test]
2972    fn clamps_max_depth_and_minkmers_like_bbnorm() {
2973        let cfg = parse(&["in=reads.fq", "target=100", "max=50", "minkmers=0"]);
2974        assert_eq!(cfg.target_depth, 100);
2975        assert_eq!(cfg.max_depth, Some(100));
2976        assert_eq!(cfg.min_kmers_over_min_depth, 1);
2977
2978        let cfg = parse(&["in=reads.fq", "max=150", "target=100"]);
2979        assert_eq!(cfg.max_depth, Some(150));
2980    }
2981
2982    #[test]
2983    fn parses_fixspikes_aliases() {
2984        let cfg = parse(&["in=reads.fq", "fixspikes=t"]);
2985        assert!(cfg.fix_spikes);
2986
2987        let cfg = parse(&["in=reads.fq", "fs=f"]);
2988        assert!(!cfg.fix_spikes);
2989    }
2990
2991    #[test]
2992    fn parses_kmg_read_limits_like_bbnorm() {
2993        let cfg = parse(&["in=reads.fq", "reads=0.01k", "tablereads=1d"]);
2994        assert_eq!(cfg.max_reads, Some(10));
2995        assert_eq!(cfg.table_reads, Some(10));
2996
2997        let cfg = parse(&["in=reads.fq", "reads=-1", "tablereads=max"]);
2998        assert_eq!(cfg.max_reads, None);
2999        assert_eq!(cfg.table_reads, Some(i64::MAX as u64));
3000    }
3001
3002    #[test]
3003    fn parses_kmg_min_length_like_bbnorm() {
3004        let cfg = parse(&["in=reads.fq", "minlen=0.101k"]);
3005        assert_eq!(cfg.min_length, 101);
3006    }
3007
3008    #[test]
3009    fn parses_quality_trimming_like_bbnorm() {
3010        let cfg = parse(&["in=reads.fq", "qtrim=r", "trimq=10"]);
3011        assert!(!cfg.trim_left);
3012        assert!(cfg.trim_right);
3013        assert!((cfg.trim_quality - 10.0).abs() < f64::EPSILON);
3014
3015        let cfg = parse(&["in=reads.fq", "qtrim=12"]);
3016        assert!(!cfg.trim_left);
3017        assert!(cfg.trim_right);
3018        assert!((cfg.trim_quality - 12.0).abs() < f64::EPSILON);
3019
3020        let cfg = parse(&["in=reads.fq", "qtrim=r", "trimq=10,20"]);
3021        assert!(!cfg.trim_left);
3022        assert!(cfg.trim_right);
3023        assert!((cfg.trim_quality - 10.0).abs() < f64::EPSILON);
3024        assert!(cfg.notes.iter().any(|note| note.contains("trimq=10,20")));
3025
3026        let cfg = parse(&["in=reads.fq", "qtrim=12,20"]);
3027        assert!(!cfg.trim_left);
3028        assert!(cfg.trim_right);
3029        assert!((cfg.trim_quality - 12.0).abs() < f64::EPSILON);
3030        assert!(cfg.notes.iter().any(|note| note.contains("qtrim=12,20")));
3031
3032        let cfg = parse(&["in=reads.fq", "qtrim=t", "optitrim=f", "trimgoodinterval=3"]);
3033        assert!(cfg.trim_left);
3034        assert!(cfg.trim_right);
3035        assert!(!cfg.trim_optimal);
3036        assert_eq!(cfg.trim_min_good_interval, 3);
3037
3038        let cfg = parse(&["in=reads.fq", "qtrim=w,5"]);
3039        assert!(!cfg.trim_left);
3040        assert!(cfg.trim_right);
3041        assert!(cfg.trim_window);
3042        assert!(!cfg.trim_optimal);
3043        assert_eq!(cfg.trim_window_length, 5);
3044    }
3045
3046    #[test]
3047    fn parses_quality_output_offset_like_bbnorm() {
3048        let cfg = parse(&["in=reads.fq", "qin=64", "qout=64"]);
3049        assert_eq!(cfg.quality_in_offset, 64);
3050        assert_eq!(cfg.quality_out_offset, 64);
3051
3052        let cfg = parse(&["in=reads.fq", "qout=auto", "qin=sanger"]);
3053        assert_eq!(cfg.quality_in_offset, 33);
3054        assert_eq!(cfg.quality_out_offset, 33);
3055
3056        let cfg = parse(&["in=reads.fq", "qual=illumina"]);
3057        assert_eq!(cfg.quality_in_offset, 64);
3058        assert_eq!(cfg.quality_out_offset, 64);
3059
3060        let cfg = parse(&["in=reads.fq", "asciiin=64", "qualityout=64"]);
3061        assert_eq!(cfg.quality_in_offset, 64);
3062        assert_eq!(cfg.quality_out_offset, 64);
3063
3064        let cfg = parse(&["in=reads.fq", "qauto=t"]);
3065        assert_eq!(cfg.quality_in_offset, 33);
3066        assert_eq!(cfg.quality_out_offset, 33);
3067        assert!(cfg.notes.iter().any(|note| note.contains("qauto")));
3068
3069        let cfg = parse(&["in=reads.fq", "qin=64", "qauto=f", "qout=64"]);
3070        assert_eq!(cfg.quality_in_offset, 64);
3071        assert_eq!(cfg.quality_out_offset, 64);
3072    }
3073
3074    #[test]
3075    fn parses_quality_change_controls_like_bbnorm() {
3076        let cfg = parse(&[
3077            "in=reads.fq",
3078            "changequality=f",
3079            "mincalledquality=5",
3080            "maxcalledquality=30",
3081        ]);
3082        assert!(!cfg.change_quality);
3083        assert_eq!(cfg.min_called_quality, 5);
3084        assert_eq!(cfg.max_called_quality, 30);
3085
3086        let cfg = parse(&[
3087            "in=reads.fq",
3088            "cq=t",
3089            "mincalledquality=-5",
3090            "maxcalledquality=200",
3091        ]);
3092        assert!(cfg.change_quality);
3093        assert_eq!(cfg.min_called_quality, 0);
3094        assert_eq!(cfg.max_called_quality, 93);
3095
3096        let cfg = parse(&["in=reads.fq", "ignorebadquality=t"]);
3097        assert!(!cfg.change_quality);
3098
3099        let cfg = parse(&["in=reads.fq", "ibq=t"]);
3100        assert!(!cfg.change_quality);
3101
3102        let cfg = parse(&["in=reads.fq", "changequality=f", "ignorebadquality=f"]);
3103        assert!(!cfg.change_quality);
3104
3105        let cfg = parse(&["in=reads.fq", "ignorebadquality=t", "changequality=t"]);
3106        assert!(cfg.change_quality);
3107    }
3108
3109    #[test]
3110    fn parses_fake_quality_controls_like_bbnorm() {
3111        let cfg = parse(&["in=reads.fa", "fakequality=20"]);
3112        assert_eq!(cfg.fake_quality, 20);
3113
3114        let cfg = parse(&["in=reads.fa", "qfake=15"]);
3115        assert_eq!(cfg.fake_quality, 15);
3116
3117        let cfg = parse(&["in=reads.fa", "fakefastaquality=80"]);
3118        assert_eq!(cfg.fake_quality, 50);
3119
3120        let cfg = parse(&["in=reads.fa", "fakefastaquality=0"]);
3121        assert_eq!(cfg.fake_quality, 30);
3122
3123        let cfg = parse(&["in=reads.fa", "ffq=t"]);
3124        assert_eq!(cfg.fake_quality, 30);
3125    }
3126
3127    #[test]
3128    fn parses_fasta_wrap_like_bbnorm() {
3129        let cfg = parse(&["in=reads.fq"]);
3130        assert_eq!(cfg.fasta_wrap, 70);
3131
3132        let cfg = parse(&["in=reads.fq", "fastawrap=20"]);
3133        assert_eq!(cfg.fasta_wrap, 20);
3134
3135        let cfg = parse(&["in=reads.fq", "wrap=0"]);
3136        assert_eq!(cfg.fasta_wrap, 0);
3137
3138        let cfg = parse(&["in=reads.fq", "wrap=-1"]);
3139        assert_eq!(cfg.fasta_wrap, 0);
3140    }
3141
3142    #[test]
3143    fn accepts_thread_counts_like_bbnorm_as_rayon_controls() {
3144        let cfg = parse(&["in=reads.fq", "threads=2"]);
3145        assert_eq!(cfg.threads, Some(2));
3146        assert_eq!(cfg.gzip_threads, Some(2));
3147        assert!(
3148            cfg.notes
3149                .iter()
3150                .any(|note| note.contains("threads=2 accepted"))
3151        );
3152        assert!(
3153            cfg.notes
3154                .iter()
3155                .any(|note| note.contains("also enables gzip input/output workers"))
3156        );
3157
3158        let cfg = parse(&["in=reads.fq", "threads=2", "zipthreads=1"]);
3159        assert_eq!(cfg.threads, Some(2));
3160        assert_eq!(cfg.gzip_threads, Some(1));
3161
3162        let cfg = parse(&["in=reads.fq", "threads=2", "useunpigz=t"]);
3163        assert_eq!(cfg.gzip_threads, Some(2));
3164
3165        let cfg = parse(&["in=reads.fq", "t=-1"]);
3166        assert_eq!(cfg.threads, None);
3167        assert!(cfg.notes.is_empty());
3168
3169        let cfg = parse(&["in=reads.fq", "threads=auto"]);
3170        assert_eq!(cfg.threads, None);
3171        assert!(
3172            cfg.notes
3173                .iter()
3174                .any(|note| note.contains("threads=auto accepted"))
3175        );
3176
3177        let cfg = parse(&["in=reads.fq", "threads=max"]);
3178        assert_eq!(
3179            cfg.threads,
3180            Some(
3181                std::thread::available_parallelism()
3182                    .map(|threads| threads.get())
3183                    .unwrap_or(1)
3184            )
3185        );
3186        assert!(
3187            cfg.notes
3188                .iter()
3189                .any(|note| note.contains("threads=max accepted"))
3190        );
3191    }
3192
3193    #[test]
3194    fn accepts_build_step_size_controls_as_covered_noops() {
3195        for case in ["stepsize=2", "buildstepsize=4"] {
3196            let cfg = parse(&["in=reads.fq", case]);
3197            assert!(
3198                cfg.notes
3199                    .iter()
3200                    .any(|note| note.contains("trusted-kmer sampling control")),
3201                "missing trusted-kmer note for {case}: {:?}",
3202                cfg.notes
3203            );
3204        }
3205
3206        for case in ["stepsize=abc", "buildstepsize=abc"] {
3207            let err = parse_args(
3208                ["in=reads.fq", "passes=1", case]
3209                    .into_iter()
3210                    .map(OsString::from),
3211            )
3212            .unwrap_err()
3213            .to_string();
3214            assert!(
3215                err.contains("expects"),
3216                "unexpected error for malformed {case}: {err}"
3217            );
3218        }
3219    }
3220
3221    #[test]
3222    fn accepts_default_equivalent_sketch_controls_as_noops() {
3223        for case in [
3224            "bits=32",
3225            "bits1=16",
3226            "cbits1=16",
3227            "cellbits1=16",
3228            "hashes=3",
3229            "buildpasses=1",
3230            "prefilter=t",
3231        ] {
3232            let cfg = parse(&["in=reads.fq", case]);
3233            assert!(
3234                !cfg.notes.is_empty(),
3235                "expected an explanatory no-op note for {case}"
3236            );
3237            if case.contains("bits1") || case.contains("cbits1") || case.contains("cellbits1") {
3238                assert_eq!(
3239                    cfg.count_min_bits_first,
3240                    Some(16),
3241                    "expected first-pass bit width for {case}"
3242                );
3243            }
3244        }
3245
3246        for case in ["bits1=abc", "cbits1=abc", "cellbits1=abc"] {
3247            let err = parse_args(
3248                ["in=reads.fq", "passes=1", case]
3249                    .into_iter()
3250                    .map(OsString::from),
3251            )
3252            .unwrap_err()
3253            .to_string();
3254            assert!(
3255                err.contains("expects"),
3256                "unexpected error for malformed {case}: {err}"
3257            );
3258        }
3259    }
3260
3261    #[test]
3262    fn accepts_prefilter_controls_with_constrained_sketch_settings() {
3263        let cfg = parse_args(
3264            [
3265                "in=x.fq",
3266                "passes=1",
3267                "prefiltercells=1k",
3268                "prehashes=2",
3269                "pbits=8",
3270            ]
3271            .into_iter()
3272            .map(OsString::from),
3273        )
3274        .unwrap();
3275        assert_eq!(cfg.prefilter.cells, Some(1000));
3276        assert_eq!(cfg.prefilter.hashes, Some(2));
3277        assert_eq!(cfg.prefilter.bits, Some(8));
3278        assert_eq!(cfg.prefilter.memory_bytes, None);
3279        assert_eq!(cfg.prefilter.memory_fraction_micros, None);
3280        assert!(cfg.prefilter.enabled);
3281        assert!(!cfg.prefilter.force_disabled);
3282        assert!(
3283            cfg.notes
3284                .iter()
3285                .any(|note| note.contains("prefilter collision estimates")),
3286            "expected constrained prefilter note: {:?}",
3287            cfg.notes
3288        );
3289
3290        let cfg = parse_args(
3291            ["in=x.fq", "passes=1", "precells=1k"]
3292                .into_iter()
3293                .map(OsString::from),
3294        )
3295        .unwrap();
3296        assert_eq!(cfg.prefilter.cells, Some(1000));
3297        assert!(cfg.prefilter.enabled);
3298        assert!(!cfg.prefilter.force_disabled);
3299
3300        let cfg = parse_args(
3301            ["in=x.fq", "passes=1", "precells=0"]
3302                .into_iter()
3303                .map(OsString::from),
3304        )
3305        .unwrap();
3306        assert_eq!(cfg.prefilter.cells, None);
3307        assert!(!cfg.prefilter.enabled);
3308        assert!(!cfg.prefilter.force_disabled);
3309
3310        let cfg = parse_args(
3311            ["in=x.fq", "passes=1", "prefilter=t", "prefiltercells=0"]
3312                .into_iter()
3313                .map(OsString::from),
3314        )
3315        .unwrap();
3316        assert_eq!(cfg.prefilter.cells, None);
3317        assert!(cfg.prefilter.enabled);
3318        assert!(!cfg.prefilter.force_disabled);
3319
3320        let cfg = parse_args(
3321            ["in=x.fq", "passes=1", "prefilterhashes=1"]
3322                .into_iter()
3323                .map(OsString::from),
3324        )
3325        .unwrap();
3326        assert_eq!(cfg.prefilter.hashes, Some(1));
3327        assert!(cfg.prefilter.enabled);
3328        assert!(!cfg.prefilter.force_disabled);
3329        assert!(
3330            cfg.notes
3331                .iter()
3332                .any(|note| note.contains("prefilter collision estimates")),
3333            "expected implicit prefilter note: {:?}",
3334            cfg.notes
3335        );
3336
3337        let cfg = parse_args(
3338            ["in=x.fq", "passes=1", "prehashes=0"]
3339                .into_iter()
3340                .map(OsString::from),
3341        )
3342        .unwrap();
3343        assert_eq!(cfg.prefilter.hashes, None);
3344        assert!(!cfg.prefilter.enabled);
3345        assert!(!cfg.prefilter.force_disabled);
3346
3347        let cfg = parse_args(
3348            ["in=x.fq", "passes=1", "prefilter=t", "prehashes=0"]
3349                .into_iter()
3350                .map(OsString::from),
3351        )
3352        .unwrap();
3353        assert_eq!(cfg.prefilter.hashes, None);
3354        assert!(cfg.prefilter.enabled);
3355        assert!(!cfg.prefilter.force_disabled);
3356
3357        let cfg = parse_args(
3358            ["in=x.fq", "passes=1", "prefiltermemory=1k"]
3359                .into_iter()
3360                .map(OsString::from),
3361        )
3362        .unwrap();
3363        assert_eq!(cfg.prefilter.memory_bytes, Some(1000));
3364        assert!(cfg.prefilter.enabled);
3365        assert!(!cfg.prefilter.force_disabled);
3366        assert!(
3367            cfg.notes
3368                .iter()
3369                .any(|note| note.contains("prefilter memory-sizing")),
3370            "expected memory-backed prefilter note: {:?}",
3371            cfg.notes
3372        );
3373
3374        for case in ["prefiltersize=0.1", "prefilterfraction=0.1"] {
3375            let cfg = parse_args(
3376                ["in=x.fq", "passes=1", case]
3377                    .into_iter()
3378                    .map(OsString::from),
3379            )
3380            .unwrap();
3381            assert!(cfg.prefilter.enabled);
3382            assert!(!cfg.prefilter.force_disabled);
3383            assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3384            assert!(
3385                cfg.notes
3386                    .iter()
3387                    .any(|note| note.contains("prefilter collision memory")),
3388                "expected prefilter fraction note for {case}"
3389            );
3390        }
3391
3392        for case in ["prefiltersize=0", "prefilterfraction=0"] {
3393            let cfg = parse_args(
3394                ["in=x.fq", "passes=1", case]
3395                    .into_iter()
3396                    .map(OsString::from),
3397            )
3398            .unwrap();
3399            assert!(!cfg.prefilter.enabled);
3400            assert!(cfg.prefilter.force_disabled);
3401            assert_eq!(cfg.prefilter.memory_fraction_micros, None);
3402            assert!(
3403                cfg.notes
3404                    .iter()
3405                    .any(|note| note.contains("disables fraction-derived")),
3406                "expected zero-fraction note for {case}"
3407            );
3408        }
3409
3410        let cfg = parse_args(
3411            ["in=x.fq", "passes=1", "prefilter=t"]
3412                .into_iter()
3413                .map(OsString::from),
3414        )
3415        .unwrap();
3416        assert!(cfg.prefilter.enabled);
3417        assert!(!cfg.prefilter.force_disabled);
3418        assert!(
3419            cfg.notes
3420                .iter()
3421                .any(|note| note.contains("default prefilter partitioning")),
3422            "expected enabled prefilter note: {:?}",
3423            cfg.notes
3424        );
3425
3426        let cfg = parse_args(
3427            ["in=x.fq", "passes=1", "prehashes=1", "prefilter=f"]
3428                .into_iter()
3429                .map(OsString::from),
3430        )
3431        .unwrap();
3432        assert_eq!(cfg.prefilter.hashes, Some(1));
3433        assert!(!cfg.prefilter.enabled);
3434        assert!(cfg.prefilter.force_disabled);
3435
3436        let cfg = parse_args(
3437            [
3438                "in=x.fq",
3439                "passes=1",
3440                "prehashes=1",
3441                "prefilter=f",
3442                "prefilter=t",
3443            ]
3444            .into_iter()
3445            .map(OsString::from),
3446        )
3447        .unwrap();
3448        assert_eq!(cfg.prefilter.hashes, Some(1));
3449        assert!(cfg.prefilter.enabled);
3450        assert!(!cfg.prefilter.force_disabled);
3451
3452        let cfg = parse_args(
3453            ["in=x.fq", "passes=1", "prefilter=f", "prehashes=1"]
3454                .into_iter()
3455                .map(OsString::from),
3456        )
3457        .unwrap();
3458        assert_eq!(cfg.prefilter.hashes, Some(1));
3459        assert!(cfg.prefilter.enabled);
3460        assert!(!cfg.prefilter.force_disabled);
3461
3462        let cfg = parse_args(
3463            ["in=x.fq", "passes=1", "prefiltercells=1k", "prefilter=f"]
3464                .into_iter()
3465                .map(OsString::from),
3466        )
3467        .unwrap();
3468        assert_eq!(cfg.prefilter.cells, Some(1000));
3469        assert!(!cfg.prefilter.enabled);
3470        assert!(cfg.prefilter.force_disabled);
3471
3472        let cfg = parse_args(
3473            [
3474                "in=x.fq",
3475                "passes=1",
3476                "prefilterfraction=0.1",
3477                "prefilter=f",
3478            ]
3479            .into_iter()
3480            .map(OsString::from),
3481        )
3482        .unwrap();
3483        assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3484        assert!(!cfg.prefilter.enabled);
3485        assert!(cfg.prefilter.force_disabled);
3486
3487        let cfg = parse_args(
3488            [
3489                "in=x.fq",
3490                "passes=1",
3491                "prefilter=f",
3492                "prefilterfraction=0.1",
3493            ]
3494            .into_iter()
3495            .map(OsString::from),
3496        )
3497        .unwrap();
3498        assert_eq!(cfg.prefilter.memory_fraction_micros, Some(100_000));
3499        assert!(cfg.prefilter.enabled);
3500        assert!(!cfg.prefilter.force_disabled);
3501
3502        let cfg = parse_args(
3503            ["in=x.fq", "passes=1", "buildpasses=2"]
3504                .into_iter()
3505                .map(OsString::from),
3506        )
3507        .unwrap();
3508        assert_eq!(cfg.build_passes, 2);
3509        assert!(
3510            cfg.notes
3511                .iter()
3512                .any(|note| note.contains("trusted-kmer filtering")),
3513            "expected build-pass trusted-filter note: {:?}",
3514            cfg.notes
3515        );
3516    }
3517
3518    #[test]
3519    fn accepts_constrained_count_min_controls_as_real_sketch_settings() {
3520        let cfg = parse_args(
3521            ["in=x.fq", "passes=1", "bits=16", "hashes=2", "cells=1k"]
3522                .into_iter()
3523                .map(OsString::from),
3524        )
3525        .unwrap();
3526        assert_eq!(cfg.count_min.bits, Some(16));
3527        assert_eq!(cfg.count_min.hashes, Some(2));
3528        assert_eq!(cfg.count_min.cells, Some(1000));
3529        assert!(
3530            cfg.notes
3531                .iter()
3532                .any(|note| note.contains("fixed-memory count-min input sketch")),
3533            "expected fixed-memory count-min sketch note: {:?}",
3534            cfg.notes
3535        );
3536
3537        let cfg = parse_args(
3538            ["in=x.fq", "passes=1", "matrixbits=10"]
3539                .into_iter()
3540                .map(OsString::from),
3541        )
3542        .unwrap();
3543        assert_eq!(cfg.count_min.cells, Some(1024));
3544
3545        let cfg = parse_args(
3546            [
3547                "in=x.fq",
3548                "passes=1",
3549                "bits=8",
3550                "hashes=2",
3551                "sketchmemory=1k",
3552            ]
3553            .into_iter()
3554            .map(OsString::from),
3555        )
3556        .unwrap();
3557        assert_eq!(cfg.count_min.memory_bytes, Some(1000));
3558        assert!(
3559            cfg.notes
3560                .iter()
3561                .any(|note| note.contains("count-min memory budget")),
3562            "expected count-min memory-budget note: {:?}",
3563            cfg.notes
3564        );
3565
3566        let cfg = parse_args(
3567            [
3568                "in=x.fq",
3569                "passes=1",
3570                "maxcountupspillbytes=64m",
3571                "maxcountupspillfinallivebytes=96m",
3572                "maxcountupspillwritebytes=128m",
3573                "maxcountupspillinitialruns=10",
3574                "maxcountupspillmergeruns=2",
3575                "maxcountupspillfinalruns=4",
3576            ]
3577            .into_iter()
3578            .map(OsString::from),
3579        )
3580        .unwrap();
3581        assert_eq!(cfg.max_countup_spill_live_bytes, Some(64_000_000));
3582        assert_eq!(cfg.max_countup_spill_final_live_bytes, Some(96_000_000));
3583        assert_eq!(cfg.max_countup_spill_write_bytes, Some(128_000_000));
3584        assert_eq!(cfg.max_countup_spill_initial_runs, Some(10));
3585        assert_eq!(cfg.max_countup_spill_merge_runs, Some(2));
3586        assert_eq!(cfg.max_countup_spill_final_runs, Some(4));
3587        assert!(
3588            cfg.notes
3589                .iter()
3590                .any(|note| note.contains("count-up temp-spill safety cap")),
3591            "expected count-up spill live cap note: {:?}",
3592            cfg.notes
3593        );
3594        assert!(
3595            cfg.notes
3596                .iter()
3597                .any(|note| note.contains("count-up temp-spill I/O safety cap")),
3598            "expected count-up spill write cap note: {:?}",
3599            cfg.notes
3600        );
3601
3602        let cfg = parse_args(
3603            [
3604                "in=x.fq",
3605                "passes=1",
3606                "mem=2g",
3607                "autocountmin=f",
3608                "exact=t",
3609                "autosketchbytes=4m",
3610                "autocountminreads=500",
3611            ]
3612            .into_iter()
3613            .map(OsString::from),
3614        )
3615        .unwrap();
3616        assert_eq!(cfg.auto_count_min_memory_bytes, Some(2_000_000_000));
3617        assert!(!cfg.auto_count_min);
3618        assert!(cfg.force_exact_counts);
3619        assert_eq!(cfg.auto_count_min_input_bytes, 4_000_000);
3620        assert_eq!(cfg.auto_count_min_read_threshold, 500);
3621
3622        for case in [
3623            "bits=abc",
3624            "bits=0",
3625            "bits=3",
3626            "bits=64",
3627            "hashes=abc",
3628            "hashes=0",
3629            "hashes=9",
3630            "cells=abc",
3631            "cells=0",
3632            "matrixbits=abc",
3633            "matrixbits=0",
3634            "matrixbits=64",
3635            "sketchmemory=abc",
3636            "sketchmemory=0",
3637            "maxcountupspillbytes=abc",
3638            "maxcountupspillfinallivebytes=abc",
3639            "maxcountupspillwritebytes=-1",
3640            "maxcountupspillinitialruns=abc",
3641            "maxcountupspillmergeruns=-1",
3642            "maxcountupspillfinalruns=abc",
3643            "mem=abc",
3644            "autosketchbytes=0",
3645            "autocountminreads=0x",
3646            "buildpasses=abc",
3647            "prehashes=abc",
3648            "prefilterhashes=abc",
3649            "prefiltercells=abc",
3650            "precells=abc",
3651            "prefiltersize=abc",
3652            "prefilterfraction=abc",
3653            "prefilterbits=abc",
3654            "prefilterbits=64",
3655            "prebits=abc",
3656            "prebits=3",
3657            "pbits=3",
3658            "prehashes=9",
3659        ] {
3660            let err = parse_args(
3661                ["in=x.fq", "passes=1", case]
3662                    .into_iter()
3663                    .map(OsString::from),
3664            )
3665            .unwrap_err()
3666            .to_string();
3667            assert!(
3668                err.contains("expects")
3669                    || err.contains("unsupported KMG suffix")
3670                    || err.contains("too many suffix letters"),
3671                "unexpected error for malformed {case}: {err}"
3672            );
3673        }
3674    }
3675
3676    #[test]
3677    fn accepts_kmer_table_runtime_controls_as_working_fallbacks() {
3678        for case in [
3679            "initialsize=1k",
3680            "ways=31",
3681            "buflen=64k",
3682            "bufflen=64k",
3683            "bufferlength=64k",
3684            "tabletype=2",
3685            "rcomp=t",
3686            "maskmiddle=f",
3687            "showstats=t",
3688            "stats=f",
3689            "showspeed=f",
3690            "ss=t",
3691            "verbose2=t",
3692            "prealloc=0.25",
3693            "preallocate=f",
3694            "filtermemory=1k",
3695            "prefiltermemory=1k",
3696            "filtermem=1k",
3697            "filtermemoryoverride=1k",
3698            "minprobprefilter=f",
3699            "mpp=t",
3700            "minprobmain=t",
3701            "mpm=f",
3702            "prefilterpasses=auto",
3703            "prepasses=1",
3704            "onepass=t",
3705        ] {
3706            let cfg = parse_args(
3707                ["in=x.fq", "passes=1", case]
3708                    .into_iter()
3709                    .map(OsString::from),
3710            )
3711            .unwrap();
3712            assert!(
3713                cfg.notes.iter().any(|note| {
3714                    note.contains("kmer-table")
3715                        || note.contains("prefilter memory-sizing")
3716                        || note.contains("prefilter pass-count")
3717                }),
3718                "expected kmer-table fallback note for {case}: {:?}",
3719                cfg.notes
3720            );
3721        }
3722
3723        let cfg = parse(&["in=x.fq", "passes=1", "initialsize=1k", "prealloc=0.25"]);
3724        assert_eq!(cfg.table_initial_size, Some(1000));
3725        assert_eq!(cfg.table_prealloc_fraction, Some(0.25));
3726
3727        let cfg = parse(&["in=x.fq", "passes=1", "preallocate=t"]);
3728        assert_eq!(cfg.table_prealloc_fraction, Some(1.0));
3729
3730        let cfg = parse(&["in=x.fq", "passes=1", "preallocate=f"]);
3731        assert_eq!(cfg.table_prealloc_fraction, None);
3732
3733        for case in [
3734            "initialsize=abc",
3735            "ways=abc",
3736            "buflen=abc",
3737            "tabletype=abc",
3738            "prealloc=0.abc",
3739            "prealloc=1.5",
3740            "filtermemory=abc",
3741            "prepasses=abc",
3742        ] {
3743            let err = parse_args(
3744                ["in=x.fq", "passes=1", case]
3745                    .into_iter()
3746                    .map(OsString::from),
3747            )
3748            .unwrap_err()
3749            .to_string();
3750            assert!(
3751                err.contains("expects")
3752                    || err.contains("unsupported KMG suffix")
3753                    || err.contains("too many suffix letters"),
3754                "unexpected error for malformed {case}: {err}"
3755            );
3756        }
3757    }
3758
3759    #[test]
3760    fn accepts_covered_runtime_noops_and_manual_auto_sizing_fallback() {
3761        for case in [
3762            "auto=t",
3763            "auto=f",
3764            "ordered=f",
3765            "verbose=t",
3766            "printcoverage=t",
3767            "tmpdir=/tmp",
3768            "usetmpdir=t",
3769            "usetmpdir=f",
3770            "usetempdir=f",
3771            "fastareadlen=4",
3772            "fastareadlength=4",
3773            "fastaminread=1",
3774            "fastaminlen=1",
3775            "fastaminlength=1",
3776            "forcesectionname=t",
3777            "fastadump=f",
3778        ] {
3779            let cfg = parse(&["in=reads.fq", case]);
3780            assert!(
3781                !cfg.notes.is_empty(),
3782                "expected an explanatory no-op note for {case}"
3783            );
3784        }
3785
3786        for case in ["fastaminread=abc", "fastaminlen=abc", "fastaminlength=abc"] {
3787            let err = parse_args(
3788                ["in=reads.fq", "passes=1", case]
3789                    .into_iter()
3790                    .map(OsString::from),
3791            )
3792            .unwrap_err()
3793            .to_string();
3794            assert!(
3795                err.contains("expects"),
3796                "unexpected error for malformed {case}: {err}"
3797            );
3798        }
3799    }
3800
3801    #[test]
3802    fn accepts_temporary_directory_controls_for_managed_temp_paths() {
3803        for case in [
3804            "tmpdir=/tmp/bbnorm",
3805            "usetmpdir=t",
3806            "usetmpdir=f",
3807            "usetempdir=t",
3808        ] {
3809            let cfg = parse(&["in=reads.fq", "passes=1", case]);
3810            assert!(
3811                cfg.notes
3812                    .iter()
3813                    .any(|note| note.contains("temporary-directory control")),
3814                "expected temporary-directory note for {case}: {:?}",
3815                cfg.notes
3816            );
3817        }
3818        let enabled = parse(&["in=reads.fq", "tmpdir=/tmp/bbnorm"]);
3819        assert_eq!(enabled.temp_dir, Some(PathBuf::from("/tmp/bbnorm")));
3820        assert!(enabled.use_temp_dir);
3821
3822        let disabled = parse(&["in=reads.fq", "tmpdir=/tmp/bbnorm", "usetmpdir=f"]);
3823        assert_eq!(disabled.temp_dir, Some(PathBuf::from("/tmp/bbnorm")));
3824        assert!(!disabled.use_temp_dir);
3825    }
3826
3827    #[test]
3828    fn parses_header_trimming_controls_like_bbnorm() {
3829        for case in ["trd=t", "trc=t", "trimreaddescriptions=f", "trimrname=t"] {
3830            let cfg = parse(&["in=reads.fq", case]);
3831            assert!(
3832                !cfg.notes.is_empty(),
3833                "expected an explanatory no-op note for {case}"
3834            );
3835        }
3836    }
3837
3838    #[test]
3839    fn accepts_shared_io_runtime_controls_as_noops_and_validates_values() {
3840        let cfg = parse(&[
3841            "in=reads.fq",
3842            "null",
3843            "monitor=f",
3844            "killswitch=600,0.002",
3845            "json=t",
3846            "silent=t",
3847            "printexecuting=f",
3848            "proxyhost=localhost",
3849            "proxyport=8080",
3850            "metadatafile=metadata.json",
3851            "testsize=t",
3852            "extin=.fq.gz",
3853            "extout=.fq",
3854            "bufferbf=f",
3855            "bufferbf1=f",
3856            "usejni=f",
3857            "bytefile1=t",
3858            "bytefile2=maybe",
3859            "bf1bufferlen=64k",
3860            "bfthreads=1",
3861            "readbufferlength=64k",
3862            "readbufferdata=1m",
3863            "readbuffers=1",
3864            "workers=auto",
3865            "workerthreads=1",
3866            "wt=auto",
3867            "threadsin=1",
3868            "tin=auto",
3869            "threadsout=1",
3870            "tout=auto",
3871            "ziplevel=2",
3872            "pigz=2",
3873            "bgzip=f",
3874            "zipthreads=1",
3875            "ztd=2.0",
3876            "blocksize=128",
3877            "nativebgzip=f",
3878            "usebzip2=f",
3879            "skipvalidation=t",
3880            "validate=maybe",
3881            "vic=f",
3882            "usempi=f",
3883            "mpi=0",
3884            "crismpi=f",
3885            "mpikeepall=f",
3886            "tossbrokenreads=f",
3887            "nullifybrokenquality=f",
3888            "deleteoldname=f",
3889            "renamebymapping=f",
3890            "assertcigar=f",
3891            "parsecustom=f",
3892            "shrinkheaders=f",
3893            "fixheader=f",
3894            "allownullheader=f",
3895            "recalpairnum=f",
3896            "pairreads=f",
3897            "flipr2=f",
3898            "int=f",
3899            "testinterleaved=f",
3900            "forceinterleaved=f",
3901            "overrideinterleaved=t",
3902        ]);
3903        assert_eq!(cfg.notes.len(), 56);
3904        assert_eq!(cfg.gzip_threads, Some(1));
3905
3906        for case in [
3907            "monitor=1,2,3",
3908            "bf1bufferlen=abc",
3909            "bfthreads=abc",
3910            "readbufferlength=abc",
3911            "readbuffers=abc",
3912            "workers=abc",
3913            "threadsin=abc",
3914            "threadsout=abc",
3915            "mpi=2k",
3916            "ziplevel=abc",
3917            "pigz=2k",
3918            "zipthreads=abc",
3919            "ztd=abc",
3920            "blocksize=abc",
3921        ] {
3922            let err = parse_args(
3923                ["in=reads.fq", "passes=1", case]
3924                    .into_iter()
3925                    .map(OsString::from),
3926            )
3927            .unwrap_err()
3928            .to_string();
3929            assert!(
3930                err.contains("expects") || err.contains("suffix"),
3931                "unexpected error for malformed {case}: {err}"
3932            );
3933        }
3934
3935        for case in ["usempi=t", "mpi=2", "crismpi=t", "mpikeepall=t"] {
3936            let cfg = parse(&["in=reads.fq", "passes=1", case]);
3937            assert!(
3938                cfg.notes.iter().any(|note| note.contains("MPI")),
3939                "missing MPI fallback note for {case}: {:?}",
3940                cfg.notes
3941            );
3942        }
3943
3944        for case in ["pairreads=t", "flipr2=t"] {
3945            let cfg = parse(&["in=reads.fq", "passes=1", case]);
3946            assert!(
3947                cfg.notes.iter().any(|note| note.contains("pairing")),
3948                "missing pairing fallback note for {case}: {:?}",
3949                cfg.notes
3950            );
3951        }
3952    }
3953
3954    #[test]
3955    fn accepts_shared_sam_runtime_controls_as_fastq_noops_and_validates_values() {
3956        for case in [
3957            "sam=1.4",
3958            "samv=1.6",
3959            "samtools=f",
3960            "sambamba=f",
3961            "printHeaderWait=f",
3962            "nativebam=f",
3963            "prefernativebam=f",
3964            "userssw=f",
3965            "attachedsamline=f",
3966            "streamerthreads=1",
3967            "fastqstreamerthreads=1",
3968            "fastastreamerthreads=1",
3969            "samwriterthreads=1",
3970            "bamwriterthreads=1",
3971            "fastqwriterthreads=1",
3972            "fastastreamer2=f",
3973            "prefermd=f",
3974            "notags=f",
3975            "mdtag=f",
3976            "idtag=f",
3977            "mateqtag=f",
3978            "xmtag=f",
3979            "smtag=f",
3980            "amtag=f",
3981            "nmtag=f",
3982            "xttag=f",
3983            "stoptag=f",
3984            "lengthtag=f",
3985            "boundstag=f",
3986            "scoretag=f",
3987            "sortscaffolds=f",
3988            "customtag=f",
3989            "nhtag=f",
3990            "keepnames=f",
3991            "saa=f",
3992            "inserttag=f",
3993            "correctnesstag=f",
3994            "intronlen=10",
3995            "suppressheader=f",
3996            "noheadersequences=f",
3997            "tophat=f",
3998            "xs=us",
3999            "xstag=fr-ss",
4000            "flipsam=f",
4001            "readgroupid=rg1",
4002            "rgsm=sample",
4003        ] {
4004            let cfg = parse_args(
4005                ["in=reads.fq", "passes=1", case]
4006                    .into_iter()
4007                    .map(OsString::from),
4008            )
4009            .unwrap();
4010            assert!(
4011                cfg.notes
4012                    .iter()
4013                    .any(|note| note.contains("SAM") || note.contains("read-group")),
4014                "expected SAM/read-group no-op note for {case}: {:?}",
4015                cfg.notes
4016            );
4017        }
4018
4019        for case in [
4020            "sam=abc",
4021            "streamerthreads=abc",
4022            "fastqwriterthreads=abc",
4023            "intronlen=abc",
4024        ] {
4025            let err = parse_args(
4026                ["in=reads.fq", "passes=1", case]
4027                    .into_iter()
4028                    .map(OsString::from),
4029            )
4030            .unwrap_err()
4031            .to_string();
4032            assert!(
4033                err.contains("expects") || err.contains("invalid float"),
4034                "unexpected error for malformed {case}: {err}"
4035            );
4036        }
4037    }
4038
4039    #[test]
4040    fn accepts_side_output_stats_histograms_and_emits_quality_length_gc_and_base_histograms() {
4041        for case in [
4042            "qhist=qual.tsv",
4043            "bqhist=basequal.tsv",
4044            "qchist=qcount.tsv",
4045            "aqhist=avg.tsv",
4046            "obqhist=overall.tsv",
4047            "mhist=match.tsv",
4048            "ihist=insert.tsv",
4049            "bhist=base.tsv",
4050            "qahist=qacc.tsv",
4051            "indelhist=indel.tsv",
4052            "ehist=error.tsv",
4053            "lhist=length.tsv",
4054            "gchist=gc.tsv",
4055            "enthist=entropy.tsv",
4056            "barcodestats=barcode.tsv",
4057            "thist=time.tsv",
4058            "idhist=id.tsv",
4059            "gcbins=auto",
4060            "gchistbins=100",
4061            "entropybins=auto",
4062            "enthistbins=100",
4063            "idhistbins=auto",
4064            "idbins=100",
4065            "gcplot=f",
4066            "entropyns=t",
4067            "maxhistlen=1k",
4068            "fixindels=f",
4069        ] {
4070            let cfg = parse_args(
4071                ["in=reads.fq", "passes=1", case]
4072                    .into_iter()
4073                    .map(OsString::from),
4074            )
4075            .unwrap();
4076            assert!(
4077                cfg.notes.iter().any(|note| note.contains("side-output")),
4078                "expected side-output fallback note for {case}: {:?}",
4079                cfg.notes
4080            );
4081        }
4082
4083        let cfg = parse_args(
4084            [
4085                "in=reads.fq",
4086                "passes=1",
4087                "qhist=quality.tsv",
4088                "bqhist=basequal.tsv",
4089                "qchist=qcount.tsv",
4090                "aqhist=avg.tsv",
4091                "obqhist=overall.tsv",
4092                "mhist=match.tsv",
4093                "ihist=insert.tsv",
4094                "qahist=qacc.tsv",
4095                "indelhist=indel.tsv",
4096                "ehist=error.tsv",
4097                "lhist=length.tsv",
4098                "gchist=gc.tsv",
4099                "bhist=base.tsv",
4100                "enthist=entropy.tsv",
4101                "idhist=id.tsv",
4102                "gcbins=100",
4103                "entropybins=100",
4104                "idbins=100",
4105                "maxhistlen=1k",
4106            ]
4107            .into_iter()
4108            .map(OsString::from),
4109        )
4110        .unwrap();
4111        assert_eq!(cfg.quality_hist_out, Some(PathBuf::from("quality.tsv")));
4112        assert_eq!(cfg.match_hist_out, Some(PathBuf::from("match.tsv")));
4113        assert_eq!(cfg.insert_hist_out, Some(PathBuf::from("insert.tsv")));
4114        assert_eq!(
4115            cfg.quality_accuracy_hist_out,
4116            Some(PathBuf::from("qacc.tsv"))
4117        );
4118        assert_eq!(cfg.indel_hist_out, Some(PathBuf::from("indel.tsv")));
4119        assert_eq!(cfg.error_hist_out, Some(PathBuf::from("error.tsv")));
4120        assert_eq!(
4121            cfg.base_quality_hist_out,
4122            Some(PathBuf::from("basequal.tsv"))
4123        );
4124        assert_eq!(
4125            cfg.quality_count_hist_out,
4126            Some(PathBuf::from("qcount.tsv"))
4127        );
4128        assert_eq!(cfg.average_quality_hist_out, Some(PathBuf::from("avg.tsv")));
4129        assert_eq!(
4130            cfg.overall_base_quality_hist_out,
4131            Some(PathBuf::from("overall.tsv"))
4132        );
4133        assert_eq!(cfg.length_hist_out, Some(PathBuf::from("length.tsv")));
4134        assert_eq!(cfg.gc_hist_out, Some(PathBuf::from("gc.tsv")));
4135        assert_eq!(cfg.base_hist_out, Some(PathBuf::from("base.tsv")));
4136        assert_eq!(cfg.entropy_hist_out, Some(PathBuf::from("entropy.tsv")));
4137        assert_eq!(cfg.identity_hist_out, Some(PathBuf::from("id.tsv")));
4138        assert_eq!(cfg.barcode_stats_out, None);
4139        assert_eq!(cfg.gc_bins, Some(100));
4140        assert_eq!(cfg.entropy_bins, 100);
4141        assert_eq!(cfg.identity_bins, 100);
4142        assert_eq!(cfg.side_hist_len, Some(1000));
4143
4144        let cfg = parse(&["in=reads.fq", "barcodestats=barcode.tsv"]);
4145        assert_eq!(cfg.barcode_stats_out, Some(PathBuf::from("barcode.tsv")));
4146
4147        for case in [
4148            "gcbins=abc",
4149            "entropybins=abc",
4150            "idhistbins=abc",
4151            "maxhistlen=abc",
4152            "maxhistlen=0",
4153        ] {
4154            let err = parse_args(
4155                ["in=reads.fq", "passes=1", case]
4156                    .into_iter()
4157                    .map(OsString::from),
4158            )
4159            .unwrap_err()
4160            .to_string();
4161            assert!(
4162                err.contains("expects") || err.contains("suffix"),
4163                "unexpected error for malformed {case}: {err}"
4164            );
4165        }
4166    }
4167
4168    #[test]
4169    fn accepts_cardinality_loglog_controls_as_bounded_estimates_and_validates_values() {
4170        for case in [
4171            "cardinality=t",
4172            "cardinality=31",
4173            "loglog=f",
4174            "loglogin=t",
4175            "cardinalityout=t",
4176            "loglogout=f",
4177            "buckets=1k",
4178            "loglogbuckets=100",
4179            "loglogcorrection=t",
4180            "loglogcf=f",
4181            "loglogbits=16",
4182            "loglogk=31",
4183            "cardinalityk=31",
4184            "kcardinality=31",
4185            "loglogklist=21,31",
4186            "loglogseed=42",
4187            "loglogminprob=0.5",
4188            "loglogtype=loglog2",
4189            "loglogmean=t",
4190            "loglogmedian=t",
4191            "loglogmwa=t",
4192            "logloghmean=t",
4193            "logloggmean=t",
4194            "loglogmantissa=8",
4195            "loglogcounts=t",
4196            "loglogcount=f",
4197        ] {
4198            let cfg = parse_args(
4199                ["in=reads.fq", "passes=1", case]
4200                    .into_iter()
4201                    .map(OsString::from),
4202            )
4203            .unwrap();
4204            assert!(
4205                cfg.notes
4206                    .iter()
4207                    .any(|note| note.contains("cardinality/loglog")),
4208                "expected cardinality/loglog fallback note for {case}: {:?}",
4209                cfg.notes
4210            );
4211        }
4212
4213        let cfg = parse(&[
4214            "in=reads.fq",
4215            "passes=1",
4216            "cardinality=t",
4217            "cardinalityout=t",
4218            "buckets=1k",
4219            "loglogseed=42",
4220            "loglogk=25",
4221            "loglogminprob=0.25",
4222        ]);
4223        assert!(cfg.cardinality.input);
4224        assert!(cfg.cardinality.output);
4225        assert_eq!(cfg.cardinality.buckets, 1000);
4226        assert_eq!(cfg.cardinality.seed, 42);
4227        assert_eq!(cfg.cardinality.k, Some(25));
4228        assert_eq!(cfg.cardinality.min_probability, 0.25);
4229
4230        let cfg = parse(&[
4231            "in=reads.fq",
4232            "passes=1",
4233            "cardinality=t",
4234            "cardinality=f",
4235            "cardinalityout=t",
4236            "loglogout=f",
4237        ]);
4238        assert!(!cfg.cardinality.input);
4239        assert!(!cfg.cardinality.output);
4240
4241        for case in [
4242            "cardinality=maybe",
4243            "buckets=0",
4244            "buckets=100g",
4245            "loglogbits=abc",
4246            "loglogklist=21,abc",
4247            "loglogseed=abc",
4248            "loglogminprob=abc",
4249            "loglogminprob=2",
4250        ] {
4251            let err = parse_args(
4252                ["in=reads.fq", "passes=1", case]
4253                    .into_iter()
4254                    .map(OsString::from),
4255            )
4256            .unwrap_err()
4257            .to_string();
4258            assert!(
4259                err.contains("expects") || err.contains("above the Rust safety cap"),
4260                "unexpected error for malformed {case}: {err}"
4261            );
4262        }
4263    }
4264
4265    #[test]
4266    fn accepts_quality_recalibration_controls_as_noops_and_validates_values() {
4267        let cfg = parse(&[
4268            "in=reads.fq",
4269            "trackall=f",
4270            "clearmatrices=f",
4271            "loadq=f",
4272            "loadq102=f",
4273            "loadqap=f",
4274            "loadqbp=f",
4275            "loadqpt=f",
4276            "loadqbt=f",
4277            "loadq10=f",
4278            "loadq12=f",
4279            "loadqb12=f",
4280            "loadqb012=f",
4281            "loadqb123=f",
4282            "loadqb234=f",
4283            "loadq12b12=f",
4284            "loadqp=f",
4285            "observationcutoff=1k",
4286            "recalpasses=1",
4287            "recalqmax=50",
4288            "recalqmin=2",
4289            "recalwithposition=t",
4290            "qmatrixmode=max",
4291            "recaltile=f",
4292        ]);
4293        assert_eq!(cfg.notes.len(), 23);
4294
4295        let cfg = parse(&[
4296            "in=reads.fq",
4297            "loadq102_p1=f",
4298            "loadq_p2=t",
4299            "observationcutoff_p1=1k",
4300            "recalpasses_p2=1",
4301            "recalqmax_p1=50",
4302            "recalqmin_p2=2",
4303            "recalwithposition_p1=t",
4304            "qmatrixmode_p2=max",
4305            "recaltile_p1=f",
4306        ]);
4307        assert_eq!(cfg.notes.len(), 9);
4308
4309        for case in [
4310            "observationcutoff=abc",
4311            "recalpasses=abc",
4312            "recalqmax=abc",
4313            "observationcutoff_p1=abc",
4314            "recalpasses_p2=abc",
4315            "recalqmax_p1=abc",
4316        ] {
4317            let err = parse_args(
4318                ["in=reads.fq", "passes=1", case]
4319                    .into_iter()
4320                    .map(OsString::from),
4321            )
4322            .unwrap_err()
4323            .to_string();
4324            assert!(
4325                err.contains("expects") || err.contains("suffix"),
4326                "unexpected error for malformed {case}: {err}"
4327            );
4328        }
4329    }
4330
4331    #[test]
4332    fn accepts_disabled_recalibrate_controls_and_rejects_enabled_recalibration() {
4333        let cfg = parse(&[
4334            "in=reads.fq",
4335            "recalibrate=f",
4336            "recalibratequality=f",
4337            "recal=f",
4338            "recalibrate_p1=f",
4339        ]);
4340        assert_eq!(cfg.notes.len(), 4);
4341        assert!(
4342            cfg.notes
4343                .iter()
4344                .all(|note| note.contains("keeps BBTools quality recalibration disabled"))
4345        );
4346
4347        for case in ["recalibrate=t", "recalibratequality=t", "recal=t"] {
4348            let err = parse_args(
4349                ["in=reads.fq", "passes=1", case]
4350                    .into_iter()
4351                    .map(OsString::from),
4352            )
4353            .unwrap_err()
4354            .to_string();
4355            assert!(
4356                err.contains("enables BBTools quality recalibration"),
4357                "unexpected error for enabled {case}: {err}"
4358            );
4359        }
4360
4361        let err = parse_args(
4362            ["in=reads.fq", "passes=1", "recalibrate=maybe"]
4363                .into_iter()
4364                .map(OsString::from),
4365        )
4366        .unwrap_err()
4367        .to_string();
4368        assert!(
4369            err.contains("recalibrate expects a boolean value"),
4370            "unexpected error for malformed recalibrate: {err}"
4371        );
4372    }
4373
4374    #[test]
4375    fn accepts_disabled_break_length_controls_and_rejects_read_splitting() {
4376        let cfg = parse(&["in=reads.fq", "breaklen=0", "breaklength=-1"]);
4377        assert_eq!(cfg.notes.len(), 2);
4378        assert!(
4379            cfg.notes
4380                .iter()
4381                .all(|note| note.contains("keeps BBTools read breaking disabled"))
4382        );
4383
4384        for case in ["breaklen=50", "breaklength=1"] {
4385            let err = parse_args(
4386                ["in=reads.fq", "passes=1", case]
4387                    .into_iter()
4388                    .map(OsString::from),
4389            )
4390            .unwrap_err()
4391            .to_string();
4392            assert!(
4393                err.contains("enables BBTools read breaking"),
4394                "unexpected error for enabled {case}: {err}"
4395            );
4396        }
4397
4398        let err = parse_args(
4399            ["in=reads.fq", "passes=1", "breaklen=abc"]
4400                .into_iter()
4401                .map(OsString::from),
4402        )
4403        .unwrap_err()
4404        .to_string();
4405        assert!(
4406            err.contains("breaklen expects"),
4407            "unexpected error for malformed breaklen: {err}"
4408        );
4409    }
4410
4411    #[test]
4412    fn accepts_shared_environment_runtime_controls_as_noops_and_validates_values() {
4413        let cfg = parse(&[
4414            "in=reads.fq",
4415            "amino=f",
4416            "amino8=f",
4417            "validatebranchless=maybe",
4418            "fairqueues=t",
4419            "fixextensions=f",
4420            "2passresize=f",
4421            "parallelsort=f",
4422            "gcbeforemem=t",
4423            "warnifnosequence=f",
4424            "warnfirsttimeonly=f",
4425            "kmg=t",
4426            "forceJavaParseDouble=f",
4427            "simd=auto",
4428            "simdsparse=f",
4429            "simdmultsparse=f",
4430            "simdfmasparse=f",
4431            "simdcopy=f",
4432            "aws=f",
4433            "nersc=t",
4434            "lowmem=f",
4435            "lockedincrement=auto",
4436            "symmetricwrite=f",
4437            "buffer=10",
4438            "buffered=f",
4439            "sidechannelstats=f",
4440            "silva=f",
4441            "unite=f",
4442            "imghq=f",
4443            "callins=f",
4444            "calldel=f",
4445            "callsub=f",
4446            "callindel=f",
4447            "calljunct=f",
4448            "callnocall=f",
4449            "protFull=t",
4450            "entropyk=3",
4451            "entropywindow=50",
4452        ]);
4453        assert_eq!(cfg.notes.len(), 37);
4454        assert_eq!(cfg.locked_increment, Some(false));
4455
4456        for case in ["entropyk=abc", "entropywindow=abc"] {
4457            let err = parse_args(
4458                ["in=reads.fq", "passes=1", case]
4459                    .into_iter()
4460                    .map(OsString::from),
4461            )
4462            .unwrap_err()
4463            .to_string();
4464            assert!(
4465                err.contains("expects"),
4466                "unexpected error for malformed {case}: {err}"
4467            );
4468        }
4469
4470        for case in ["amino=t", "amino8=t"] {
4471            let err = parse_args(
4472                ["in=reads.fq", "passes=1", case]
4473                    .into_iter()
4474                    .map(OsString::from),
4475            )
4476            .unwrap_err()
4477            .to_string();
4478            assert!(
4479                err.contains("amino-acid kmer mode"),
4480                "unexpected error for enabled {case}: {err}"
4481            );
4482        }
4483    }
4484
4485    #[test]
4486    fn parses_base_cleanup_controls_like_bbnorm() {
4487        let cfg = parse(&[
4488            "in=reads.fq",
4489            "utot=t",
4490            "tuc=t",
4491            "lctn=t",
4492            "dotdashxton=t",
4493            "itn=t",
4494            "fixjunk=t",
4495        ]);
4496        assert!(cfg.u_to_t);
4497        assert!(cfg.to_upper_case);
4498        assert!(cfg.lower_case_to_n);
4499        assert!(cfg.dot_dash_x_to_n);
4500        assert!(cfg.iupac_to_n);
4501        assert_eq!(cfg.junk_mode, JunkMode::Fix);
4502
4503        let cfg = parse(&["in=reads.fq", "ignorejunk=t"]);
4504        assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4505
4506        let cfg = parse(&["in=reads.fq", "flagjunk=t"]);
4507        assert_eq!(cfg.junk_mode, JunkMode::Flag);
4508
4509        let cfg = parse(&["in=reads.fq", "tossjunk=t"]);
4510        assert_eq!(cfg.junk_mode, JunkMode::Flag);
4511
4512        let cfg = parse(&["in=reads.fq", "junk=discard"]);
4513        assert_eq!(cfg.junk_mode, JunkMode::Flag);
4514
4515        let cfg = parse(&["in=reads.fq", "crashjunk=f"]);
4516        assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4517
4518        let cfg = parse(&["in=reads.fq", "failjunk=f"]);
4519        assert_eq!(cfg.junk_mode, JunkMode::Ignore);
4520
4521        let cfg = parse(&["in=reads.fq", "ignorejunk=t", "crashjunk=t"]);
4522        assert_eq!(cfg.junk_mode, JunkMode::Crash);
4523
4524        let cfg = parse(&["in=reads.fq", "junk=fail"]);
4525        assert_eq!(cfg.junk_mode, JunkMode::Crash);
4526
4527        let cfg = parse(&["in=reads.fq", "junk=iupacton"]);
4528        assert!(cfg.fix_junk_and_iupac);
4529        assert_eq!(cfg.junk_mode, JunkMode::Fix);
4530    }
4531
4532    #[test]
4533    fn false_flagjunk_alias_resets_to_crash_like_bbnorm() {
4534        let cfg = parse(&["in=reads.fq", "flagjunk=t", "flagjunk=f"]);
4535        assert_eq!(cfg.junk_mode, JunkMode::Crash);
4536
4537        let cfg = parse(&["in=reads.fq", "tossjunk=t", "tossjunk=f"]);
4538        assert_eq!(cfg.junk_mode, JunkMode::Flag);
4539    }
4540
4541    #[test]
4542    fn accepts_bbnorm_inactive_trim_parser_options_as_noops() {
4543        let cfg = parse(&[
4544            "in=reads.fq",
4545            "trimclip=t",
4546            "trimpolya=t",
4547            "trimpolyg=10",
4548            "trimpolygleft=f",
4549            "trimpolycright=2",
4550            "maxnonpoly=3",
4551            "ftr=10",
4552            "ftl=2",
4553            "ftm=4",
4554            "ftr2=7",
4555        ]);
4556        assert_eq!(cfg.notes.len(), 10);
4557    }
4558
4559    #[test]
4560    fn accepts_bbnorm_inactive_read_filter_parser_options_as_noops() {
4561        let cfg = parse(&[
4562            "in=reads.fq",
4563            "maxlen=50",
4564            "minlenfraction=0.8",
4565            "maxns=0",
4566            "mingc=0.9",
4567            "maxgc=0.1",
4568            "usepairgc=t",
4569            "minconsecutivebases=200",
4570            "maq=40,20",
4571            "maqb=20",
4572            "mbq=30",
4573            "chastityfilter=t",
4574            "trimbadsequence=t",
4575            "failnobarcode=f",
4576            "badbarcodes=fail",
4577            "barcodefilter=f",
4578            "barcodes=ACGT,TGCA",
4579            "aqbp=t",
4580            "mintrimlen=10",
4581            "untrim=f",
4582        ]);
4583        assert_eq!(cfg.notes.len(), 19);
4584
4585        for case in ["mintrimlen=abc", "badbarcodes=maybe"] {
4586            let err = parse_args(
4587                ["in=reads.fq", "passes=1", case]
4588                    .into_iter()
4589                    .map(OsString::from),
4590            )
4591            .unwrap_err()
4592            .to_string();
4593            assert!(
4594                err.contains("expects"),
4595                "unexpected error for malformed {case}: {err}"
4596            );
4597        }
4598    }
4599
4600    #[test]
4601    fn accepts_genome_build_context_controls_as_normalization_noops() {
4602        for case in ["build=1", "genome=1"] {
4603            let cfg = parse_args(
4604                ["in=reads.fq", "passes=1", case]
4605                    .into_iter()
4606                    .map(OsString::from),
4607            )
4608            .unwrap();
4609            assert!(
4610                cfg.notes
4611                    .iter()
4612                    .any(|note| note.contains("genome-build context")),
4613                "expected genome-build context no-op note for {case}: {:?}",
4614                cfg.notes
4615            );
4616        }
4617
4618        for case in ["genome=abc", "idfilter=0.9", "subfilter=1"] {
4619            let err = parse_args(
4620                ["in=reads.fq", "passes=1", case]
4621                    .into_iter()
4622                    .map(OsString::from),
4623            )
4624            .unwrap_err()
4625            .to_string();
4626            assert!(
4627                err.contains("expects") || err.contains("unknown or unsupported"),
4628                "unexpected error for malformed {case}: {err}"
4629            );
4630        }
4631    }
4632
4633    #[test]
4634    fn parses_explicit_interleaved_single_stream_outputs() {
4635        let cfg = parse(&[
4636            "in=reads.fq",
4637            "interleaved=t",
4638            "out=keep.fq",
4639            "outt=toss.fq",
4640        ]);
4641        assert!(cfg.interleaved);
4642        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
4643        assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
4644        assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
4645        assert!(cfg.out2.is_none());
4646        assert!(cfg.out_toss2.is_none());
4647
4648        let cfg = parse(&["in=reads.fq", "int=t"]);
4649        assert!(cfg.interleaved);
4650        assert!(!cfg.test_interleaved);
4651
4652        let cfg = parse(&["in=reads.fq", "forceinterleaved=t"]);
4653        assert!(cfg.interleaved);
4654        assert!(!cfg.test_interleaved);
4655
4656        let cfg = parse(&["in=reads.fq", "testinterleaved=f"]);
4657        assert!(!cfg.interleaved);
4658        assert!(!cfg.test_interleaved);
4659
4660        let cfg = parse(&["in=reads.fq", "overrideinterleaved=t"]);
4661        assert!(!cfg.notes.is_empty());
4662    }
4663
4664    #[test]
4665    fn defaults_to_auto_interleaved_detection() {
4666        let cfg = parse(&[
4667            "in=reads.fq",
4668            "out=keep1.fq",
4669            "out2=keep2.fq",
4670            "outt=toss1.fq",
4671            "outt2=toss2.fq",
4672        ]);
4673        assert!(!cfg.interleaved);
4674        assert!(cfg.test_interleaved);
4675    }
4676
4677    #[test]
4678    fn paired_input_allows_bbnorm_single_stream_or_hash_pattern_outputs() {
4679        let cfg = parse(&[
4680            "in=reads1.fq",
4681            "in2=reads2.fq",
4682            "out=keep#.fq",
4683            "outt=toss.fq",
4684        ]);
4685        assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep#.fq"));
4686        assert!(cfg.out2.is_none());
4687        assert_eq!(cfg.out_toss1.unwrap(), PathBuf::from("toss.fq"));
4688        assert!(cfg.out_toss2.is_none());
4689    }
4690
4691    #[test]
4692    fn interleaved_true_with_in2_remains_two_file_paired_like_bbnorm() {
4693        let cfg = parse(&["in=reads1.fq", "in2=reads2.fq", "interleaved=t"]);
4694        assert!(cfg.interleaved);
4695        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
4696        assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
4697    }
4698
4699    #[test]
4700    fn expands_missing_hash_input_pattern_like_bbnorm() {
4701        let cfg = parse(&["in=reads#.fq"]);
4702        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads1.fq"));
4703        assert_eq!(cfg.in2.unwrap(), PathBuf::from("reads2.fq"));
4704    }
4705
4706    #[test]
4707    fn keeps_literal_hash_input_when_file_exists_like_bbnorm() {
4708        let dir = tempfile::tempdir().unwrap();
4709        let literal = dir.path().join("reads#.fq");
4710        std::fs::write(&literal, b"@r1\nACGT\n+\nIIII\n").unwrap();
4711
4712        let cfg = parse_args(
4713            [format!("in={}", literal.display()), "passes=1".to_string()]
4714                .into_iter()
4715                .map(OsString::from),
4716        )
4717        .unwrap();
4718        assert_eq!(cfg.in1.unwrap(), literal);
4719        assert!(cfg.in2.is_none());
4720    }
4721
4722    #[test]
4723    fn keeps_literal_comma_extra_when_file_exists_like_bbnorm() {
4724        let dir = tempfile::tempdir().unwrap();
4725        let input = dir.path().join("main.fq");
4726        let literal = dir.path().join("extra,with,commas.fq");
4727        std::fs::write(&input, b"@r1\nACGT\n+\nIIII\n").unwrap();
4728        std::fs::write(&literal, b"@r2\nACGT\n+\nIIII\n").unwrap();
4729
4730        let cfg = parse_args(
4731            [
4732                format!("in={}", input.display()),
4733                format!("extra={}", literal.display()),
4734                "extra=null".to_string(),
4735                "passes=1".to_string(),
4736            ]
4737            .into_iter()
4738            .map(OsString::from),
4739        )
4740        .unwrap();
4741        assert_eq!(cfg.extra, vec![literal]);
4742    }
4743
4744    #[test]
4745    fn expands_config_files_like_bbnorm() {
4746        let dir = tempfile::tempdir().unwrap();
4747        let cfg1 = dir.path().join("a.config");
4748        let cfg2 = dir.path().join("b.config");
4749        std::fs::write(
4750            &cfg1,
4751            "\n# comment\nin=reads.fq\npasses=1\nkeepall=t\nk=21\n",
4752        )
4753        .unwrap();
4754        std::fs::write(&cfg2, "target=7\nout=keep.fq\n").unwrap();
4755
4756        let cfg = parse_args(
4757            [
4758                format!("config={},{}", cfg1.display(), cfg2.display()),
4759                "target=9".to_string(),
4760            ]
4761            .into_iter()
4762            .map(OsString::from),
4763        )
4764        .unwrap();
4765
4766        assert_eq!(cfg.in1.unwrap(), PathBuf::from("reads.fq"));
4767        assert_eq!(cfg.k, 21);
4768        assert_eq!(cfg.target_depth, 9);
4769        assert_eq!(cfg.out1.unwrap(), PathBuf::from("keep.fq"));
4770        assert!(cfg.keep_all);
4771        assert!(
4772            cfg.notes
4773                .iter()
4774                .any(|note| note.contains("expanded into 6 BBTools-style argument line"))
4775        );
4776    }
4777
4778    #[test]
4779    fn reports_missing_config_files_like_bbnorm() {
4780        let dir = tempfile::tempdir().unwrap();
4781        let missing = dir.path().join("missing.config");
4782        let err = parse_args(
4783            [format!("config={}", missing.display())]
4784                .into_iter()
4785                .map(OsString::from),
4786        )
4787        .unwrap_err()
4788        .to_string();
4789
4790        assert!(err.contains("could not process config file"));
4791    }
4792
4793    #[test]
4794    fn rejects_missing_extra_inputs_like_bbnorm() {
4795        let err = parse_args(
4796            ["in=reads.fq", "extra=missing#.fq", "passes=1"]
4797                .into_iter()
4798                .map(OsString::from),
4799        )
4800        .unwrap_err();
4801        assert!(err.to_string().contains("extra input missing#.fq"));
4802    }
4803
4804    #[test]
4805    fn parses_single_pass_final_stage_aliases() {
4806        let cfg = parse(&[
4807            "in=reads.fq",
4808            "tbrf=t",
4809            "dbo2=t",
4810            "tossbadreads1=t",
4811            "dbo1=t",
4812        ]);
4813        assert!(cfg.toss_error_reads);
4814        assert!(cfg.toss_error_reads_first);
4815        assert!(cfg.discard_bad_only);
4816        assert!(cfg.discard_bad_only_first);
4817    }
4818
4819    #[test]
4820    fn parses_multipass_and_countup_controls() {
4821        let cfg = parse(&[
4822            "in=reads.fq",
4823            "passes=2",
4824            "target1=7",
4825            "targetbadpercentilelow=20",
4826            "tbph=0.8",
4827            "abrc=t",
4828        ]);
4829        assert_eq!(cfg.target_depth_first, Some(7));
4830        assert_eq!(cfg.target_bad_percent_low, 0.2);
4831        assert_eq!(cfg.target_bad_percent_high, 0.8);
4832        assert!(cfg.add_bad_reads_countup);
4833
4834        for case in ["target1=abc", "targetbadpercentilelow=abc", "tbph=abc"] {
4835            let err = parse_args(
4836                ["in=reads.fq", "passes=1", case]
4837                    .into_iter()
4838                    .map(OsString::from),
4839            )
4840            .unwrap_err()
4841            .to_string();
4842            assert!(
4843                err.contains("expects"),
4844                "unexpected error for malformed {case}: {err}"
4845            );
4846        }
4847    }
4848
4849    #[test]
4850    fn allows_outuncorrected_in_multipass_runs() {
4851        let cfg = parse(&[
4852            "in=reads_1.fq",
4853            "in2=reads_2.fq",
4854            "passes=2",
4855            "out=keep_1.fq",
4856            "out2=keep_2.fq",
4857            "outuncorrected=unc_1.fq",
4858            "outuncorrected2=unc_2.fq",
4859        ]);
4860        assert_eq!(cfg.passes, 2);
4861        assert_eq!(
4862            cfg.out_uncorrected1.as_deref(),
4863            Some(std::path::Path::new("unc_1.fq"))
4864        );
4865        assert_eq!(
4866            cfg.out_uncorrected2.as_deref(),
4867            Some(std::path::Path::new("unc_2.fq"))
4868        );
4869    }
4870
4871    #[test]
4872    fn final_stage_alias_can_override_conflated_alias() {
4873        let cfg = parse(&["in=reads.fq", "tossbadreads=t", "tossbadreadsf=f"]);
4874        assert!(!cfg.toss_error_reads);
4875    }
4876
4877    #[test]
4878    fn remove_if_either_bad_alias_inverts_require_both_bad() {
4879        let cfg = parse(&["in=reads.fq", "requirebothbad=t", "removeifeitherbad=t"]);
4880        assert!(!cfg.require_both_bad);
4881
4882        let cfg = parse(&["in=reads.fq", "rieb=f"]);
4883        assert!(cfg.require_both_bad);
4884    }
4885
4886    #[test]
4887    fn explicit_interleaved_false_rejects_second_outputs_without_in2() {
4888        let err = parse_args(
4889            [
4890                "in=reads.fq",
4891                "interleaved=f",
4892                "out=keep1.fq",
4893                "out2=keep2.fq",
4894                "passes=1",
4895            ]
4896            .into_iter()
4897            .map(OsString::from),
4898        )
4899        .unwrap_err();
4900        assert!(err.to_string().contains("out2"));
4901    }
4902
4903    #[test]
4904    fn enabled_ecc_sets_real_correction_fields() {
4905        let cfg = parse_args(["in=x.fq", "ecc=t"].into_iter().map(OsString::from)).unwrap();
4906        assert_eq!(cfg.passes, 2);
4907        assert!(cfg.error_correct);
4908        assert!(cfg.error_correct_first);
4909        assert!(cfg.error_correct_final);
4910        assert!(!cfg.overlap_error_correct);
4911        assert!(!cfg.mark_errors_only);
4912        assert!(cfg.notes.is_empty());
4913
4914        let cfg = parse(&["in=x.fq", "ecc=f"]);
4915        assert!(!cfg.error_correct);
4916        assert!(!cfg.error_correct_first);
4917        assert!(!cfg.error_correct_final);
4918        assert!(!cfg.overlap_error_correct);
4919
4920        let cfg = parse(&["in=x.fq", "ecc1=t", "ecc2=f"]);
4921        assert!(cfg.error_correct);
4922        assert!(cfg.error_correct_first);
4923        assert!(!cfg.error_correct_final);
4924
4925        let cfg = parse(&["in=x.fq", "ecc1=f", "eccf=t"]);
4926        assert!(cfg.error_correct);
4927        assert!(!cfg.error_correct_first);
4928        assert!(cfg.error_correct_final);
4929
4930        let cfg = parse(&["in=x.fq", "markerrors=t"]);
4931        assert!(cfg.error_correct);
4932        assert!(cfg.error_correct_first);
4933        assert!(!cfg.error_correct_final);
4934
4935        let cfg = parse(&["in=x.fq", "ecco=t"]);
4936        assert!(cfg.error_correct);
4937        assert!(cfg.error_correct_first);
4938        assert!(cfg.error_correct_final);
4939        assert!(cfg.overlap_error_correct);
4940        assert!(!cfg.overlap_error_correct_auto);
4941        assert!(cfg.notes[0].contains("paired overlap repair"));
4942
4943        let cfg = parse(&["in=x.fq", "ecco=auto"]);
4944        assert!(cfg.error_correct);
4945        assert!(cfg.error_correct_first);
4946        assert!(cfg.error_correct_final);
4947        assert!(!cfg.overlap_error_correct);
4948        assert!(cfg.overlap_error_correct_auto);
4949        assert!(cfg.notes[0].contains("automatic overlap"));
4950
4951        let cfg = parse(&["in=x.fq", "ecco=t", "ecco=f"]);
4952        assert!(cfg.error_correct);
4953        assert!(!cfg.overlap_error_correct);
4954        assert!(!cfg.overlap_error_correct_auto);
4955    }
4956
4957    #[test]
4958    fn accepts_ecc_tuning_controls_and_validates_integers() {
4959        let cfg = parse(&[
4960            "in=reads.fq",
4961            "ecclimit=3",
4962            "eccmaxqual=127",
4963            "errorcorrectratio=140",
4964            "echighthresh=22",
4965            "eclowthresh=2",
4966            "suflen=3",
4967            "prefixlen=3",
4968            "cfl=t",
4969            "cfr=f",
4970        ]);
4971        assert_eq!(cfg.max_errors_to_correct, 3);
4972        assert_eq!(cfg.max_quality_to_correct, 127);
4973        assert_eq!(cfg.error_correct_ratio, 140);
4974        assert_eq!(cfg.error_correct_high_thresh, 22);
4975        assert_eq!(cfg.error_correct_low_thresh, 2);
4976        assert_eq!(cfg.suffix_len, 3);
4977        assert_eq!(cfg.prefix_len, 3);
4978        assert!(cfg.correct_from_left);
4979        assert!(!cfg.correct_from_right);
4980        assert!(cfg.notes.is_empty());
4981
4982        for case in [
4983            "ecclimit=abc",
4984            "eccmaxqual=abc",
4985            "ecr=abc",
4986            "echthresh=abc",
4987            "eclt=abc",
4988            "suflen=abc",
4989            "prelen=abc",
4990        ] {
4991            let err = parse_args(
4992                ["in=reads.fq", "passes=1", case]
4993                    .into_iter()
4994                    .map(OsString::from),
4995            )
4996            .unwrap_err()
4997            .to_string();
4998            assert!(
4999                err.contains("expects"),
5000                "unexpected error for malformed {case}: {err}"
5001            );
5002        }
5003    }
5004
5005    #[test]
5006    fn parses_countup_mode() {
5007        let cfg = parse_args(["in=x.fq", "countup=t"].into_iter().map(OsString::from)).unwrap();
5008        assert!(cfg.count_up);
5009
5010        let cfg = parse(&["in=x.fq", "countup=f"]);
5011        assert!(!cfg.count_up);
5012        assert!(cfg.notes.iter().any(|note| note.contains("countup=f")));
5013    }
5014
5015    #[test]
5016    fn parses_experimental_gpu_counting_controls() {
5017        let cfg = parse(&[
5018            "in=reads.fq",
5019            "gpucounting=t",
5020            "gpuhelper=tmp/cuda_kmer_reduce_runs",
5021            "gpupersistent=t",
5022        ]);
5023        assert!(cfg.gpu_counting);
5024        assert_eq!(
5025            cfg.gpu_helper,
5026            Some(PathBuf::from("tmp/cuda_kmer_reduce_runs"))
5027        );
5028        assert!(cfg.gpu_persistent);
5029        assert!(
5030            cfg.notes
5031                .iter()
5032                .any(|note| note.contains("experimental CUDA"))
5033        );
5034    }
5035
5036    #[test]
5037    fn wrapper_sampling_options_fall_back_to_supported_normalization() {
5038        for case in [
5039            "sampleoutput=1",
5040            "readsample=1",
5041            "kmersample=1",
5042            "samplerate=0.5",
5043            "sample=0.5",
5044            "sampleseed=1",
5045            "seed=1",
5046        ] {
5047            let cfg = parse_args(
5048                ["in=x.fq", "passes=1", case]
5049                    .into_iter()
5050                    .map(OsString::from),
5051            )
5052            .unwrap();
5053            assert!(
5054                cfg.notes
5055                    .iter()
5056                    .any(|note| note.contains("Rust ignores it")),
5057                "expected sampling fallback note for {case}"
5058            );
5059        }
5060    }
5061
5062    #[test]
5063    fn nondeterministic_mode_stays_enabled_for_random_selection() {
5064        for case in ["deterministic=t", "dr=t", "det=t"] {
5065            let cfg = parse(&["in=reads.fq", case]);
5066            assert!(cfg.deterministic, "expected deterministic mode for {case}");
5067        }
5068
5069        let cfg = parse_args(
5070            ["in=reads.fq", "passes=1", "deterministic=f"]
5071                .into_iter()
5072                .map(OsString::from),
5073        )
5074        .unwrap();
5075        assert!(!cfg.deterministic);
5076        assert!(
5077            cfg.notes
5078                .iter()
5079                .all(|note| !note.contains("deterministic=f is not implemented yet"))
5080        );
5081        assert!(
5082            cfg.notes
5083                .iter()
5084                .any(|note| note.contains("faster parallel replay"))
5085        );
5086    }
5087}