1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// args.rs - Command line arguments definition
use argh::FromArgs;
#[derive(FromArgs)]
/// cgDist - High-performance distance matrix calculator
pub struct Args {
/// print the cgdist version and exit
#[argh(switch, short = 'V')]
pub version: bool,
/// path to FASTA schema directory or schema file
#[argh(option)]
pub schema: Option<String>,
/// path to allelic profile matrix (.tsv or .csv)
#[argh(option)]
pub profiles: Option<String>,
/// output distance matrix file
#[argh(option)]
pub output: Option<String>,
/// distance mode: snps, snps-indel-contiguous, snps-indel-bases, hamming (default: snps).
/// Legacy alias: snps-indel-events == snps-indel-contiguous (deprecated).
#[argh(option, default = "String::from(\"snps\")")]
pub mode: String,
/// output format: tsv, csv, phylip, nexus (default: tsv)
#[argh(option, default = "String::from(\"tsv\")")]
pub format: String,
/// missing data character (default: -)
#[argh(option, default = "String::from(\"-\")")]
pub missing_char: String,
/// minimum number of shared loci required for distance calculation (default: 0)
#[argh(option, default = "0")]
pub min_loci: usize,
/// number of threads (default: 1; pass 0 for auto-detect = number of physical cores).
/// On shared systems, auto-detect can interfere with other processes; users running on
/// dedicated hardware should explicitly request the desired number of threads.
#[argh(option)]
pub threads: Option<usize>,
/// sample quality filter: minimum fraction of non-missing loci per sample (0.0-1.0, default: 0.0 = no filter)
#[argh(option, default = "0.0")]
pub sample_threshold: f64,
/// locus quality filter: minimum fraction of non-missing samples per locus (0.0-1.0, default: 0.0 = no filter)
#[argh(option, default = "0.0")]
pub locus_threshold: f64,
/// include only samples matching regex pattern
#[argh(option)]
pub include_samples: Option<String>,
/// exclude samples matching regex pattern
#[argh(option)]
pub exclude_samples: Option<String>,
/// include only loci matching regex pattern
#[argh(option)]
pub include_loci: Option<String>,
/// exclude loci matching regex pattern
#[argh(option)]
pub exclude_loci: Option<String>,
/// include only loci listed in a file (one locus per line)
#[argh(option)]
pub include_loci_list: Option<String>,
/// exclude loci listed in a file (one locus per line)
#[argh(option)]
pub exclude_loci_list: Option<String>,
/// include only samples listed in a file (one sample per line)
#[argh(option)]
pub include_samples_list: Option<String>,
/// exclude samples listed in a file (one sample per line)
#[argh(option)]
pub exclude_samples_list: Option<String>,
/// enable Hamming fallback for SNPs-only mode (opt-in: when an allele pair has 0 SNPs but
/// different hashes due to InDels, contribute +1 instead of 0; preserves cgDist >= Hamming
/// ordering at the cost of counting non-SNP positions as "SNPs"). Default: disabled.
#[argh(switch)]
pub hamming_fallback: bool,
/// [DEPRECATED] no longer needed: Hamming fallback is now opt-in via --hamming-fallback.
/// This flag is accepted for backward compatibility and is a no-op (a warning is printed
/// when supplied).
#[argh(switch)]
pub no_hamming_fallback: bool,
/// cache file path for ultra-fast reuse (.lz4 extension)
#[argh(option)]
pub cache_file: Option<String>,
/// user note to save with the cache for future reference
#[argh(option)]
pub cache_note: Option<String>,
/// enrich cache with nucleotide sequence lengths from schema
#[argh(switch)]
pub enrich_lengths: bool,
/// output file for enriched cache (default: overwrites input cache)
#[argh(option)]
pub enrich_output: Option<String>,
/// save detailed alignments to file (TSV format)
#[argh(option)]
pub save_alignments: Option<String>,
/// alignment mode: dna, dna-strict, dna-permissive, custom (default: dna)
#[argh(option, default = "String::from(\"dna\")")]
pub alignment_mode: String,
/// custom match score (overrides preset mode, enables custom mode)
#[argh(option)]
pub match_score: Option<i32>,
/// custom mismatch penalty (overrides preset mode, enables custom mode)
#[argh(option)]
pub mismatch_penalty: Option<i32>,
/// custom gap open penalty (overrides preset mode, enables custom mode)
#[argh(option)]
pub gap_open: Option<i32>,
/// custom gap extend penalty (overrides preset mode, enables custom mode)
#[argh(option)]
pub gap_extend: Option<i32>,
/// force recomputation ignoring cache compatibility (start fresh)
#[argh(switch)]
pub force_recompute: bool,
/// build cache only without computing distance matrix
#[argh(switch)]
pub cache_only: bool,
/// flag candidate recombination regions: output log of allele pairs whose mutation density
/// exceeds --candidate-recombination-threshold. This is a heuristic flagging step, not a
/// validated recombination detection method (confirmation requires phylogeny-aware tools
/// such as Gubbins, ClonalFrameML, fastGEAR). Legacy alias --recombination-log is also
/// accepted.
#[argh(option)]
pub candidate_recombination_log: Option<String>,
/// [DEPRECATED ALIAS] use --candidate-recombination-log instead. Kept for backward
/// compatibility; a deprecation warning is printed when this name is used.
#[argh(option)]
pub recombination_log: Option<String>,
/// SNPs + InDel-bases threshold above which a locus is flagged as a recombination candidate
/// (default: 20). Legacy alias --recombination-threshold is also accepted.
#[argh(option)]
pub candidate_recombination_threshold: Option<usize>,
/// [DEPRECATED ALIAS] use --candidate-recombination-threshold instead. Kept for backward
/// compatibility; a deprecation warning is printed when this name is used.
#[argh(option)]
pub recombination_threshold: Option<usize>,
/// show matrix statistics and diversity metrics only, then exit
#[argh(switch)]
pub stats_only: bool,
/// benchmark mode: measure alignment processing speed (pairs/second) and exit
#[argh(switch)]
pub benchmark: bool,
/// benchmark duration in seconds (default: 15)
#[argh(option, default = "15")]
pub benchmark_duration: u64,
/// validate inputs without computation (dry run)
#[argh(switch)]
pub dry_run: bool,
/// allele hasher type: crc32, sha256, md5, sequence, hamming (default: crc32)
#[argh(option, default = "String::from(\"crc32\")")]
pub hasher_type: String,
/// inspect cache file instead of running distance calculation
#[argh(option)]
pub inspector: Option<String>,
/// path to TOML configuration file
#[argh(option)]
pub config: Option<String>,
/// generate sample configuration file and exit
#[argh(switch)]
pub generate_config: bool,
}