jam_rs/cli/args.rs
1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4#[derive(Debug, Parser)]
5#[command(name = "jam")]
6#[command(bin_name = "jam")]
7#[command(version = "0.9.10")]
8#[command(
9 about = "Just another (genomic) minhasher (jam), obviously blazingly fast",
10 long_about = "An optimized minhash implementation that focuses on quick scans for small sequences in large datasets."
11)]
12pub struct Cli {
13 #[command(subcommand)]
14 pub command: Commands,
15 /// Number of threads to use
16 #[arg(short, long, global = true, default_value = "1")]
17 pub threads: Option<usize>,
18 /// Overwrite output files
19 #[arg(short, long, global = true, default_value = "false")]
20 pub force: bool,
21 /// Silent mode, no (additional) output to stdout
22 /// Only errors and output files will be printed
23 #[arg(short, long, global = true, default_value = "false")]
24 pub silent: bool,
25 /// Maximum memory usage in bytes in GB
26 #[arg(short, long, global = true, default_value = "2")]
27 pub memory: Option<usize>,
28}
29
30#[derive(Debug, Subcommand, Clone)]
31pub enum Commands {
32 /// Sketch one or more files and write the result to an output file
33 #[command(arg_required_else_help = true)]
34 Sketch {
35 /// Input file(s), directories, or file with list of files to be hashed
36 #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
37 input: Vec<PathBuf>,
38 /// Output file (.jam format)
39 #[arg(short, long)]
40 #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
41 output: PathBuf,
42 /// K-mer size, all sketches must have the same size to be compared and below 32
43 #[arg(short = 'k', long = "kmer-size", default_value = "21")]
44 kmer_size: u8,
45 /// Scale the hash space to a minimum fraction of the maximum hash value (FracMinHash)
46 #[arg(long)]
47 fscale: Option<u64>,
48 /// Complexity cut-off, only hash sequences with complexity above this value
49 /// This is created via shannon entropy
50 #[arg(long, default_value = "0.0")]
51 complexity: f64,
52 /// Create a separate sketch for each sequence record
53 /// Will increase the size of the output file
54 #[arg(long)]
55 singleton: bool,
56 /// Custom temporary directory for intermediate files during sorting
57 #[arg(long)]
58 temp_dir: Option<PathBuf>,
59 /// Path to a bias table file (.bias) for hash-based filtering
60 #[arg(long)]
61 bias_table: Option<PathBuf>,
62 },
63
64 /// Estimate containment of a query sequence against a sketch database.
65 /// Requires all sketches to have the same kmer size
66 #[command(arg_required_else_help = true)]
67 Dist {
68 /// Input FASTA/FASTQ file to query
69 #[arg(short, long)]
70 input: PathBuf,
71 /// Database sketch (.jam file)
72 #[arg(short, long)]
73 database: PathBuf,
74 /// Output to file instead of stdout
75 #[arg(short, long)]
76 #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
77 output: Option<PathBuf>,
78 /// Cut-off value for similarity/containment
79 #[arg(short, long, default_value = "0.0")]
80 cutoff: f64,
81 /// Singleton mode, process each query sequence separately
82 #[arg(long, default_value = "false")]
83 singleton: bool,
84 },
85
86 /// Build and analyze hash bias tables for filtering
87 #[command(arg_required_else_help = true)]
88 Bias {
89 #[command(subcommand)]
90 command: BiasCommands,
91 },
92
93 /// Display statistics about a JAM database
94 #[command(arg_required_else_help = true)]
95 Stats {
96 /// Input JAM database (.jam file)
97 #[arg(short, long)]
98 input: PathBuf,
99 /// Short summary only
100 #[arg(long)]
101 short: bool,
102 /// Include the full entry statistics
103 #[arg(long)]
104 full: bool,
105 },
106}
107
108#[derive(Debug, Subcommand, Clone)]
109pub enum BiasCommands {
110 /// Create a bias table from positive (target) and negative (background) FASTA files.
111 /// Target signal is always subtracted from background before computing bias weights.
112 #[command(arg_required_else_help = true)]
113 Create {
114 /// Positive (target) FASTA file(s) - sequences to enrich for
115 #[arg(long, required = true, num_args = 1..)]
116 positive: Vec<PathBuf>,
117 /// Negative (background) FASTA file(s) - sequences to deplete.
118 /// Target signal is subtracted from background automatically.
119 #[arg(long, required = true, num_args = 1..)]
120 negative: Vec<PathBuf>,
121 /// Output bias table file (.bias)
122 #[arg(short, long)]
123 output: PathBuf,
124 /// K-mer size (must match sketch k-mer size)
125 #[arg(short = 'k', long = "kmer-size", default_value = "21")]
126 kmer_size: u8,
127 /// FracMinHash scale (must match sketch fscale)
128 #[arg(long, default_value = "1000")]
129 fscale: u64,
130 /// Count-Min Sketch width (columns, power of 2 recommended)
131 #[arg(long, default_value = "1048576")]
132 cms_width: usize,
133 /// Count-Min Sketch depth (number of hash functions)
134 #[arg(long, default_value = "5")]
135 cms_depth: usize,
136 /// Smoothing parameter for log-ratio computation
137 #[arg(long, default_value = "1.0")]
138 alpha: f32,
139 /// Target fold enrichment. If not set, maximizes automatically.
140 /// A warning is shown if the requested value exceeds the maximum
141 /// achievable by the data.
142 #[arg(long)]
143 fold_enrichment: Option<f32>,
144 /// Number of threads to use for bias sketching
145 #[arg(long)]
146 threads: Option<usize>,
147 },
148
149 /// Display statistics for a bias table (.bias file)
150 #[command(arg_required_else_help = true)]
151 Stats {
152 /// Input bias table file (.bias)
153 input: PathBuf,
154 /// Output JSON report to file instead of stderr
155 #[arg(short, long)]
156 output: Option<PathBuf>,
157 },
158}