Skip to main content

jam_rs/cli/
args.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4#[derive(Debug, Parser)]
5#[command(name = "jam")]
6#[command(bin_name = "jam")]
7#[command(version = "0.9.10")]
8#[command(
9    about = "Just another (genomic) minhasher (jam), obviously blazingly fast",
10    long_about = "An optimized minhash implementation that focuses on quick scans for small sequences in large datasets."
11)]
12pub struct Cli {
13    #[command(subcommand)]
14    pub command: Commands,
15    /// Number of threads to use
16    #[arg(short, long, global = true, default_value = "1")]
17    pub threads: Option<usize>,
18    /// Overwrite output files
19    #[arg(short, long, global = true, default_value = "false")]
20    pub force: bool,
21    /// Silent mode, no (additional) output to stdout
22    /// Only errors and output files will be printed
23    #[arg(short, long, global = true, default_value = "false")]
24    pub silent: bool,
25    /// Maximum memory usage in bytes in GB
26    #[arg(short, long, global = true, default_value = "2")]
27    pub memory: Option<usize>,
28}
29
30#[derive(Debug, Subcommand, Clone)]
31pub enum Commands {
32    /// Sketch one or more files and write the result to an output file
33    #[command(arg_required_else_help = true)]
34    Sketch {
35        /// Input file(s), directories, or file with list of files to be hashed
36        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
37        input: Vec<PathBuf>,
38        /// Output file (.jam format)
39        #[arg(short, long)]
40        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
41        output: PathBuf,
42        /// K-mer size, all sketches must have the same size to be compared and below 32
43        #[arg(short = 'k', long = "kmer-size", default_value = "21")]
44        kmer_size: u8,
45        /// Scale the hash space to a minimum fraction of the maximum hash value (FracMinHash)
46        #[arg(long)]
47        fscale: Option<u64>,
48        /// Complexity cut-off, only hash sequences with complexity above this value
49        /// This is created via shannon entropy
50        #[arg(long, default_value = "0.0")]
51        complexity: f64,
52        /// Create a separate sketch for each sequence record
53        /// Will increase the size of the output file
54        #[arg(long)]
55        singleton: bool,
56        /// Custom temporary directory for intermediate files during sorting
57        #[arg(long)]
58        temp_dir: Option<PathBuf>,
59        /// Path to a bias table file (.bias) for hash-based filtering
60        #[arg(long)]
61        bias_table: Option<PathBuf>,
62    },
63
64    /// Estimate containment of a query sequence against a sketch database.
65    /// Requires all sketches to have the same kmer size
66    #[command(arg_required_else_help = true)]
67    Dist {
68        /// Input FASTA/FASTQ file to query
69        #[arg(short, long)]
70        input: PathBuf,
71        /// Database sketch (.jam file)
72        #[arg(short, long)]
73        database: PathBuf,
74        /// Output to file instead of stdout
75        #[arg(short, long)]
76        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
77        output: Option<PathBuf>,
78        /// Cut-off value for similarity/containment
79        #[arg(short, long, default_value = "0.0")]
80        cutoff: f64,
81        /// Singleton mode, process each query sequence separately
82        #[arg(long, default_value = "false")]
83        singleton: bool,
84    },
85
86    /// Build and analyze hash bias tables for filtering
87    #[command(arg_required_else_help = true)]
88    Bias {
89        #[command(subcommand)]
90        command: BiasCommands,
91    },
92
93    /// Display statistics about a JAM database
94    #[command(arg_required_else_help = true)]
95    Stats {
96        /// Input JAM database (.jam file)
97        #[arg(short, long)]
98        input: PathBuf,
99        /// Short summary only
100        #[arg(long)]
101        short: bool,
102        /// Include the full entry statistics
103        #[arg(long)]
104        full: bool,
105    },
106}
107
108#[derive(Debug, Subcommand, Clone)]
109pub enum BiasCommands {
110    /// Create a bias table from positive (target) and negative (background) FASTA files.
111    /// Target signal is always subtracted from background before computing bias weights.
112    #[command(arg_required_else_help = true)]
113    Create {
114        /// Positive (target) FASTA file(s) - sequences to enrich for
115        #[arg(long, required = true, num_args = 1..)]
116        positive: Vec<PathBuf>,
117        /// Negative (background) FASTA file(s) - sequences to deplete.
118        /// Target signal is subtracted from background automatically.
119        #[arg(long, required = true, num_args = 1..)]
120        negative: Vec<PathBuf>,
121        /// Output bias table file (.bias)
122        #[arg(short, long)]
123        output: PathBuf,
124        /// K-mer size (must match sketch k-mer size)
125        #[arg(short = 'k', long = "kmer-size", default_value = "21")]
126        kmer_size: u8,
127        /// FracMinHash scale (must match sketch fscale)
128        #[arg(long, default_value = "1000")]
129        fscale: u64,
130        /// Count-Min Sketch width (columns, power of 2 recommended)
131        #[arg(long, default_value = "1048576")]
132        cms_width: usize,
133        /// Count-Min Sketch depth (number of hash functions)
134        #[arg(long, default_value = "5")]
135        cms_depth: usize,
136        /// Smoothing parameter for log-ratio computation
137        #[arg(long, default_value = "1.0")]
138        alpha: f32,
139        /// Target fold enrichment. If not set, maximizes automatically.
140        /// A warning is shown if the requested value exceeds the maximum
141        /// achievable by the data.
142        #[arg(long)]
143        fold_enrichment: Option<f32>,
144        /// Number of threads to use for bias sketching
145        #[arg(long)]
146        threads: Option<usize>,
147    },
148
149    /// Display statistics for a bias table (.bias file)
150    #[command(arg_required_else_help = true)]
151    Stats {
152        /// Input bias table file (.bias)
153        input: PathBuf,
154        /// Output JSON report to file instead of stderr
155        #[arg(short, long)]
156        output: Option<PathBuf>,
157    },
158}