jam_rs/
cli.rs

1use clap::{Parser, Subcommand, ValueEnum};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5#[derive(Debug, Parser)]
6#[command(name = "jam")]
7#[command(bin_name = "jam")]
8#[command(version = "0.1.0-beta.1")]
9#[command(
10    about = "Just another (genomic) minhasher (jam), obviously blazingly fast",
11    long_about = "An optimized minhash implementation that focuses on quick scans for small sequences in large datasets."
12)]
13pub struct Cli {
14    #[command(subcommand)]
15    pub command: Commands,
16    /// Number of threads to use
17    #[arg(short, long, global = true, default_value = "1")]
18    pub threads: Option<usize>,
19    /// Overwrite output files
20    #[arg(short, long, global = true, default_value = "false")]
21    pub force: bool,
22}
23
24#[derive(ValueEnum, Debug, Clone)]
25pub enum OutputFormats {
26    Bin,
27    // Sourmash compatible json
28    Sourmash,
29}
30
31#[derive(ValueEnum, Debug, Clone, Deserialize, Serialize)]
32pub enum HashAlgorithms {
33    Default, // AHash < 32 | Xxhash >= 32
34    Ahash,
35    Xxhash,
36    Murmur3,
37}
38
39#[derive(Debug, Subcommand, Clone)]
40pub enum Commands {
41    /// Sketch one or more files and write the result to an output file (or stdout)
42    #[command(arg_required_else_help = true)]
43    Sketch {
44        /// Input file(s), one directory or one file with list of files to be hashed
45        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
46        input: Vec<PathBuf>,
47        /// Output file
48        #[arg(short, long)]
49        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
50        output: Option<PathBuf>,
51        /// kmer size, all sketches must have the same size to be compared
52        #[arg(short = 'k', long = "kmer-size", default_value = "21")]
53        kmer_size: u8,
54        /// Scale the hash space to a minimum fraction of the maximum hash value (FracMinHash)
55        #[arg(long)]
56        fscale: Option<u64>,
57        /// Scale the hash space to a minimum fraction of all k-mers (SizeMinHash)
58        #[arg(long)]
59        kscale: Option<u64>,
60        /// Minimum number of k-mers (per record) to be hashed, bottom cut-off
61        #[arg(long)]
62        nmin: Option<u64>,
63        /// Maximum number of k-mers (per record) to be hashed, top cut-off
64        #[arg(long)]
65        nmax: Option<u64>,
66        /// Change to other output formats
67        #[arg(long, default_value = "bin")]
68        format: OutputFormats,
69        /// Change the hashing algorithm
70        #[arg(long, default_value = "default")]
71        algorithm: HashAlgorithms,
72        /// Create a separate sketch for each sequence record
73        #[arg(long)]
74        singleton: bool,
75    },
76    /// Merge multiple input sketches into a single sketch
77    #[command(arg_required_else_help = true)]
78    Merge {
79        /// One or more input sketches
80        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
81        inputs: Vec<PathBuf>,
82        /// Output file
83        #[arg(short, long, required = true)]
84        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
85        output: PathBuf,
86    },
87    /// Estimate containment of a (small) sketch against a subset of one or more sketches as database.
88    /// Requires all sketches to have the same kmer size
89    #[command(arg_required_else_help = true)]
90    Dist {
91        /// Input sketch or raw file
92        #[arg(short, long)]
93        input: PathBuf,
94        /// Database sketch(es)
95        #[arg(short, long)]
96        database: Vec<PathBuf>,
97        /// Output to file instead of stdout
98        #[arg(short, long)]
99        #[arg(value_parser = clap::value_parser!(std::path::PathBuf))]
100        output: Option<PathBuf>,
101        /// Cut-off value for similarity
102        #[arg(short, long, default_value = "0.0")]
103        cutoff: f64,
104        /// Use the Stats params for restricting results
105        #[arg(long)]
106        stats: bool,
107        /// Use GC stats with an upper bound of x% (gc_lower and gc_upper must be set)
108        #[arg(long)]
109        gc_lower: Option<u8>,
110        /// Use GC stats with an lower bound of y% (gc_lower and gc_upper must be set)
111        #[arg(long)]
112        gc_upper: Option<u8>,
113    },
114}