filterx/
args.rs

1use clap::{ArgAction, Args, Parser, Subcommand, ValueHint};
2use filterx_core::reader::FileContentType;
3use filterx_source::{FastaRecordType, QualityType};
4
5static LONG_ABOUT: &'static str = include_str!("./long.txt");
6
7#[derive(Debug, Clone, Parser)]
8#[clap(
9    long_about = LONG_ABOUT,
10    author,
11    version,
12    name = "filterx",
13)]
14pub struct Cli {
15    #[clap(subcommand)]
16    pub command: Command,
17
18    /// Set the number of threads to use. Defaults to the number of logical CPUs.
19    #[clap(short = 'j', long)]
20    pub threads: Option<usize>,
21}
22
23#[derive(Debug, Clone, Subcommand)]
24pub enum Command {
25    /// handle csv file
26    #[command(visible_alias = "c")]
27    Csv(CsvCommand),
28
29    /// handle fasta file
30    #[command(visible_alias = "fa")]
31    Fasta(FastaCommand),
32
33    /// handle fastq file
34    #[command(visible_alias = "fq")]
35    Fastq(FastqCommand),
36
37    /// handle sam file
38    Sam(SamCommand),
39
40    /// handle vcf file
41    Vcf(VcfCommand),
42
43    /// handle gff file
44    GFF(GFFCommand),
45
46    /// handle gtf file
47    GTF(GFFCommand),
48
49    /// builtin function help
50    Info(InfoArgs),
51}
52
53pub fn set_thread_size(thread_size: Option<usize>) -> () {
54    if thread_size.is_some() {
55        let thread_size = thread_size.unwrap();
56        // set polars thread size by env
57        std::env::set_var("POLARS_NUM_THREADS", thread_size.to_string());
58        // set gzp thread size by env
59        std::env::set_var("GZP_NUM_THREADS", thread_size.to_string());
60    }
61}
62
63#[derive(Debug, Clone, Args)]
64pub struct ShareArgs {
65    /// The input string
66    #[clap(value_hint=ValueHint::FilePath)]
67    pub input: String,
68
69    /// expression to filter
70    #[clap(short = 'e', long, action = ArgAction::Append)]
71    pub expr: Option<Vec<String>>,
72
73    /// The output file, default is stdout.
74    #[clap(short='o', long, value_hint=ValueHint::FilePath)]
75    pub output: Option<String>,
76
77    /// output as table format, only output to stdout
78    #[clap(short = 't', long, default_value = "false", action = ArgAction::SetTrue)]
79    pub table: Option<bool>,
80
81    /// only works with -o.
82    #[clap(long, alias = "ot", default_value = "auto")]
83    pub output_type: Option<FileContentType>,
84
85    /// sql string
86    #[clap(long)]
87    pub sql: Option<String>,
88}
89
90#[derive(Debug, Clone, Parser)]
91pub struct CsvCommand {
92    #[clap(flatten)]
93    pub share_args: ShareArgs,
94
95    /// whether the input file has header, default is false
96    #[clap(short = 'H', long, default_value = "false", action = ArgAction::SetTrue)]
97    pub header: Option<bool>,
98
99    /// Output headers if -H was set. --no-header will disable it.
100    #[clap(long = "no-header", action = ArgAction::SetTrue, alias="nh")]
101    pub no_header: Option<bool>,
102
103    /// The comment prefix
104    #[clap(short = 'c', long, default_value = "#")]
105    pub comment_prefix: Option<String>,
106
107    /// The separator, if not set, will try to detect
108    #[clap(short = 's', long)]
109    pub separator: Option<String>,
110
111    /// The output separator, same as -s if not set
112    #[clap(long = "os")]
113    pub output_separator: Option<String>,
114
115    /// skip row number, 0 means no skip
116    #[clap(long, default_value = "0")]
117    pub skip: Option<usize>,
118
119    /// limit row number, 0 means no limit
120    #[clap(long, default_value = "0")]
121    pub limit: Option<usize>,
122}
123
124#[derive(Debug, Clone, Parser)]
125pub struct FastaCommand {
126    #[clap(flatten)]
127    pub share_args: ShareArgs,
128
129    /// number of sequence per chunk
130    #[clap(short = 'c', long, default_value = "4096")]
131    pub chunk: Option<usize>,
132
133    /// don't parse comment
134    #[clap(long, default_value = "false", action = ArgAction::SetTrue)]
135    pub no_comment: Option<bool>,
136
137    /// limit sequence number, 0 means no limit
138    #[clap(long, default_value = "0")]
139    pub limit: Option<usize>,
140
141    /// sequence type, default is DNA
142    #[clap(long, default_value = "auto")]
143    pub r#type: Option<FastaRecordType>,
144
145    /// detect sequence type by first N sequences
146    #[clap(long, default_value = "3")]
147    pub detect_size: Option<usize>,
148}
149
150#[derive(Debug, Clone, Parser)]
151pub struct FastqCommand {
152    #[clap(flatten)]
153    pub share_args: ShareArgs,
154
155    /// number of sequence per chunk
156    #[clap(short = 'c', long, default_value = "4096")]
157    pub chunk: Option<usize>,
158
159    /// don't parse quality
160    #[clap(long, default_value = "false", action = ArgAction::SetTrue, visible_alias="no-qual")]
161    pub no_quality: Option<bool>,
162
163    /// don't parse comment
164    #[clap(long, default_value = "false", action = ArgAction::SetTrue)]
165    pub no_comment: Option<bool>,
166
167    /// limit sequence number, 0 means no limit
168    #[clap(long, default_value = "0")]
169    pub limit: Option<usize>,
170
171    /// quality type, phred33, phred64, auto, auto: will try to detect
172    #[clap(long, default_value = "auto")]
173    pub phred: Option<QualityType>,
174
175    /// detect quality type by first N sequences
176    #[clap(long, default_value = "100")]
177    pub detect_size: Option<usize>,
178}
179
180#[derive(Debug, Clone, Parser)]
181pub struct SamCommand {
182    #[clap(flatten)]
183    pub share_args: ShareArgs,
184
185    #[clap(short = 'H', long, default_value = "false", action = ArgAction::SetTrue)]
186    pub header: Option<bool>,
187}
188
189#[derive(Debug, Clone, Parser)]
190pub struct VcfCommand {
191    #[clap(flatten)]
192    pub share_args: ShareArgs,
193
194    #[clap(short = 'H', long, default_value = "false", action = ArgAction::SetTrue)]
195    pub header: Option<bool>,
196}
197
198#[derive(Debug, Clone, Parser)]
199pub struct GFFCommand {
200    #[clap(flatten)]
201    pub share_args: ShareArgs,
202
203    #[clap(short = 'H', long, default_value = "false", action = ArgAction::SetTrue)]
204    pub header: Option<bool>,
205}
206
207#[derive(Debug, Clone, Parser)]
208pub struct InfoArgs {
209    /// builtin function name
210    pub name: Option<String>,
211
212    /// list all builtin functions
213    #[clap(short='l', long, default_value = "false", action = ArgAction::SetTrue)]
214    pub list: Option<bool>,
215}