1use std::path::PathBuf;
2
3use clap::{Args, Parser, Subcommand, ValueEnum};
4
5#[derive(Debug, Parser)]
6#[command(author, version, about = "Manage CSV files efficiently", long_about = None)]
7pub struct Cli {
8 #[command(subcommand)]
9 pub command: Commands,
10}
11
12#[derive(Debug, Subcommand)]
13pub enum Commands {
14 Probe(ProbeArgs),
16 Schema(SchemaArgs),
18 Index(IndexArgs),
20 Process(ProcessArgs),
22 Append(AppendArgs),
24 Verify(VerifyArgs),
26 Preview(PreviewArgs),
28 Stats(StatsArgs),
30 Frequency(FrequencyArgs),
32 Join(JoinArgs),
34 Install(InstallArgs),
36 Columns(ColumnsArgs),
38}
39
40#[derive(Debug, Args)]
41pub struct ProbeArgs {
42 #[arg(short = 'i', long = "input")]
44 pub input: PathBuf,
45 #[arg(short = 'm', long = "schema", alias = "meta")]
47 pub schema: PathBuf,
48 #[arg(long, default_value_t = 2000)]
50 pub sample_rows: usize,
51 #[arg(long, value_parser = parse_delimiter)]
53 pub delimiter: Option<u8>,
54 #[arg(long = "input-encoding")]
56 pub input_encoding: Option<String>,
57 #[arg(long = "mapping")]
59 pub mapping: bool,
60 #[arg(long = "replace")]
62 pub replace_template: bool,
63}
64
65#[derive(Debug, Args)]
66pub struct SchemaArgs {
67 #[arg(short = 'o', long = "output")]
69 pub output: PathBuf,
70 #[arg(short = 'c', long = "column", action = clap::ArgAction::Append, required = true)]
72 pub columns: Vec<String>,
73 #[arg(long = "replace", action = clap::ArgAction::Append)]
75 pub replacements: Vec<String>,
76}
77
78#[derive(Debug, Args)]
79pub struct IndexArgs {
80 #[arg(short, long)]
82 pub input: PathBuf,
83 #[arg(short = 'o', long = "index")]
85 pub index: PathBuf,
86 #[arg(short = 'C', long = "columns", value_delimiter = ',')]
88 pub columns: Vec<String>,
89 #[arg(long = "spec", action = clap::ArgAction::Append)]
91 pub specs: Vec<String>,
92 #[arg(long = "combo", action = clap::ArgAction::Append)]
94 pub combos: Vec<String>,
95 #[arg(short = 'm', long = "schema", alias = "meta")]
97 pub schema: Option<PathBuf>,
98 #[arg(long)]
100 pub limit: Option<usize>,
101 #[arg(long, value_parser = parse_delimiter)]
103 pub delimiter: Option<u8>,
104 #[arg(long = "input-encoding")]
106 pub input_encoding: Option<String>,
107}
108
109#[derive(Debug, Args)]
110pub struct ProcessArgs {
111 #[arg(short = 'i', long = "input")]
113 pub input: PathBuf,
114 #[arg(short = 'o', long = "output")]
116 pub output: Option<PathBuf>,
117 #[arg(short = 'm', long = "schema", alias = "meta")]
119 pub schema: Option<PathBuf>,
120 #[arg(short = 'x', long = "index")]
122 pub index: Option<PathBuf>,
123 #[arg(long = "index-variant")]
125 pub index_variant: Option<String>,
126 #[arg(long = "sort", action = clap::ArgAction::Append)]
128 pub sort: Vec<String>,
129 #[arg(short = 'C', long = "columns", action = clap::ArgAction::Append)]
131 pub columns: Vec<String>,
132 #[arg(long = "exclude-columns", action = clap::ArgAction::Append)]
134 pub exclude_columns: Vec<String>,
135 #[arg(long = "derive", action = clap::ArgAction::Append)]
137 pub derives: Vec<String>,
138 #[arg(long = "filter", action = clap::ArgAction::Append)]
140 pub filters: Vec<String>,
141 #[arg(long = "row-numbers")]
143 pub row_numbers: bool,
144 #[arg(long)]
146 pub limit: Option<usize>,
147 #[arg(long, value_parser = parse_delimiter)]
149 pub delimiter: Option<u8>,
150 #[arg(long = "output-delimiter", value_parser = parse_delimiter)]
152 pub output_delimiter: Option<u8>,
153 #[arg(long = "input-encoding")]
155 pub input_encoding: Option<String>,
156 #[arg(long = "output-encoding")]
158 pub output_encoding: Option<String>,
159 #[arg(long = "boolean-format", default_value = "original")]
161 pub boolean_format: BooleanFormat,
162 #[arg(long = "table")]
164 pub table: bool,
165}
166
167#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq, Default)]
168#[value(rename_all = "kebab-case")]
169pub enum BooleanFormat {
170 #[default]
171 Original,
172 TrueFalse,
173 OneZero,
174}
175
176#[derive(Debug, Args)]
177pub struct AppendArgs {
178 #[arg(short = 'i', long = "input", required = true, action = clap::ArgAction::Append)]
180 pub inputs: Vec<PathBuf>,
181 #[arg(short = 'o', long = "output")]
183 pub output: Option<PathBuf>,
184 #[arg(short = 'm', long = "schema", alias = "meta")]
186 pub schema: Option<PathBuf>,
187 #[arg(long, value_parser = parse_delimiter)]
189 pub delimiter: Option<u8>,
190 #[arg(long = "input-encoding")]
192 pub input_encoding: Option<String>,
193 #[arg(long = "output-encoding")]
195 pub output_encoding: Option<String>,
196}
197
198#[derive(Debug, Args)]
199pub struct VerifyArgs {
200 #[arg(short = 'm', long = "schema", alias = "meta")]
202 pub schema: PathBuf,
203 #[arg(short = 'i', long = "input", required = true, action = clap::ArgAction::Append)]
205 pub inputs: Vec<PathBuf>,
206 #[arg(long, value_parser = parse_delimiter)]
208 pub delimiter: Option<u8>,
209 #[arg(long = "input-encoding")]
211 pub input_encoding: Option<String>,
212 #[arg(long = "report-invalid", value_name = "OPTIONS", num_args = 0..=3)]
214 pub report_invalid: Option<Vec<String>>,
215}
216
217#[derive(Debug, Args)]
218pub struct PreviewArgs {
219 #[arg(short = 'i', long = "input")]
221 pub input: PathBuf,
222 #[arg(long, default_value_t = 10)]
224 pub rows: usize,
225 #[arg(long, value_parser = parse_delimiter)]
227 pub delimiter: Option<u8>,
228 #[arg(long = "input-encoding")]
230 pub input_encoding: Option<String>,
231}
232
233#[derive(Debug, Args)]
234pub struct StatsArgs {
235 #[arg(short = 'i', long = "input")]
237 pub input: PathBuf,
238 #[arg(short = 'm', long = "schema", alias = "meta")]
240 pub schema: Option<PathBuf>,
241 #[arg(short = 'C', long = "columns", action = clap::ArgAction::Append)]
243 pub columns: Vec<String>,
244 #[arg(long, value_parser = parse_delimiter)]
246 pub delimiter: Option<u8>,
247 #[arg(long = "input-encoding")]
249 pub input_encoding: Option<String>,
250 #[arg(long, default_value_t = 0)]
252 pub limit: usize,
253}
254
255#[derive(Debug, Args)]
256pub struct FrequencyArgs {
257 #[arg(short = 'i', long = "input")]
259 pub input: PathBuf,
260 #[arg(short = 'm', long = "schema", alias = "meta")]
262 pub schema: Option<PathBuf>,
263 #[arg(short = 'C', long = "columns", action = clap::ArgAction::Append)]
265 pub columns: Vec<String>,
266 #[arg(long, value_parser = parse_delimiter)]
268 pub delimiter: Option<u8>,
269 #[arg(long = "input-encoding")]
271 pub input_encoding: Option<String>,
272 #[arg(long, default_value_t = 0)]
274 pub top: usize,
275}
276
277#[derive(Debug, Args)]
278pub struct ColumnsArgs {
279 #[arg(short = 'm', long = "schema", alias = "meta")]
281 pub schema: PathBuf,
282}
283
284#[derive(Debug, Clone, Copy, ValueEnum)]
285#[value(rename_all = "kebab-case")]
286pub enum JoinKind {
287 Inner,
288 Left,
289 Right,
290 Full,
291}
292
293#[derive(Debug, Args)]
294pub struct JoinArgs {
295 #[arg(long = "left")]
297 pub left: PathBuf,
298 #[arg(long = "right")]
300 pub right: PathBuf,
301 #[arg(short = 'o', long = "output")]
303 pub output: Option<PathBuf>,
304 #[arg(long = "left-key")]
306 pub left_key: String,
307 #[arg(long = "right-key")]
309 pub right_key: String,
310 #[arg(long = "type", value_enum, default_value = "inner")]
312 pub kind: JoinKind,
313 #[arg(long = "left-schema", alias = "left-meta")]
315 pub left_schema: Option<PathBuf>,
316 #[arg(long = "right-schema", alias = "right-meta")]
318 pub right_schema: Option<PathBuf>,
319 #[arg(long = "delimiter", value_parser = parse_delimiter)]
321 pub delimiter: Option<u8>,
322 #[arg(long = "left-encoding")]
324 pub left_encoding: Option<String>,
325 #[arg(long = "right-encoding")]
327 pub right_encoding: Option<String>,
328 #[arg(long = "output-encoding")]
330 pub output_encoding: Option<String>,
331}
332
333#[derive(Debug, Args)]
334pub struct InstallArgs {
335 #[arg(long)]
337 pub version: Option<String>,
338 #[arg(long)]
340 pub force: bool,
341 #[arg(long)]
343 pub locked: bool,
344 #[arg(long)]
346 pub root: Option<PathBuf>,
347}
348
349pub fn parse_delimiter(value: &str) -> Result<u8, String> {
350 match value {
351 "tab" | "\t" => Ok(b'\t'),
352 "comma" | "," => Ok(b','),
353 "|" | "pipe" => Ok(b'|'),
354 ";" | "semicolon" => Ok(b';'),
355 other => {
356 let mut chars = other.chars();
357 let first = chars
358 .next()
359 .ok_or_else(|| "Delimiter cannot be empty".to_string())?;
360 if chars.next().is_some() {
361 return Err("Delimiter must be a single character".to_string());
362 }
363 if !first.is_ascii() {
364 return Err("Delimiter must be ASCII".to_string());
365 }
366 Ok(first as u8)
367 }
368 }
369}