1use std::path::PathBuf;
2
3use clap::{Args, Parser, Subcommand, ValueEnum};
4
5#[derive(Debug, Parser)]
6#[command(author, version, about = "Manage CSV files efficiently", long_about = None)]
7pub struct Cli {
8 #[command(subcommand)]
9 pub command: Commands,
10}
11
12#[derive(Debug, Subcommand)]
13pub enum Commands {
14 Schema(SchemaArgs),
16 Index(IndexArgs),
18 Process(ProcessArgs),
20 Append(AppendArgs),
22 Stats(StatsArgs),
24 Install(InstallArgs),
28}
29
30#[derive(Debug, Args)]
31pub struct SchemaArgs {
32 #[command(subcommand)]
34 pub mode: Option<SchemaMode>,
35 #[arg(short = 'o', long = "output", alias = "schema", short_alias = 'm')]
37 pub output: Option<PathBuf>,
38 #[arg(short = 'c', long = "column", action = clap::ArgAction::Append)]
40 pub columns: Vec<String>,
41 #[arg(long = "replace", action = clap::ArgAction::Append)]
43 pub replacements: Vec<String>,
44}
45
46#[derive(Debug, Subcommand)]
47pub enum SchemaMode {
48 Probe(SchemaProbeArgs),
50 Infer(SchemaInferArgs),
52 Verify(SchemaVerifyArgs),
54 Columns(SchemaColumnsArgs),
56}
57
58#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
59#[value(rename_all = "kebab-case")]
60pub enum NaPlaceholderBehavior {
61 Empty,
62 Fill,
63}
64
65#[derive(Debug, Args, Clone)]
66pub struct SchemaProbeArgs {
67 #[arg(short = 'i', long = "input")]
69 pub input: PathBuf,
70 #[arg(long = "sample-rows", default_value_t = 2000)]
72 pub sample_rows: usize,
73 #[arg(long, value_parser = parse_delimiter)]
75 pub delimiter: Option<u8>,
76 #[arg(long = "input-encoding")]
78 pub input_encoding: Option<String>,
79 #[arg(long = "mapping")]
81 pub mapping: bool,
82 #[arg(long = "override", action = clap::ArgAction::Append)]
84 pub overrides: Vec<String>,
85 #[arg(long = "snapshot")]
87 pub snapshot: Option<PathBuf>,
88 #[arg(long = "na-behavior", value_enum, default_value = "empty")]
90 pub na_behavior: NaPlaceholderBehavior,
91 #[arg(long = "na-fill")]
93 pub na_fill: Option<String>,
94 #[arg(long = "assume-header", value_name = "true|false")]
96 pub assume_header: Option<bool>,
97}
98
99#[derive(Debug, Args, Clone)]
100pub struct SchemaInferArgs {
101 #[command(flatten)]
102 pub probe: SchemaProbeArgs,
103 #[arg(short = 'o', long = "output", alias = "schema", short_alias = 'm')]
105 pub output: Option<PathBuf>,
106 #[arg(long = "replace-template")]
108 pub replace_template: bool,
109 #[arg(long = "preview")]
111 pub preview: bool,
112 #[arg(long = "diff")]
114 pub diff: Option<PathBuf>,
115}
116
117#[derive(Debug, Args, Clone)]
118pub struct SchemaVerifyArgs {
119 #[arg(short = 'm', long = "schema", alias = "meta")]
121 pub schema: PathBuf,
122 #[arg(short = 'i', long = "input", required = true, action = clap::ArgAction::Append)]
124 pub inputs: Vec<PathBuf>,
125 #[arg(long, value_parser = parse_delimiter)]
127 pub delimiter: Option<u8>,
128 #[arg(long = "input-encoding")]
130 pub input_encoding: Option<String>,
131 #[arg(long = "report-invalid", value_name = "OPTIONS", num_args = 0..=3)]
133 pub report_invalid: Option<Vec<String>>,
134}
135
136#[derive(Debug, Args)]
137pub struct IndexArgs {
138 #[arg(short, long)]
140 pub input: PathBuf,
141 #[arg(short = 'o', long = "index")]
143 pub index: PathBuf,
144 #[arg(short = 'C', long = "columns", value_delimiter = ',')]
146 pub columns: Vec<String>,
147 #[arg(long = "spec", action = clap::ArgAction::Append)]
149 pub specs: Vec<String>,
150 #[arg(long = "covering", action = clap::ArgAction::Append)]
152 pub coverings: Vec<String>,
153 #[arg(short = 'm', long = "schema", alias = "meta")]
155 pub schema: Option<PathBuf>,
156 #[arg(long)]
158 pub limit: Option<usize>,
159 #[arg(long, value_parser = parse_delimiter)]
161 pub delimiter: Option<u8>,
162 #[arg(long = "input-encoding")]
164 pub input_encoding: Option<String>,
165}
166
167#[derive(Debug, Args)]
168pub struct ProcessArgs {
169 #[arg(short = 'i', long = "input")]
171 pub input: PathBuf,
172 #[arg(short = 'o', long = "output")]
174 pub output: Option<PathBuf>,
175 #[arg(short = 'm', long = "schema", alias = "meta")]
177 pub schema: Option<PathBuf>,
178 #[arg(short = 'x', long = "index")]
180 pub index: Option<PathBuf>,
181 #[arg(long = "index-variant")]
183 pub index_variant: Option<String>,
184 #[arg(long = "sort", action = clap::ArgAction::Append)]
186 pub sort: Vec<String>,
187 #[arg(short = 'C', long = "columns", action = clap::ArgAction::Append)]
189 pub columns: Vec<String>,
190 #[arg(long = "exclude-columns", action = clap::ArgAction::Append)]
192 pub exclude_columns: Vec<String>,
193 #[arg(long = "derive", action = clap::ArgAction::Append)]
195 pub derives: Vec<String>,
196 #[arg(long = "filter", action = clap::ArgAction::Append)]
198 pub filters: Vec<String>,
199 #[arg(long = "filter-expr", action = clap::ArgAction::Append)]
201 pub filter_exprs: Vec<String>,
202 #[arg(long = "row-numbers")]
204 pub row_numbers: bool,
205 #[arg(long)]
207 pub limit: Option<usize>,
208 #[arg(long, value_parser = parse_delimiter)]
210 pub delimiter: Option<u8>,
211 #[arg(long = "output-delimiter", value_parser = parse_delimiter)]
213 pub output_delimiter: Option<u8>,
214 #[arg(long = "input-encoding")]
216 pub input_encoding: Option<String>,
217 #[arg(long = "output-encoding")]
219 pub output_encoding: Option<String>,
220 #[arg(long = "boolean-format", default_value = "original")]
222 pub boolean_format: BooleanFormat,
223 #[arg(long = "preview")]
225 pub preview: bool,
226 #[arg(long = "table")]
228 pub table: bool,
229 #[arg(long = "apply-mappings")]
231 pub apply_mappings: bool,
232 #[arg(long = "skip-mappings")]
234 pub skip_mappings: bool,
235}
236
237#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq, Default)]
238#[value(rename_all = "kebab-case")]
239pub enum BooleanFormat {
240 #[default]
241 Original,
242 TrueFalse,
243 OneZero,
244}
245
246#[derive(Debug, Args)]
247pub struct AppendArgs {
248 #[arg(short = 'i', long = "input", required = true, action = clap::ArgAction::Append)]
250 pub inputs: Vec<PathBuf>,
251 #[arg(short = 'o', long = "output")]
253 pub output: Option<PathBuf>,
254 #[arg(short = 'm', long = "schema", alias = "meta")]
256 pub schema: Option<PathBuf>,
257 #[arg(long, value_parser = parse_delimiter)]
259 pub delimiter: Option<u8>,
260 #[arg(long = "input-encoding")]
262 pub input_encoding: Option<String>,
263 #[arg(long = "output-encoding")]
265 pub output_encoding: Option<String>,
266}
267
268#[derive(Debug, Args)]
269pub struct StatsArgs {
270 #[arg(short = 'i', long = "input")]
272 pub input: PathBuf,
273 #[arg(short = 'm', long = "schema", alias = "meta")]
275 pub schema: Option<PathBuf>,
276 #[arg(short = 'C', long = "columns", action = clap::ArgAction::Append)]
278 pub columns: Vec<String>,
279 #[arg(long = "filter", action = clap::ArgAction::Append)]
281 pub filters: Vec<String>,
282 #[arg(long = "filter-expr", action = clap::ArgAction::Append)]
284 pub filter_exprs: Vec<String>,
285 #[arg(long, value_parser = parse_delimiter)]
287 pub delimiter: Option<u8>,
288 #[arg(long = "input-encoding")]
290 pub input_encoding: Option<String>,
291 #[arg(long, default_value_t = 0)]
293 pub limit: usize,
294 #[arg(long)]
296 pub frequency: bool,
297 #[arg(long, default_value_t = 0)]
299 pub top: usize,
300}
301
302#[derive(Debug, Args)]
303pub struct SchemaColumnsArgs {
304 #[arg(short = 'm', long = "schema", alias = "meta")]
306 pub schema: PathBuf,
307}
308
309#[derive(Debug, Clone, Copy, ValueEnum)]
310#[value(rename_all = "kebab-case")]
311pub enum JoinKind {
312 Inner,
313 Left,
314 Right,
315 Full,
316}
317
318#[derive(Debug, Args)]
319pub struct JoinArgs {
320 #[arg(long = "left")]
322 pub left: PathBuf,
323 #[arg(long = "right")]
325 pub right: PathBuf,
326 #[arg(short = 'o', long = "output")]
328 pub output: Option<PathBuf>,
329 #[arg(long = "left-key")]
331 pub left_key: String,
332 #[arg(long = "right-key")]
334 pub right_key: String,
335 #[arg(long = "type", value_enum, default_value = "inner")]
337 pub kind: JoinKind,
338 #[arg(long = "left-schema", alias = "left-meta")]
340 pub left_schema: Option<PathBuf>,
341 #[arg(long = "right-schema", alias = "right-meta")]
343 pub right_schema: Option<PathBuf>,
344 #[arg(long = "delimiter", value_parser = parse_delimiter)]
346 pub delimiter: Option<u8>,
347 #[arg(long = "left-encoding")]
349 pub left_encoding: Option<String>,
350 #[arg(long = "right-encoding")]
352 pub right_encoding: Option<String>,
353 #[arg(long = "output-encoding")]
355 pub output_encoding: Option<String>,
356}
357
358#[derive(Debug, Args)]
359pub struct InstallArgs {
360 #[arg(long)]
362 pub version: Option<String>,
363 #[arg(long)]
365 pub force: bool,
366 #[arg(long)]
368 pub locked: bool,
369 #[arg(long)]
371 pub root: Option<PathBuf>,
372}
373
374pub fn parse_delimiter(value: &str) -> Result<u8, String> {
375 match value {
376 "tab" | "\t" => Ok(b'\t'),
377 "comma" | "," => Ok(b','),
378 "|" | "pipe" => Ok(b'|'),
379 ";" | "semicolon" => Ok(b';'),
380 other => {
381 let mut chars = other.chars();
382 let first = chars
383 .next()
384 .ok_or_else(|| "Delimiter cannot be empty".to_string())?;
385 if chars.next().is_some() {
386 return Err("Delimiter must be a single character".to_string());
387 }
388 if !first.is_ascii() {
389 return Err("Delimiter must be ASCII".to_string());
390 }
391 Ok(first as u8)
392 }
393 }
394}