1use clap::{CommandFactory, Parser, ValueEnum};
7use std::path::Path;
8
9#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
12pub enum FileFormat {
13 Parquet,
15 Csv,
17 Tsv,
19 Psv,
21 Json,
23 Jsonl,
25 Arrow,
27 Avro,
29 Orc,
31 Excel,
33}
34
35impl FileFormat {
36 pub fn from_path(path: &Path) -> Option<Self> {
38 path.extension()
39 .and_then(|e| e.to_str())
40 .and_then(Self::from_extension)
41 }
42
43 pub fn from_extension(ext: &str) -> Option<Self> {
45 match ext.to_lowercase().as_str() {
46 "parquet" => Some(Self::Parquet),
47 "csv" => Some(Self::Csv),
48 "tsv" => Some(Self::Tsv),
49 "psv" => Some(Self::Psv),
50 "json" => Some(Self::Json),
51 "jsonl" | "ndjson" => Some(Self::Jsonl),
52 "arrow" | "ipc" | "feather" => Some(Self::Arrow),
53 "avro" => Some(Self::Avro),
54 "orc" => Some(Self::Orc),
55 "xls" | "xlsx" | "xlsm" | "xlsb" => Some(Self::Excel),
56 _ => None,
57 }
58 }
59}
60
61#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
63pub enum CompressionFormat {
64 Gzip,
66 Zstd,
68 Bzip2,
70 Xz,
72}
73
74impl CompressionFormat {
75 pub fn from_extension(path: &Path) -> Option<Self> {
77 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
78 match ext.to_lowercase().as_str() {
79 "gz" => Some(Self::Gzip),
80 "zst" | "zstd" => Some(Self::Zstd),
81 "bz2" | "bz" => Some(Self::Bzip2),
82 "xz" => Some(Self::Xz),
83 _ => None,
84 }
85 } else {
86 None
87 }
88 }
89
90 pub fn extension(&self) -> &'static str {
92 match self {
93 Self::Gzip => "gz",
94 Self::Zstd => "zst",
95 Self::Bzip2 => "bz2",
96 Self::Xz => "xz",
97 }
98 }
99}
100
101#[derive(Clone, Parser, Debug)]
103#[command(
104 name = "datui",
105 version,
106 about = "Data Exploration in the Terminal",
107 long_about = include_str!("../long_about.txt")
108)]
109pub struct Args {
110 #[arg(required_unless_present_any = ["generate_config", "clear_cache", "remove_templates"], num_args = 1.., value_name = "PATH")]
113 pub paths: Vec<std::path::PathBuf>,
114
115 #[arg(long = "skip-lines")]
117 pub skip_lines: Option<usize>,
118
119 #[arg(long = "skip-rows")]
121 pub skip_rows: Option<usize>,
122
123 #[arg(long = "skip-tail-rows", value_name = "N")]
125 pub skip_tail_rows: Option<usize>,
126
127 #[arg(long = "no-header")]
129 pub no_header: Option<bool>,
130
131 #[arg(long = "delimiter")]
133 pub delimiter: Option<u8>,
134
135 #[arg(long = "infer-schema-length", value_name = "N")]
137 pub infer_schema_length: Option<usize>,
138
139 #[arg(long = "ignore-errors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
141 pub ignore_errors: Option<bool>,
142
143 #[arg(long = "null-value", value_name = "VAL")]
145 pub null_value: Vec<String>,
146
147 #[arg(long = "compression", value_enum)]
150 pub compression: Option<CompressionFormat>,
151
152 #[arg(long = "format", value_enum)]
155 pub format: Option<FileFormat>,
156
157 #[arg(long = "debug", action)]
159 pub debug: bool,
160
161 #[arg(long = "hive", action)]
163 pub hive: bool,
164
165 #[arg(long = "single-spine-schema", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
167 pub single_spine_schema: Option<bool>,
168
169 #[arg(long = "parse-dates", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
171 pub parse_dates: Option<bool>,
172
173 #[arg(long = "parse-strings", value_name = "COL", num_args = 0.., default_missing_value = "")]
175 pub parse_strings: Vec<String>,
176
177 #[arg(long = "no-parse-strings", action)]
179 pub no_parse_strings: bool,
180
181 #[arg(long = "decompress-in-memory", default_missing_value = "true", num_args = 0..=1, value_parser = clap::value_parser!(bool))]
183 pub decompress_in_memory: Option<bool>,
184
185 #[arg(long = "temp-dir", value_name = "DIR")]
187 pub temp_dir: Option<std::path::PathBuf>,
188
189 #[arg(long = "sheet", value_name = "SHEET")]
191 pub excel_sheet: Option<String>,
192
193 #[arg(long = "clear-cache", action)]
195 pub clear_cache: bool,
196
197 #[arg(long = "template")]
199 pub template: Option<String>,
200
201 #[arg(long = "remove-templates", action)]
203 pub remove_templates: bool,
204
205 #[arg(long = "sampling-threshold", value_name = "N")]
209 pub sampling_threshold: Option<usize>,
210
211 #[arg(long = "polars-streaming", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
213 pub polars_streaming: Option<bool>,
214
215 #[arg(long = "workaround-pivot-date-index", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
217 pub workaround_pivot_date_index: Option<bool>,
218
219 #[arg(long = "pages-lookahead")]
222 pub pages_lookahead: Option<usize>,
223
224 #[arg(long = "pages-lookback")]
227 pub pages_lookback: Option<usize>,
228
229 #[arg(long = "row-numbers", action)]
231 pub row_numbers: bool,
232
233 #[arg(long = "row-start-index")]
235 pub row_start_index: Option<usize>,
236
237 #[arg(long = "column-colors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
239 pub column_colors: Option<bool>,
240
241 #[arg(long = "generate-config", action)]
243 pub generate_config: bool,
244
245 #[arg(long = "force", requires = "generate_config", action)]
247 pub force: bool,
248
249 #[arg(long = "s3-endpoint-url", value_name = "URL")]
251 pub s3_endpoint_url: Option<String>,
252
253 #[arg(long = "s3-access-key-id", value_name = "KEY")]
255 pub s3_access_key_id: Option<String>,
256
257 #[arg(long = "s3-secret-access-key", value_name = "SECRET")]
259 pub s3_secret_access_key: Option<String>,
260
261 #[arg(long = "s3-region", value_name = "REGION")]
263 pub s3_region: Option<String>,
264}
265
266fn escape_table_cell(s: &str) -> String {
268 s.replace('|', "\\|").replace(['\n', '\r'], " ")
269}
270
271pub fn render_options_markdown() -> String {
276 let mut cmd = Args::command();
277 cmd.build();
278
279 let mut out = String::from("# Command Line Options\n\n");
280
281 out.push_str("## Usage\n\n```\n");
282 let usage = cmd.render_usage();
283 out.push_str(&usage.to_string());
284 out.push_str("\n```\n\n");
285
286 out.push_str("## Options\n\n");
287 out.push_str("| Option | Description |\n");
288 out.push_str("|--------|-------------|\n");
289
290 for arg in cmd.get_arguments() {
291 let id = arg.get_id().as_ref().to_string();
292 if id == "help" || id == "version" {
293 continue;
294 }
295
296 let option_str = if arg.is_positional() {
297 let placeholder: String = arg
298 .get_value_names()
299 .map(|names| {
300 names
301 .iter()
302 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
303 .collect::<Vec<_>>()
304 .join(" ")
305 })
306 .unwrap_or_default();
307 if arg.is_required_set() {
308 placeholder
309 } else {
310 format!("[{placeholder}]")
311 }
312 } else {
313 let mut parts = Vec::new();
314 if let Some(s) = arg.get_short() {
315 parts.push(format!("-{s}"));
316 }
317 if let Some(l) = arg.get_long() {
318 parts.push(format!("--{l}"));
319 }
320 let op = parts.join(", ");
321 let takes_val = arg.get_action().takes_values();
322 let placeholder: String = if takes_val {
323 arg.get_value_names()
324 .map(|names| {
325 names
326 .iter()
327 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
328 .collect::<Vec<_>>()
329 .join(" ")
330 })
331 .unwrap_or_default()
332 } else {
333 String::new()
334 };
335 if placeholder.is_empty() {
336 op
337 } else {
338 format!("{op} {placeholder}")
339 }
340 };
341
342 let help = arg
343 .get_help()
344 .map(|h| escape_table_cell(&h.to_string()))
345 .unwrap_or_else(|| "-".to_string());
346
347 out.push_str(&format!("| `{option_str}` | {help} |\n"));
348 }
349
350 out
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 #[test]
358 fn test_compression_detection() {
359 assert_eq!(
360 CompressionFormat::from_extension(Path::new("file.csv.gz")),
361 Some(CompressionFormat::Gzip)
362 );
363 assert_eq!(
364 CompressionFormat::from_extension(Path::new("file.csv.zst")),
365 Some(CompressionFormat::Zstd)
366 );
367 assert_eq!(
368 CompressionFormat::from_extension(Path::new("file.csv.bz2")),
369 Some(CompressionFormat::Bzip2)
370 );
371 assert_eq!(
372 CompressionFormat::from_extension(Path::new("file.csv.xz")),
373 Some(CompressionFormat::Xz)
374 );
375 assert_eq!(
376 CompressionFormat::from_extension(Path::new("file.csv")),
377 None
378 );
379 assert_eq!(CompressionFormat::from_extension(Path::new("file")), None);
380 }
381
382 #[test]
383 fn test_compression_extension() {
384 assert_eq!(CompressionFormat::Gzip.extension(), "gz");
385 assert_eq!(CompressionFormat::Zstd.extension(), "zst");
386 assert_eq!(CompressionFormat::Bzip2.extension(), "bz2");
387 assert_eq!(CompressionFormat::Xz.extension(), "xz");
388 }
389
390 #[test]
391 fn test_file_format_from_path() {
392 assert_eq!(
393 FileFormat::from_path(Path::new("data.parquet")),
394 Some(FileFormat::Parquet)
395 );
396 assert_eq!(
397 FileFormat::from_path(Path::new("data.csv")),
398 Some(FileFormat::Csv)
399 );
400 assert_eq!(
401 FileFormat::from_path(Path::new("file.jsonl")),
402 Some(FileFormat::Jsonl)
403 );
404 assert_eq!(FileFormat::from_path(Path::new("noext")), None);
405 assert_eq!(
406 FileFormat::from_path(Path::new("file.NDJSON")),
407 Some(FileFormat::Jsonl)
408 );
409 }
410}