1use clap::{CommandFactory, Parser, ValueEnum};
7use std::path::Path;
8
9#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
12pub enum FileFormat {
13 Parquet,
15 Csv,
17 Tsv,
19 Psv,
21 Json,
23 Jsonl,
25 Arrow,
27 Avro,
29 Orc,
31 Excel,
33}
34
35impl FileFormat {
36 pub fn from_path(path: &Path) -> Option<Self> {
38 path.extension()
39 .and_then(|e| e.to_str())
40 .and_then(Self::from_extension)
41 }
42
43 pub fn from_extension(ext: &str) -> Option<Self> {
45 match ext.to_lowercase().as_str() {
46 "parquet" => Some(Self::Parquet),
47 "csv" => Some(Self::Csv),
48 "tsv" => Some(Self::Tsv),
49 "psv" => Some(Self::Psv),
50 "json" => Some(Self::Json),
51 "jsonl" | "ndjson" => Some(Self::Jsonl),
52 "arrow" | "ipc" | "feather" => Some(Self::Arrow),
53 "avro" => Some(Self::Avro),
54 "orc" => Some(Self::Orc),
55 "xls" | "xlsx" | "xlsm" | "xlsb" => Some(Self::Excel),
56 _ => None,
57 }
58 }
59}
60
61#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
63pub enum CompressionFormat {
64 Gzip,
66 Zstd,
68 Bzip2,
70 Xz,
72}
73
74impl CompressionFormat {
75 pub fn from_extension(path: &Path) -> Option<Self> {
77 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
78 match ext.to_lowercase().as_str() {
79 "gz" => Some(Self::Gzip),
80 "zst" | "zstd" => Some(Self::Zstd),
81 "bz2" | "bz" => Some(Self::Bzip2),
82 "xz" => Some(Self::Xz),
83 _ => None,
84 }
85 } else {
86 None
87 }
88 }
89
90 pub fn extension(&self) -> &'static str {
92 match self {
93 Self::Gzip => "gz",
94 Self::Zstd => "zst",
95 Self::Bzip2 => "bz2",
96 Self::Xz => "xz",
97 }
98 }
99}
100
101#[derive(Clone, Parser, Debug)]
103#[command(
104 name = "datui",
105 version,
106 about = "Data Exploration in the Terminal",
107 long_about = include_str!("../long_about.txt")
108)]
109pub struct Args {
110 #[arg(required_unless_present_any = ["generate_config", "clear_cache", "remove_templates"], num_args = 1.., value_name = "PATH")]
113 pub paths: Vec<std::path::PathBuf>,
114
115 #[arg(long = "skip-lines")]
117 pub skip_lines: Option<usize>,
118
119 #[arg(long = "skip-rows")]
121 pub skip_rows: Option<usize>,
122
123 #[arg(long = "skip-tail-rows", value_name = "N")]
125 pub skip_tail_rows: Option<usize>,
126
127 #[arg(long = "no-header")]
129 pub no_header: Option<bool>,
130
131 #[arg(long = "delimiter")]
133 pub delimiter: Option<u8>,
134
135 #[arg(long = "null-value", value_name = "VAL")]
137 pub null_value: Vec<String>,
138
139 #[arg(long = "compression", value_enum)]
142 pub compression: Option<CompressionFormat>,
143
144 #[arg(long = "format", value_enum)]
147 pub format: Option<FileFormat>,
148
149 #[arg(long = "debug", action)]
151 pub debug: bool,
152
153 #[arg(long = "hive", action)]
155 pub hive: bool,
156
157 #[arg(long = "single-spine-schema", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
159 pub single_spine_schema: Option<bool>,
160
161 #[arg(long = "parse-dates", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
163 pub parse_dates: Option<bool>,
164
165 #[arg(long = "parse-strings", value_name = "COL", num_args = 0.., default_missing_value = "")]
167 pub parse_strings: Vec<String>,
168
169 #[arg(long = "no-parse-strings", action)]
171 pub no_parse_strings: bool,
172
173 #[arg(long = "decompress-in-memory", default_missing_value = "true", num_args = 0..=1, value_parser = clap::value_parser!(bool))]
175 pub decompress_in_memory: Option<bool>,
176
177 #[arg(long = "temp-dir", value_name = "DIR")]
179 pub temp_dir: Option<std::path::PathBuf>,
180
181 #[arg(long = "sheet", value_name = "SHEET")]
183 pub excel_sheet: Option<String>,
184
185 #[arg(long = "clear-cache", action)]
187 pub clear_cache: bool,
188
189 #[arg(long = "template")]
191 pub template: Option<String>,
192
193 #[arg(long = "remove-templates", action)]
195 pub remove_templates: bool,
196
197 #[arg(long = "sampling-threshold", value_name = "N")]
201 pub sampling_threshold: Option<usize>,
202
203 #[arg(long = "polars-streaming", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
205 pub polars_streaming: Option<bool>,
206
207 #[arg(long = "workaround-pivot-date-index", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
209 pub workaround_pivot_date_index: Option<bool>,
210
211 #[arg(long = "pages-lookahead")]
214 pub pages_lookahead: Option<usize>,
215
216 #[arg(long = "pages-lookback")]
219 pub pages_lookback: Option<usize>,
220
221 #[arg(long = "row-numbers", action)]
223 pub row_numbers: bool,
224
225 #[arg(long = "row-start-index")]
227 pub row_start_index: Option<usize>,
228
229 #[arg(long = "column-colors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
231 pub column_colors: Option<bool>,
232
233 #[arg(long = "generate-config", action)]
235 pub generate_config: bool,
236
237 #[arg(long = "force", requires = "generate_config", action)]
239 pub force: bool,
240
241 #[arg(long = "s3-endpoint-url", value_name = "URL")]
243 pub s3_endpoint_url: Option<String>,
244
245 #[arg(long = "s3-access-key-id", value_name = "KEY")]
247 pub s3_access_key_id: Option<String>,
248
249 #[arg(long = "s3-secret-access-key", value_name = "SECRET")]
251 pub s3_secret_access_key: Option<String>,
252
253 #[arg(long = "s3-region", value_name = "REGION")]
255 pub s3_region: Option<String>,
256}
257
258fn escape_table_cell(s: &str) -> String {
260 s.replace('|', "\\|").replace(['\n', '\r'], " ")
261}
262
263pub fn render_options_markdown() -> String {
268 let mut cmd = Args::command();
269 cmd.build();
270
271 let mut out = String::from("# Command Line Options\n\n");
272
273 out.push_str("## Usage\n\n```\n");
274 let usage = cmd.render_usage();
275 out.push_str(&usage.to_string());
276 out.push_str("\n```\n\n");
277
278 out.push_str("## Options\n\n");
279 out.push_str("| Option | Description |\n");
280 out.push_str("|--------|-------------|\n");
281
282 for arg in cmd.get_arguments() {
283 let id = arg.get_id().as_ref().to_string();
284 if id == "help" || id == "version" {
285 continue;
286 }
287
288 let option_str = if arg.is_positional() {
289 let placeholder: String = arg
290 .get_value_names()
291 .map(|names| {
292 names
293 .iter()
294 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
295 .collect::<Vec<_>>()
296 .join(" ")
297 })
298 .unwrap_or_default();
299 if arg.is_required_set() {
300 placeholder
301 } else {
302 format!("[{placeholder}]")
303 }
304 } else {
305 let mut parts = Vec::new();
306 if let Some(s) = arg.get_short() {
307 parts.push(format!("-{s}"));
308 }
309 if let Some(l) = arg.get_long() {
310 parts.push(format!("--{l}"));
311 }
312 let op = parts.join(", ");
313 let takes_val = arg.get_action().takes_values();
314 let placeholder: String = if takes_val {
315 arg.get_value_names()
316 .map(|names| {
317 names
318 .iter()
319 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
320 .collect::<Vec<_>>()
321 .join(" ")
322 })
323 .unwrap_or_default()
324 } else {
325 String::new()
326 };
327 if placeholder.is_empty() {
328 op
329 } else {
330 format!("{op} {placeholder}")
331 }
332 };
333
334 let help = arg
335 .get_help()
336 .map(|h| escape_table_cell(&h.to_string()))
337 .unwrap_or_else(|| "-".to_string());
338
339 out.push_str(&format!("| `{option_str}` | {help} |\n"));
340 }
341
342 out
343}
344
345#[cfg(test)]
346mod tests {
347 use super::*;
348
349 #[test]
350 fn test_compression_detection() {
351 assert_eq!(
352 CompressionFormat::from_extension(Path::new("file.csv.gz")),
353 Some(CompressionFormat::Gzip)
354 );
355 assert_eq!(
356 CompressionFormat::from_extension(Path::new("file.csv.zst")),
357 Some(CompressionFormat::Zstd)
358 );
359 assert_eq!(
360 CompressionFormat::from_extension(Path::new("file.csv.bz2")),
361 Some(CompressionFormat::Bzip2)
362 );
363 assert_eq!(
364 CompressionFormat::from_extension(Path::new("file.csv.xz")),
365 Some(CompressionFormat::Xz)
366 );
367 assert_eq!(
368 CompressionFormat::from_extension(Path::new("file.csv")),
369 None
370 );
371 assert_eq!(CompressionFormat::from_extension(Path::new("file")), None);
372 }
373
374 #[test]
375 fn test_compression_extension() {
376 assert_eq!(CompressionFormat::Gzip.extension(), "gz");
377 assert_eq!(CompressionFormat::Zstd.extension(), "zst");
378 assert_eq!(CompressionFormat::Bzip2.extension(), "bz2");
379 assert_eq!(CompressionFormat::Xz.extension(), "xz");
380 }
381
382 #[test]
383 fn test_file_format_from_path() {
384 assert_eq!(
385 FileFormat::from_path(Path::new("data.parquet")),
386 Some(FileFormat::Parquet)
387 );
388 assert_eq!(
389 FileFormat::from_path(Path::new("data.csv")),
390 Some(FileFormat::Csv)
391 );
392 assert_eq!(
393 FileFormat::from_path(Path::new("file.jsonl")),
394 Some(FileFormat::Jsonl)
395 );
396 assert_eq!(FileFormat::from_path(Path::new("noext")), None);
397 assert_eq!(
398 FileFormat::from_path(Path::new("file.NDJSON")),
399 Some(FileFormat::Jsonl)
400 );
401 }
402}