1use clap::{CommandFactory, Parser, ValueEnum};
7use std::path::Path;
8
9#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
12pub enum FileFormat {
13 Parquet,
15 Csv,
17 Tsv,
19 Psv,
21 Json,
23 Jsonl,
25 Arrow,
27 Avro,
29 Orc,
31 Excel,
33}
34
35impl FileFormat {
36 pub fn from_path(path: &Path) -> Option<Self> {
38 path.extension()
39 .and_then(|e| e.to_str())
40 .and_then(Self::from_extension)
41 }
42
43 pub fn from_extension(ext: &str) -> Option<Self> {
45 match ext.to_lowercase().as_str() {
46 "parquet" => Some(Self::Parquet),
47 "csv" => Some(Self::Csv),
48 "tsv" => Some(Self::Tsv),
49 "psv" => Some(Self::Psv),
50 "json" => Some(Self::Json),
51 "jsonl" | "ndjson" => Some(Self::Jsonl),
52 "arrow" | "ipc" | "feather" => Some(Self::Arrow),
53 "avro" => Some(Self::Avro),
54 "orc" => Some(Self::Orc),
55 "xls" | "xlsx" | "xlsm" | "xlsb" => Some(Self::Excel),
56 _ => None,
57 }
58 }
59}
60
61#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
63pub enum CompressionFormat {
64 Gzip,
66 Zstd,
68 Bzip2,
70 Xz,
72}
73
74impl CompressionFormat {
75 pub fn from_extension(path: &Path) -> Option<Self> {
77 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
78 match ext.to_lowercase().as_str() {
79 "gz" => Some(Self::Gzip),
80 "zst" | "zstd" => Some(Self::Zstd),
81 "bz2" | "bz" => Some(Self::Bzip2),
82 "xz" => Some(Self::Xz),
83 _ => None,
84 }
85 } else {
86 None
87 }
88 }
89
90 pub fn extension(&self) -> &'static str {
92 match self {
93 Self::Gzip => "gz",
94 Self::Zstd => "zst",
95 Self::Bzip2 => "bz2",
96 Self::Xz => "xz",
97 }
98 }
99}
100
101#[derive(Parser, Debug)]
103#[command(
104 name = "datui",
105 version,
106 about = "Data Exploration in the Terminal",
107 long_about = include_str!("../long_about.txt")
108)]
109pub struct Args {
110 #[arg(required_unless_present_any = ["generate_config", "clear_cache", "remove_templates"], num_args = 1.., value_name = "PATH")]
113 pub paths: Vec<std::path::PathBuf>,
114
115 #[arg(long = "skip-lines")]
117 pub skip_lines: Option<usize>,
118
119 #[arg(long = "skip-rows")]
121 pub skip_rows: Option<usize>,
122
123 #[arg(long = "no-header")]
125 pub no_header: Option<bool>,
126
127 #[arg(long = "delimiter")]
129 pub delimiter: Option<u8>,
130
131 #[arg(long = "null-value", value_name = "VAL")]
133 pub null_value: Vec<String>,
134
135 #[arg(long = "compression", value_enum)]
138 pub compression: Option<CompressionFormat>,
139
140 #[arg(long = "format", value_enum)]
143 pub format: Option<FileFormat>,
144
145 #[arg(long = "debug", action)]
147 pub debug: bool,
148
149 #[arg(long = "hive", action)]
151 pub hive: bool,
152
153 #[arg(long = "single-spine-schema", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
155 pub single_spine_schema: Option<bool>,
156
157 #[arg(long = "parse-dates", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
159 pub parse_dates: Option<bool>,
160
161 #[arg(long = "decompress-in-memory", default_missing_value = "true", num_args = 0..=1, value_parser = clap::value_parser!(bool))]
163 pub decompress_in_memory: Option<bool>,
164
165 #[arg(long = "temp-dir", value_name = "DIR")]
167 pub temp_dir: Option<std::path::PathBuf>,
168
169 #[arg(long = "sheet", value_name = "SHEET")]
171 pub excel_sheet: Option<String>,
172
173 #[arg(long = "clear-cache", action)]
175 pub clear_cache: bool,
176
177 #[arg(long = "template")]
179 pub template: Option<String>,
180
181 #[arg(long = "remove-templates", action)]
183 pub remove_templates: bool,
184
185 #[arg(long = "sampling-threshold", value_name = "N")]
189 pub sampling_threshold: Option<usize>,
190
191 #[arg(long = "polars-streaming", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
193 pub polars_streaming: Option<bool>,
194
195 #[arg(long = "workaround-pivot-date-index", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
197 pub workaround_pivot_date_index: Option<bool>,
198
199 #[arg(long = "pages-lookahead")]
202 pub pages_lookahead: Option<usize>,
203
204 #[arg(long = "pages-lookback")]
207 pub pages_lookback: Option<usize>,
208
209 #[arg(long = "row-numbers", action)]
211 pub row_numbers: bool,
212
213 #[arg(long = "row-start-index")]
215 pub row_start_index: Option<usize>,
216
217 #[arg(long = "column-colors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
219 pub column_colors: Option<bool>,
220
221 #[arg(long = "generate-config", action)]
223 pub generate_config: bool,
224
225 #[arg(long = "force", requires = "generate_config", action)]
227 pub force: bool,
228
229 #[arg(long = "s3-endpoint-url", value_name = "URL")]
231 pub s3_endpoint_url: Option<String>,
232
233 #[arg(long = "s3-access-key-id", value_name = "KEY")]
235 pub s3_access_key_id: Option<String>,
236
237 #[arg(long = "s3-secret-access-key", value_name = "SECRET")]
239 pub s3_secret_access_key: Option<String>,
240
241 #[arg(long = "s3-region", value_name = "REGION")]
243 pub s3_region: Option<String>,
244}
245
246fn escape_table_cell(s: &str) -> String {
248 s.replace('|', "\\|").replace(['\n', '\r'], " ")
249}
250
251pub fn render_options_markdown() -> String {
256 let mut cmd = Args::command();
257 cmd.build();
258
259 let mut out = String::from("# Command Line Options\n\n");
260
261 out.push_str("## Usage\n\n```\n");
262 let usage = cmd.render_usage();
263 out.push_str(&usage.to_string());
264 out.push_str("\n```\n\n");
265
266 out.push_str("## Options\n\n");
267 out.push_str("| Option | Description |\n");
268 out.push_str("|--------|-------------|\n");
269
270 for arg in cmd.get_arguments() {
271 let id = arg.get_id().as_ref().to_string();
272 if id == "help" || id == "version" {
273 continue;
274 }
275
276 let option_str = if arg.is_positional() {
277 let placeholder: String = arg
278 .get_value_names()
279 .map(|names| {
280 names
281 .iter()
282 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
283 .collect::<Vec<_>>()
284 .join(" ")
285 })
286 .unwrap_or_default();
287 if arg.is_required_set() {
288 placeholder
289 } else {
290 format!("[{placeholder}]")
291 }
292 } else {
293 let mut parts = Vec::new();
294 if let Some(s) = arg.get_short() {
295 parts.push(format!("-{s}"));
296 }
297 if let Some(l) = arg.get_long() {
298 parts.push(format!("--{l}"));
299 }
300 let op = parts.join(", ");
301 let takes_val = arg.get_action().takes_values();
302 let placeholder: String = if takes_val {
303 arg.get_value_names()
304 .map(|names| {
305 names
306 .iter()
307 .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
308 .collect::<Vec<_>>()
309 .join(" ")
310 })
311 .unwrap_or_default()
312 } else {
313 String::new()
314 };
315 if placeholder.is_empty() {
316 op
317 } else {
318 format!("{op} {placeholder}")
319 }
320 };
321
322 let help = arg
323 .get_help()
324 .map(|h| escape_table_cell(&h.to_string()))
325 .unwrap_or_else(|| "-".to_string());
326
327 out.push_str(&format!("| `{option_str}` | {help} |\n"));
328 }
329
330 out
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336
337 #[test]
338 fn test_compression_detection() {
339 assert_eq!(
340 CompressionFormat::from_extension(Path::new("file.csv.gz")),
341 Some(CompressionFormat::Gzip)
342 );
343 assert_eq!(
344 CompressionFormat::from_extension(Path::new("file.csv.zst")),
345 Some(CompressionFormat::Zstd)
346 );
347 assert_eq!(
348 CompressionFormat::from_extension(Path::new("file.csv.bz2")),
349 Some(CompressionFormat::Bzip2)
350 );
351 assert_eq!(
352 CompressionFormat::from_extension(Path::new("file.csv.xz")),
353 Some(CompressionFormat::Xz)
354 );
355 assert_eq!(
356 CompressionFormat::from_extension(Path::new("file.csv")),
357 None
358 );
359 assert_eq!(CompressionFormat::from_extension(Path::new("file")), None);
360 }
361
362 #[test]
363 fn test_compression_extension() {
364 assert_eq!(CompressionFormat::Gzip.extension(), "gz");
365 assert_eq!(CompressionFormat::Zstd.extension(), "zst");
366 assert_eq!(CompressionFormat::Bzip2.extension(), "bz2");
367 assert_eq!(CompressionFormat::Xz.extension(), "xz");
368 }
369
370 #[test]
371 fn test_file_format_from_path() {
372 assert_eq!(
373 FileFormat::from_path(Path::new("data.parquet")),
374 Some(FileFormat::Parquet)
375 );
376 assert_eq!(
377 FileFormat::from_path(Path::new("data.csv")),
378 Some(FileFormat::Csv)
379 );
380 assert_eq!(
381 FileFormat::from_path(Path::new("file.jsonl")),
382 Some(FileFormat::Jsonl)
383 );
384 assert_eq!(FileFormat::from_path(Path::new("noext")), None);
385 assert_eq!(
386 FileFormat::from_path(Path::new("file.NDJSON")),
387 Some(FileFormat::Jsonl)
388 );
389 }
390}