Skip to main content

datui_cli/
lib.rs

1//! Shared CLI definitions for datui.
2//!
3//! Used by the main application and by the build script (manpage) and
4//! gen_docs binary (command-line-options markdown).
5
6use clap::{CommandFactory, Parser, ValueEnum};
7use std::path::Path;
8
9/// Compression format for data files
10#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
11pub enum CompressionFormat {
12    /// Gzip compression (.gz) - Most common, good balance of speed and compression
13    Gzip,
14    /// Zstandard compression (.zst) - Modern, fast compression with good ratios
15    Zstd,
16    /// Bzip2 compression (.bz2) - Good compression ratio, slower than gzip
17    Bzip2,
18    /// XZ compression (.xz) - Excellent compression ratio, slower than bzip2
19    Xz,
20}
21
22impl CompressionFormat {
23    /// Detect compression format from file extension
24    pub fn from_extension(path: &Path) -> Option<Self> {
25        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
26            match ext.to_lowercase().as_str() {
27                "gz" => Some(Self::Gzip),
28                "zst" | "zstd" => Some(Self::Zstd),
29                "bz2" | "bz" => Some(Self::Bzip2),
30                "xz" => Some(Self::Xz),
31                _ => None,
32            }
33        } else {
34            None
35        }
36    }
37
38    /// Get file extension for this compression format
39    pub fn extension(&self) -> &'static str {
40        match self {
41            Self::Gzip => "gz",
42            Self::Zstd => "zst",
43            Self::Bzip2 => "bz2",
44            Self::Xz => "xz",
45        }
46    }
47}
48
49/// Command-line arguments for datui
50#[derive(Parser, Debug)]
51#[command(
52    name = "datui",
53    version,
54    about = "Data Exploration in the Terminal",
55    long_about = include_str!("../long_about.txt")
56)]
57pub struct Args {
58    /// Path(s) to the data file(s) to open.
59    /// Multiple files of the same format are concatenated into one table (not required with --generate-config, --clear-cache, or --remove-templates)
60    #[arg(required_unless_present_any = ["generate_config", "clear_cache", "remove_templates"], num_args = 1.., value_name = "PATH")]
61    pub paths: Vec<std::path::PathBuf>,
62
63    /// Skip this many lines when reading a file
64    #[arg(long = "skip-lines")]
65    pub skip_lines: Option<usize>,
66
67    /// Skip this many rows when reading a file
68    #[arg(long = "skip-rows")]
69    pub skip_rows: Option<usize>,
70
71    /// Specify that the file has no header
72    #[arg(long = "no-header")]
73    pub no_header: Option<bool>,
74
75    /// Specify the delimiter to use when reading a delimited text file
76    #[arg(long = "delimiter")]
77    pub delimiter: Option<u8>,
78
79    /// Treat these values as null when reading CSV. Use once per value; no "=" means all columns, COL=VAL means column COL only (first "=" separates column from value). Example: --null-value NA --null-value amount=
80    #[arg(long = "null-value", value_name = "VAL")]
81    pub null_value: Vec<String>,
82
83    /// Specify the compression format explicitly (gzip, zstd, bzip2, xz)
84    /// If not specified, compression is auto-detected from file extension.
85    #[arg(long = "compression", value_enum)]
86    pub compression: Option<CompressionFormat>,
87
88    /// Enable debug mode to show operational information
89    #[arg(long = "debug", action)]
90    pub debug: bool,
91
92    /// Enable Hive-style partitioning for directory or glob paths; ignored for a single file
93    #[arg(long = "hive", action)]
94    pub hive: bool,
95
96    /// Infer Hive/partitioned Parquet schema from one file for faster load (default: true). Set to false to use full schema scan.
97    #[arg(long = "single-spine-schema", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
98    pub single_spine_schema: Option<bool>,
99
100    /// Try to parse CSV string columns as dates (e.g. YYYY-MM-DD, ISO datetime). Default: true
101    #[arg(long = "parse-dates", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
102    pub parse_dates: Option<bool>,
103
104    /// Decompress into memory. Default: decompress to temp file and use lazy scan
105    #[arg(long = "decompress-in-memory", default_missing_value = "true", num_args = 0..=1, value_parser = clap::value_parser!(bool))]
106    pub decompress_in_memory: Option<bool>,
107
108    /// Directory for decompression temp files (default: system temp, e.g. TMPDIR)
109    #[arg(long = "temp-dir", value_name = "DIR")]
110    pub temp_dir: Option<std::path::PathBuf>,
111
112    /// Excel sheet to load: 0-based index (e.g. 0) or sheet name (e.g. "Sales")
113    #[arg(long = "sheet", value_name = "SHEET")]
114    pub excel_sheet: Option<String>,
115
116    /// Clear all cache data and exit
117    #[arg(long = "clear-cache", action)]
118    pub clear_cache: bool,
119
120    /// Apply a template by name when starting the application
121    #[arg(long = "template")]
122    pub template: Option<String>,
123
124    /// Remove all templates and exit
125    #[arg(long = "remove-templates", action)]
126    pub remove_templates: bool,
127
128    /// When set, datasets with this many or more rows are sampled for analysis (faster, less memory).
129    /// Overrides config [performance] sampling_threshold. Use 0 to disable sampling (full dataset) for this run.
130    /// When omitted, config or full-dataset mode is used.
131    #[arg(long = "sampling-threshold", value_name = "N")]
132    pub sampling_threshold: Option<usize>,
133
134    /// Use Polars streaming engine for LazyFrame collect when available (default: true). Set to false to disable.
135    #[arg(long = "polars-streaming", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
136    pub polars_streaming: Option<bool>,
137
138    /// Apply workaround for Polars 0.52 pivot with Date/Datetime index (default: true). Set to false to test without it.
139    #[arg(long = "workaround-pivot-date-index", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
140    pub workaround_pivot_date_index: Option<bool>,
141
142    /// Number of pages to buffer ahead of the visible area (default: 3)
143    /// Larger values provide smoother scrolling but use more memory
144    #[arg(long = "pages-lookahead")]
145    pub pages_lookahead: Option<usize>,
146
147    /// Number of pages to buffer behind the visible area (default: 3)
148    /// Larger values provide smoother scrolling but use more memory
149    #[arg(long = "pages-lookback")]
150    pub pages_lookback: Option<usize>,
151
152    /// Display row numbers on the left side of the table
153    #[arg(long = "row-numbers", action)]
154    pub row_numbers: bool,
155
156    /// Starting index for row numbers (default: 1)
157    #[arg(long = "row-start-index")]
158    pub row_start_index: Option<usize>,
159
160    /// Colorize main table cells by column type (default: true). Set to false to disable.
161    #[arg(long = "column-colors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
162    pub column_colors: Option<bool>,
163
164    /// Generate default configuration file at ~/.config/datui/config.toml
165    #[arg(long = "generate-config", action)]
166    pub generate_config: bool,
167
168    /// Force overwrite existing config file when using --generate-config
169    #[arg(long = "force", requires = "generate_config", action)]
170    pub force: bool,
171
172    /// S3-compatible endpoint URL (overrides config and AWS_ENDPOINT_URL). Example: http://localhost:9000
173    #[arg(long = "s3-endpoint-url", value_name = "URL")]
174    pub s3_endpoint_url: Option<String>,
175
176    /// S3 access key (overrides config and AWS_ACCESS_KEY_ID)
177    #[arg(long = "s3-access-key-id", value_name = "KEY")]
178    pub s3_access_key_id: Option<String>,
179
180    /// S3 secret key (overrides config and AWS_SECRET_ACCESS_KEY)
181    #[arg(long = "s3-secret-access-key", value_name = "SECRET")]
182    pub s3_secret_access_key: Option<String>,
183
184    /// S3 region (overrides config and AWS_REGION). Example: us-east-1
185    #[arg(long = "s3-region", value_name = "REGION")]
186    pub s3_region: Option<String>,
187}
188
189/// Escape `|` and newlines for use in markdown table cells.
190fn escape_table_cell(s: &str) -> String {
191    s.replace('|', "\\|").replace(['\n', '\r'], " ")
192}
193
194/// Render command-line options as markdown.
195///
196/// Used by the gen_docs binary; output is written to stdout and then
197/// to `docs/reference/command-line-options.md` by the docs build process.
198pub fn render_options_markdown() -> String {
199    let mut cmd = Args::command();
200    cmd.build();
201
202    let mut out = String::from("# Command Line Options\n\n");
203
204    out.push_str("## Usage\n\n```\n");
205    let usage = cmd.render_usage();
206    out.push_str(&usage.to_string());
207    out.push_str("\n```\n\n");
208
209    out.push_str("## Options\n\n");
210    out.push_str("| Option | Description |\n");
211    out.push_str("|--------|-------------|\n");
212
213    for arg in cmd.get_arguments() {
214        let id = arg.get_id().as_ref().to_string();
215        if id == "help" || id == "version" {
216            continue;
217        }
218
219        let option_str = if arg.is_positional() {
220            let placeholder: String = arg
221                .get_value_names()
222                .map(|names| {
223                    names
224                        .iter()
225                        .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
226                        .collect::<Vec<_>>()
227                        .join(" ")
228                })
229                .unwrap_or_default();
230            if arg.is_required_set() {
231                placeholder
232            } else {
233                format!("[{placeholder}]")
234            }
235        } else {
236            let mut parts = Vec::new();
237            if let Some(s) = arg.get_short() {
238                parts.push(format!("-{s}"));
239            }
240            if let Some(l) = arg.get_long() {
241                parts.push(format!("--{l}"));
242            }
243            let op = parts.join(", ");
244            let takes_val = arg.get_action().takes_values();
245            let placeholder: String = if takes_val {
246                arg.get_value_names()
247                    .map(|names| {
248                        names
249                            .iter()
250                            .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
251                            .collect::<Vec<_>>()
252                            .join(" ")
253                    })
254                    .unwrap_or_default()
255            } else {
256                String::new()
257            };
258            if placeholder.is_empty() {
259                op
260            } else {
261                format!("{op} {placeholder}")
262            }
263        };
264
265        let help = arg
266            .get_help()
267            .map(|h| escape_table_cell(&h.to_string()))
268            .unwrap_or_else(|| "-".to_string());
269
270        out.push_str(&format!("| `{option_str}` | {help} |\n"));
271    }
272
273    out
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn test_compression_detection() {
282        assert_eq!(
283            CompressionFormat::from_extension(Path::new("file.csv.gz")),
284            Some(CompressionFormat::Gzip)
285        );
286        assert_eq!(
287            CompressionFormat::from_extension(Path::new("file.csv.zst")),
288            Some(CompressionFormat::Zstd)
289        );
290        assert_eq!(
291            CompressionFormat::from_extension(Path::new("file.csv.bz2")),
292            Some(CompressionFormat::Bzip2)
293        );
294        assert_eq!(
295            CompressionFormat::from_extension(Path::new("file.csv.xz")),
296            Some(CompressionFormat::Xz)
297        );
298        assert_eq!(
299            CompressionFormat::from_extension(Path::new("file.csv")),
300            None
301        );
302        assert_eq!(CompressionFormat::from_extension(Path::new("file")), None);
303    }
304
305    #[test]
306    fn test_compression_extension() {
307        assert_eq!(CompressionFormat::Gzip.extension(), "gz");
308        assert_eq!(CompressionFormat::Zstd.extension(), "zst");
309        assert_eq!(CompressionFormat::Bzip2.extension(), "bz2");
310        assert_eq!(CompressionFormat::Xz.extension(), "xz");
311    }
312}