Skip to main content

datui_cli/
lib.rs

1//! Shared CLI definitions for datui.
2//!
3//! Used by the main application and by the build script (manpage) and
4//! gen_docs binary (command-line-options markdown).
5
6use clap::{CommandFactory, Parser, ValueEnum};
7use std::path::Path;
8
9/// Compression format for data files
10#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
11pub enum CompressionFormat {
12    /// Gzip compression (.gz) - Most common, good balance of speed and compression
13    Gzip,
14    /// Zstandard compression (.zst) - Modern, fast compression with good ratios
15    Zstd,
16    /// Bzip2 compression (.bz2) - Good compression ratio, slower than gzip
17    Bzip2,
18    /// XZ compression (.xz) - Excellent compression ratio, slower than bzip2
19    Xz,
20}
21
22impl CompressionFormat {
23    /// Detect compression format from file extension
24    pub fn from_extension(path: &Path) -> Option<Self> {
25        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
26            match ext.to_lowercase().as_str() {
27                "gz" => Some(Self::Gzip),
28                "zst" | "zstd" => Some(Self::Zstd),
29                "bz2" | "bz" => Some(Self::Bzip2),
30                "xz" => Some(Self::Xz),
31                _ => None,
32            }
33        } else {
34            None
35        }
36    }
37
38    /// Get file extension for this compression format
39    pub fn extension(&self) -> &'static str {
40        match self {
41            Self::Gzip => "gz",
42            Self::Zstd => "zst",
43            Self::Bzip2 => "bz2",
44            Self::Xz => "xz",
45        }
46    }
47}
48
49/// Command-line arguments for datui
50#[derive(Parser, Debug)]
51#[command(
52    name = "datui",
53    version,
54    about = "Data Exploration in the Terminal",
55    long_about = include_str!("../long_about.txt")
56)]
57pub struct Args {
58    /// Path(s) to the data file(s) to open.
59    /// Multiple files of the same format are concatenated into one table (not required with --generate-config, --clear-cache, or --remove-templates)
60    #[arg(required_unless_present_any = ["generate_config", "clear_cache", "remove_templates"], num_args = 1.., value_name = "PATH")]
61    pub paths: Vec<std::path::PathBuf>,
62
63    /// Skip this many lines when reading a file
64    #[arg(long = "skip-lines")]
65    pub skip_lines: Option<usize>,
66
67    /// Skip this many rows when reading a file
68    #[arg(long = "skip-rows")]
69    pub skip_rows: Option<usize>,
70
71    /// Specify that the file has no header
72    #[arg(long = "no-header")]
73    pub no_header: Option<bool>,
74
75    /// Specify the delimiter to use when reading a delimited text file
76    #[arg(long = "delimiter")]
77    pub delimiter: Option<u8>,
78
79    /// Specify the compression format explicitly (gzip, zstd, bzip2, xz)
80    /// If not specified, compression is auto-detected from file extension.
81    #[arg(long = "compression", value_enum)]
82    pub compression: Option<CompressionFormat>,
83
84    /// Enable debug mode to show operational information
85    #[arg(long = "debug", action)]
86    pub debug: bool,
87
88    /// Enable Hive-style partitioning for directory or glob paths; ignored for a single file
89    #[arg(long = "hive", action)]
90    pub hive: bool,
91
92    /// Infer Hive/partitioned Parquet schema from one file for faster load (default: true). Set to false to use full schema scan.
93    #[arg(long = "single-spine-schema", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
94    pub single_spine_schema: Option<bool>,
95
96    /// Try to parse CSV string columns as dates (e.g. YYYY-MM-DD, ISO datetime). Default: true
97    #[arg(long = "parse-dates", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
98    pub parse_dates: Option<bool>,
99
100    /// Decompress into memory. Default: decompress to temp file and use lazy scan
101    #[arg(long = "decompress-in-memory", default_missing_value = "true", num_args = 0..=1, value_parser = clap::value_parser!(bool))]
102    pub decompress_in_memory: Option<bool>,
103
104    /// Directory for decompression temp files (default: system temp, e.g. TMPDIR)
105    #[arg(long = "temp-dir", value_name = "DIR")]
106    pub temp_dir: Option<std::path::PathBuf>,
107
108    /// Excel sheet to load: 0-based index (e.g. 0) or sheet name (e.g. "Sales")
109    #[arg(long = "sheet", value_name = "SHEET")]
110    pub excel_sheet: Option<String>,
111
112    /// Clear all cache data and exit
113    #[arg(long = "clear-cache", action)]
114    pub clear_cache: bool,
115
116    /// Apply a template by name when starting the application
117    #[arg(long = "template")]
118    pub template: Option<String>,
119
120    /// Remove all templates and exit
121    #[arg(long = "remove-templates", action)]
122    pub remove_templates: bool,
123
124    /// When set, datasets with this many or more rows are sampled for analysis (faster, less memory).
125    /// Overrides config [performance] sampling_threshold. Use 0 to disable sampling (full dataset) for this run.
126    /// When omitted, config or full-dataset mode is used.
127    #[arg(long = "sampling-threshold", value_name = "N")]
128    pub sampling_threshold: Option<usize>,
129
130    /// Use Polars streaming engine for LazyFrame collect when available (default: true). Set to false to disable.
131    #[arg(long = "polars-streaming", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
132    pub polars_streaming: Option<bool>,
133
134    /// Apply workaround for Polars 0.52 pivot with Date/Datetime index (default: true). Set to false to test without it.
135    #[arg(long = "workaround-pivot-date-index", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
136    pub workaround_pivot_date_index: Option<bool>,
137
138    /// Number of pages to buffer ahead of the visible area (default: 3)
139    /// Larger values provide smoother scrolling but use more memory
140    #[arg(long = "pages-lookahead")]
141    pub pages_lookahead: Option<usize>,
142
143    /// Number of pages to buffer behind the visible area (default: 3)
144    /// Larger values provide smoother scrolling but use more memory
145    #[arg(long = "pages-lookback")]
146    pub pages_lookback: Option<usize>,
147
148    /// Display row numbers on the left side of the table
149    #[arg(long = "row-numbers", action)]
150    pub row_numbers: bool,
151
152    /// Starting index for row numbers (default: 1)
153    #[arg(long = "row-start-index")]
154    pub row_start_index: Option<usize>,
155
156    /// Colorize main table cells by column type (default: true). Set to false to disable.
157    #[arg(long = "column-colors", value_name = "BOOL", value_parser = clap::value_parser!(bool))]
158    pub column_colors: Option<bool>,
159
160    /// Generate default configuration file at ~/.config/datui/config.toml
161    #[arg(long = "generate-config", action)]
162    pub generate_config: bool,
163
164    /// Force overwrite existing config file when using --generate-config
165    #[arg(long = "force", requires = "generate_config", action)]
166    pub force: bool,
167
168    /// S3-compatible endpoint URL (overrides config and AWS_ENDPOINT_URL). Example: http://localhost:9000
169    #[arg(long = "s3-endpoint-url", value_name = "URL")]
170    pub s3_endpoint_url: Option<String>,
171
172    /// S3 access key (overrides config and AWS_ACCESS_KEY_ID)
173    #[arg(long = "s3-access-key-id", value_name = "KEY")]
174    pub s3_access_key_id: Option<String>,
175
176    /// S3 secret key (overrides config and AWS_SECRET_ACCESS_KEY)
177    #[arg(long = "s3-secret-access-key", value_name = "SECRET")]
178    pub s3_secret_access_key: Option<String>,
179
180    /// S3 region (overrides config and AWS_REGION). Example: us-east-1
181    #[arg(long = "s3-region", value_name = "REGION")]
182    pub s3_region: Option<String>,
183}
184
185/// Escape `|` and newlines for use in markdown table cells.
186fn escape_table_cell(s: &str) -> String {
187    s.replace('|', "\\|").replace(['\n', '\r'], " ")
188}
189
190/// Render command-line options as markdown.
191///
192/// Used by the gen_docs binary; output is written to stdout and then
193/// to `docs/reference/command-line-options.md` by the docs build process.
194pub fn render_options_markdown() -> String {
195    let mut cmd = Args::command();
196    cmd.build();
197
198    let mut out = String::from("# Command Line Options\n\n");
199
200    out.push_str("## Usage\n\n```\n");
201    let usage = cmd.render_usage();
202    out.push_str(&usage.to_string());
203    out.push_str("\n```\n\n");
204
205    out.push_str("## Options\n\n");
206    out.push_str("| Option | Description |\n");
207    out.push_str("|--------|-------------|\n");
208
209    for arg in cmd.get_arguments() {
210        let id = arg.get_id().as_ref().to_string();
211        if id == "help" || id == "version" {
212            continue;
213        }
214
215        let option_str = if arg.is_positional() {
216            let placeholder: String = arg
217                .get_value_names()
218                .map(|names| {
219                    names
220                        .iter()
221                        .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
222                        .collect::<Vec<_>>()
223                        .join(" ")
224                })
225                .unwrap_or_default();
226            if arg.is_required_set() {
227                placeholder
228            } else {
229                format!("[{placeholder}]")
230            }
231        } else {
232            let mut parts = Vec::new();
233            if let Some(s) = arg.get_short() {
234                parts.push(format!("-{s}"));
235            }
236            if let Some(l) = arg.get_long() {
237                parts.push(format!("--{l}"));
238            }
239            let op = parts.join(", ");
240            let takes_val = arg.get_action().takes_values();
241            let placeholder: String = if takes_val {
242                arg.get_value_names()
243                    .map(|names| {
244                        names
245                            .iter()
246                            .map(|n: &clap::builder::Str| format!("<{}>", n.as_ref() as &str))
247                            .collect::<Vec<_>>()
248                            .join(" ")
249                    })
250                    .unwrap_or_default()
251            } else {
252                String::new()
253            };
254            if placeholder.is_empty() {
255                op
256            } else {
257                format!("{op} {placeholder}")
258            }
259        };
260
261        let help = arg
262            .get_help()
263            .map(|h| escape_table_cell(&h.to_string()))
264            .unwrap_or_else(|| "-".to_string());
265
266        out.push_str(&format!("| `{option_str}` | {help} |\n"));
267    }
268
269    out
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_compression_detection() {
278        assert_eq!(
279            CompressionFormat::from_extension(Path::new("file.csv.gz")),
280            Some(CompressionFormat::Gzip)
281        );
282        assert_eq!(
283            CompressionFormat::from_extension(Path::new("file.csv.zst")),
284            Some(CompressionFormat::Zstd)
285        );
286        assert_eq!(
287            CompressionFormat::from_extension(Path::new("file.csv.bz2")),
288            Some(CompressionFormat::Bzip2)
289        );
290        assert_eq!(
291            CompressionFormat::from_extension(Path::new("file.csv.xz")),
292            Some(CompressionFormat::Xz)
293        );
294        assert_eq!(
295            CompressionFormat::from_extension(Path::new("file.csv")),
296            None
297        );
298        assert_eq!(CompressionFormat::from_extension(Path::new("file")), None);
299    }
300
301    #[test]
302    fn test_compression_extension() {
303        assert_eq!(CompressionFormat::Gzip.extension(), "gz");
304        assert_eq!(CompressionFormat::Zstd.extension(), "zst");
305        assert_eq!(CompressionFormat::Bzip2.extension(), "bz2");
306        assert_eq!(CompressionFormat::Xz.extension(), "xz");
307    }
308}