polars_view/
args.rs

1use crate::{DEFAULT_CSV_DELIMITER, NULL_VALUES, PolarsViewError, PolarsViewResult};
2
3use clap::Parser;
4use regex::Regex;
5use std::path::PathBuf;
6
7// https://stackoverflow.com/questions/74068168/clap-rs-not-printing-colors-during-help
8fn get_styles() -> clap::builder::Styles {
9    let cyan = anstyle::Color::Ansi(anstyle::AnsiColor::Cyan);
10    let green = anstyle::Color::Ansi(anstyle::AnsiColor::Green);
11    let yellow = anstyle::Color::Ansi(anstyle::AnsiColor::Yellow);
12
13    clap::builder::Styles::styled()
14        .placeholder(anstyle::Style::new().fg_color(Some(yellow)))
15        .usage(anstyle::Style::new().fg_color(Some(cyan)).bold())
16        .header(
17            anstyle::Style::new()
18                .fg_color(Some(cyan))
19                .bold()
20                .underline(),
21        )
22        .literal(anstyle::Style::new().fg_color(Some(green)))
23}
24
25// https://docs.rs/clap/latest/clap/struct.Command.html#method.help_template
26const APPLET_TEMPLATE: &str = "\
27{before-help}
28{about-with-newline}
29{usage-heading} {usage}
30
31{all-args}
32{after-help}";
33
34const EX1: &str = r#" polars-view data.csv"#;
35const EX2: &str = r#" polars-view data.csv -f "^(Chave|Key).*$""#;
36const EX3: &str =
37    r#" polars-view data.csv -q "SELECT * FROM AllData WHERE \"Col Name\" Like '%ABC%'""#;
38const EX4: &str = r#" polars-view -q "SELECT * FROM AllData WHERE \"Valor Total\" > 5000" -r "^Val.*$" data.parquet"#;
39
40/// Command-line arguments for the PolarsView application.
41#[derive(Parser, Debug, Clone)]
42#[command(
43    // Read from `Cargo.toml`.
44    author, version, about,
45    long_about = None,
46    next_line_help = true,
47    help_template = APPLET_TEMPLATE,
48    styles=get_styles(),
49    after_help = format!("EXAMPLES:\n{EX1}\n{EX2}\n{EX3}\n{EX4}")
50)]
51pub struct Arguments {
52    /// CSV delimiter character. [Default: ';']
53    #[arg(
54        short = 'd',
55        long,
56        default_value = DEFAULT_CSV_DELIMITER,
57        help = "CSV delimiter character",
58        long_help = "Sets the CSV delimiter.\n\
59        Auto-detect tries common separators (, ; | \\t) if initial parse fails.",
60        requires = "path"
61    )]
62    pub delimiter: String,
63
64    /// Exclude columns containing only null values [requires data file].
65    #[arg(
66        short = 'e',
67        long,
68        help = "Exclude columns containing only null values [requires FILE_PATH]",
69        long_help = "If present, drops columns with only NULLs after load/query.",
70        action = clap::ArgAction::SetTrue,
71        // requires path implicitly holds
72    )]
73    pub exclude_null_cols: bool,
74
75    /// Regex pattern(s) matching columns to force read as String type [requires FILE_PATH].
76    #[arg(
77        short = 'f',
78        long = "force-string-cols",
79        value_name = "REGEX_PATTERN",      // Indicate expected value format
80        help = "Regex matching columns to force read as String (overrides inference)",
81        long_help = "\
82Forces columns whose names match the provided REGEX_PATTERN to be read as String type.
83Crucial for columns with large numeric IDs/keys often misinterpreted by type inference.
84
85REGEX_PATTERN Requirements:
86- Matching is case-sensitive by default (depends on regex engine).
87- Example 1: --force-string-cols \"^Chave.*$\"
88- Example 2: --force-string-cols \"^(Chave|ID|Code).*$\"
89
90[NOTE] Primarily affects CSV/JSON reading where type inference occurs.
91",
92        requires = "path",
93        value_parser = validate_force_string_argument_regex
94    )]
95    pub force_string_patterns: Option<String>,
96
97    /// Comma-separated values to treat as NULL. [Default: \"\", <N/D>]
98    #[arg(
99        short = 'n',
100        long,
101        value_name = "NULL_LIST",
102        default_value = NULL_VALUES,
103        help = "Comma-separated values interpreted as NULL",
104        long_help = "Specify custom null strings. Whitespace trimmed.\n\
105        Use quotes for values with commas/spaces (e.g., \"NA\",\"-\").",
106        requires = "path"
107    )]
108    pub null_values: String,
109
110    /// Optional path to the data file (CSV, JSON, NDJSON, Parquet).
111    #[arg(
112        value_name = "FILE_PATH",
113        default_value = ".",
114        required = false,
115        help = "Path to data file (CSV/JSON/NDJSON/Parquet) [Optional]",
116        long_help = "Path to the input data file.\n\
117        If omitted, opens the UI to load a file manually (menu or drag-drop)."
118    )]
119    pub path: PathBuf,
120
121    /// SQL query to apply after loading data [requires data file].
122    #[arg(
123        short = 'q',
124        long,
125        value_name = "SQL_QUERY",
126        help = "SQL query to apply to loaded data (use quotes) [requires FILE_PATH]",
127        long_help = "Optional Polars SQL query to execute after loading.\n\
128        Example: -q \"SELECT * FROM AllData WHERE count > 10\"",
129        requires = "path"
130    )]
131    pub query: Option<String>,
132
133    /// Apply regex to normalize specific string columns containing European-style numbers.
134    #[arg(
135        short = 'r',
136        long,
137        value_name = "REGEX_PATTERN", // Indicate expected value format
138        help = "Normalize Euro-style number strings in selected columns (via regex) to Float64",
139        long_help = "\
140    Selects string columns using the provided regex pattern and converts their contents
141    from a European-style numeric format (e.g., '1.234,56') to standard Float64 values
142    (e.g., 1234.56).
143    
144    The normalization removes '.' (thousands separators) and replaces ',' with '.' (decimal separator)
145    before casting to Float64.
146    
147    REGEX_PATTERN Requirements:
148    - Must match *entire* column names.
149    - Must be '*' (wildcard for ALL string columns - CAUTION!) OR
150    - Must be a regex starting with '^' and ending with '$'.
151    Examples: \"^Amount_EUR$\", \"^Value_.*$\", \"^(Total|Subtotal)$\"
152    
153    [WARNING] Applying to non-string columns via '*' or incorrect regex will likely cause errors.
154    Invalid regex patterns (e.g., '^Val[') can also cause errors.
155    
156    Application example:
157        polars-view data.csv -a \"^Val.*$\"
158    ",
159        requires = "path",
160        value_parser = validate_normalize_argument_regex
161    )]
162    pub regex: Option<String>,
163
164    /// Table name for SQL queries [requires -q/--query]. [Default: AllData]
165    #[arg(
166        short = 't',
167        long,
168        value_name = "TABLE_NAME",
169        default_value = "AllData",
170        help = "Table name for SQL queries [Default: AllData; requires -q]",
171        long_help = "Sets the table name used in the FROM clause of the SQL query (--query).",
172        requires = "query"
173    )]
174    pub table_name: String,
175}
176
177impl Arguments {
178    /// Build `Arguments` struct.
179    pub fn build() -> Arguments {
180        Arguments::parse()
181    }
182}
183
184// --- Regex Validation Functions ---
185
186/// Validates command-line regex pattern: must be '*' or '^...$' format AND syntactically correct.
187fn validate_cli_regex(pattern: &str, arg_name: &str) -> PolarsViewResult<String> {
188    // 1. Check Format Constraint
189    let is_wildcard = pattern == "*";
190    let is_formatted_regex = pattern.starts_with('^') && pattern.ends_with('$');
191
192    if !is_wildcard && !is_formatted_regex {
193        let reason = "Pattern must be '*' or (start with '^' and end with '$')".to_string();
194        return Err(PolarsViewError::InvalidArgument {
195            arg_name: arg_name.to_string(),
196            reason,
197        });
198    }
199
200    if !is_wildcard && !pattern.is_empty() {
201        match Regex::new(pattern) {
202            Ok(_) => Ok(pattern.to_string()),
203            Err(e) => {
204                let reason = format!("Invalid regex syntax: {e}");
205                Err(PolarsViewError::InvalidArgument {
206                    arg_name: arg_name.to_string(),
207                    reason,
208                })
209            }
210        }
211    } else {
212        Ok(pattern.to_string())
213    }
214}
215
216// --- Wrapper Validator Functions for specific arguments ---
217
218/// clap validator specifically for the '--regex' (normalization) argument.
219fn validate_normalize_argument_regex(s: &str) -> PolarsViewResult<String> {
220    validate_cli_regex(s, "--regex")
221}
222
223/// clap validator specifically for the '--force-string-cols' argument.
224fn validate_force_string_argument_regex(s: &str) -> PolarsViewResult<String> {
225    validate_cli_regex(s, "--force-string-cols")
226}
227
228//----------------------------------------------------------------------------//
229//                                   Tests                                    //
230//----------------------------------------------------------------------------//
231
232/// Run tests with:
233/// cargo test -- --show-output tests_args`
234#[cfg(test)]
235mod tests_args {
236    use super::*;
237    use crate::{DEFAULT_CSV_DELIMITER, NULL_VALUES};
238    use std::path::PathBuf;
239
240    // Helper to create a dummy PathBuf for testing command line parsing.
241    // clap doesn't need the file to exist for basic parsing tests.
242    fn test_path(name: &str) -> PathBuf {
243        PathBuf::from(name)
244    }
245
246    #[test]
247    fn test_args_basic_path_only() {
248        let path_str = "data.csv";
249        let args = Arguments::parse_from(["polars-view", path_str]);
250
251        assert_eq!(args.path, test_path(path_str));
252        // Check defaults
253        assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
254        assert_eq!(args.null_values, NULL_VALUES);
255        assert_eq!(args.query, None); // Query is optional, defaults to None
256        assert_eq!(args.table_name, "AllData"); // Table name defaults even without query
257        assert!(!args.exclude_null_cols); // Flag defaults to false
258        assert_eq!(args.regex, None); // Optional, defaults to None
259    }
260
261    #[test]
262    fn test_args_defaults_with_dot_path() {
263        let args = Arguments::parse_from(["polars-view", "."]); // Explicitly use default path
264
265        assert_eq!(args.path, test_path("."));
266        // Check defaults
267        assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
268        assert_eq!(args.null_values, NULL_VALUES);
269        assert_eq!(args.query, None);
270        assert_eq!(args.table_name, "AllData");
271        assert!(!args.exclude_null_cols);
272        assert_eq!(args.regex, None);
273    }
274    #[test]
275    fn test_args_all_options_short() {
276        let path_str = "input.parquet";
277        let query_str = "SELECT c1 FROM MyData WHERE c2 > 0";
278        let regex_str = "^Col_\\d+$";
279        let nulls_str = "NA,-99";
280        let table_str = "MyData";
281        let delim_str = ",";
282
283        let args = Arguments::parse_from([
284            "polars-view",
285            "-d",
286            delim_str,
287            "-n",
288            nulls_str,
289            "-q",
290            query_str,
291            "-t",
292            table_str, // requires -q
293            "-e",      // exclude_null_cols flag
294            "-r",
295            regex_str,
296            path_str, // Path comes last usually
297        ]);
298
299        assert_eq!(args.path, test_path(path_str));
300        assert_eq!(args.delimiter, delim_str);
301        assert_eq!(args.null_values, nulls_str);
302        assert_eq!(args.query, Some(query_str.to_string()));
303        assert_eq!(args.table_name, table_str);
304        assert!(args.exclude_null_cols);
305        assert_eq!(args.regex, Some(regex_str.to_string()));
306    }
307
308    #[test]
309    fn test_args_all_options_long() {
310        let path_str = "log.ndjson";
311        let query_str = "SELECT *";
312        let regex_str = "*";
313        let nulls_str = "\"-\", \"?\"";
314        let table_str = "LogData";
315        let delim_str = ";"; // Delimiter specified but won't be used for ndjson
316
317        let args = Arguments::parse_from([
318            "polars-view",
319            "--delimiter",
320            delim_str,
321            "--null-values",
322            nulls_str,
323            "--query",
324            query_str,
325            "--table-name",
326            table_str,
327            "--exclude-null-cols", // Long flag
328            "--regex",
329            regex_str,
330            path_str,
331        ]);
332
333        assert_eq!(args.path, test_path(path_str));
334        assert_eq!(args.delimiter, delim_str); // Value is captured even if not used for this format
335        assert_eq!(args.null_values, nulls_str);
336        assert_eq!(args.query, Some(query_str.to_string()));
337        assert_eq!(args.table_name, table_str);
338        assert!(args.exclude_null_cols);
339        assert_eq!(args.regex, Some(regex_str.to_string()));
340    }
341
342    #[test]
343    fn test_args_no_path_provided_uses_default() {
344        // No path provided, clap should use the default_value "."
345        let args = Arguments::parse_from(["polars-view"]); // Use default path "."
346
347        assert_eq!(args.path, test_path("."));
348        // Defaults for others
349        assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
350        assert_eq!(args.null_values, NULL_VALUES);
351        assert_eq!(args.query, None);
352        assert_eq!(args.table_name, "AllData");
353        assert!(!args.exclude_null_cols);
354        assert_eq!(args.regex, None);
355    }
356
357    #[test]
358    fn test_args_query_without_tablename() {
359        // Should use default table_name 'AllData'
360        let path_str = "metrics.csv";
361        let query_str = "SELECT count(*) FROM AllData";
362        let args = Arguments::parse_from(["polars-view", "-q", query_str, path_str]);
363
364        assert_eq!(args.path, test_path(path_str));
365        assert_eq!(args.query, Some(query_str.to_string()));
366        assert_eq!(args.table_name, "AllData"); // Default table name used
367        // Check other defaults
368        assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
369        assert_eq!(args.null_values, NULL_VALUES);
370        assert!(!args.exclude_null_cols);
371        assert_eq!(args.regex, None);
372    }
373
374    #[test]
375    fn test_args_flags_only() {
376        let path_str = "config.json";
377        let args = Arguments::parse_from(["polars-view", "-e", path_str]); // Just the path and remove flag
378
379        assert_eq!(args.path, test_path(path_str));
380        assert!(args.exclude_null_cols); // Flag sets it to true
381        // Check other defaults
382        assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
383        assert_eq!(args.null_values, NULL_VALUES);
384        assert_eq!(args.query, None);
385        assert_eq!(args.table_name, "AllData");
386        assert_eq!(args.regex, None);
387    }
388}