use crate::{DEFAULT_CSV_DELIMITER, NULL_VALUES, PolarsViewError, PolarsViewResult};
use clap::Parser;
use regex::Regex;
use std::path::PathBuf;
fn get_styles() -> clap::builder::Styles {
let cyan = anstyle::Color::Ansi(anstyle::AnsiColor::Cyan);
let green = anstyle::Color::Ansi(anstyle::AnsiColor::Green);
let yellow = anstyle::Color::Ansi(anstyle::AnsiColor::Yellow);
clap::builder::Styles::styled()
.placeholder(anstyle::Style::new().fg_color(Some(yellow)))
.usage(anstyle::Style::new().fg_color(Some(cyan)).bold())
.header(
anstyle::Style::new()
.fg_color(Some(cyan))
.bold()
.underline(),
)
.literal(anstyle::Style::new().fg_color(Some(green)))
}
const APPLET_TEMPLATE: &str = "\
{before-help}
{about-with-newline}
{usage-heading} {usage}
{all-args}
{after-help}";
const EX1: &str = r#" polars-view data.csv"#;
const EX2: &str = r#" polars-view data.csv -f "^(Chave|Key).*$""#;
const EX3: &str =
r#" polars-view data.csv -q "SELECT * FROM AllData WHERE \"Col Name\" Like '%ABC%'""#;
const EX4: &str = r#" polars-view -q "SELECT * FROM AllData WHERE \"Valor Total\" > 5000" -r "^Val.*$" data.parquet"#;
#[derive(Parser, Debug, Clone)]
#[command(
// Read from `Cargo.toml`.
author, version, about,
long_about = None,
next_line_help = true,
help_template = APPLET_TEMPLATE,
styles=get_styles(),
after_help = format!("EXAMPLES:\n{EX1}\n{EX2}\n{EX3}\n{EX4}")
)]
pub struct Arguments {
#[arg(
short = 'd',
long,
default_value = DEFAULT_CSV_DELIMITER,
help = "CSV delimiter character",
long_help = "Sets the CSV delimiter.\n\
Auto-detect tries common separators (, ; | \\t) if initial parse fails.",
requires = "path"
)]
pub delimiter: String,
#[arg(
short = 'e',
long,
help = "Exclude columns containing only null values [requires FILE_PATH]",
long_help = "If present, drops columns with only NULLs after load/query.",
action = clap::ArgAction::SetTrue,
// requires path implicitly holds
)]
pub exclude_null_cols: bool,
#[arg(
short = 'f',
long = "force-string-cols",
value_name = "REGEX_PATTERN", // Indicate expected value format
help = "Regex matching columns to force read as String (overrides inference)",
long_help = "\
Forces columns whose names match the provided REGEX_PATTERN to be read as String type.
Crucial for columns with large numeric IDs/keys often misinterpreted by type inference.
REGEX_PATTERN Requirements:
- Matching is case-sensitive by default (depends on regex engine).
- Example 1: --force-string-cols \"^Chave.*$\"
- Example 2: --force-string-cols \"^(Chave|ID|Code).*$\"
[NOTE] Primarily affects CSV/JSON reading where type inference occurs.
",
requires = "path",
value_parser = validate_force_string_argument_regex
)]
pub force_string_patterns: Option<String>,
#[arg(
short = 'n',
long,
value_name = "NULL_LIST",
default_value = NULL_VALUES,
help = "Comma-separated values interpreted as NULL",
long_help = "Specify custom null strings. Whitespace trimmed.\n\
Use quotes for values with commas/spaces (e.g., \"NA\",\"-\").",
requires = "path"
)]
pub null_values: String,
#[arg(
value_name = "FILE_PATH",
required = false,
help = "Path to data file (CSV/JSON/NDJSON/Parquet) [Optional]",
long_help = "Path to the input data file.\n\
If omitted, opens the UI to load a file manually (menu or drag-drop)."
)]
pub path: Option<PathBuf>,
#[arg(
short = 'q',
long,
value_name = "SQL_QUERY",
help = "SQL query to apply to loaded data (use quotes) [requires FILE_PATH]",
long_help = "Optional Polars SQL query to execute after loading.\n\
Example: -q \"SELECT * FROM AllData WHERE count > 10\"",
requires = "path"
)]
pub query: Option<String>,
#[arg(
short = 'r',
long,
value_name = "REGEX_PATTERN", // Indicate expected value format
help = "Normalize Euro-style number strings in selected columns (via regex) to Float64",
long_help = "\
Selects string columns using the provided regex pattern and converts their contents
from a European-style numeric format (e.g., '1.234,56') to standard Float64 values
(e.g., 1234.56).
The normalization removes '.' (thousands separators) and replaces ',' with '.' (decimal separator)
before casting to Float64.
REGEX_PATTERN Requirements:
- Must match *entire* column names.
- Must be '*' (wildcard for ALL string columns - CAUTION!) OR
- Must be a regex starting with '^' and ending with '$'.
Examples: \"^Amount_EUR$\", \"^Value_.*$\", \"^(Total|Subtotal)$\"
[WARNING] Applying to non-string columns via '*' or incorrect regex will likely cause errors.
Invalid regex patterns (e.g., '^Val[') can also cause errors.
Application example:
polars-view data.csv -a \"^Val.*$\"
",
requires = "path",
value_parser = validate_normalize_argument_regex
)]
pub regex: Option<String>,
#[arg(
short = 't',
long,
value_name = "TABLE_NAME",
default_value = "AllData",
help = "Table name for SQL queries [Default: AllData; requires -q]",
long_help = "Sets the table name used in the FROM clause of the SQL query (--query).",
requires = "query"
)]
pub table_name: String,
}
impl Arguments {
pub fn build() -> Arguments {
Arguments::parse()
}
}
fn validate_cli_regex(pattern: &str, arg_name: &str) -> PolarsViewResult<String> {
let is_wildcard = pattern == "*";
let is_formatted_regex = pattern.starts_with('^') && pattern.ends_with('$');
if !is_wildcard && !is_formatted_regex {
let reason = "Pattern must be '*' or (start with '^' and end with '$')".to_string();
return Err(PolarsViewError::InvalidArgument {
arg_name: arg_name.to_string(),
reason,
});
}
if !is_wildcard && !pattern.is_empty() {
match Regex::new(pattern) {
Ok(_) => Ok(pattern.to_string()),
Err(e) => {
let reason = format!("Invalid regex syntax: {e}");
Err(PolarsViewError::InvalidArgument {
arg_name: arg_name.to_string(),
reason,
})
}
}
} else {
Ok(pattern.to_string())
}
}
fn validate_normalize_argument_regex(s: &str) -> PolarsViewResult<String> {
validate_cli_regex(s, "--regex")
}
fn validate_force_string_argument_regex(s: &str) -> PolarsViewResult<String> {
validate_cli_regex(s, "--force-string-cols")
}
#[cfg(test)]
mod tests_args {
use super::*;
use crate::{DEFAULT_CSV_DELIMITER, NULL_VALUES};
use std::path::PathBuf;
fn test_path(name: &str) -> PathBuf {
PathBuf::from(name)
}
#[test]
fn test_args_basic_path_only() {
let path_str = "data.csv";
let args = Arguments::parse_from(["polars-view", path_str]);
assert_eq!(args.path, Some(test_path(path_str)));
assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
assert_eq!(args.null_values, NULL_VALUES);
assert_eq!(args.query, None); assert_eq!(args.table_name, "AllData"); assert!(!args.exclude_null_cols); assert_eq!(args.regex, None); }
#[test]
fn test_args_defaults_with_dot_path() {
let args = Arguments::parse_from(["polars-view", "."]);
assert_eq!(args.path, Some(test_path(".")));
assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
assert_eq!(args.null_values, NULL_VALUES);
assert_eq!(args.query, None);
assert_eq!(args.table_name, "AllData");
assert!(!args.exclude_null_cols);
assert_eq!(args.regex, None);
}
#[test]
fn test_args_all_options_short() {
let path_str = "input.parquet";
let query_str = "SELECT c1 FROM MyData WHERE c2 > 0";
let regex_str = "^Col_\\d+$";
let nulls_str = "NA,-99";
let table_str = "MyData";
let delim_str = ",";
let args = Arguments::parse_from([
"polars-view",
"-d",
delim_str,
"-n",
nulls_str,
"-q",
query_str,
"-t",
table_str, "-e", "-r",
regex_str,
path_str, ]);
assert_eq!(args.path, Some(test_path(path_str)));
assert_eq!(args.delimiter, delim_str);
assert_eq!(args.null_values, nulls_str);
assert_eq!(args.query, Some(query_str.to_string()));
assert_eq!(args.table_name, table_str);
assert!(args.exclude_null_cols);
assert_eq!(args.regex, Some(regex_str.to_string()));
}
#[test]
fn test_args_all_options_long() {
let path_str = "log.ndjson";
let query_str = "SELECT *";
let regex_str = "*";
let nulls_str = "\"-\", \"?\"";
let table_str = "LogData";
let delim_str = ";";
let args = Arguments::parse_from([
"polars-view",
"--delimiter",
delim_str,
"--null-values",
nulls_str,
"--query",
query_str,
"--table-name",
table_str,
"--exclude-null-cols", "--regex",
regex_str,
path_str,
]);
assert_eq!(args.path, Some(test_path(path_str)));
assert_eq!(args.delimiter, delim_str); assert_eq!(args.null_values, nulls_str);
assert_eq!(args.query, Some(query_str.to_string()));
assert_eq!(args.table_name, table_str);
assert!(args.exclude_null_cols);
assert_eq!(args.regex, Some(regex_str.to_string()));
}
#[test]
fn test_args_no_path_provided_uses_default() {
let args = Arguments::parse_from(["polars-view"]);
assert_eq!(args.path, None);
assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
assert_eq!(args.null_values, NULL_VALUES);
assert_eq!(args.query, None);
assert_eq!(args.table_name, "AllData");
assert!(!args.exclude_null_cols);
assert_eq!(args.regex, None);
}
#[test]
fn test_args_query_without_tablename() {
let path_str = "metrics.csv";
let query_str = "SELECT count(*) FROM AllData";
let args = Arguments::parse_from(["polars-view", "-q", query_str, path_str]);
assert_eq!(args.path, Some(test_path(path_str)));
assert_eq!(args.query, Some(query_str.to_string()));
assert_eq!(args.table_name, "AllData"); assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
assert_eq!(args.null_values, NULL_VALUES);
assert!(!args.exclude_null_cols);
assert_eq!(args.regex, None);
}
#[test]
fn test_args_flags_only() {
let path_str = "config.json";
let args = Arguments::parse_from(["polars-view", "-e", path_str]);
assert_eq!(args.path, Some(test_path(path_str)));
assert!(args.exclude_null_cols); assert_eq!(args.delimiter, DEFAULT_CSV_DELIMITER);
assert_eq!(args.null_values, NULL_VALUES);
assert_eq!(args.query, None);
assert_eq!(args.table_name, "AllData");
assert_eq!(args.regex, None);
}
}