use crate::errors::{CsvCliError, CsvCliResult};
use std::fs;
use std::io;
#[derive(Debug, PartialEq)]
pub struct CsvSettings {
delimiter: u8,
has_header: bool,
}
impl Default for CsvSettings {
fn default() -> CsvSettings {
CsvSettings {
delimiter: b',',
has_header: true,
}
}
}
impl CsvSettings {
pub fn parse_new(
fname: &Option<&str>,
delim: Option<&str>,
has_header: bool,
) -> CsvCliResult<CsvSettings> {
let delimiter = CsvSettings::parse_delimiter(&fname, delim)?;
let settings = CsvSettings {
delimiter,
has_header,
};
Ok(settings)
}
pub fn get_reader_from_path(&self, filename: &str) -> csv::Result<csv::Reader<fs::File>> {
csv::ReaderBuilder::new()
.delimiter(self.delimiter)
.trim(csv::Trim::All)
.has_headers(self.has_header)
.from_path(filename)
}
pub fn get_reader_from_stdin(&self) -> csv::Reader<io::Stdin> {
csv::ReaderBuilder::new()
.delimiter(self.delimiter)
.trim(csv::Trim::All)
.has_headers(self.has_header)
.from_reader(io::stdin())
}
fn parse_delimiter(fname: &Option<&str>, delim: Option<&str>) -> CsvCliResult<u8> {
let explicit_delim = match delim {
Some(r"\t") => Some(vec![b'\t']),
Some(val) => Some(val.as_bytes().to_vec()),
None => None,
};
let expected_delim = match *fname {
_ if explicit_delim.is_some() => explicit_delim.unwrap(),
Some(fname) if fname.ends_with(".tsv") || fname.ends_with(".tab") => vec![b'\t'],
_ => vec![b','],
};
if expected_delim.len() != 1 {
let msg = format!(
"Could not convert `{}` delimiter to a single ASCII character",
String::from_utf8(expected_delim).unwrap()
);
return Err(CsvCliError::InvalidConfiguration(msg));
}
Ok(expected_delim[0])
}
pub fn get_field_index(&self, colname: &str, headers: &Vec<&str>) -> CsvCliResult<usize> {
let infered_num = match self.get_numeric_index(&colname) {
Some(num) if num < headers.len() => Ok(Some(num)),
Some(_num) => Err(CsvCliError::InvalidConfiguration(format!(
"Could not properly configure. Column selection needs to be between 0 and `{}`",
headers.len()
))),
None if !self.has_header => Err(CsvCliError::InvalidConfiguration(
"Columns must be numeric if you don't have a header".to_string(),
)),
None => Ok(None),
}?;
if let Some(num) = infered_num {
return Ok(num);
}
let str_idx = self.get_string_index(&colname, headers)?;
Ok(str_idx)
}
pub fn get_field_indexes(
&self,
user_defs: &Vec<&str>,
headers: &Vec<&str>,
) -> CsvCliResult<Vec<usize>> {
let mut output_vec = Vec::new();
for user_input in user_defs {
let all_cols = self.split_arg_string(user_input);
for colname in all_cols {
let idx = self.get_field_index(&colname, headers)?;
output_vec.push(idx);
}
}
Ok(output_vec)
}
fn split_arg_string(&self, combined_cols: &str) -> Vec<String> {
let mut split_strings = Vec::new();
let mut quote_char = None;
let mut current_splice = String::new();
for c in combined_cols.chars() {
if quote_char.is_none() {
if c == '\'' || c == '\"' {
quote_char = Some(c);
} else if c == ',' {
split_strings.push(current_splice);
current_splice = String::new();
continue;
}
} else if (c == '\'' || c == '\"') && (Some(c) == quote_char) {
quote_char = None;
}
current_splice.push(c);
}
if !(current_splice.is_empty()) {
split_strings.push(current_splice);
}
split_strings
}
fn get_numeric_index(&self, colname: &str) -> Option<usize> {
let parsed_str = colname.trim();
if parsed_str == "" {
return None;
}
for char in parsed_str.chars() {
if !(char.is_ascii_digit()) {
return None;
}
}
Some(parsed_str.parse().unwrap())
}
fn get_string_index(&self, colname: &str, headers: &Vec<&str>) -> CsvCliResult<usize> {
let mut quote_char = None;
let mut in_brackets = false;
let mut expected_header = String::new();
let mut expected_order = String::new();
let trimmed_str = colname.trim();
for c in trimmed_str.chars() {
if quote_char.is_none() {
if in_brackets {
if c != ']' {
expected_order.push(c);
}
} else if c == '\'' || c == '\"' {
quote_char = Some(c);
} else if c != '[' {
expected_header.push(c);
} else {
in_brackets = true;
}
} else {
if (c == '\'' || c == '\"') && Some(c) == quote_char {
quote_char = None;
continue;
}
expected_header.push(c);
}
}
if expected_order.is_empty() {
expected_order = "0".to_string();
}
let order = expected_order.parse::<usize>().or_else(|_| {
Err(CsvCliError::InvalidConfiguration(format!(
"Could not convert column name `{}`. Hint: consider enclosing the column in quotes",
colname
)))
})?;
self.find_index_from_expected(&expected_header, order, headers)
}
fn find_index_from_expected(
&self,
expected_header: &str,
expected_order: usize,
headers: &Vec<&str>,
) -> CsvCliResult<usize> {
let mut count = 0;
for (i, field) in headers.iter().enumerate() {
if &expected_header == field {
if count == expected_order {
return Ok(i);
}
count += 1;
}
}
Err(CsvCliError::InvalidConfiguration(format!(
"Could not find `{}` in header row",
expected_header
)))
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
use std::panic;
proptest! {
#[test]
fn delimiter_never_panics(s in "\\PC*") {
let result = panic::catch_unwind(|| {
let _settings = CsvSettings::parse_new(&None, Some(&s), true);
});
assert!(result.is_ok());
}
#[test]
fn split_header_never_panics(s in "\\PC*") {
let settings = CsvSettings::default();
let result = panic::catch_unwind(|| {
let _valid = settings.split_arg_string(&s);
});
assert!(result.is_ok());
}
#[test]
fn numeric_index_never_panics(s in "\\PC*") {
let settings = CsvSettings::default();
let result = panic::catch_unwind(|| {
let _valid = settings.get_numeric_index(&s);
});
assert!(result.is_ok());
}
#[test]
fn leading_whitespace_parses(s in " [0-9]\t") {
let settings = CsvSettings::default();
assert!(settings.get_numeric_index(&s).is_some());
}
#[test]
fn nums_correctly_parse(n: usize) {
let settings = CsvSettings::default();
assert_eq!(settings.get_numeric_index(&n.to_string()), Some(n));
}
#[test]
fn string_index_never_panics(s in "\\PC*") {
let settings = CsvSettings::default();
let sample_header = vec!["hello"];
let result = panic::catch_unwind(|| {
let _valid = settings.get_string_index(&s, &sample_header);
});
assert!(result.is_ok());
}
fn matching_strings_get_idx(s in "[A-Za-z]") {
let header = vec![s.as_ref()];
let settings = CsvSettings::default();
assert_eq!(settings.get_string_index(&s, &header).unwrap(), 0);
let header = vec![s.as_ref(), s.as_ref()];
let new_str = format!("{}{}", s, "[1]".to_string());
assert_eq!(settings.get_string_index(&new_str, &header).unwrap(), 1);
}
}
#[test]
fn test_split_single_str() {
let settings = CsvSettings::default();
assert_eq!(
settings.split_arg_string("FIELDNAME"),
vec!["FIELDNAME".to_string()]
);
assert_eq!(
settings.split_arg_string("\'FIELDNAME\'"),
vec!["\'FIELDNAME\'".to_string()]
);
assert_eq!(
settings.split_arg_string("\'FIELDNAME,a\'"),
vec!["\'FIELDNAME,a\'".to_string()]
);
assert_eq!(
settings.split_arg_string("\'FIELDNAME,a\',a"),
vec!["\'FIELDNAME,a\'".to_string(), "a".to_string()]
);
assert_eq!(
settings.split_arg_string("a,b"),
vec!["a".to_string(), "b".to_string()]
);
assert_eq!(
settings.split_arg_string("\"FIELDNAME\',a"),
vec!["\"FIELDNAME\',a".to_string()]
);
}
#[test]
fn test_str_indexes() {
let header = vec![
"FIELDNAME1",
"FIELDNAME2",
"FIELDNAME1",
"FIELDNAME2[0]",
"FIELDNAME2[0]",
"",
];
let settings = CsvSettings::default();
assert_eq!(settings.get_string_index("FIELDNAME1", &header).unwrap(), 0);
assert!(settings.get_string_index("BLABLABLA", &header).is_err());
assert_eq!(
settings.get_string_index("FIELDNAME1[1]", &header).unwrap(),
2
);
assert_eq!(
settings
.get_string_index("'FIELDNAME2[0]'", &header)
.unwrap(),
3
);
assert_eq!(
settings
.get_string_index("'FIELDNAME2[0]'[1]", &header)
.unwrap(),
4
);
assert!(settings
.get_string_index("'FIELDNAME2[0]'[2]", &header)
.is_err());
assert!(settings
.get_string_index("FIELDNAME2[0][0]", &header)
.is_err());
}
#[test]
fn test_non_ascii_unicode_digits_fail_numeric_parsing() {
let invalid_n_chars = vec!["۳", "᠐", "ᛯ", "Ⅿ", "¼", "౸"];
let settings = CsvSettings::default();
for inv_char in invalid_n_chars {
assert!(settings.get_numeric_index(inv_char).is_none());
}
}
#[test]
fn test_empty_numeric_index_doesnt_parse() {
let settings = CsvSettings::default();
assert!(settings.get_numeric_index("").is_none());
}
#[test]
fn test_no_header_doesnt_parse() {
let no_header_set = CsvSettings::parse_new(&None, None, false).unwrap();
let header_row = vec!["a", "b"];
assert!(no_header_set
.get_field_indexes(&vec!["a"], &header_row)
.is_err());
}
}