use clap::Parser;
use csvlint::validate;
use std::fs::File;
use std::io::{self, BufReader};
use std::process;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
#[arg(short, long, default_value = ",")]
delimiter: String,
#[arg(short, long, default_value_t = false)]
lazyquotes: bool,
#[arg(long, default_value_t = false)]
rfc4180: bool,
file: String,
}
fn main() {
let args = Args::parse();
let (delimiter_byte, lazy_quotes) = if args.rfc4180 {
if args.delimiter != "," {
eprintln!(
"Warning: --rfc4180 mode requires comma delimiter, ignoring --delimiter option"
);
}
if args.lazyquotes {
eprintln!("Warning: --rfc4180 mode disables lazy quotes, ignoring --lazyquotes option");
}
(b',', false)
} else {
let delimiter_byte = match parse_delimiter(&args.delimiter) {
Ok(d) => d,
Err(e) => {
eprintln!("{}", e);
process::exit(1);
}
};
(delimiter_byte, args.lazyquotes)
};
if !args.rfc4180 && (args.delimiter != "," || args.lazyquotes) {
eprintln!("Warning: not using defaults, may not validate CSV to RFC 4180");
}
if args.rfc4180 {
println!("Running in strict RFC 4180 compliance mode");
println!("- Delimiter: comma (,)");
println!("- Line endings: CRLF required");
println!("- Quote escaping: strict");
println!();
}
let file = match File::open(&args.file) {
Ok(f) => f,
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
eprintln!("file '{}' does not exist", args.file);
} else {
eprintln!("error opening file '{}': {}", args.file, e);
}
process::exit(1);
}
};
let reader = BufReader::new(file);
let result = match validate(reader, delimiter_byte, lazy_quotes, args.rfc4180) {
Ok(result) => result,
Err(e) => {
eprintln!("validation error: {}", e);
process::exit(1);
}
};
if result.errors.is_empty() {
if args.rfc4180 {
println!("file is valid and complies with RFC 4180");
} else {
println!("file is valid");
}
process::exit(0);
}
let mut field_count_errors = 0;
let mut line_ending_errors = 0;
let mut quote_errors = 0;
let mut other_errors = 0;
for error in &result.errors {
match error.error {
csvlint::CsvErrorKind::FieldCount => field_count_errors += 1,
csvlint::CsvErrorKind::InvalidLineEnding => line_ending_errors += 1,
csvlint::CsvErrorKind::BareQuote
| csvlint::CsvErrorKind::Quote
| csvlint::CsvErrorKind::UnterminatedQuote => quote_errors += 1,
_ => other_errors += 1,
}
}
println!("Found {} validation error(s):", result.errors.len());
if field_count_errors > 0 {
println!(" - {} field count error(s)", field_count_errors);
}
if line_ending_errors > 0 {
println!(
" - {} line ending error(s) (RFC 4180 requires CRLF)",
line_ending_errors
);
}
if quote_errors > 0 {
println!(" - {} quote/escaping error(s)", quote_errors);
}
if other_errors > 0 {
println!(" - {} other error(s)", other_errors);
}
println!();
for error in &result.errors {
println!("{}", error);
}
if result.halted {
println!("\nunable to parse any further");
process::exit(1);
}
process::exit(2);
}
fn parse_delimiter(delimiter_str: &str) -> Result<u8, String> {
match delimiter_str {
"," => Ok(b','),
"\\t" => Ok(b'\t'),
"|" => Ok(b'|'),
":" => Ok(b':'),
";" => Ok(b';'),
s if s.len() == 1 => Ok(s.as_bytes()[0]),
_ => Err(format!(
"error parsing delimiter '{}', note that only one-character delimiters are supported",
delimiter_str
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_delimiter() {
assert_eq!(parse_delimiter(",").unwrap(), b',');
assert_eq!(parse_delimiter("\\t").unwrap(), b'\t');
assert_eq!(parse_delimiter("|").unwrap(), b'|');
assert_eq!(parse_delimiter(":").unwrap(), b':');
assert_eq!(parse_delimiter(";").unwrap(), b';');
assert_eq!(parse_delimiter("x").unwrap(), b'x');
assert!(parse_delimiter("").is_err());
assert!(parse_delimiter("ab").is_err());
}
}