convert2json/
csv.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#![cfg(any(feature = "csv", feature = "csv2json", feature = "cq"))]
extern crate csv;

use super::{exit, Error};
use csv::{ReaderBuilder, Trim};
use pico_args::{Arguments, Error as picoError};
use std::collections::HashMap;
use std::ffi::OsString;
use std::io::Read;

const HELP: &str = "\
Usage: csv2json [-d <delimiter>] [-q <quote>] [-E <escape>] [--no-trim] [files...]

Reads CSV from files or standard input and converts this to JSON, emitted on standard output. Any errors are reported to standard error and result in a non-zero exit code.

Options:
  -d, --delimiter   field delimiter to use when parsing CSV, defaults to: ,
                    (comma)
  -q, --quote       quote character to use when parsing CSV, defaults to: \"
                    (double quote)
  -E, --escape      escape character to use when parsing CSV, to escape quote
                    characters within a field. By default, quotes get escaped by
                    doubling them.
  --no-trim         do not trim headers & fields. By default, both get trimmed
                    of starting or trailing whitespace characters.
  -h, --help        display usage information
";

pub type CsvMap = HashMap<String, serde_json::Value>;

struct CsvParameters {
    delimiter: Option<u8>,
    quote: Option<u8>,
    escape: Option<u8>,
    no_trim: bool,
}

pub struct CsvReader {
    read: ReaderBuilder,
}

impl CsvReader {
    pub fn new(exit_on_help: bool) -> Self {
        let arguments = match Self::args(exit_on_help) {
            Ok(a) => a,
            Err(e) => {
                eprintln!("Error {e}");
                exit(Error::ArgumentParsing as i32);
            }
        };
        let mut read = ReaderBuilder::new();
        read.flexible(true);
        if let Some(delimiter) = arguments.delimiter {
            read.delimiter(delimiter);
        }
        if let Some(quote) = arguments.quote {
            read.quote(quote);
        }
        if arguments.escape.is_some() {
            // note that setting this to None would disable escape sequences entirely
            read.escape(arguments.escape).double_quote(false);
        }
        if !arguments.no_trim {
            read.trim(Trim::All);
        }
        Self { read }
    }

    pub fn append<R: Read>(&mut self, results: &mut Vec<CsvMap>, reader: R) {
        for row in self.read.from_reader(reader).deserialize() {
            let record: CsvMap = match row {
                Ok(values) => values,
                Err(e) => {
                    eprintln!("Error parsing input: {e}");
                    exit(Error::InputParsing as i32);
                }
            };
            results.push(record);
        }
    }

    fn args(exit_on_help: bool) -> Result<CsvParameters, picoError> {
        let mut pargs = Arguments::from_env();
        // pico-args doesn't support -help:
        // > short keys should be a single character or a repeated character
        if pargs.contains(["-h", "--help"]) || pargs.contains("-?") {
            eprintln!("{HELP}");
            if exit_on_help {
                exit(0);
            }
        }
        let args = CsvParameters {
            delimiter: pargs.opt_value_from_fn(["-d", "--delimiter"], Self::arg_u8)?,
            quote: pargs.opt_value_from_fn(["-q", "--quote"], Self::arg_u8)?,
            escape: pargs.opt_value_from_fn(["-E", "--escape"], Self::arg_u8)?,
            no_trim: pargs.contains("--no-trim"),
        };
        if pargs.finish().contains(&OsString::from("-help")) {
            eprintln!("{HELP}");
            if exit_on_help {
                exit(0);
            }
        }
        Ok(args)
    }

    fn arg_u8(s: &str) -> Result<u8, &'static str> {
        if s.len() != 1 {
            return Err("argument requires a single character");
        }
        match s.chars().next() {
            Some(c) => Ok(c as u8),
            None => Err("argument is missing a character"),
        }
    }
}