Skip to main content

pick/formats/
csv_format.rs

1use crate::error::PickError;
2use serde_json::Value;
3
4pub fn parse(input: &str) -> Result<Value, PickError> {
5    // Detect delimiter (comma or tab)
6    let delimiter = detect_delimiter(input);
7
8    let mut reader = csv::ReaderBuilder::new()
9        .has_headers(true)
10        .flexible(true)
11        .delimiter(delimiter)
12        .from_reader(input.as_bytes());
13
14    let headers = reader
15        .headers()
16        .map_err(|e| PickError::ParseError("CSV".into(), e.to_string()))?
17        .clone();
18
19    if headers.is_empty() {
20        return Err(PickError::ParseError(
21            "CSV".into(),
22            "no headers found".into(),
23        ));
24    }
25
26    let mut rows = Vec::new();
27
28    for result in reader.records() {
29        let record = result.map_err(|e| PickError::ParseError("CSV".into(), e.to_string()))?;
30        let mut map = serde_json::Map::new();
31
32        for (i, field) in record.iter().enumerate() {
33            let key = headers
34                .get(i)
35                .map(|h| h.to_string())
36                .unwrap_or_else(|| i.to_string());
37            map.insert(key, Value::String(field.to_string()));
38        }
39
40        rows.push(Value::Object(map));
41    }
42
43    Ok(Value::Array(rows))
44}
45
46fn detect_delimiter(input: &str) -> u8 {
47    let first_line = input.lines().next().unwrap_or("");
48    let commas = first_line.matches(',').count();
49    let tabs = first_line.matches('\t').count();
50
51    if tabs > commas { b'\t' } else { b',' }
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57    use serde_json::json;
58
59    #[test]
60    fn parse_simple_csv() {
61        let input = "name,age,city\nAlice,30,NYC\nBob,25,LA";
62        let v = parse(input).unwrap();
63        assert_eq!(v[0]["name"], json!("Alice"));
64        assert_eq!(v[0]["age"], json!("30"));
65        assert_eq!(v[1]["city"], json!("LA"));
66    }
67
68    #[test]
69    fn parse_tsv() {
70        let input = "name\tage\tcity\nAlice\t30\tNYC";
71        let v = parse(input).unwrap();
72        assert_eq!(v[0]["name"], json!("Alice"));
73        assert_eq!(v[0]["age"], json!("30"));
74    }
75
76    #[test]
77    fn parse_quoted_fields() {
78        let input = "name,desc\nAlice,\"hello, world\"\nBob,\"line1\nline2\"";
79        let v = parse(input).unwrap();
80        assert_eq!(v[0]["desc"], json!("hello, world"));
81    }
82
83    #[test]
84    fn parse_empty_fields() {
85        let input = "a,b,c\n1,,3\n,2,";
86        let v = parse(input).unwrap();
87        assert_eq!(v[0]["b"], json!(""));
88        assert_eq!(v[1]["a"], json!(""));
89        assert_eq!(v[1]["c"], json!(""));
90    }
91
92    #[test]
93    fn parse_single_column() {
94        let input = "name\nAlice\nBob";
95        let v = parse(input).unwrap();
96        assert_eq!(v[0]["name"], json!("Alice"));
97        assert_eq!(v[1]["name"], json!("Bob"));
98    }
99
100    #[test]
101    fn parse_single_row() {
102        let input = "name,age\nAlice,30";
103        let v = parse(input).unwrap();
104        assert_eq!(v.as_array().unwrap().len(), 1);
105    }
106
107    #[test]
108    fn parse_many_columns() {
109        let input = "a,b,c,d,e\n1,2,3,4,5";
110        let v = parse(input).unwrap();
111        assert_eq!(v[0]["e"], json!("5"));
112    }
113
114    #[test]
115    fn parse_headers_only() {
116        let input = "name,age,city";
117        let v = parse(input).unwrap();
118        assert_eq!(v.as_array().unwrap().len(), 0);
119    }
120
121    #[test]
122    fn parse_numeric_looking_values() {
123        let input = "id,count\n001,42";
124        let v = parse(input).unwrap();
125        // CSV values are always strings
126        assert_eq!(v[0]["id"], json!("001"));
127        assert_eq!(v[0]["count"], json!("42"));
128    }
129}