csv_parser/
lib.rs

1use std::{
2    fs::File,
3    io::{self, Error, Read},
4    path::PathBuf,
5};
6
7pub fn read(filename: PathBuf) -> Result<Vec<u8>, Error> {
8    let mut buf = Vec::new();
9
10    let size_read: usize = if filename.exists() {
11        // valid file - read it
12        let file = File::open(&filename)?;
13
14        let mut reader = io::BufReader::new(file);
15        reader.read_to_end(&mut buf)?
16    } else {
17        // assume stdin pipe
18        io::stdin().read_to_end(&mut buf)?
19    };
20
21    if size_read == 0 {
22        // nothing read, nothing to do
23        return Ok(buf);
24    }
25
26    // check for and remove UTF-8 BOM
27    if buf[0..3] == [0xEF, 0xBB, 0xBF] {
28        buf.remove(0);
29        buf.remove(0);
30        buf.remove(0);
31    }
32
33    Ok(buf)
34}
35
36pub fn parse(buf: Vec<u8>) -> Result<Vec<Vec<String>>, Error> {
37    let mut row_cols: Vec<Vec<String>> = Vec::new();
38
39    if buf.is_empty() {
40        // nothing read, nothing to do
41        return Ok(row_cols);
42    }
43
44    // split all
45    let mut in_quotes: bool = false;
46    let mut prev_pos: usize = 0;
47    let mut add_col: bool = false;
48
49    for ix in 0..buf.len() {
50        if buf[ix] == 34 {
51            // double quotes
52            if in_quotes {
53                // case: "text","","te""x,t"↩"tex↩ty",""""," "
54
55                // already in double quotes, then check the next character
56                // if it's another quote, then boil down to 1 double quote
57                let next = if ix < buf.len() - 1 { buf[ix + 1] } else { 10 };
58
59                if next == 10 || next == 13 || next == 44 {
60                    in_quotes = false;
61                    continue;
62                }
63
64            //continue;
65
66            //in_quotes = false;
67            } else {
68                in_quotes = true;
69            }
70        }
71        if (buf[ix] == 10 && !in_quotes) || ix == buf.len() - 1 {
72            let subset = Vec::from(&buf[prev_pos..=ix]);
73            let line = String::from_utf8(subset).unwrap();
74            //println!("{:?}", line);
75            let chr: Vec<char> = line.chars().collect();
76
77            let mut cols: Vec<String> = Vec::new();
78
79            let mut line_pos: usize = 0;
80            for jx in 0..chr.len() {
81                if chr[jx] == '"' {
82                    // double quotes
83                    if in_quotes {
84                        // case: text,"","te""xt"↩"te,x↩ty"
85
86                        let next = if jx < chr.len() - 1 {
87                            chr[jx + 1]
88                        } else {
89                            '\n'
90                        };
91
92                        if next == '\n' || next == ',' {
93                            in_quotes = false;
94
95                            if jx == chr.len() - 1 {
96                                add_col = true;
97                            } else {
98                                continue;
99                            }
100                        }
101                    } else {
102                        in_quotes = true;
103                    }
104                }
105                if (chr[jx] == ',' && !in_quotes) || jx == chr.len() - 1 {
106                    add_col = true;
107                }
108
109                if add_col {
110                    //println!("col {line_pos} - {jx}    ::   chr.len() {}", chr.len());
111                    let mut col = String::new();
112                    let spacer = if jx - line_pos == 0 { 1 } else { 0 };
113                    for i in line_pos..jx + spacer {
114                        if chr[i] == '"' {
115                            if i == line_pos {
116                                // start of column, ignore a quote
117                                //col.push('~');
118                                continue;
119                            }
120                            if i == jx - 1 {
121                                // end of column, ignore the quote
122                                //col.push('¬');
123                                continue;
124                            }
125                            if chr[i - 1] == '"' {
126                                // previous char is another quote, ignore the quote
127                                //col.push('¦');
128                                continue;
129                            }
130                        }
131
132                        //col.push('#');
133                        col.push(chr[i]);
134                    }
135                    cols.push(col);
136                    line_pos = jx + 1;
137                    add_col = false;
138                }
139            }
140            in_quotes = false;
141
142            row_cols.push(cols);
143            prev_pos = ix + 1;
144        }
145    }
146
147    Ok(row_cols)
148}
149
150pub fn to_lines(row_cols: Vec<Vec<String>>) -> Vec<String> {
151    let mut render: Vec<String> = Vec::new();
152    let mut max_col_size: Vec<usize> = Vec::new();
153
154    for row in &row_cols {
155        // check per row for the max number of columns as some rows can be shorter or longer
156        // than others and extend max_col_size accordingly to the length of the new
157        if max_col_size.len() < row.len() {
158            for col in row.iter().skip(max_col_size.len()) {
159                max_col_size.push(col.len());
160            }
161        }
162
163        for i in 0..max_col_size.len() {
164            if i < row.len() && max_col_size[i] < row[i].len() {
165                max_col_size[i] = row[i].len();
166            }
167        }
168    }
169
170    //    let mut i: usize = 0;
171    for row in row_cols {
172        //        i += 1;
173        let mut line = String::new();
174        line.push_str("| ");
175
176        for x in 0..row.len() {
177            line.push_str(format!("{:<w$} | ", row[x], w = max_col_size[x]).as_str());
178        }
179        //line.insert_str(0, format!("{i:>10} : {:>10} : ", line.len()).as_str());
180
181        render.push(line);
182    }
183
184    render
185}