csv_parser/lib.rs
1use std::{
2 fs::File,
3 io::{self, Error, Read},
4 path::PathBuf,
5};
6
7pub fn read(filename: PathBuf) -> Result<Vec<u8>, Error> {
8 let mut buf = Vec::new();
9
10 let size_read: usize = if filename.exists() {
11 // valid file - read it
12 let file = File::open(&filename)?;
13
14 let mut reader = io::BufReader::new(file);
15 reader.read_to_end(&mut buf)?
16 } else {
17 // assume stdin pipe
18 io::stdin().read_to_end(&mut buf)?
19 };
20
21 if size_read == 0 {
22 // nothing read, nothing to do
23 return Ok(buf);
24 }
25
26 // check for and remove UTF-8 BOM
27 if buf[0..3] == [0xEF, 0xBB, 0xBF] {
28 buf.remove(0);
29 buf.remove(0);
30 buf.remove(0);
31 }
32
33 Ok(buf)
34}
35
36pub fn parse(buf: Vec<u8>) -> Result<Vec<Vec<String>>, Error> {
37 let mut row_cols: Vec<Vec<String>> = Vec::new();
38
39 if buf.is_empty() {
40 // nothing read, nothing to do
41 return Ok(row_cols);
42 }
43
44 // split all
45 let mut in_quotes: bool = false;
46 let mut prev_pos: usize = 0;
47 let mut add_col: bool = false;
48
49 for ix in 0..buf.len() {
50 if buf[ix] == 34 {
51 // double quotes
52 if in_quotes {
53 // case: "text","","te""x,t"↩"tex↩ty",""""," "
54
55 // already in double quotes, then check the next character
56 // if it's another quote, then boil down to 1 double quote
57 let next = if ix < buf.len() - 1 { buf[ix + 1] } else { 10 };
58
59 if next == 10 || next == 13 || next == 44 {
60 in_quotes = false;
61 continue;
62 }
63
64 //continue;
65
66 //in_quotes = false;
67 } else {
68 in_quotes = true;
69 }
70 }
71 if (buf[ix] == 10 && !in_quotes) || ix == buf.len() - 1 {
72 let subset = Vec::from(&buf[prev_pos..=ix]);
73 let line = String::from_utf8(subset).unwrap();
74 //println!("{:?}", line);
75 let chr: Vec<char> = line.chars().collect();
76
77 let mut cols: Vec<String> = Vec::new();
78
79 let mut line_pos: usize = 0;
80 for jx in 0..chr.len() {
81 if chr[jx] == '"' {
82 // double quotes
83 if in_quotes {
84 // case: text,"","te""xt"↩"te,x↩ty"
85
86 let next = if jx < chr.len() - 1 {
87 chr[jx + 1]
88 } else {
89 '\n'
90 };
91
92 if next == '\n' || next == ',' {
93 in_quotes = false;
94
95 if jx == chr.len() - 1 {
96 add_col = true;
97 } else {
98 continue;
99 }
100 }
101 } else {
102 in_quotes = true;
103 }
104 }
105 if (chr[jx] == ',' && !in_quotes) || jx == chr.len() - 1 {
106 add_col = true;
107 }
108
109 if add_col {
110 //println!("col {line_pos} - {jx} :: chr.len() {}", chr.len());
111 let mut col = String::new();
112 let spacer = if jx - line_pos == 0 { 1 } else { 0 };
113 for i in line_pos..jx + spacer {
114 if chr[i] == '"' {
115 if i == line_pos {
116 // start of column, ignore a quote
117 //col.push('~');
118 continue;
119 }
120 if i == jx - 1 {
121 // end of column, ignore the quote
122 //col.push('¬');
123 continue;
124 }
125 if chr[i - 1] == '"' {
126 // previous char is another quote, ignore the quote
127 //col.push('¦');
128 continue;
129 }
130 }
131
132 //col.push('#');
133 col.push(chr[i]);
134 }
135 cols.push(col);
136 line_pos = jx + 1;
137 add_col = false;
138 }
139 }
140 in_quotes = false;
141
142 row_cols.push(cols);
143 prev_pos = ix + 1;
144 }
145 }
146
147 Ok(row_cols)
148}
149
150pub fn to_lines(row_cols: Vec<Vec<String>>) -> Vec<String> {
151 let mut render: Vec<String> = Vec::new();
152 let mut max_col_size: Vec<usize> = Vec::new();
153
154 for row in &row_cols {
155 // check per row for the max number of columns as some rows can be shorter or longer
156 // than others and extend max_col_size accordingly to the length of the new
157 if max_col_size.len() < row.len() {
158 for col in row.iter().skip(max_col_size.len()) {
159 max_col_size.push(col.len());
160 }
161 }
162
163 for i in 0..max_col_size.len() {
164 if i < row.len() && max_col_size[i] < row[i].len() {
165 max_col_size[i] = row[i].len();
166 }
167 }
168 }
169
170 // let mut i: usize = 0;
171 for row in row_cols {
172 // i += 1;
173 let mut line = String::new();
174 line.push_str("| ");
175
176 for x in 0..row.len() {
177 line.push_str(format!("{:<w$} | ", row[x], w = max_col_size[x]).as_str());
178 }
179 //line.insert_str(0, format!("{i:>10} : {:>10} : ", line.len()).as_str());
180
181 render.push(line);
182 }
183
184 render
185}