1use std::cmp::min;
2use std::io::{self, BufRead, Write};
3
4use crate::column::{Column, MeasureColumn};
5use crate::errors::*;
6use crate::parser::{Row, RowParser};
7use crate::range::{Range, Ranges};
8
9pub mod column;
10pub mod parser;
11pub mod range;
12mod utils;
13
14pub mod errors {
15 pub type Result<T> = std::result::Result<T, Error>;
16
17 #[derive(Debug, thiserror::Error)]
18 pub enum Error {
19 #[error("IO error")]
20 Io(#[from] ::std::io::Error),
21
22 #[error("could not parse '{}' as a range", .s)]
23 RangeParseError {
24 s: String
25 },
26
27 #[error("invalid decreasing range: {}", .s)]
28 InvalidDecreasingRange {
29 s: String
30 },
31
32 #[error("columns are numbered starting from 1")]
33 ColumnsStartAtOne,
34 }
35}
36
37#[derive(Debug)]
38pub struct Options {
39 pub truncate: Option<Ranges>,
40 pub ratio: f64,
41 pub lines: usize,
42 pub include_cols: Option<Ranges>,
43 pub exclude_cols: Ranges,
44 pub delim: String,
45 pub output_delim: String,
46 pub strict_delim: bool,
47 pub print_info: bool,
48 pub online: bool,
49}
50
51pub fn process<R: BufRead, W: Write>(input: R, mut output: W, opts: &Options) -> Result<()> {
52 #[derive(Debug)]
53 enum ProcessingState {
54 Measuring {
55 lines_measured: usize,
56 backlog: Vec<Row>,
57 },
58 PrintBacklog {
59 backlog: Vec<Row>,
60 },
61 ProcessInput,
62 }
63
64 let mut state = ProcessingState::Measuring {
65 lines_measured: 1,
66 backlog: Vec::new(),
67 };
68 let mut measure_columns = Vec::new();
69 let mut columns = Vec::new();
70 let parser = RowParser::new(opts.delim.clone(), opts.strict_delim);
71 let mut row = Row::new();
72 let mut lines = input.lines();
73
74 loop {
75 state = match state {
76 ProcessingState::Measuring {
77 lines_measured,
78 mut backlog,
79 } => {
80 if let Some(line) = lines.next() {
81 let line = line?;
82 parser.parse_into(&mut row, line);
83 update_columns(
84 &mut measure_columns,
85 &row,
86 opts.include_cols.as_ref(),
87 &opts.exclude_cols,
88 opts.truncate.as_ref(),
89 opts.print_info,
90 );
91 if opts.online {
92 columns.clear();
93 columns
94 .extend(measure_columns.iter().map(|c| c.calculate_size(opts.ratio)));
95 print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
96 } else {
97 backlog.push(row.clone());
98 }
99 if opts.lines == 0 || lines_measured < opts.lines {
100 ProcessingState::Measuring {
101 lines_measured: lines_measured + 1,
102 backlog,
103 }
104 } else {
105 ProcessingState::PrintBacklog { backlog }
106 }
107 } else {
108 ProcessingState::PrintBacklog { backlog }
109 }
110 }
111 ProcessingState::PrintBacklog { backlog } => {
112 columns.clear();
113 columns.extend(measure_columns.iter().map(|c| c.calculate_size(opts.ratio)));
114
115 if opts.print_info {
116 for (i, col) in columns.iter_mut().enumerate() {
117 writeln!(output, "Column {}", i + 1)?;
118 col.print_info(&mut output)?;
119 writeln!(output)?;
120 }
121 return Ok(());
122 }
123
124 for row in backlog {
125 print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
126 }
127
128 ProcessingState::ProcessInput
129 }
130 ProcessingState::ProcessInput => {
131 if let Some(line) = lines.next() {
132 let line = line?;
133 parser.parse_into(&mut row, line);
134 print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
135
136 ProcessingState::ProcessInput
137 } else {
138 break;
139 }
140 }
141 }
142 }
143
144 Ok(())
145}
146
147fn update_columns(
148 columns: &mut Vec<MeasureColumn>,
149 row: &Row,
150 include_cols: Option<&Ranges>,
151 excluded_cols: &Ranges,
152 truncate_cols: Option<&Ranges>,
153 collect_info: bool,
154) {
155 for i in 0..min(columns.len(), row.len()) {
156 columns[i].add_sample(&row[i]);
157 }
158 #[allow(clippy::needless_range_loop)]
159 for i in columns.len()..row.len() {
160 let mut col = MeasureColumn::new(collect_info);
161 let col_num = (i + 1) as u32;
162
163 let included = include_cols
164 .map(|rs| rs.any_contains(col_num))
165 .unwrap_or(true);
166
167 let excluded = excluded_cols.any_contains(col_num);
168
169 let truncated = truncate_cols
170 .map(|rs| rs.any_contains(col_num))
171 .unwrap_or(false);
172
173 col.set_excluded(!included || excluded);
174 col.set_truncated(truncated);
175
176 col.add_sample(&row[i]);
177
178 columns.push(col);
179 }
180}
181
182fn print_row<W: Write>(out: &mut W, columns: &[Column], row: &Row, output_delim: &str) -> io::Result<()> {
183 let mut overflow: usize = 0;
184 for ((cell, col), first, last) in utils::first_last_iter(
185 row.get_parts()
186 .zip(columns)
187 .filter(|&(_, col)| !col.is_excluded()),
188 ) {
189 if !first {
190 write!(out, "{}", output_delim)?;
191 }
192 overflow = col.print_cell(out, cell, overflow, last)?;
193 }
194 writeln!(out)?;
195 Ok(())
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201 use std::io::BufReader;
202
203 #[test]
204 fn basic_test() {
205 let opts = Options {
206 truncate: None,
207 ratio: 1.0,
208 lines: 1000,
209 include_cols: None,
210 exclude_cols: Ranges::new(),
211 delim: " \t".to_string(),
212 output_delim: " ".to_string(),
213 strict_delim: false,
214 print_info: false,
215 online: false,
216 };
217
218 let reader = BufReader::new(&b"aa bb cc\n1 2 3\n"[..]);
219 let mut output: Vec<u8> = Vec::new();
220 process(reader, &mut output, &opts).unwrap();
221 assert_eq!(&output, b"aa bb cc\n1 2 3\n");
222 }
223
224 #[test]
225 fn exclude_column() {
226 let mut opts = Options {
227 truncate: None,
228 ratio: 1.0,
229 lines: 1000,
230 include_cols: None,
231 exclude_cols: Ranges(vec![Range::Between(2, 2)]),
232 delim: " \t".to_string(),
233 output_delim: " ".to_string(),
234 strict_delim: false,
235 print_info: false,
236 online: false,
237 };
238
239 let input: &[u8] = b"aa bb cc\n1 2 3\n";
240 let mut output: Vec<u8> = Vec::new();
241 process(BufReader::new(input), &mut output, &opts).unwrap();
242 assert_eq!(&output, b"aa cc\n1 3\n");
243
244 opts.exclude_cols = Ranges(vec![Range::From(2)]);
245 output.clear();
246 process(BufReader::new(input), &mut output, &opts).unwrap();
247 assert_eq!(&output, b"aa\n1\n");
248
249 opts.exclude_cols = Ranges(vec![Range::To(2)]);
250 output.clear();
251 process(BufReader::new(input), &mut output, &opts).unwrap();
252 assert_eq!(&output, b"cc\n3\n");
253
254 opts.exclude_cols = Ranges(vec![Range::Between(1, 1), Range::Between(3, 3)]);
255 output.clear();
256 process(BufReader::new(input), &mut output, &opts).unwrap();
257 assert_eq!(&output, b"bb\n2\n");
258 }
259
260 #[test]
261 fn lines_opt() {
262 let opts = Options {
263 truncate: None,
264 ratio: 1.0,
265 lines: 1,
266 include_cols: None,
267 exclude_cols: Ranges::new(),
268 delim: " \t".to_string(),
269 output_delim: " ".to_string(),
270 strict_delim: false,
271 print_info: false,
272 online: false,
273 };
274
275 let reader = BufReader::new(&b"1 1\naaaa aaaa\n"[..]);
276 let mut output: Vec<u8> = Vec::new();
277 process(reader, &mut output, &opts).unwrap();
278 assert_eq!(&output, b"1 1\naaaa aaaa\n");
279 }
280
281 #[test]
282 fn overflow() {
283 let opts = Options {
284 truncate: None,
285 ratio: 1.0,
286 lines: 1,
287 include_cols: None,
288 exclude_cols: Ranges::new(),
289 delim: " \t".to_string(),
290 output_delim: " ".to_string(),
291 strict_delim: false,
292 print_info: false,
293 online: false,
294 };
295
296 let input = ("a a aaaaaaaaaaa a\n".repeat(10) + "bbbbbb bb b b\n").into_bytes();
301 let expected = "a a aaaaaaaaaaa a\n".repeat(10) + "bbbbbb bb b b\n";
302 let reader = BufReader::new(&input[..]);
303 let mut output: Vec<u8> = Vec::new();
304 process(reader, &mut output, &opts).unwrap();
305 assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
306 }
307
308 #[test]
309 fn output_delimiter() {
310 let opts = Options {
311 truncate: None,
312 ratio: 1.0,
313 lines: 1,
314 include_cols: None,
315 exclude_cols: Ranges::new(),
316 delim: " \t".to_string(),
317 output_delim: " & ".to_string(),
318 strict_delim: false,
319 print_info: false,
320 online: false,
321 };
322
323 let input = ("a a aaaaaaaaaaa a\n".repeat(10) + "bbbbbb bb b b\n").into_bytes();
328 let expected = "a & a & aaaaaaaaaaa & a\n".repeat(10) + "bbbbbb & bb & b & b\n";
329 let reader = BufReader::new(&input[..]);
330 let mut output: Vec<u8> = Vec::new();
331 process(reader, &mut output, &opts).unwrap();
332 assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
333 }
334}