tabulate/
lib.rs

1use std::cmp::min;
2use std::io::{self, BufRead, Write};
3
4use crate::column::{Column, MeasureColumn};
5use crate::errors::*;
6use crate::parser::{Row, RowParser};
7use crate::range::{Range, Ranges};
8
9pub mod column;
10pub mod parser;
11pub mod range;
12mod utils;
13
14pub mod errors {
15    pub type Result<T> = std::result::Result<T, Error>;
16
17    #[derive(Debug, thiserror::Error)]
18    pub enum Error {
19        #[error("IO error")]
20        Io(#[from] ::std::io::Error),
21
22        #[error("could not parse '{}' as a range", .s)]
23        RangeParseError {
24            s: String
25        },
26
27        #[error("invalid decreasing range: {}", .s)]
28        InvalidDecreasingRange {
29            s: String
30        },
31
32        #[error("columns are numbered starting from 1")]
33        ColumnsStartAtOne,
34    }
35}
36
37#[derive(Debug)]
38pub struct Options {
39    pub truncate: Option<Ranges>,
40    pub ratio: f64,
41    pub lines: usize,
42    pub include_cols: Option<Ranges>,
43    pub exclude_cols: Ranges,
44    pub delim: String,
45    pub output_delim: String,
46    pub strict_delim: bool,
47    pub print_info: bool,
48    pub online: bool,
49}
50
51pub fn process<R: BufRead, W: Write>(input: R, mut output: W, opts: &Options) -> Result<()> {
52    #[derive(Debug)]
53    enum ProcessingState {
54        Measuring {
55            lines_measured: usize,
56            backlog: Vec<Row>,
57        },
58        PrintBacklog {
59            backlog: Vec<Row>,
60        },
61        ProcessInput,
62    }
63
64    let mut state = ProcessingState::Measuring {
65        lines_measured: 1,
66        backlog: Vec::new(),
67    };
68    let mut measure_columns = Vec::new();
69    let mut columns = Vec::new();
70    let parser = RowParser::new(opts.delim.clone(), opts.strict_delim);
71    let mut row = Row::new();
72    let mut lines = input.lines();
73
74    loop {
75        state = match state {
76            ProcessingState::Measuring {
77                lines_measured,
78                mut backlog,
79            } => {
80                if let Some(line) = lines.next() {
81                    let line = line?;
82                    parser.parse_into(&mut row, line);
83                    update_columns(
84                        &mut measure_columns,
85                        &row,
86                        opts.include_cols.as_ref(),
87                        &opts.exclude_cols,
88                        opts.truncate.as_ref(),
89                        opts.print_info,
90                    );
91                    if opts.online {
92                        columns.clear();
93                        columns
94                            .extend(measure_columns.iter().map(|c| c.calculate_size(opts.ratio)));
95                        print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
96                    } else {
97                        backlog.push(row.clone());
98                    }
99                    if opts.lines == 0 || lines_measured < opts.lines {
100                        ProcessingState::Measuring {
101                            lines_measured: lines_measured + 1,
102                            backlog,
103                        }
104                    } else {
105                        ProcessingState::PrintBacklog { backlog }
106                    }
107                } else {
108                    ProcessingState::PrintBacklog { backlog }
109                }
110            }
111            ProcessingState::PrintBacklog { backlog } => {
112                columns.clear();
113                columns.extend(measure_columns.iter().map(|c| c.calculate_size(opts.ratio)));
114
115                if opts.print_info {
116                    for (i, col) in columns.iter_mut().enumerate() {
117                        writeln!(output, "Column {}", i + 1)?;
118                        col.print_info(&mut output)?;
119                        writeln!(output)?;
120                    }
121                    return Ok(());
122                }
123
124                for row in backlog {
125                    print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
126                }
127
128                ProcessingState::ProcessInput
129            }
130            ProcessingState::ProcessInput => {
131                if let Some(line) = lines.next() {
132                    let line = line?;
133                    parser.parse_into(&mut row, line);
134                    print_row(&mut output, &columns[..], &row, &opts.output_delim)?;
135
136                    ProcessingState::ProcessInput
137                } else {
138                    break;
139                }
140            }
141        }
142    }
143
144    Ok(())
145}
146
147fn update_columns(
148    columns: &mut Vec<MeasureColumn>,
149    row: &Row,
150    include_cols: Option<&Ranges>,
151    excluded_cols: &Ranges,
152    truncate_cols: Option<&Ranges>,
153    collect_info: bool,
154) {
155    for i in 0..min(columns.len(), row.len()) {
156        columns[i].add_sample(&row[i]);
157    }
158    #[allow(clippy::needless_range_loop)]
159    for i in columns.len()..row.len() {
160        let mut col = MeasureColumn::new(collect_info);
161        let col_num = (i + 1) as u32;
162
163        let included = include_cols
164            .map(|rs| rs.any_contains(col_num))
165            .unwrap_or(true);
166
167        let excluded = excluded_cols.any_contains(col_num);
168
169        let truncated = truncate_cols
170            .map(|rs| rs.any_contains(col_num))
171            .unwrap_or(false);
172
173        col.set_excluded(!included || excluded);
174        col.set_truncated(truncated);
175
176        col.add_sample(&row[i]);
177
178        columns.push(col);
179    }
180}
181
182fn print_row<W: Write>(out: &mut W, columns: &[Column], row: &Row, output_delim: &str) -> io::Result<()> {
183    let mut overflow: usize = 0;
184    for ((cell, col), first, last) in utils::first_last_iter(
185        row.get_parts()
186            .zip(columns)
187            .filter(|&(_, col)| !col.is_excluded()),
188    ) {
189        if !first {
190            write!(out, "{}", output_delim)?;
191        }
192        overflow = col.print_cell(out, cell, overflow, last)?;
193    }
194    writeln!(out)?;
195    Ok(())
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use std::io::BufReader;
202
203    #[test]
204    fn basic_test() {
205        let opts = Options {
206            truncate: None,
207            ratio: 1.0,
208            lines: 1000,
209            include_cols: None,
210            exclude_cols: Ranges::new(),
211            delim: " \t".to_string(),
212            output_delim: "  ".to_string(),
213            strict_delim: false,
214            print_info: false,
215            online: false,
216        };
217
218        let reader = BufReader::new(&b"aa bb cc\n1 2 3\n"[..]);
219        let mut output: Vec<u8> = Vec::new();
220        process(reader, &mut output, &opts).unwrap();
221        assert_eq!(&output, b"aa  bb  cc\n1   2   3\n");
222    }
223
224    #[test]
225    fn exclude_column() {
226        let mut opts = Options {
227            truncate: None,
228            ratio: 1.0,
229            lines: 1000,
230            include_cols: None,
231            exclude_cols: Ranges(vec![Range::Between(2, 2)]),
232            delim: " \t".to_string(),
233            output_delim: "  ".to_string(),
234            strict_delim: false,
235            print_info: false,
236            online: false,
237        };
238
239        let input: &[u8] = b"aa bb cc\n1 2 3\n";
240        let mut output: Vec<u8> = Vec::new();
241        process(BufReader::new(input), &mut output, &opts).unwrap();
242        assert_eq!(&output, b"aa  cc\n1   3\n");
243
244        opts.exclude_cols = Ranges(vec![Range::From(2)]);
245        output.clear();
246        process(BufReader::new(input), &mut output, &opts).unwrap();
247        assert_eq!(&output, b"aa\n1\n");
248
249        opts.exclude_cols = Ranges(vec![Range::To(2)]);
250        output.clear();
251        process(BufReader::new(input), &mut output, &opts).unwrap();
252        assert_eq!(&output, b"cc\n3\n");
253
254        opts.exclude_cols = Ranges(vec![Range::Between(1, 1), Range::Between(3, 3)]);
255        output.clear();
256        process(BufReader::new(input), &mut output, &opts).unwrap();
257        assert_eq!(&output, b"bb\n2\n");
258    }
259
260    #[test]
261    fn lines_opt() {
262        let opts = Options {
263            truncate: None,
264            ratio: 1.0,
265            lines: 1,
266            include_cols: None,
267            exclude_cols: Ranges::new(),
268            delim: " \t".to_string(),
269            output_delim: "  ".to_string(),
270            strict_delim: false,
271            print_info: false,
272            online: false,
273        };
274
275        let reader = BufReader::new(&b"1 1\naaaa aaaa\n"[..]);
276        let mut output: Vec<u8> = Vec::new();
277        process(reader, &mut output, &opts).unwrap();
278        assert_eq!(&output, b"1  1\naaaa  aaaa\n");
279    }
280
281    #[test]
282    fn overflow() {
283        let opts = Options {
284            truncate: None,
285            ratio: 1.0,
286            lines: 1,
287            include_cols: None,
288            exclude_cols: Ranges::new(),
289            delim: " \t".to_string(),
290            output_delim: "  ".to_string(),
291            strict_delim: false,
292            print_info: false,
293            online: false,
294        };
295
296        // a  a  aaaaaaaaaaa  a
297        // a  a  aaaaaaaaaaa  a
298        // a  a  aaaaaaaaaaa  a
299        // bbbbbb  bb  b      b
300        let input = ("a a aaaaaaaaaaa a\n".repeat(10) + "bbbbbb bb b b\n").into_bytes();
301        let expected = "a  a  aaaaaaaaaaa  a\n".repeat(10) + "bbbbbb  bb  b      b\n";
302        let reader = BufReader::new(&input[..]);
303        let mut output: Vec<u8> = Vec::new();
304        process(reader, &mut output, &opts).unwrap();
305        assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
306    }
307
308    #[test]
309    fn output_delimiter() {
310        let opts = Options {
311            truncate: None,
312            ratio: 1.0,
313            lines: 1,
314            include_cols: None,
315            exclude_cols: Ranges::new(),
316            delim: " \t".to_string(),
317            output_delim: " & ".to_string(),
318            strict_delim: false,
319            print_info: false,
320            online: false,
321        };
322
323        // a & a & aaaaaaaaaaa & a
324        // a & a & aaaaaaaaaaa & a
325        // a & a & aaaaaaaaaaa & a
326        // bbbbbb & bb & b     & b
327        let input = ("a a aaaaaaaaaaa a\n".repeat(10) + "bbbbbb bb b b\n").into_bytes();
328        let expected = "a & a & aaaaaaaaaaa & a\n".repeat(10) + "bbbbbb & bb & b     & b\n";
329        let reader = BufReader::new(&input[..]);
330        let mut output: Vec<u8> = Vec::new();
331        process(reader, &mut output, &opts).unwrap();
332        assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
333    }
334}