qsv_tabwriter/
lib.rs

1//! This crate provides an implementation of
2//! [elastic tabstops](http://nickgravgaard.com/elastictabstops/index.html).
3//! It is a minimal port of Go's
4//! [tabwriter](http://golang.org/pkg/text/tabwriter/) package.
5//! Namely, its main mode of operation is to wrap a `Writer` and implement
6//! elastic tabstops for the text written to the wrapped `Writer`.
7//!
8//! This package is also bundled with a program, `tabwriter`,
9//! that exposes this functionality at the command line.
10//!
11//! Here's an example that shows basic alignment:
12//!
13//! ```rust
14//! use std::io::Write;
15//! use qsv_tabwriter::TabWriter;
16//!
17//! let mut tw = TabWriter::new(vec![]);
18//! write!(&mut tw, "
19//! Bruce Springsteen\tBorn to Run
20//! Bob Seger\tNight Moves
21//! Metallica\tBlack
22//! The Boss\tDarkness on the Edge of Town
23//! ").unwrap();
24//! tw.flush().unwrap();
25//!
26//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
27//! assert_eq!(&*written, "
28//! Bruce Springsteen  Born to Run
29//! Bob Seger          Night Moves
30//! Metallica          Black
31//! The Boss           Darkness on the Edge of Town
32//! ");
33//! ```
34//!
35//! Note that `flush` **must** be called or else `TabWriter` may never write
36//! anything. This is because elastic tabstops requires knowing about future
37//! lines in order to align output. More precisely, all text considered in a
38//! single alignment must fit into memory.
39//!
40//! Here's another example that demonstrates how *only* contiguous columns
41//! are aligned:
42//!
43//! ```rust
44//! use std::io::Write;
45//! use qsv_tabwriter::TabWriter;
46//!
47//! let mut tw = TabWriter::new(vec![]).padding(1);
48//! write!(&mut tw, "
49//!fn foobar() {{
50//!    let mut x = 1+1;\t// addition
51//!    x += 1;\t// increment in place
52//!    let y = x * x * x * x;\t// multiply!
53//!
54//!    y += 1;\t// this is another group
55//!    y += 2 * 2;\t// that is separately aligned
56//!}}
57//!").unwrap();
58//! tw.flush().unwrap();
59//!
60//! let written = String::from_utf8(tw.into_inner().unwrap()).unwrap();
61//! assert_eq!(&*written, "
62//!fn foobar() {
63//!    let mut x = 1+1;       // addition
64//!    x += 1;                // increment in place
65//!    let y = x * x * x * x; // multiply!
66//!
67//!    y += 1;     // this is another group
68//!    y += 2 * 2; // that is separately aligned
69//!}
70//!");
71//! ```
72
73#![deny(missing_docs)]
74
75use std::cmp;
76use std::error;
77use std::fmt;
78use std::io::{self, BufWriter, Write};
79use std::mem;
80use std::str;
81
82#[cfg(test)]
83mod test;
84
85/// `TabWriter` wraps an arbitrary writer and aligns tabbed output.
86///
87/// Elastic tabstops work by aligning *contiguous* tabbed delimited fields
88/// known as *column blocks*. When a line appears that breaks all contiguous
89/// blocks, all buffered output will be flushed to the underlying writer.
90/// Otherwise, output will stay buffered until `flush` is explicitly called.
91#[derive(Debug)]
92pub struct TabWriter<W: io::Write> {
93    w: BufWriter<W>,
94    buf: io::Cursor<Vec<u8>>,
95    lines: Vec<Vec<Cell>>,
96    curcell: Cell,
97    minwidth: usize,
98    padding: usize,
99    alignment: Alignment,
100    ansi: bool,
101    tab_indent: bool,
102}
103
104/// `Alignment` represents how a `TabWriter` should align text within its cell.
105#[derive(Debug, PartialEq, Eq)]
106pub enum Alignment {
107    /// Text should be aligned with the left edge of the cell
108    Left,
109    /// Text should be centered within the cell
110    Center,
111    /// Text should be aligned with the right edge of the cell
112    Right,
113    /// Like Left, but the last whitespace is a tab
114    /// This produces a valid TSV file
115    LeftEndTab,
116    /// Like Left, but adds a comment line at the top that comma-delimited
117    /// enumerates the starting position of each column (Fixed Width Format).
118    /// Positions are 1-indexed.
119    LeftFwf,
120}
121
122enum MainAlignment {
123    Left,
124    Right,
125    Center,
126}
127
128#[derive(Debug)]
129struct Cell {
130    start: usize, // offset into TabWriter.buf
131    width: usize, // in characters
132    size: usize,  // in bytes
133}
134
135impl<W: io::Write> TabWriter<W> {
136    /// Create a new `TabWriter` from an existing `Writer`.
137    ///
138    /// All output written to `Writer` is passed through `TabWriter`.
139    /// Contiguous column blocks indicated by tabs are aligned.
140    ///
141    /// Note that `flush` must be called to guarantee that `TabWriter` will
142    /// write to the given writer.
143    pub fn new(w: W) -> Self {
144        Self {
145            w: BufWriter::with_capacity(65536, w),
146            buf: io::Cursor::new(Vec::with_capacity(1024)),
147            lines: vec![vec![]],
148            curcell: Cell::new(0),
149            minwidth: 2,
150            padding: 2,
151            alignment: Alignment::Left,
152            ansi: cfg!(feature = "ansi_formatting"),
153            tab_indent: false,
154        }
155    }
156
157    /// Set the minimum width of each column. That is, all columns will have
158    /// *at least* the size given here. If a column is smaller than `minwidth`,
159    /// then it is padded with spaces.
160    ///
161    /// The default minimum width is `2`.
162    #[must_use]
163    pub const fn minwidth(mut self, minwidth: usize) -> Self {
164        self.minwidth = minwidth;
165        self
166    }
167
168    /// Set the padding between columns. All columns will be separated by
169    /// *at least* the number of spaces indicated by `padding`. If `padding`
170    /// is zero, then columns may run up against each other without any
171    /// separation.
172    ///
173    /// The default padding is `2`.
174    #[must_use]
175    pub const fn padding(mut self, padding: usize) -> Self {
176        self.padding = padding;
177        self
178    }
179
180    /// Set the alignment of text within cells. This will effect future flushes.
181    ///
182    /// The default alignment is `Alignment::Left`.
183    #[must_use]
184    pub const fn alignment(mut self, alignment: Alignment) -> Self {
185        self.alignment = alignment;
186        self
187    }
188
189    /// Ignore ANSI escape codes when computing the number of display columns.
190    ///
191    /// This is disabled by default. (But is enabled by default when the
192    /// deprecated `ansi_formatting` crate feature is enabled.)
193    #[must_use]
194    pub const fn ansi(mut self, yes: bool) -> Self {
195        self.ansi = yes;
196        self
197    }
198
199    /// Always use tabs for indentation columns (i.e., padding of
200    /// leading empty cells on the left).
201    ///
202    /// This is disabled by default.
203    #[must_use]
204    pub const fn tab_indent(mut self, yes: bool) -> Self {
205        self.tab_indent = yes;
206        self
207    }
208
209    /// Unwraps this `TabWriter`, returning the underlying writer.
210    ///
211    /// This internal buffer is flushed before returning the writer. If the
212    /// flush fails, then an error is returned.
213    ///
214    /// # Errors
215    ///
216    /// This function will return an error if flushing the internal buffer fails.
217    /// The error is wrapped in an `IntoInnerError` along with the original `TabWriter`.
218    ///
219    /// # Panics
220    ///
221    /// This method will panic if `BufWriter::into_inner()` fails after a successful
222    /// flush, which would indicate a serious system-level problem.
223    #[allow(clippy::result_large_err)]
224    pub fn into_inner(mut self) -> Result<W, IntoInnerError<W>> {
225        // First flush our internal buffer
226        if let Err(err) = self.flush() {
227            return Err(IntoInnerError(self, err));
228        }
229
230        // Now extract the BufWriter and try to get the inner writer
231        // BufWriter::into_inner() can only fail if there was a previous write error,
232        // which would have been caught by our flush() call above.
233        self.w.into_inner().map_or_else(|_|
234            // This panic should never happen since we flushed above, but if it does,
235            // panic as it indicates a serious system-level problem.
236            panic!("BufWriter::into_inner() failed unexpectedly after successful flush"),
237            |inner_w| Ok(inner_w))
238    }
239
240    /// Resets the state of the aligner. Once the aligner is reset, all future
241    /// writes will start producing a new alignment.
242    fn reset(&mut self) {
243        self.buf = io::Cursor::new(Vec::with_capacity(1024));
244        self.lines = vec![vec![]];
245        self.curcell = Cell::new(0);
246    }
247
248    /// Adds the bytes received into the buffer and updates the size of
249    /// the current cell.
250    fn add_bytes(&mut self, bytes: &[u8]) {
251        self.curcell.size += bytes.len();
252        let _ = self.buf.write_all(bytes); // cannot fail
253    }
254
255    /// Ends the current cell, updates the UTF8 width of the cell and starts
256    /// a fresh cell.
257    fn term_curcell(&mut self) {
258        #[allow(clippy::cast_possible_truncation)]
259        let mut curcell = Cell::new(self.buf.position() as usize);
260        mem::swap(&mut self.curcell, &mut curcell);
261
262        if self.ansi {
263            curcell.update_width(self.buf.get_ref(), count_columns_ansi);
264        } else {
265            curcell.update_width(self.buf.get_ref(), count_columns_noansi);
266        }
267        self.curline_mut().push(curcell);
268    }
269
270    /// Return a view of the current line of cells.
271    fn curline(&self) -> &[Cell] {
272        let i = self.lines.len() - 1;
273        &self.lines[i]
274    }
275
276    /// Return a mutable view of the current line of cells.
277    fn curline_mut(&mut self) -> &mut Vec<Cell> {
278        let i = self.lines.len() - 1;
279        &mut self.lines[i]
280    }
281}
282
283impl Cell {
284    const fn new(start: usize) -> Self {
285        Self { start, width: 0, size: 0 }
286    }
287
288    fn update_width(
289        &mut self,
290        buf: &[u8],
291        count_columns: impl Fn(&[u8]) -> usize,
292    ) {
293        let end = self.start + self.size;
294        self.width = count_columns(&buf[self.start..end]);
295    }
296}
297
298impl<W: io::Write> io::Write for TabWriter<W> {
299    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
300        let mut lastterm = 0usize;
301        for (i, &c) in buf.iter().enumerate() {
302            match c {
303                b'\t' | b'\n' => {
304                    self.add_bytes(&buf[lastterm..i]);
305                    self.term_curcell();
306                    lastterm = i + 1;
307                    if c == b'\n' {
308                        let ncells = self.curline().len();
309                        self.lines.push(vec![]);
310                        // Having a single cell means that *all* previous
311                        // columns have been broken, so we should just flush.
312                        if ncells == 1 {
313                            self.flush()?;
314                        }
315                    }
316                }
317                _ => {}
318            }
319        }
320        self.add_bytes(&buf[lastterm..]);
321        Ok(buf.len())
322    }
323
324    fn flush(&mut self) -> io::Result<()> {
325        if self.curcell.size > 0 {
326            self.term_curcell();
327        }
328        let widths = cell_widths(&self.lines, self.minwidth);
329
330        // This is a trick to avoid allocating padding for every cell.
331        // Just allocate the most we'll ever need and borrow from it.
332        let biggest_width = widths
333            .iter()
334            .map(|ws| ws.iter().copied().max().unwrap_or(0))
335            .max()
336            .unwrap_or(0);
337        let padding: String =
338            std::iter::repeat_n(' ', biggest_width + self.padding).collect();
339
340        // Generate comment line for Leftfwf alignment
341        if self.alignment == Alignment::LeftFwf
342            && !self.lines.is_empty()
343            && !self.lines[0].is_empty()
344        {
345            let comment_line = generate_fwf_comment_line(
346                &self.lines[0],
347                &widths[0],
348                self.padding,
349            );
350            self.w.write_all(comment_line.as_bytes())?;
351        }
352
353        let mut first = true;
354
355        // we do this so that we have a more efficient match pattern
356        // in the hot loop below
357        let main_alignment = match self.alignment {
358            Alignment::Left | Alignment::LeftEndTab | Alignment::LeftFwf => {
359                MainAlignment::Left
360            }
361            Alignment::Right => MainAlignment::Right,
362            Alignment::Center => MainAlignment::Center,
363        };
364
365        for (line, widths) in self.lines.iter().zip(widths.iter()) {
366            if first {
367                first = false;
368            } else {
369                self.w.write_all(b"\n")?;
370            }
371
372            let mut use_tabs = self.tab_indent;
373            for (i, cell) in line.iter().enumerate() {
374                let bytes =
375                    &self.buf.get_ref()[cell.start..cell.start + cell.size];
376                if i >= widths.len() {
377                    // There is no width for the last column
378                    assert_eq!(i, line.len() - 1);
379                    self.w.write_all(bytes)?;
380                } else {
381                    if use_tabs && cell.size == 0 {
382                        write!(&mut self.w, "\t")?;
383                        continue;
384                    }
385                    use_tabs = false;
386
387                    assert!(widths[i] >= cell.width);
388                    let extra_space = widths[i] - cell.width;
389                    let (left_spaces, mut right_spaces) = match main_alignment
390                    {
391                        MainAlignment::Left => (0, extra_space),
392                        MainAlignment::Right => (extra_space, 0),
393                        MainAlignment::Center => {
394                            (extra_space / 2, extra_space - extra_space / 2)
395                        }
396                    };
397                    right_spaces += self.padding;
398
399                    write!(&mut self.w, "{}", &padding[0..left_spaces])?;
400                    self.w.write_all(bytes)?;
401
402                    // Handle LeftEndTab alignment
403                    if self.alignment == Alignment::LeftEndTab {
404                        // use spaces for padding except the last character is a tab
405                        if right_spaces > 1 {
406                            write!(
407                                &mut self.w,
408                                "{}",
409                                &padding[0..right_spaces - 1]
410                            )?;
411                        }
412                        if right_spaces > 0 {
413                            write!(&mut self.w, "\t")?;
414                        }
415                    } else {
416                        write!(&mut self.w, "{}", &padding[0..right_spaces])?;
417                    }
418                }
419            }
420        }
421
422        self.reset();
423        Ok(())
424    }
425}
426
427/// An error returned by `into_inner`.
428///
429/// This combines the error that happened while flushing the buffer with the
430/// `TabWriter` itself.
431pub struct IntoInnerError<W: io::Write>(TabWriter<W>, io::Error);
432
433impl<W: io::Write> IntoInnerError<W> {
434    /// Returns the error which caused the `into_error()` call to fail.
435    pub const fn error(&self) -> &io::Error {
436        &self.1
437    }
438
439    /// Returns the `TabWriter` instance which generated the error.
440    pub fn into_inner(self) -> TabWriter<W> {
441        self.0
442    }
443}
444
445impl<W: io::Write> fmt::Debug for IntoInnerError<W> {
446    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
447        self.error().fmt(f)
448    }
449}
450
451impl<W: io::Write> fmt::Display for IntoInnerError<W> {
452    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
453        self.error().fmt(f)
454    }
455}
456
457impl<W: io::Write + ::std::any::Any> error::Error for IntoInnerError<W> {
458    fn cause(&self) -> Option<&dyn error::Error> {
459        Some(self.error())
460    }
461}
462
463/// Generate a comment line for the Fixed Width Format alignment.
464///
465/// The comment line is a comma-delimited list of the starting position of each
466/// column. Positions are 1-indexed.
467///
468/// # Arguments
469/// * `cells` - The cells on the first line of the table.
470/// * `widths` - The widths of the columns.
471/// * `padding` - The padding between columns.
472///
473/// # Returns
474/// A string containing the comment line.
475fn generate_fwf_comment_line(
476    cells: &[Cell],
477    widths: &[usize],
478    padding: usize,
479) -> String {
480    let mut positions = Vec::new();
481    let mut current_pos = 1; // Start with 1-indexed positions
482
483    // Calculate positions for all columns
484    for &width in widths {
485        positions.push(current_pos.to_string());
486        current_pos += width + padding;
487    }
488
489    // Add position for the last column if it exists
490    if cells.len() > widths.len() {
491        positions.push(current_pos.to_string());
492    }
493
494    format!("#{}\n", positions.join(","))
495}
496
497fn cell_widths(lines: &[Vec<Cell>], minwidth: usize) -> Vec<Vec<usize>> {
498    // Naively, this algorithm looks like it could be O(n^2m) where `n` is
499    // the number of lines and `m` is the number of contiguous columns.
500    //
501    // However, I claim that it is actually O(nm). That is, the width for
502    // every contiguous column is computed exactly once.
503    let mut ws: Vec<_> = (0..lines.len()).map(|_| vec![]).collect();
504    let mut width;
505    let mut contig_count;
506    for (i, iline) in lines.iter().enumerate() {
507        if iline.is_empty() {
508            continue;
509        }
510        for col in ws[i].len()..(iline.len() - 1) {
511            width = minwidth;
512            contig_count = 0;
513            for line in &lines[i..] {
514                if col + 1 >= line.len() {
515                    // ignores last column
516                    break;
517                }
518                contig_count += 1;
519                width = cmp::max(width, line[col].width);
520            }
521            for line_widths in ws.iter_mut().skip(i).take(contig_count) {
522                line_widths.push(width);
523            }
524        }
525    }
526    ws
527}
528
529fn count_columns_noansi(bytes: &[u8]) -> usize {
530    use unicode_width::UnicodeWidthChar;
531
532    // If we have a Unicode string, then attempt to guess the number of
533    // *display* columns used.
534    //
535    str::from_utf8(bytes).map_or(bytes.len(), |s| {
536        s.chars()
537            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
538            .sum::<usize>()
539    })
540}
541
542fn count_columns_ansi(bytes: &[u8]) -> usize {
543    use unicode_width::UnicodeWidthChar;
544
545    // If we have a Unicode string, then attempt to guess the number of
546    // *display* columns used.
547    str::from_utf8(bytes).map_or(bytes.len(), |s| {
548        strip_formatting(s)
549            .chars()
550            .map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
551            .sum::<usize>()
552    })
553}
554
555fn strip_formatting(input: &str) -> std::borrow::Cow<'_, str> {
556    let mut escapes = find_ansi_escapes(input).peekable();
557    if escapes.peek().is_none() {
558        return std::borrow::Cow::Borrowed(input);
559    }
560    let mut without_escapes = String::with_capacity(input.len());
561    let mut last_end = 0;
562    for mat in escapes {
563        without_escapes.push_str(&input[last_end..mat.start]);
564        last_end = mat.end;
565    }
566    without_escapes.push_str(&input[last_end..]);
567    std::borrow::Cow::Owned(without_escapes)
568}
569
570fn find_ansi_escapes(
571    input: &str,
572) -> impl Iterator<Item = std::ops::Range<usize>> + '_ {
573    const ESCAPE_PREFIX: &str = "\x1B[";
574    let mut last_end = 0;
575    std::iter::from_fn(move || {
576        let start = last_end
577            + input[last_end..].match_indices(ESCAPE_PREFIX).next()?.0;
578        let after_prefix = start + ESCAPE_PREFIX.len();
579        let end = after_prefix
580            + input[after_prefix..].match_indices('m').next()?.0
581            + 1;
582        last_end = end;
583        Some(start..end)
584    })
585}