Skip to main content

linuxutils_text/
column.rs

1use linuxutils_common::man::ManContent;
2
3pub const MAN: ManContent = ManContent::empty();
4
5use clap::Parser;
6use std::{
7    fs::File,
8    io::{self, BufRead, BufReader, Write},
9    path::PathBuf,
10    process::ExitCode,
11};
12
13const TAB_WIDTH: usize = 8;
14
15/// Columnate lists.
16#[derive(Parser)]
17#[command(name = "column", version, about)]
18pub struct Args {
19    /// Create a table from delimited input.
20    #[arg(short = 't', long = "table")]
21    table: bool,
22
23    /// Use JSON output format (requires -N).
24    #[arg(short = 'J', long = "json")]
25    json: bool,
26
27    /// Specify the table name for JSON output.
28    #[arg(short = 'n', long = "table-name", value_name = "name")]
29    table_name: Option<String>,
30
31    /// Specify column to use for tree-like output.
32    #[arg(short = 'r', long = "tree", value_name = "column")]
33    tree: Option<String>,
34
35    /// Specify column containing line IDs for tree parent-child relations.
36    #[arg(short = 'i', long = "tree-id", value_name = "column")]
37    tree_id: Option<String>,
38
39    /// Specify column containing parent IDs for tree parent-child relations.
40    #[arg(short = 'p', long = "tree-parent", value_name = "column")]
41    tree_parent: Option<String>,
42
43    /// Possible input item delimiters (default is whitespace).
44    #[arg(short = 's', long = "separator", value_name = "separators")]
45    separator: Option<String>,
46
47    /// Column delimiter for table output (default is two spaces).
48    #[arg(short = 'o', long = "output-separator", value_name = "string")]
49    output_separator: Option<String>,
50
51    /// Specify column names (comma-separated).
52    #[arg(short = 'N', long = "table-columns", value_name = "names")]
53    table_columns: Option<String>,
54
55    /// Omit printing the table header.
56    #[arg(short = 'd', long = "table-noheadings")]
57    table_noheadings: bool,
58
59    /// Maximum number of input columns.
60    #[arg(short = 'l', long = "table-columns-limit", value_name = "number")]
61    table_columns_limit: Option<usize>,
62
63    /// Right align text in specified columns.
64    #[arg(short = 'R', long = "table-right", value_name = "columns")]
65    table_right: Option<String>,
66
67    /// Truncate text in specified columns when necessary.
68    #[arg(short = 'T', long = "table-truncate", value_name = "columns")]
69    table_truncate: Option<String>,
70
71    /// Ignore unusually long cells when calculating column width.
72    #[arg(short = 'E', long = "table-noextreme", value_name = "columns")]
73    table_noextreme: Option<String>,
74
75    /// Allow multi-line wrapping in specified columns.
76    #[arg(short = 'W', long = "table-wrap", value_name = "columns")]
77    table_wrap: Option<String>,
78
79    /// Don't print specified columns.
80    #[arg(short = 'H', long = "table-hide", value_name = "columns")]
81    table_hide: Option<String>,
82
83    /// Specify output column order.
84    #[arg(short = 'O', long = "table-order", value_name = "columns")]
85    table_order: Option<String>,
86
87    /// Print header line for each page.
88    #[arg(short = 'e', long = "table-header-repeat")]
89    table_header_repeat: bool,
90
91    /// Define a column with attributes (can be repeated).
92    #[arg(short = 'C', long = "table-column", value_name = "attributes")]
93    table_column: Vec<String>,
94
95    /// Fill all available space on output.
96    #[arg(short = 'm', long = "table-maxout")]
97    table_maxout: bool,
98
99    /// Fill rows before columns.
100    #[arg(short = 'x', long = "fillrows")]
101    fill_rows: bool,
102
103    /// Output width (default: terminal width or 80). Use 0 or "unlimited" for no limit.
104    #[arg(short = 'c', long = "output-width", value_name = "width")]
105    output_width: Option<String>,
106
107    /// Use spaces instead of tabs, with this minimum spacing between columns.
108    #[arg(short = 'S', long = "use-spaces", value_name = "number")]
109    use_spaces: Option<usize>,
110
111    /// Preserve blank lines in input.
112    #[arg(short = 'L', long = "keep-empty-lines")]
113    keep_empty_lines: bool,
114
115    /// Files to read. If none are given, reads from standard input.
116    files: Vec<PathBuf>,
117}
118
119fn terminal_width() -> usize {
120    let stdout = io::stdout();
121    match rustix::termios::tcgetwinsize(&stdout) {
122        Ok(ws) if ws.ws_col > 0 => ws.ws_col as usize,
123        _ => 80,
124    }
125}
126
127fn parse_output_width(s: &str) -> Option<usize> {
128    match s {
129        "unlimited" | "0" => None,
130        _ => s.parse::<usize>().ok(),
131    }
132}
133
134fn round_up_to_tab(width: usize) -> usize {
135    width.div_ceil(TAB_WIDTH) * TAB_WIDTH
136}
137
138fn pad_with_tabs(
139    item: &str,
140    col_width: usize,
141    writer: &mut impl Write,
142) -> io::Result<()> {
143    writer.write_all(item.as_bytes())?;
144    let chars_used = item.len();
145    let target = round_up_to_tab(col_width);
146    let mut pos = chars_used;
147    while pos < target {
148        let next_tab = ((pos / TAB_WIDTH) + 1) * TAB_WIDTH;
149        writer.write_all(b"\t")?;
150        pos = next_tab;
151    }
152    Ok(())
153}
154
155fn pad_with_spaces(
156    item: &str,
157    col_width: usize,
158    spacing: usize,
159    writer: &mut impl Write,
160) -> io::Result<()> {
161    write!(writer, "{:<width$}", item, width = col_width + spacing)?;
162    Ok(())
163}
164
165pub fn run(args: Args) -> ExitCode {
166    let width_limit = match &args.output_width {
167        Some(s) => parse_output_width(s),
168        None => Some(terminal_width()),
169    };
170
171    let input = match read_input(&args.files) {
172        Ok(s) => s,
173        Err(e) => {
174            eprintln!("column: {e}");
175            return ExitCode::FAILURE;
176        }
177    };
178
179    let stdout = io::stdout();
180    let mut writer = stdout.lock();
181
182    let result = if args.table || args.json || args.tree.is_some() {
183        table_mode(&input, &args, width_limit, &mut writer)
184    } else {
185        columnate(
186            &input,
187            width_limit,
188            args.fill_rows,
189            args.use_spaces,
190            args.keep_empty_lines,
191            &mut writer,
192        )
193    };
194
195    if let Err(e) = result {
196        eprintln!("column: {e}");
197        return ExitCode::FAILURE;
198    }
199
200    ExitCode::SUCCESS
201}
202
203fn read_input(files: &[PathBuf]) -> io::Result<String> {
204    let mut buf = String::new();
205    if files.is_empty() {
206        let stdin = io::stdin();
207        let reader = stdin.lock();
208        for line in reader.lines() {
209            buf.push_str(&line?);
210            buf.push('\n');
211        }
212    } else {
213        for path in files {
214            let file = File::open(path).map_err(|e| {
215                io::Error::new(e.kind(), format!("{}: {e}", path.display()))
216            })?;
217            let reader = BufReader::new(file);
218            for line in reader.lines() {
219                buf.push_str(&line?);
220                buf.push('\n');
221            }
222        }
223    }
224    Ok(buf)
225}
226
227fn table_mode(
228    input: &str,
229    args: &Args,
230    width_limit: Option<usize>,
231    writer: &mut impl Write,
232) -> io::Result<()> {
233    use cols::{OutputMode, Table, TermForce, print_table};
234    use std::collections::HashMap;
235
236    // Parse input into rows of fields.
237    let rows = parse_table_input(
238        input,
239        args.separator.as_deref(),
240        args.table_columns_limit,
241    );
242
243    if rows.is_empty() {
244        return Ok(());
245    }
246
247    // Determine column count from the widest row.
248    let ncols = rows.iter().map(|r| r.len()).max().unwrap_or(0);
249    if ncols == 0 {
250        return Ok(());
251    }
252
253    // Build column names.
254    let col_names: Vec<String> = if let Some(ref names_str) = args.table_columns
255    {
256        let user_names: Vec<&str> = names_str.split(',').collect();
257        (0..ncols)
258            .map(|i| {
259                user_names
260                    .get(i)
261                    .map(|s| s.to_string())
262                    .unwrap_or_else(|| format!("COL{}", i + 1))
263            })
264            .collect()
265    } else {
266        (0..ncols).map(|i| format!("COL{}", i + 1)).collect()
267    };
268
269    // Build the table.
270    let mut table = Table::new();
271
272    if let Some(width) = width_limit {
273        table.termwidth_set(width);
274        table.termforce_set(TermForce::Always);
275    }
276
277    if args.table_noheadings || args.table_columns.is_none() {
278        table.headings_set(false);
279    }
280
281    if args.table_maxout {
282        table.maxout_set(true);
283    }
284
285    if let Some(ref sep) = args.output_separator {
286        table.column_separator_set(sep);
287    }
288
289    // If -C (per-column attributes) is given, use those for column definitions.
290    if !args.table_column.is_empty() {
291        for (i, attr_str) in args.table_column.iter().enumerate() {
292            let attrs = parse_column_attrs(attr_str);
293            let name = attrs
294                .iter()
295                .find_map(
296                    |(k, v)| {
297                        if k == "name" { v.as_deref() } else { None }
298                    },
299                )
300                .or_else(|| col_names.get(i).map(|s| s.as_str()))
301                .unwrap_or("?");
302            let idx = table.new_column(name);
303            let col = table.column_mut(idx).unwrap();
304            for (key, _val) in &attrs {
305                match key.as_str() {
306                    "right" => {
307                        col.right_set(true);
308                    }
309                    "trunc" => {
310                        col.truncate_set(true);
311                    }
312                    "noextreme" => {
313                        col.no_extremes_set(true);
314                    }
315                    "wrap" => {
316                        col.wrap_set(true);
317                    }
318                    "hide" => {
319                        col.hidden_set(true);
320                    }
321                    "strictwidth" => {
322                        col.strict_width_set(true);
323                    }
324                    _ => {}
325                }
326            }
327        }
328        // Add remaining columns if input has more than -C definitions.
329        for name in &col_names[args.table_column.len()..] {
330            table.new_column(name);
331        }
332    } else {
333        for name in &col_names {
334            table.new_column(name);
335        }
336    }
337
338    // Apply column flags from -R, -T, -E, -W, -H options.
339    if let Some(ref spec) = args.table_right {
340        for idx in resolve_columns(spec, &col_names) {
341            if let Some(col) = table.column_mut(idx) {
342                col.right_set(true);
343            }
344        }
345    }
346    if let Some(ref spec) = args.table_truncate {
347        for idx in resolve_columns(spec, &col_names) {
348            if let Some(col) = table.column_mut(idx) {
349                col.truncate_set(true);
350            }
351        }
352    }
353    if let Some(ref spec) = args.table_noextreme {
354        for idx in resolve_columns(spec, &col_names) {
355            if let Some(col) = table.column_mut(idx) {
356                col.no_extremes_set(true);
357            }
358        }
359    }
360    if let Some(ref spec) = args.table_wrap {
361        for idx in resolve_columns(spec, &col_names) {
362            if let Some(col) = table.column_mut(idx) {
363                col.wrap_set(true);
364            }
365        }
366    }
367    if let Some(ref spec) = args.table_hide {
368        for idx in resolve_columns(spec, &col_names) {
369            if let Some(col) = table.column_mut(idx) {
370                col.hidden_set(true);
371            }
372        }
373    }
374
375    if args.table_header_repeat {
376        table.header_repeat_set(true);
377    }
378
379    // Handle -O (column order) by reordering columns.
380    // For now we don't reorder — cols doesn't support column reordering
381    // after creation. This would need to remap column indices.
382
383    // JSON mode.
384    if args.json {
385        table.output_mode_set(OutputMode::Json);
386        if let Some(ref name) = args.table_name {
387            table.name_set(name);
388        } else {
389            table.name_set("table");
390        }
391    }
392
393    // Resolve tree column indices.
394    let tree_col = args
395        .tree
396        .as_ref()
397        .and_then(|s| resolve_single_column(s, &col_names));
398    let tree_id_col = args
399        .tree_id
400        .as_ref()
401        .and_then(|s| resolve_single_column(s, &col_names));
402    let tree_parent_col = args
403        .tree_parent
404        .as_ref()
405        .and_then(|s| resolve_single_column(s, &col_names));
406
407    // Mark the tree column.
408    if let Some(tc) = tree_col
409        && let Some(col) = table.column_mut(tc)
410    {
411        col.tree_set(true);
412    }
413
414    // Populate rows.
415    if let (Some(id_col), Some(parent_col)) = (tree_id_col, tree_parent_col) {
416        // Tree mode: build parent-child relationships from data.
417        let mut id_to_line: HashMap<String, cols::LineId> = HashMap::new();
418        let mut deferred_parents: Vec<(cols::LineId, String)> = Vec::new();
419
420        for row in &rows {
421            let line_id = table.new_line(None);
422            let line = table.line_mut(line_id);
423            for (ci, cell) in row.iter().enumerate() {
424                line.data_set(ci, cell);
425            }
426            if let Some(id_val) = row.get(id_col) {
427                id_to_line.insert(id_val.clone(), line_id);
428            }
429            if let Some(parent_val) = row.get(parent_col)
430                && !parent_val.is_empty()
431                && parent_val != "0"
432            {
433                deferred_parents.push((line_id, parent_val.clone()));
434            }
435        }
436
437        // Wire up parent-child relationships.
438        for (child_line, parent_id) in &deferred_parents {
439            if let Some(&parent_line) = id_to_line.get(parent_id) {
440                table.add_child(parent_line, *child_line);
441            }
442        }
443    } else {
444        for row in &rows {
445            let line_id = table.new_line(None);
446            let line = table.line_mut(line_id);
447            for (ci, cell) in row.iter().enumerate() {
448                line.data_set(ci, cell);
449            }
450        }
451    }
452
453    print_table(&table, writer)
454}
455
456/// Resolve a column specification string to 0-based column indices.
457///
458/// The spec is a comma-separated list of:
459/// - `0` — all columns
460/// - `-1` — last column
461/// - `N` — 1-based index
462/// - `N-M` — range of 1-based indices
463/// - `name` — column name (matched against `col_names`)
464fn resolve_columns(spec: &str, col_names: &[String]) -> Vec<usize> {
465    let ncols = col_names.len();
466    let mut result = Vec::new();
467
468    for part in spec.split(',') {
469        let part = part.trim();
470        if part == "0" {
471            result.extend(0..ncols);
472        } else if part == "-" {
473            // Special: all unnamed columns (auto-generated COLn names).
474            for (i, name) in col_names.iter().enumerate() {
475                if name.starts_with("COL") && name[3..].parse::<usize>().is_ok()
476                {
477                    result.push(i);
478                }
479            }
480        } else if part == "-1" {
481            if ncols > 0 {
482                result.push(ncols - 1);
483            }
484        } else if let Some(dash_pos) = part.find('-') {
485            // Could be a range "N-M" or a negative index "-1" (handled above).
486            if dash_pos == 0 {
487                // Negative number — already handled "-1" above.
488                continue;
489            }
490            if let (Ok(start), Ok(end)) = (
491                part[..dash_pos].parse::<usize>(),
492                part[dash_pos + 1..].parse::<usize>(),
493            ) && start >= 1
494                && end >= start
495            {
496                for i in start..=end.min(ncols) {
497                    result.push(i - 1);
498                }
499            }
500        } else if let Ok(n) = part.parse::<usize>() {
501            if n >= 1 && n <= ncols {
502                result.push(n - 1);
503            }
504        } else {
505            // Match by name.
506            for (i, name) in col_names.iter().enumerate() {
507                if name == part {
508                    result.push(i);
509                }
510            }
511        }
512    }
513
514    result
515}
516
517/// Parse `-C` column attribute string like "name=FOO,right,trunc".
518fn parse_column_attrs(s: &str) -> Vec<(String, Option<String>)> {
519    s.split(',')
520        .map(|attr| {
521            if let Some((k, v)) = attr.split_once('=') {
522                (k.trim().to_string(), Some(v.trim().to_string()))
523            } else {
524                (attr.trim().to_string(), None)
525            }
526        })
527        .collect()
528}
529
530/// Resolve a single column reference to a 0-based index.
531fn resolve_single_column(spec: &str, col_names: &[String]) -> Option<usize> {
532    resolve_columns(spec, col_names).into_iter().next()
533}
534
535/// Parse input text into rows of fields based on separator.
536fn parse_table_input(
537    input: &str,
538    separator: Option<&str>,
539    columns_limit: Option<usize>,
540) -> Vec<Vec<String>> {
541    let mut rows = Vec::new();
542
543    for line in input.lines() {
544        if line.is_empty() {
545            continue;
546        }
547
548        let fields: Vec<String> = if let Some(sep) = separator {
549            // Split by any character in the separator string (non-greedy).
550            let sep_chars: Vec<char> = sep.chars().collect();
551            split_by_chars(line, &sep_chars, columns_limit)
552        } else {
553            // Default: split by whitespace runs.
554            split_by_whitespace(line, columns_limit)
555        };
556
557        rows.push(fields);
558    }
559
560    rows
561}
562
563/// Split a line by any character in `sep_chars`, keeping empty fields
564/// (non-greedy splitting like util-linux >= 2.23).
565fn split_by_chars(
566    line: &str,
567    sep_chars: &[char],
568    limit: Option<usize>,
569) -> Vec<String> {
570    let mut fields = Vec::new();
571    let mut current = String::new();
572
573    for ch in line.chars() {
574        if sep_chars.contains(&ch) {
575            if let Some(max) = limit
576                && fields.len() + 1 >= max
577            {
578                // Last field gets the rest.
579                current.push(ch);
580                continue;
581            }
582            fields.push(std::mem::take(&mut current));
583        } else {
584            current.push(ch);
585        }
586    }
587    fields.push(current);
588    fields
589}
590
591/// Split by whitespace runs (greedy), with optional column limit.
592fn split_by_whitespace(line: &str, limit: Option<usize>) -> Vec<String> {
593    match limit {
594        Some(max) => {
595            let mut fields: Vec<String> = Vec::new();
596            let mut rest = line;
597            while fields.len() + 1 < max {
598                rest = rest.trim_start();
599                if rest.is_empty() {
600                    break;
601                }
602                if let Some(pos) = rest.find(char::is_whitespace) {
603                    fields.push(rest[..pos].to_string());
604                    rest = &rest[pos..];
605                } else {
606                    fields.push(rest.to_string());
607                    rest = "";
608                    break;
609                }
610            }
611            if !rest.is_empty() {
612                fields.push(rest.trim_start().to_string());
613            }
614            fields
615        }
616        None => line.split_whitespace().map(String::from).collect(),
617    }
618}
619
620pub fn columnate(
621    input: &str,
622    width_limit: Option<usize>,
623    fill_rows: bool,
624    use_spaces: Option<usize>,
625    keep_empty_lines: bool,
626    writer: &mut impl Write,
627) -> io::Result<()> {
628    let mut items: Vec<&str> = Vec::new();
629    for line in input.lines() {
630        if line.trim().is_empty() {
631            if keep_empty_lines {
632                items.push("");
633            }
634            continue;
635        }
636        for word in line.split_whitespace() {
637            items.push(word);
638        }
639    }
640
641    if items.is_empty() {
642        return Ok(());
643    }
644
645    let max_item_width = items.iter().map(|s| s.len()).max().unwrap_or(0);
646
647    let (num_cols, col_width) = match width_limit {
648        None => (items.len(), max_item_width),
649        Some(width) => {
650            let effective_col_width = match use_spaces {
651                Some(spacing) => max_item_width + spacing,
652                None => round_up_to_tab(max_item_width + 1).max(TAB_WIDTH),
653            };
654            if effective_col_width == 0 || effective_col_width > width {
655                (1, max_item_width)
656            } else {
657                let cols = width / effective_col_width;
658                (cols.max(1), max_item_width)
659            }
660        }
661    };
662
663    let num_rows = items.len().div_ceil(num_cols);
664
665    for row in 0..num_rows {
666        let mut first = true;
667        for col in 0..num_cols {
668            let idx = if fill_rows {
669                row * num_cols + col
670            } else {
671                col * num_rows + row
672            };
673
674            if idx >= items.len() {
675                continue;
676            }
677
678            let item = items[idx];
679
680            if item.is_empty() && keep_empty_lines {
681                writeln!(writer)?;
682                first = true;
683                continue;
684            }
685
686            if !first {
687                // Padding was already applied to the previous item
688            } else {
689                first = false;
690            }
691
692            let is_last_in_row = {
693                let mut last = true;
694                for next_col in (col + 1)..num_cols {
695                    let next_idx = if fill_rows {
696                        row * num_cols + next_col
697                    } else {
698                        next_col * num_rows + row
699                    };
700                    if next_idx < items.len() {
701                        last = false;
702                        break;
703                    }
704                }
705                last
706            };
707
708            if is_last_in_row {
709                writer.write_all(item.as_bytes())?;
710            } else {
711                match use_spaces {
712                    Some(spacing) => {
713                        pad_with_spaces(item, col_width, spacing, writer)?
714                    }
715                    None => pad_with_tabs(item, col_width, writer)?,
716                }
717            }
718        }
719        writeln!(writer)?;
720    }
721
722    Ok(())
723}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728
729    fn run_columnate(
730        input: &str,
731        width: Option<usize>,
732        fill_rows: bool,
733        use_spaces: Option<usize>,
734        keep_empty_lines: bool,
735    ) -> String {
736        let mut output = Vec::new();
737        columnate(
738            input,
739            width,
740            fill_rows,
741            use_spaces,
742            keep_empty_lines,
743            &mut output,
744        )
745        .unwrap();
746        String::from_utf8(output).unwrap()
747    }
748
749    #[test]
750    fn fill_columns_default() {
751        let input = "a\nb\nc\nd\ne\nf\n";
752        let result = run_columnate(input, Some(10), false, Some(2), false);
753        assert_eq!(result, "a  c  e\nb  d  f\n");
754    }
755
756    #[test]
757    fn fill_rows() {
758        let input = "a\nb\nc\nd\ne\nf\n";
759        let result = run_columnate(input, Some(10), true, Some(2), false);
760        assert_eq!(result, "a  b  c\nd  e  f\n");
761    }
762
763    #[test]
764    fn empty_input() {
765        let result = run_columnate("", Some(80), false, Some(2), false);
766        assert_eq!(result, "");
767    }
768
769    #[test]
770    fn single_item() {
771        let result = run_columnate("hello\n", Some(80), false, Some(2), false);
772        assert_eq!(result, "hello\n");
773    }
774
775    #[test]
776    fn no_width_limit() {
777        let input = "a\nb\nc\nd\n";
778        let result = run_columnate(input, None, false, Some(2), false);
779        assert_eq!(result, "a  b  c  d\n");
780    }
781
782    #[test]
783    fn keep_empty_lines() {
784        let input = "a\n\nb\nc\n";
785        let result = run_columnate(input, Some(80), false, Some(2), true);
786        // Empty line becomes an empty item, printed on its own line
787        assert!(result.contains("a"));
788        assert!(result.contains("b"));
789        assert!(result.contains("c"));
790    }
791
792    #[test]
793    fn uneven_items() {
794        let input = "a\nb\nc\nd\ne\n";
795        let result = run_columnate(input, Some(10), false, Some(2), false);
796        // 5 items, 3 cols => 2 rows
797        // col-first: row0=[a,c,e] row1=[b,d]
798        assert_eq!(result, "a  c  e\nb  d\n");
799    }
800}