rgwml 2.0.0

Typed, local-first tabular data library with columnar in-memory storage.
Documentation
use crate::table::{Column, Table};

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrettyOptions {
    pub max_rows: usize,
    pub max_columns: usize,
    pub max_width: usize,
    pub show_types: bool,
}

impl Default for PrettyOptions {
    fn default() -> Self {
        Self {
            max_rows: 20,
            max_columns: 12,
            max_width: 32,
            show_types: true,
        }
    }
}

pub fn render(table: &Table, options: &PrettyOptions) -> String {
    let mut lines = Vec::new();
    let max_width = options.max_width.max(1);
    let selected_rows = table.selected_row_indices();
    let visible_columns = table.ncols().min(options.max_columns);
    let preview_rows = selected_rows
        .iter()
        .copied()
        .take(options.max_rows)
        .collect::<Vec<_>>();

    lines.push(format!("rows: {}", table.nrows()));
    lines.push(format!("columns: {}", table.ncols()));
    lines.push("preview:".to_string());

    if visible_columns == 0 {
        lines.push("(no columns to display)".to_string());
    } else {
        let fields = table.schema().fields()[..visible_columns].to_vec();
        let columns = table.columns()[..visible_columns].to_vec();
        let header_cells = fields
            .iter()
            .map(|field| truncate_text(field.name.as_ref(), max_width))
            .collect::<Vec<_>>();
        let type_cells = fields
            .iter()
            .map(|field| truncate_text(&field.dtype.to_string(), max_width))
            .collect::<Vec<_>>();
        let data_cells = preview_rows
            .iter()
            .map(|&row_index| {
                columns
                    .iter()
                    .map(|column| format_cell(column, row_index, max_width))
                    .collect::<Vec<_>>()
            })
            .collect::<Vec<_>>();

        let mut widths = header_cells
            .iter()
            .map(|cell| cell.chars().count())
            .collect::<Vec<_>>();
        if options.show_types {
            for (width, cell) in widths.iter_mut().zip(type_cells.iter()) {
                *width = (*width).max(cell.chars().count());
            }
        }
        for row in &data_cells {
            for (width, cell) in widths.iter_mut().zip(row.iter()) {
                *width = (*width).max(cell.chars().count());
            }
        }

        lines.push(render_table_row(&header_cells, &widths));
        if options.show_types {
            lines.push(render_table_row(&type_cells, &widths));
        }
        lines.push(render_separator(&widths));

        if data_cells.is_empty() {
            lines.push("(no rows)".to_string());
        } else {
            lines.extend(
                data_cells
                    .iter()
                    .map(|row| render_table_row(row, &widths))
                    .collect::<Vec<_>>(),
            );
        }
    }

    if table.nrows() > preview_rows.len() {
        lines.push(format!(
            "... {} more rows omitted",
            table.nrows() - preview_rows.len()
        ));
    }
    if table.ncols() > visible_columns {
        lines.push(format!(
            "... {} more columns omitted",
            table.ncols() - visible_columns
        ));
    }

    lines.join("\n")
}

fn format_cell(column: &Column, row_index: u32, max_width: usize) -> String {
    let value = match column {
        Column::Bool(_) => column
            .bool_value(row_index)
            .map(|value| value.to_string())
            .unwrap_or_else(|| "null".to_string()),
        Column::I64(_) | Column::TimestampMs(_) => column
            .i64_value(row_index)
            .map(|value| value.to_string())
            .unwrap_or_else(|| "null".to_string()),
        Column::Date32(_) => column
            .i32_value(row_index)
            .map(|value| value.to_string())
            .unwrap_or_else(|| "null".to_string()),
        Column::F64(_) => column
            .f64_value(row_index)
            .map(|value| value.to_string())
            .unwrap_or_else(|| "null".to_string()),
        Column::Utf8(_) | Column::DictUtf8(_) => column
            .utf8_value(row_index)
            .map(ToString::to_string)
            .unwrap_or_else(|| "null".to_string()),
    };

    truncate_text(&value, max_width)
}

fn truncate_text(value: &str, max_width: usize) -> String {
    let len = value.chars().count();
    if len <= max_width {
        return value.to_string();
    }

    if max_width <= 3 {
        return value.chars().take(max_width).collect();
    }

    let mut truncated = value.chars().take(max_width - 3).collect::<String>();
    truncated.push_str("...");
    truncated
}

fn render_table_row(cells: &[String], widths: &[usize]) -> String {
    let mut line = String::from("|");
    for (cell, width) in cells.iter().zip(widths.iter()) {
        line.push(' ');
        line.push_str(&pad_cell(cell, *width));
        line.push(' ');
        line.push('|');
    }
    line
}

fn render_separator(widths: &[usize]) -> String {
    let cells = widths
        .iter()
        .map(|width| "-".repeat(*width))
        .collect::<Vec<_>>();
    render_table_row(&cells, widths)
}

fn pad_cell(value: &str, width: usize) -> String {
    let len = value.chars().count();
    if len >= width {
        return value.to_string();
    }

    let mut padded = String::with_capacity(value.len() + (width - len));
    padded.push_str(value);
    padded.push_str(&" ".repeat(width - len));
    padded
}

#[cfg(test)]
mod tests {
    use std::sync::Arc;

    use super::{render, PrettyOptions};
    use crate::ops::{ColumnSelector, NullOrder, Predicate, SortKey, SortOrder};
    use crate::table::{Bitmap, Column, DataType, Field, PrimitiveCol, Schema, Table, Utf8Col};

    fn utf8_column(values: &[Option<&str>]) -> Utf8Col {
        let mut offsets = Vec::with_capacity(values.len() + 1);
        let mut bytes = Vec::new();
        let mut validity = Vec::with_capacity(values.len());
        offsets.push(0);
        for value in values {
            match value {
                Some(value) => {
                    bytes.extend_from_slice(value.as_bytes());
                    offsets.push(bytes.len() as u32);
                    validity.push(true);
                }
                None => {
                    offsets.push(bytes.len() as u32);
                    validity.push(false);
                }
            }
        }

        Utf8Col::new(offsets, bytes, Some(Bitmap::from_bools(&validity)))
            .expect("utf8 column should build")
    }

    #[test]
    fn renders_selected_rows_with_types_and_nulls() {
        let schema = Schema::new(vec![
            Field::new(Arc::<str>::from("id"), DataType::I64),
            Field::new(Arc::<str>::from("name"), DataType::Utf8),
            Field::new(Arc::<str>::from("score"), DataType::F64),
        ])
        .expect("schema should be valid");
        let table = Table::from_columns(
            schema,
            vec![
                Column::I64(PrimitiveCol::new(vec![1_i64, 2_i64, 3_i64], None)),
                Column::Utf8(utf8_column(&[Some("alpha"), Some("beta"), Some("gamma")])),
                Column::F64(PrimitiveCol::new(
                    vec![1.5_f64, 0.0_f64, 9.25_f64],
                    Some(Bitmap::from_bools(&[true, false, true])),
                )),
            ],
        )
        .expect("table should build");
        let sorted = table
            .sort_by(&[SortKey {
                column: ColumnSelector::from("id"),
                order: SortOrder::Descending,
                nulls: NullOrder::Last,
            }])
            .expect("sort should succeed")
            .filter(&Predicate::Comparison {
                column: ColumnSelector::from("id"),
                op: crate::ops::CompareOp::Gte,
                value: Some(crate::ops::Literal::I64(2)),
            })
            .expect("filter should succeed");

        let output = render(
            &sorted,
            &PrettyOptions {
                max_rows: 5,
                max_columns: 5,
                max_width: 16,
                show_types: true,
            },
        );

        assert!(output.contains("rows: 2"));
        assert!(output.contains("| id "));
        assert!(output.contains("| i64"));
        assert!(output.contains("gamma"));
        assert!(output.contains("beta"));
        assert!(output.contains("null"));
        assert!(!output.contains("alpha"));
        assert!(
            output.find("gamma").expect("gamma should be present")
                < output.find("beta").expect("beta should be present")
        );
    }

    #[test]
    fn truncates_cells_and_reports_omitted_rows_and_columns() {
        let schema = Schema::new(vec![
            Field::new(Arc::<str>::from("id"), DataType::I64),
            Field::new(Arc::<str>::from("description"), DataType::Utf8),
            Field::new(Arc::<str>::from("status"), DataType::Utf8),
            Field::new(Arc::<str>::from("ignored"), DataType::Utf8),
        ])
        .expect("schema should be valid");
        let table = Table::from_columns(
            schema,
            vec![
                Column::I64(PrimitiveCol::new(vec![1_i64, 2_i64, 3_i64], None)),
                Column::Utf8(utf8_column(&[
                    Some("alpha-very-long"),
                    Some("beta"),
                    Some("gamma"),
                ])),
                Column::Utf8(utf8_column(&[Some("ready"), Some("running"), Some("done")])),
                Column::Utf8(utf8_column(&[Some("x"), Some("y"), Some("z")])),
            ],
        )
        .expect("table should build");

        let output = render(
            &table,
            &PrettyOptions {
                max_rows: 2,
                max_columns: 2,
                max_width: 8,
                show_types: false,
            },
        );

        assert!(output.contains("rows: 3"));
        assert!(output.contains("columns: 4"));
        assert!(output.contains("alpha..."));
        assert!(output.contains("... 1 more rows omitted"));
        assert!(output.contains("... 2 more columns omitted"));
        assert!(!output.contains("ignored"));
        assert!(!output.contains("| utf8 "));
    }
}