use comfy_table::{Cell, ContentArrangement, Table};
use datafusion::arrow::{
array::RecordBatch,
error::ArrowError,
util::display::{ArrayFormatter, FormatOptions},
};
use std::fmt::Display;
use unicode_width::UnicodeWidthStr;
const DEFAULT_PRESET: &str = "||--|=+||-+||++++++";
const TRUNCATED_PRESET: &str = "|…--|=+…|-+|…+++…+…";
pub fn pretty_format_compact_batch(
batch: &RecordBatch,
max_width: usize,
max_row_height: usize,
min_compacted_col_width: usize,
) -> Result<impl Display, ArrowError> {
let schema = batch.schema();
let total_fields = schema.fields().len();
let format_opts = FormatOptions::default().with_display_error(true);
let header: Vec<Cell> = schema
.fields()
.iter()
.map(|f| Cell::new(f.name()))
.collect();
let formatters: Vec<_> = batch
.columns()
.iter()
.map(|col| ArrayFormatter::try_new(col.as_ref(), &format_opts))
.collect::<Result<_, ArrowError>>()?;
let formatted_values: Vec<Vec<Cell>> = (0..batch.num_rows())
.map(|row_idx| {
formatters
.iter()
.map(|fmt| Cell::new(fmt.value(row_idx)))
.collect()
})
.collect();
let mut column_widths = vec![0; total_fields];
for row in std::iter::once(&header).chain(formatted_values.iter()) {
for (col_idx, cell) in row.iter().enumerate() {
let cell_width = cell.content().width() + 3; column_widths[col_idx] = column_widths[col_idx].max(cell_width);
}
}
let nb_displayed_columns = if max_width == 0 {
total_fields
} else {
let mut table_width = 1; let mut fit_columns = 0; for width in column_widths {
let col_width = width.min(min_compacted_col_width).max(4); if table_width + col_width > max_width {
break;
}
table_width += col_width;
fit_columns += 1;
}
fit_columns
};
let table_preset = if nb_displayed_columns == total_fields {
DEFAULT_PRESET
} else {
TRUNCATED_PRESET
};
let mut table = Table::new();
table
.force_no_tty()
.load_preset(table_preset)
.set_content_arrangement(ContentArrangement::Dynamic)
.set_header(header.into_iter().take(nb_displayed_columns))
.set_truncation_indicator("…");
for formatted_row in formatted_values {
table.add_row(formatted_row.into_iter().take(nb_displayed_columns));
}
if max_row_height > 0 {
for row in table.row_iter_mut() {
row.max_height(max_row_height);
}
}
if max_width > 0 {
table.set_width(max_width as u16);
}
Ok(table.to_string())
}
#[cfg(test)]
mod tests {
use super::pretty_format_compact_batch;
use datafusion::arrow::array::{Int64Array, StringArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::error::ArrowError;
use datafusion::arrow::record_batch::RecordBatch;
use insta::{Settings, assert_snapshot};
use std::sync::Arc;
fn insta_settings() -> Settings {
let mut settings = Settings::clone_current();
settings.set_prepend_module_to_snapshot(false);
settings
}
#[test]
fn test_pretty_format_no_constraints() -> Result<(), ArrowError> {
assert_formatting(0, 0, 0, "pretty_format_no_constraints")
}
#[test]
fn test_pretty_format_table_width() -> Result<(), ArrowError> {
assert_formatting(25, 3, 0, "pretty_format_table_width")
}
#[test]
fn test_pretty_format_all_constraints_narrow() -> Result<(), ArrowError> {
assert_formatting(25, 3, 12, "pretty_format_all_constraints_narrow")
}
#[test]
fn test_pretty_format_all_constraints_wide() -> Result<(), ArrowError> {
assert_formatting(76, 3, 12, "pretty_format_all_constraints_wide")
}
fn assert_formatting(
max_width: usize,
max_row_height: usize,
min_compacted_col_width: usize,
test_name: &str,
) -> Result<(), ArrowError> {
let batch = create_sample_batch();
let result = pretty_format_compact_batch(
&batch,
max_width,
max_row_height,
min_compacted_col_width,
)?
.to_string();
insta_settings().bind(|| assert_snapshot!(test_name, result));
Ok(())
}
fn create_sample_batch() -> RecordBatch {
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int64, false),
Field::new("country", DataType::Utf8, false),
Field::new("description", DataType::Utf8, false),
Field::new("city_name", DataType::Utf8, false),
Field::new("emojis", DataType::Utf8, false),
Field::new("chinese name", DataType::Utf8, false),
Field::new("pop_count", DataType::Int64, false),
]));
let a = Arc::new(Int64Array::from(vec![1, 3, 7]));
let country =
Arc::new(StringArray::from(vec!["France", "United Kingdom", "Spain"]));
let description = Arc::new(StringArray::from(vec![
"Paris is renowned as the City of Light, celebrated for its rich history, magnificent architecture, and vibrant arts scene. The city boasts iconic landmarks such as the Eiffel Tower and the Louvre, along with charming streets, quaint cafés, and a deep cultural heritage that continues to inspire artists, writers, and travelers from around the world.",
"London is a dynamic and cosmopolitan metropolis that seamlessly blends its storied past with modern innovation. The city offers an array of historical sites, diverse neighborhoods, and world-class museums and theaters. Its bustling markets, green parks, and ever-evolving cultural scene make London a hub of creativity, commerce, and community life.",
"Barcelona is a lively coastal city known for its striking modernist architecture, Mediterranean beaches, and eclectic cultural offerings. From the whimsical creations of Antoni Gaudí to the vibrant street life and renowned culinary delights, Barcelona captivates visitors with its unique blend of historic charm and contemporary energy.",
]));
let city_name = Arc::new(StringArray::from(vec!["Paris", "London", "Barcelona"]));
let emojis = Arc::new(StringArray::from(vec![
"🗼🥖🍷🎨🚲🏰🌟",
"🇬🇧🚕🏰🎡🎩☕",
"🌞🎨⚽🍴🎉🌊",
]));
let chinese_name = Arc::new(StringArray::from(vec!["巴黎", "倫敦", "巴塞隆納"]));
let pop_count = Arc::new(Int64Array::from(vec![321, 987654, 2]));
RecordBatch::try_new(
schema,
vec![
a,
country,
description,
city_name,
emojis,
chinese_name,
pop_count,
],
)
.unwrap()
}
}