use crate::{SeriesFormat, SeriesFormatShorthand, FormatError};
use polars::prelude::*;
const DEFAULT_TABLE_HEIGHT: usize = 30;
#[derive(Debug)]
pub struct DataFrameFormat {
pub column_formats: Option<Vec<SeriesFormatShorthand>>,
pub column_delimiter: String,
pub header_separator_delimiter: String,
pub header_separator_char: char,
pub include_header_row: bool,
pub include_header_separator_row: bool,
pub include_summary_row: bool,
pub include_summary_separator_row: bool,
pub render_height: Option<usize>,
pub max_render_width: Option<usize>,
}
impl Default for DataFrameFormat {
fn default() -> DataFrameFormat {
DataFrameFormat {
column_formats: None,
column_delimiter: " │ ".to_string(),
header_separator_delimiter: "──┼──".to_string(),
header_separator_char: '─',
include_header_row: true,
include_header_separator_row: true,
include_summary_row: false,
include_summary_separator_row: false,
render_height: None,
max_render_width: None,
}
}
}
#[derive(Debug)]
pub struct DataFrameFormatFinal {
pub column_formats: Vec<SeriesFormat>,
pub column_delimiter: String,
pub header_separator_delimiter: String,
pub header_separator_char: char,
pub include_header_row: bool,
pub include_header_separator_row: bool,
pub include_summary_row: bool,
pub include_summary_separator_row: bool,
pub render_height: usize,
pub max_render_width: usize,
}
impl DataFrameFormat {
pub fn format(&self, df: DataFrame) -> Result<String, FormatError> {
let fmt = self.finalize(df.clone())?;
fmt.format(df)
}
fn finalize(&self, df: DataFrame) -> Result<DataFrameFormatFinal, FormatError> {
let schema = df.schema();
let column_formats: Vec<SeriesFormat> = match &self.column_formats {
Some(cols) => {
let mut fmts = Vec::new();
for col in cols.iter() {
let dtype = match schema.get_field(col.name.as_str()) {
Some(field) => field.dtype,
None => {
return Err(FormatError::ColumnMissing(format!(
"missing column: {}",
col.name
)))
}
};
fmts.push(col.clone().finalize(&dtype)?);
}
fmts
}
None => {
let fmts: Result<Vec<SeriesFormat>, FormatError> = schema
.iter()
.map(|(name, dtype)| SeriesFormatShorthand::new().name(name).finalize(dtype))
.collect();
fmts?
}
};
let max_render_width = match self.max_render_width {
Some(value) => value,
None => {
let max_render_width = safe_sum_with_max_on_overflow(
column_formats.iter().map(|c| c.get_max_width()).collect(),
);
safe_sum_with_max_on_overflow(vec![
max_render_width,
self.column_delimiter.chars().count() * (column_formats.len() - 1),
])
}
};
let fmt = DataFrameFormatFinal {
column_formats,
column_delimiter: self.column_delimiter.clone(),
header_separator_delimiter: self.header_separator_delimiter.clone(),
header_separator_char: self.header_separator_char,
include_header_row: self.include_header_row,
include_header_separator_row: self.include_header_separator_row,
include_summary_row: self.include_summary_row,
include_summary_separator_row: self.include_summary_separator_row,
render_height: self.render_height.unwrap_or(DEFAULT_TABLE_HEIGHT),
max_render_width,
};
Ok(fmt)
}
}
fn safe_sum_with_max_on_overflow(numbers: Vec<usize>) -> usize {
let mut sum: usize = 0;
for number in numbers {
match sum.checked_add(number) {
Some(s) => sum = s,
None => return usize::MAX,
};
}
sum
}
impl DataFrameFormatFinal {
fn n_header_lines(&self) -> usize {
self.column_formats
.iter()
.map(|f| f.display_name.chars().filter(|&c| c == '\n').count() + 1)
.max()
.unwrap_or(0)
}
fn n_data_rows(&self) -> usize {
self.render_height -
(self.include_header_row as usize) *
(self.n_header_lines() + (self.include_header_separator_row as usize)) -
(self.include_summary_row as usize) *
(1 + (self.include_summary_separator_row as usize))
}
fn total_rendered_width(&self, used_widths: &Vec<usize>) -> usize {
used_widths.iter().sum::<usize>() +
((used_widths.len() as i64 - 1).max(0) as usize) *
self.column_delimiter.chars().count()
}
fn render_header_rows(&self, used_widths: &[usize], total_width: usize) -> Vec<String> {
let n_header_lines = self.n_header_lines();
let mut rows: Vec<String> =
(0..n_header_lines).map(|_| String::with_capacity(total_width)).collect();
for (c, width) in used_widths.iter().enumerate() {
if c != 0 {
for row in rows.iter_mut() {
row.push_str(self.column_delimiter.as_str());
}
}
let name = self.column_formats[c].display_name.as_str();
let lines: Vec<String> = name.split('\n').map(|s| s.to_string()).collect();
let bound = n_header_lines - lines.len();
for row in rows.iter_mut().take(bound) {
row.push_str(" ".repeat(*width).as_str());
}
for (row, line) in rows.iter_mut().skip(bound).zip(lines) {
row.push_str(format!("{:>width$}", line, width = width).as_str());
}
}
rows
}
fn render_header_separator_row(&self, used_widths: &[usize], total_width: usize) -> String {
let mut row = String::with_capacity(total_width);
let separator = self.header_separator_char.to_string();
for (c, width) in used_widths.iter().enumerate() {
if c != 0 {
row.push_str(self.header_separator_delimiter.as_str());
}
row.push_str(separator.repeat(*width).as_str());
}
row
}
fn render_columns(&self, df: DataFrame) -> Result<(Vec<usize>, Vec<Vec<String>>), FormatError> {
let mut column_min_widths: Vec<usize> = vec![];
let mut column_max_widths: Vec<usize> = vec![];
for fmt in self.column_formats.iter() {
let min_width = fmt.header_width().max(fmt.get_min_width());
let max_width = fmt.get_max_width();
if min_width > max_width {
let msg = format!("min_width > max_width for column: {}", fmt.display_name);
return Err(FormatError::InvalidFormat(msg));
}
column_min_widths.push(min_width);
column_max_widths.push(max_width);
}
let total_min_width = column_min_widths.iter().sum::<usize>() +
self.column_delimiter.chars().count() * (self.column_formats.len() - 1);
let n_used_columns = if total_min_width >= self.max_render_width {
let mut n_used_columns = 0;
let mut used_width = 0;
for min_width in column_min_widths.iter() {
if used_width > 0 {
used_width += self.column_delimiter.chars().count();
}
if used_width + min_width <= self.max_render_width {
n_used_columns += 1;
used_width += min_width;
} else {
break;
}
}
n_used_columns
} else {
self.column_formats.len()
};
let mut columns = Vec::with_capacity(n_used_columns);
let mut used_widths = Vec::with_capacity(n_used_columns);
let mut spare_room: usize = self.max_render_width -
column_min_widths.iter().take(n_used_columns).sum::<usize>() -
self.column_delimiter.chars().count() * ((n_used_columns as i64 - 1).max(0) as usize);
for (c, column_format) in self.column_formats.iter().take(n_used_columns).enumerate() {
if let (0, _) = df.shape() {
used_widths.push(column_min_widths[c]);
columns.push(vec![]);
continue
}
let min_width = column_min_widths[c];
let max_width = column_max_widths[c].min(min_width + spare_room);
let column = column_format
.clone()
.min_width(min_width)
.max_width(max_width)
.format(df.column(column_format.name.as_str())?)?;
let used_width = column
.iter()
.map(|s| s.chars().count())
.max()
.ok_or(FormatError::EmptyData(format!("empty column: {}", column_format.name)))?;
columns.push(column);
used_widths.push(used_width);
spare_room -= used_width - min_width;
}
Ok((used_widths, columns))
}
fn assemble_rows(&self, columns: Vec<Vec<String>>, rows: &mut Vec<String>, total_width: usize) {
let n_data_rows = match columns.first() {
Some(column) => column.len(),
None => return,
};
for r in 0..n_data_rows {
let mut row = String::with_capacity(total_width);
for (c, column) in columns.iter().enumerate() {
if c != 0 {
row.push_str(self.column_delimiter.as_str())
}
row.push_str(column[r].as_str())
}
rows.push(row)
}
}
pub(crate) fn format(&self, df: DataFrame) -> Result<String, FormatError> {
let n_data_rows = self.n_data_rows();
let df = df.clone().slice(0, n_data_rows);
let (used_widths, columns) = self.render_columns(df)?;
let total_width = self.total_rendered_width(&used_widths);
let mut rows = Vec::with_capacity(self.render_height);
if self.include_header_row {
for row in self.render_header_rows(&used_widths, total_width) {
rows.push(row);
}
if self.include_header_separator_row {
rows.push(self.render_header_separator_row(&used_widths, total_width));
}
};
self.assemble_rows(columns, &mut rows, total_width);
if self.include_summary_row {
todo!("summary row")
}
Ok(rows.join("\n"))
}
}