use super::Column;
use crate::reports::{ColData, ColumnTypeEnum, Row, RowContent, Rows};
use polars::prelude::*;
use std::num::ParseFloatError;
#[derive(thiserror::Error, Debug)]
pub enum QBPolarsError {
#[error("Polars error: {0}")]
PolarsError(#[from] polars::prelude::PolarsError),
#[error("No columns found in report")]
NoColumnsFound,
#[error("Invalid shape of rows in report")]
InvalidRowShape,
}
impl super::Report {
pub fn into_dataframe(self) -> Result<DataFrame, QBPolarsError> {
let cols = self.columns.and_then(|c| c.column);
let leaf_rows = self.rows.map(collect_leaf_rows);
let (cols, leaf_rows) = match (cols, leaf_rows) {
(None, None) => return Ok(DataFrame::empty()),
(None, Some(_)) => {
return Err(QBPolarsError::NoColumnsFound);
}
(Some(c), None) => (c, Vec::new()),
(Some(c), Some(lr)) => (c, lr),
};
let mut col_defs = Vec::new();
flatten_columns(cols, None, &mut col_defs);
if leaf_rows.is_empty() {
if col_defs.is_empty() {
return Ok(DataFrame::empty());
};
let series = col_defs
.into_iter()
.map(|(col_name, _)| Series::new_empty(col_name.into(), &DataType::String))
.map(polars::prelude::Column::from);
return Ok(DataFrame::from_iter(series));
}
if leaf_rows.iter().any(|row| row.len() != col_defs.len()) {
return Err(QBPolarsError::InvalidRowShape);
}
let series = col_defs
.into_iter()
.enumerate()
.map(|(idx, (col_name, col_type))| {
create_series_for_col(col_name, col_type, idx, &leaf_rows)
});
let columns = series.map(polars::prelude::Column::from).collect();
DataFrame::new(columns).map_err(Into::into)
}
}
fn create_series_for_col(
col_name: String,
col_type: ColumnTypeEnum,
idx: usize,
rows: &[Vec<ColData>],
) -> Series {
match col_type {
ColumnTypeEnum::Money | ColumnTypeEnum::Rate => {
create_numeric_or_string_series(col_name, idx, rows)
}
_ => create_string_series(col_name, idx, rows),
}
}
fn create_numeric_or_string_series(col_name: String, idx: usize, rows: &[Vec<ColData>]) -> Series {
let values: Vec<_> = rows.iter().map(|row| row[idx].value.as_deref()).collect();
if let Ok(numeric_values) = try_parse_as_numeric(values) {
Float64Chunked::from_iter_options(col_name.into(), numeric_values.into_iter()).into_series()
} else {
create_string_series(col_name, idx, rows)
}
}
fn try_parse_as_numeric(values: Vec<Option<&str>>) -> Result<Vec<Option<f64>>, ParseFloatError> {
values
.into_iter()
.map(|value| {
value
.filter(|s| !s.is_empty())
.map(|s| s.trim_matches(|c: char| c.is_whitespace() || c == '%'))
.map(str::parse::<f64>)
.transpose()
})
.collect()
}
fn create_string_series(col_name: String, idx: usize, rows: &[Vec<ColData>]) -> Series {
StringChunked::from_iter_options(
col_name.into(),
rows.iter().map(|row| row[idx].value.as_deref()),
)
.into_series()
}
fn flatten_columns(
cols: Vec<Column>,
prefix: Option<&str>,
out: &mut Vec<(String, ColumnTypeEnum)>,
) {
for col in cols {
let name = col
.col_title
.is_empty()
.then(|| col.col_type.as_str())
.unwrap_or_else(|| col.col_title.as_str());
let title = if let Some(p) = prefix {
format!("{} - {}", p, name)
} else {
name.to_string()
};
if let Some(sub) = col.columns.and_then(|s| s.column) {
flatten_columns(sub, Some(&title), out);
continue;
}
out.push((title, col.col_type));
}
}
fn collect_leaf_rows(rows: Rows) -> Vec<Vec<ColData>> {
let mut out = Vec::new();
if let Some(inner) = rows.row {
for row in inner {
collect_from_row(row, &mut out);
}
}
out
}
fn collect_from_row(row: Row, out: &mut Vec<Vec<ColData>>) {
match row.content {
RowContent::Coldata { col_data } => {
out.push(col_data);
}
RowContent::HeaderRowsSummary { rows, .. } => {
if let Some(subrows) = rows {
if let Some(inner) = subrows.row {
for r in inner {
collect_from_row(r, out);
}
}
}
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_report_to_dataframe() {
let input = include_str!("../../test/data/report2.json");
let report: super::super::Report = serde_json::from_str(input).unwrap();
let mut df_result = report.into_dataframe().unwrap();
assert!(!df_result.is_empty());
println!("{:?}", df_result);
}
}