use polars::prelude::DataFrame;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::core::df::tabular;
use crate::error::OxenError;
use crate::model::merkle_tree::node::FileNode;
use crate::model::metadata::generic_metadata::GenericMetadata;
use crate::model::{DataFrameSize, LocalRepository};
use crate::opts::DFOpts;
#[derive(Deserialize, Serialize, Debug, Clone, ToSchema)]
pub struct TabularDiffWrapper {
pub tabular: TabularDiffSummaryImpl,
}
#[derive(Deserialize, Serialize, Debug, Clone, ToSchema)]
pub struct TabularDiffSummary {
pub summary: TabularDiffSummaryImpl,
}
#[derive(Deserialize, Serialize, Debug, Clone, ToSchema)]
pub struct TabularDiffSummaryImpl {
pub num_added_rows: usize,
pub num_added_cols: usize,
pub num_removed_rows: usize,
pub num_removed_cols: usize,
pub schema_has_changed: bool,
}
impl TabularDiffSummary {
pub fn to_wrapper(&self) -> TabularDiffWrapper {
TabularDiffWrapper {
tabular: self.summary.clone(),
}
}
}
impl TabularDiffWrapper {
pub fn from_file_nodes(
base_entry: &Option<FileNode>,
head_entry: &Option<FileNode>,
) -> Result<TabularDiffWrapper, OxenError> {
match (base_entry, head_entry) {
(Some(base_entry), Some(head_entry)) => {
let base_size = match &base_entry.metadata() {
Some(GenericMetadata::MetadataTabular(df_meta)) => DataFrameSize {
height: df_meta.tabular.height,
width: df_meta.tabular.width,
},
_ => return Err(OxenError::basic_str("Invalid metadata type")),
};
let head_size = match &head_entry.metadata() {
Some(GenericMetadata::MetadataTabular(df_meta)) => DataFrameSize {
height: df_meta.tabular.height,
width: df_meta.tabular.width,
},
_ => return Err(OxenError::basic_str("Invalid metadata type")),
};
let schema_has_changed = base_size.width != head_size.width;
let num_added_rows = head_size.height.saturating_sub(base_size.height);
let num_removed_rows = base_size.height.saturating_sub(head_size.height);
let num_added_cols = head_size.width.saturating_sub(base_size.width);
let num_removed_cols = base_size.width.saturating_sub(head_size.width);
Ok(TabularDiffWrapper {
tabular: TabularDiffSummaryImpl {
num_added_rows,
num_added_cols,
num_removed_rows,
num_removed_cols,
schema_has_changed,
},
})
}
(Some(base_entry), None) => {
let base_size = match &base_entry.metadata() {
Some(GenericMetadata::MetadataTabular(df_meta)) => DataFrameSize {
height: df_meta.tabular.height,
width: df_meta.tabular.width,
},
_ => return Err(OxenError::basic_str("Invalid metadata type")),
};
Ok(TabularDiffWrapper {
tabular: TabularDiffSummaryImpl {
num_added_rows: 0,
num_added_cols: 0,
num_removed_rows: base_size.height,
num_removed_cols: base_size.width,
schema_has_changed: false,
},
})
}
(None, Some(head_entry)) => {
let head_size = match &head_entry.metadata() {
Some(GenericMetadata::MetadataTabular(df_meta)) => DataFrameSize {
height: df_meta.tabular.height,
width: df_meta.tabular.width,
},
_ => return Err(OxenError::basic_str("Invalid metadata type")),
};
Ok(TabularDiffWrapper {
tabular: TabularDiffSummaryImpl {
num_added_rows: head_size.height,
num_added_cols: head_size.width,
num_removed_rows: 0,
num_removed_cols: 0,
schema_has_changed: false,
},
})
}
(None, None) => Ok(TabularDiffWrapper {
tabular: TabularDiffSummaryImpl {
num_added_rows: 0,
num_added_cols: 0,
num_removed_rows: 0,
num_removed_cols: 0,
schema_has_changed: false,
},
}),
}
}
pub async fn maybe_get_df_from_file_node(
repo: &LocalRepository,
node: &Option<FileNode>,
) -> Option<DataFrame> {
match node {
Some(node) => {
let version_store = repo.version_store();
let version_path = version_store
.get_version_path(&node.hash().to_string())
.await
.expect("invariant violation: version path not found in maybe_get_df_from_file_node");
tabular::read_df_with_extension(&*version_path, node.extension(), &DFOpts::empty())
.await
.ok()
}
None => None,
}
}
pub fn maybe_get_size(df: &Option<DataFrame>) -> Option<DataFrameSize> {
df.as_ref().map(|df| DataFrameSize {
height: df.height(),
width: df.width(),
})
}
pub fn schema_has_changed(base_df: &Option<DataFrame>, head_df: &Option<DataFrame>) -> bool {
if base_df.is_none() && head_df.is_none() {
return false;
}
if let Some(base_df) = base_df
&& let Some(head_df) = head_df
{
return TabularDiffWrapper::schema_has_changed_df(base_df, head_df);
}
true
}
fn schema_has_changed_df(base_df: &DataFrame, head_df: &DataFrame) -> bool {
let base_schema = base_df.schema();
let head_schema = head_df.schema();
base_schema != head_schema
}
}