liboxen 0.46.7

Oxen is a fast, unstructured data version control, to help version large machine learning datasets written in Rust.
Documentation
use crate::{
    error::OxenError,
    model::data_frame::schema::{Field, Schema},
};
use polars::frame::DataFrame;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;

use super::AddRemoveModifyCounts;

#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
pub struct TabularSchemaDiff {
    pub added: Vec<Field>,
    pub removed: Vec<Field>,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiffMods {
    pub row_counts: AddRemoveModifyCounts,
    pub col_changes: TabularSchemaDiff,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiffSummary {
    pub modifications: TabularDiffMods,
    pub schemas: TabularDiffSchemas,
    pub dupes: TabularDiffDupes,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiff {
    pub filename1: Option<String>,
    pub filename2: Option<String>,
    pub summary: TabularDiffSummary,
    pub parameters: TabularDiffParameters,

    // TODO: Not sure if this is the best way to represent polars data frames
    #[schema(value_type = Object)]
    pub contents: DataFrame,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiffSchemas {
    pub left: Schema,
    pub right: Schema,
    pub diff: Schema,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiffParameters {
    pub keys: Vec<String>,
    pub targets: Vec<String>,
    pub display: Vec<String>,
}

// Need to serialize here because we directly write this to disk to cache compares
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TabularDiffDupes {
    pub left: u64,
    pub right: u64,
}
impl TabularDiffDupes {
    pub fn empty() -> Self {
        TabularDiffDupes { left: 0, right: 0 }
    }
}

impl TabularDiffMods {
    pub fn empty() -> Self {
        TabularDiffMods {
            row_counts: AddRemoveModifyCounts {
                added: 0,
                removed: 0,
                modified: 0,
            },
            col_changes: TabularSchemaDiff::empty(),
        }
    }
}

impl TabularDiffSchemas {
    pub fn empty() -> Self {
        TabularDiffSchemas {
            left: Schema::empty(),
            right: Schema::empty(),
            diff: Schema::empty(),
        }
    }
}

impl TabularDiffParameters {
    pub fn empty() -> Self {
        TabularDiffParameters {
            keys: Vec::new(),
            targets: Vec::new(),
            display: Vec::new(),
        }
    }
}

impl TabularSchemaDiff {
    pub fn from_schemas(s1: &Schema, s2: &Schema) -> Result<TabularSchemaDiff, OxenError> {
        let added = s2
            .fields
            .iter()
            .filter(|field| !s1.fields.contains(field))
            .cloned()
            .collect::<Vec<Field>>();

        let removed = s1
            .fields
            .iter()
            .filter(|field| !s2.fields.contains(field))
            .cloned()
            .collect::<Vec<Field>>();

        Ok(TabularSchemaDiff { added, removed })
    }

    pub fn empty() -> Self {
        TabularSchemaDiff {
            added: vec![],
            removed: vec![],
        }
    }
}

impl TabularDiff {
    pub fn has_changes(&self) -> bool {
        self.summary.modifications.row_counts.added > 0
            || self.summary.modifications.row_counts.removed > 0
            || self.summary.modifications.row_counts.modified > 0
            || !self.summary.modifications.col_changes.added.is_empty()
            || !self.summary.modifications.col_changes.removed.is_empty()
    }

    pub fn empty() -> Self {
        TabularDiff {
            summary: TabularDiffSummary::empty(),
            parameters: TabularDiffParameters::empty(),
            contents: DataFrame::empty(),
            filename1: None,
            filename2: None,
        }
    }
}

impl TabularDiffSummary {
    pub fn empty() -> Self {
        TabularDiffSummary {
            modifications: TabularDiffMods::empty(),
            schemas: TabularDiffSchemas::empty(),
            dupes: TabularDiffDupes::empty(),
        }
    }
}