liboxen 0.48.0

Oxen is a fast, unstructured data version control, to help version large machine learning datasets written in Rust.
Documentation
//! # oxen df
//!
//! Interact with DataFrames
//!

use std::path::Path;

use crate::core::df::tabular;
use crate::error::OxenError;
use crate::model::LocalRepository;
use crate::opts::DFOpts;
use crate::{repositories, util};

/// Interact with DataFrames
pub async fn df(input: impl AsRef<Path>, opts: DFOpts) -> Result<(), OxenError> {
    let mut df = tabular::show_path(input, opts.clone()).await?;

    if let Some(write) = opts.write {
        println!("Writing {write:?}");
        tabular::write_df(&mut df, write)?;
    }

    if let Some(output) = opts.output {
        println!("Writing {output:?}");
        tabular::write_df(&mut df, output)?;
    }

    Ok(())
}

pub async fn df_revision(
    repo: &LocalRepository,
    input: impl AsRef<Path>,
    revision: impl AsRef<str>,
    opts: DFOpts,
) -> Result<(), OxenError> {
    let commit = repositories::revisions::get(repo, &revision)?
        .ok_or_else(|| OxenError::basic_str(format!("Revision {} not found", revision.as_ref())))?;
    let path = input.as_ref();
    let Some(root) = repositories::tree::get_node_by_path_with_children(repo, &commit, path)?
    else {
        return Err(OxenError::basic_str(format!(
            "Merkle tree for revision {} not found",
            revision.as_ref()
        )));
    };

    let mut df = tabular::show_node(repo.clone(), &root, opts.clone()).await?;

    if let Some(output) = opts.output {
        println!("Writing {output:?}");
        tabular::write_df(&mut df, output)?;
    }

    Ok(())
}

/// Get a human readable schema for a DataFrame
pub fn schema<P: AsRef<Path>>(input: P, flatten: bool, opts: DFOpts) -> Result<String, OxenError> {
    tabular::schema_to_string(input, flatten, &opts)
}

/// Add a column to a dataframe
pub async fn add_column(path: &Path, data: &str) -> Result<(), OxenError> {
    if util::fs::is_tabular(path) {
        let mut opts = DFOpts::empty();
        opts.add_col = Some(data.to_string());
        opts.output = Some(path.to_path_buf());
        df(path, opts).await
    } else {
        let err = format!("{} is not a tabular file", path.display());
        Err(OxenError::basic_str(err))
    }
}