rgwml 2.0.0

Typed, local-first tabular data library with columnar in-memory storage.
Documentation
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};

use rgwml::{
    read_csv, write_csv, AggregateExpr, AggregateOp, ColumnSelector, CompareOp, CsvReadOptions,
    CsvWriteOptions, GroupKey, Literal, NullOrder, Predicate, SortKey, SortOrder,
};

#[test]
fn read_filter_sort_write_example_runs() {
    let input_path =
        write_temp_csv("segment,revenue\nwidgets,200.0\nanalytics,150.0\nwidgets,90.0\n");
    let output_path = temp_csv_path();

    let table = read_csv(&input_path, &CsvReadOptions::default()).expect("sales csv should load");

    let filtered = table
        .filter(&Predicate::Comparison {
            column: ColumnSelector::from("revenue"),
            op: CompareOp::Gt,
            value: Some(Literal::F64(100.0)),
        })
        .expect("filter should succeed");

    let sorted = filtered
        .sort_by(&[SortKey {
            column: ColumnSelector::from("segment"),
            order: SortOrder::Ascending,
            nulls: NullOrder::Last,
        }])
        .expect("sort should succeed")
        .materialize()
        .expect("materialize should succeed");

    write_csv(&sorted, &output_path, &CsvWriteOptions::default()).expect("write should succeed");

    let written =
        read_csv(&output_path, &CsvReadOptions::default()).expect("round trip should load");
    assert_eq!(written.nrows(), 2);
    assert_eq!(
        written
            .column_by_name("segment")
            .and_then(|column| column.utf8_value(0)),
        Some("analytics")
    );
    assert_eq!(
        written
            .column_by_name("segment")
            .and_then(|column| column.utf8_value(1)),
        Some("widgets")
    );

    fs::remove_file(input_path).expect("temp input csv should be removable");
    fs::remove_file(output_path).expect("temp output csv should be removable");
}

#[test]
fn group_and_aggregate_example_runs() {
    let input_path =
        write_temp_csv("segment,revenue\nwidgets,200.0\nanalytics,150.0\nwidgets,90.0\n");
    let table = read_csv(&input_path, &CsvReadOptions::default()).expect("sales csv should load");

    let grouped = table
        .group_by(
            &[GroupKey {
                column: ColumnSelector::from("segment"),
            }],
            &[
                AggregateExpr {
                    input: None,
                    op: AggregateOp::CountRows,
                    alias: Arc::from("rows"),
                },
                AggregateExpr {
                    input: Some(ColumnSelector::from("revenue")),
                    op: AggregateOp::Sum,
                    alias: Arc::from("revenue_sum"),
                },
                AggregateExpr {
                    input: Some(ColumnSelector::from("revenue")),
                    op: AggregateOp::Mean,
                    alias: Arc::from("revenue_mean"),
                },
            ],
        )
        .expect("group by should succeed")
        .sort_by(&[SortKey {
            column: ColumnSelector::from("segment"),
            order: SortOrder::Ascending,
            nulls: NullOrder::Last,
        }])
        .expect("sort should succeed")
        .materialize()
        .expect("materialize should succeed");

    assert_eq!(grouped.nrows(), 2);
    assert_eq!(
        grouped
            .column_by_name("segment")
            .and_then(|column| column.utf8_value(0)),
        Some("analytics")
    );
    assert_eq!(
        grouped
            .column_by_name("rows")
            .and_then(|column| column.i64_value(0)),
        Some(1)
    );
    assert_eq!(
        grouped
            .column_by_name("revenue_sum")
            .and_then(|column| column.f64_value(1)),
        Some(290.0)
    );
    assert_eq!(
        grouped
            .column_by_name("revenue_mean")
            .and_then(|column| column.f64_value(1)),
        Some(145.0)
    );

    fs::remove_file(input_path).expect("temp input csv should be removable");
}

fn write_temp_csv(contents: &str) -> PathBuf {
    let path = temp_csv_path();
    fs::write(&path, contents).expect("temp csv should be writable");
    path
}

fn temp_csv_path() -> PathBuf {
    let nanos = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .expect("clock should be after epoch")
        .as_nanos();
    std::env::temp_dir().join(format!("rgwml_readme_example_{nanos}.csv"))
}