Skip to main content

csv_to_tensor/
csv_to_tensor.rs

1//! # Companion example: CSV string -> clean -> numeric `Tensor` (matten-data)
2//!
3//! Run: cargo run -p matten-data --example csv_to_tensor
4//!
5//! ## What this shows
6//! The canonical `matten-data` workflow: parse a small messy CSV, inspect it,
7//! select numeric columns, fill a missing cell, and convert explicitly to a
8//! `[rows, columns]` f64 `Tensor`.
9//!
10//! ## Teaching points
11//! - `matten-data` is **Beta** and intentionally small;
12//! - it is **not** a dataframe: no group-by, join, merge, pivot, or query;
13//! - missing values and numeric conversion are **explicit** (`fill_missing` then
14//!   `try_numeric`), never silent;
15//! - the output is a plain numeric `Tensor` of shape `[rows, columns]`.
16
17use matten_data::Table;
18
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}