Skip to main content

data_03_missing_values/
data_03_missing_values.rs

1//! # data_03 — Missing values are explicit, never silent
2//!
3//! Run: `cargo run -p matten-data --example data_03_missing_values`
4//!
5//! ## What this shows
6//! `matten-data` never turns a missing cell into `0` behind your back. A missing
7//! value that reaches numeric conversion is an error; you must fill it first.
8//!
9//! ## Teaching points
10//! - converting a table with a missing cell fails with a precise
11//!   `MissingValue { column, row }` error (row is the 1-based CSV line);
12//! - `fill_missing` replaces only the missing cells, with a value you choose;
13//! - after an explicit fill, conversion succeeds.
14
15use matten_data::{MattenDataError, Table};
16
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24    let table = Table::from_csv_str(csv)?;
25    let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27    // Converting with a missing cell still present is rejected — no silent zero.
28    match numeric_cols.try_numeric() {
29        Err(MattenDataError::MissingValue { column, row }) => {
30            println!("missing value blocked conversion: column={column}, csv_line={row}");
31            assert_eq!(column, "cost");
32            assert_eq!(row, 3); // header is line 1, so the south row is line 3
33        }
34        other => panic!("expected MissingValue, got {other:?}"),
35    }
36
37    // Decide explicitly what a missing cost means here, then convert.
38    let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40    println!("filled shape: {:?}", tensor.shape());
41    println!("filled data : {:?}", tensor.as_slice());
42
43    assert_eq!(tensor.shape(), &[3, 2]);
44    // Only the missing south/cost was filled; the other cells are untouched.
45    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47    println!("data_03_missing_values: OK");
48    Ok(())
49}