data_03_missing_values/data_03_missing_values.rs
1//! # data_03 — Missing values are explicit, never silent
2//!
3//! Run: `cargo run -p matten-data --example data_03_missing_values`
4//!
5//! ## What this shows
6//! `matten-data` never turns a missing cell into `0` behind your back. A missing
7//! value that reaches numeric conversion is an error; you must fill it first.
8//!
9//! ## Teaching points
10//! - converting a table with a missing cell fails with a precise
11//! `MissingValue { column, row }` error (row is the 1-based CSV line);
12//! - `fill_missing` replaces only the missing cells, with a value you choose;
13//! - after an explicit fill, conversion succeeds.
14
15use matten_data::{MattenDataError, Table};
16
17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24 let table = Table::from_csv_str(csv)?;
25 let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27 // Converting with a missing cell still present is rejected — no silent zero.
28 match numeric_cols.try_numeric() {
29 Err(MattenDataError::MissingValue { column, row }) => {
30 println!("missing value blocked conversion: column={column}, csv_line={row}");
31 assert_eq!(column, "cost");
32 assert_eq!(row, 3); // header is line 1, so the south row is line 3
33 }
34 other => panic!("expected MissingValue, got {other:?}"),
35 }
36
37 // Decide explicitly what a missing cost means here, then convert.
38 let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40 println!("filled shape: {:?}", tensor.shape());
41 println!("filled data : {:?}", tensor.as_slice());
42
43 assert_eq!(tensor.shape(), &[3, 2]);
44 // Only the missing south/cost was filled; the other cells are untouched.
45 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47 println!("data_03_missing_values: OK");
48 Ok(())
49}