Skip to main content

data_01_schema_summary/
data_01_schema_summary.rs

1//! # data_01 — Inspecting a table with `schema_summary`
2//!
3//! Run: `cargo run -p matten-data --example data_01_schema_summary`
4//!
5//! ## What this shows
6//! How to look at a table *before* converting it: row count, column count,
7//! column names, the number of missing cells per column, and the simple inferred
8//! kind of each column.
9//!
10//! ## Teaching points
11//! - inspection is cheap and non-destructive — nothing is converted yet;
12//! - the schema summary is a small, printable description, not a query engine;
13//! - inferred kinds (`integer`, `float`, `boolean`, `text`, `mixed`,
14//!   `missing-only`) are a hint for which columns can become numeric.
15
16use matten_data::Table;
17
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}