pub struct Table { /* private fields */ }Expand description
A small, owned, rectangular table-like data set.
External guarantees: row order is preserved; column order is preserved; column
names are stable after loading. Operations return new owned Table values; no
borrowed view lifetimes appear in normal use.
Implementations§
Source§impl Table
impl Table
Sourcepub fn from_csv_str(input: &str) -> Result<Table, MattenDataError>
pub fn from_csv_str(input: &str) -> Result<Table, MattenDataError>
Parse a Table from a CSV string.
The first row is the header. Empty input, empty or duplicate header names,
and ragged rows are reported as MattenDataError (never a panic).
use matten_data::Table;
let table = Table::from_csv_str("a,b\n1,2\n3,4").unwrap();
assert_eq!(table.row_count(), 2);
assert_eq!(table.column_names(), &["a".to_string(), "b".to_string()]);Examples found in repository?
20fn main() -> Result<(), matten_data::MattenDataError> {
21 // A small table with one text column and one missing numeric cell.
22 let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28 let tensor = Table::from_csv_str(csv)?
29 .select_columns(["sales", "cost"])? // keep only the numeric columns
30 .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31 .try_numeric()? // strict, explicit conversion to f64
32 .to_tensor()?; // a normal [rows, columns] Tensor
33
34 println!("shape: {:?}", tensor.shape());
35 println!("data : {:?}", tensor.as_slice());
36
37 assert_eq!(tensor.shape(), &[3, 2]);
38 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40 println!("data_00_quickstart: OK");
41 Ok(())
42}More examples
19fn main() -> Result<(), matten_data::MattenDataError> {
20 // A small, messy table: a text column and one missing numeric cell.
21 let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27 let table = Table::from_csv_str(csv)?;
28
29 // Inspect what we have before converting anything.
30 println!("{}", table.schema_summary());
31
32 // Select only the numeric columns we want, fill the one missing cost with 0,
33 // convert explicitly, and produce a [rows, columns] f64 tensor.
34 let tensor = table
35 .select_columns(["sales", "cost", "quantity"])?
36 .fill_missing(0.0)?
37 .try_numeric()?
38 .to_tensor()?;
39
40 println!("tensor shape: {:?}", tensor.shape());
41 println!("tensor data : {:?}", tensor.as_slice());
42
43 // 3 rows x 3 columns; the missing south/cost became 0.0.
44 assert_eq!(tensor.shape(), &[3, 3]);
45 assert_eq!(
46 tensor.as_slice(),
47 &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48 );
49 println!("csv_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23 let table = Table::from_csv_str(csv)?;
24
25 // Ask for a subset in a deliberately different order than the CSV.
26 let reordered = table.select_columns(["quantity", "sales"])?;
27 println!("selected names: {:?}", reordered.column_names());
28
29 // The output order is exactly what was requested.
30 assert_eq!(
31 reordered.column_names(),
32 &["quantity".to_string(), "sales".to_string()]
33 );
34 assert_eq!(reordered.column_count(), 2);
35 assert_eq!(reordered.row_count(), 2);
36
37 // Asking for a column that does not exist is a structured error.
38 match table.select_columns(["sales", "profit"]) {
39 Err(MattenDataError::MissingColumn { name }) => {
40 println!("missing column reported: {name}");
41 assert_eq!(name, "profit");
42 }
43 other => panic!("expected MissingColumn, got {other:?}"),
44 }
45
46 println!("data_02_select_columns: OK");
47 Ok(())
48}19fn main() -> Result<(), matten_data::MattenDataError> {
20 let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26 let tensor: Tensor = Table::from_csv_str(csv)?
27 .select_columns(["sales", "cost"])?
28 .try_numeric()?
29 .to_tensor()?;
30
31 // [rows, columns] = [3, 2].
32 assert_eq!(tensor.shape(), &[3, 2]);
33
34 // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37 // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38 // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39 let column_means = tensor.mean_axis(0);
40 println!("shape : {:?}", tensor.shape());
41 println!("column means : {:?}", column_means.as_slice());
42
43 assert_eq!(column_means.shape(), &[2]);
44 assert_eq!(
45 column_means.as_slice(),
46 &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47 );
48
49 println!("data_04_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24 let table = Table::from_csv_str(csv)?;
25 let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27 // Converting with a missing cell still present is rejected — no silent zero.
28 match numeric_cols.try_numeric() {
29 Err(MattenDataError::MissingValue { column, row }) => {
30 println!("missing value blocked conversion: column={column}, csv_line={row}");
31 assert_eq!(column, "cost");
32 assert_eq!(row, 3); // header is line 1, so the south row is line 3
33 }
34 other => panic!("expected MissingValue, got {other:?}"),
35 }
36
37 // Decide explicitly what a missing cost means here, then convert.
38 let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40 println!("filled shape: {:?}", tensor.shape());
41 println!("filled data : {:?}", tensor.as_slice());
42
43 assert_eq!(tensor.shape(), &[3, 2]);
44 // Only the missing south/cost was filled; the other cells are untouched.
45 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47 println!("data_03_missing_values: OK");
48 Ok(())
49}18fn main() -> Result<(), matten_data::MattenDataError> {
19 let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25 let table = Table::from_csv_str(csv)?;
26
27 // Top-level shape of the table.
28 println!("rows : {}", table.row_count());
29 println!("columns : {}", table.column_count());
30 println!("names : {:?}", table.column_names());
31
32 // A printable, one-glance summary (Table: R rows x C columns, then a line
33 // per column with its inferred kind and missing count).
34 let summary = table.schema_summary();
35 print!("{summary}");
36
37 // The same information, per column, if you want to act on it in code.
38 println!("--- per-column ---");
39 for col in summary.column_summaries() {
40 println!(
41 "{:<8} kind={:<7} missing={}",
42 col.name, col.kind, col.missing
43 );
44 }
45
46 // The "cost" column has exactly one missing cell (south).
47 let cost = summary
48 .column_summaries()
49 .iter()
50 .find(|c| c.name == "cost")
51 .expect("cost column exists");
52 assert_eq!(cost.missing, 1);
53 assert_eq!(table.row_count(), 3);
54 assert_eq!(table.column_count(), 4);
55
56 println!("data_01_schema_summary: OK");
57 Ok(())
58}Sourcepub fn from_csv_path<P: AsRef<Path>>(path: P) -> Result<Table, MattenDataError>
pub fn from_csv_path<P: AsRef<Path>>(path: P) -> Result<Table, MattenDataError>
Parse a Table from a CSV file at path.
I/O failures (for example a missing file) are reported as
MattenDataError::Io with the path and underlying error preserved.
Source§impl Table
impl Table
Sourcepub fn row_count(&self) -> usize
pub fn row_count(&self) -> usize
Number of data rows.
Examples found in repository?
17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23 let table = Table::from_csv_str(csv)?;
24
25 // Ask for a subset in a deliberately different order than the CSV.
26 let reordered = table.select_columns(["quantity", "sales"])?;
27 println!("selected names: {:?}", reordered.column_names());
28
29 // The output order is exactly what was requested.
30 assert_eq!(
31 reordered.column_names(),
32 &["quantity".to_string(), "sales".to_string()]
33 );
34 assert_eq!(reordered.column_count(), 2);
35 assert_eq!(reordered.row_count(), 2);
36
37 // Asking for a column that does not exist is a structured error.
38 match table.select_columns(["sales", "profit"]) {
39 Err(MattenDataError::MissingColumn { name }) => {
40 println!("missing column reported: {name}");
41 assert_eq!(name, "profit");
42 }
43 other => panic!("expected MissingColumn, got {other:?}"),
44 }
45
46 println!("data_02_select_columns: OK");
47 Ok(())
48}More examples
18fn main() -> Result<(), matten_data::MattenDataError> {
19 let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25 let table = Table::from_csv_str(csv)?;
26
27 // Top-level shape of the table.
28 println!("rows : {}", table.row_count());
29 println!("columns : {}", table.column_count());
30 println!("names : {:?}", table.column_names());
31
32 // A printable, one-glance summary (Table: R rows x C columns, then a line
33 // per column with its inferred kind and missing count).
34 let summary = table.schema_summary();
35 print!("{summary}");
36
37 // The same information, per column, if you want to act on it in code.
38 println!("--- per-column ---");
39 for col in summary.column_summaries() {
40 println!(
41 "{:<8} kind={:<7} missing={}",
42 col.name, col.kind, col.missing
43 );
44 }
45
46 // The "cost" column has exactly one missing cell (south).
47 let cost = summary
48 .column_summaries()
49 .iter()
50 .find(|c| c.name == "cost")
51 .expect("cost column exists");
52 assert_eq!(cost.missing, 1);
53 assert_eq!(table.row_count(), 3);
54 assert_eq!(table.column_count(), 4);
55
56 println!("data_01_schema_summary: OK");
57 Ok(())
58}Sourcepub fn column_count(&self) -> usize
pub fn column_count(&self) -> usize
Number of columns.
Examples found in repository?
17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23 let table = Table::from_csv_str(csv)?;
24
25 // Ask for a subset in a deliberately different order than the CSV.
26 let reordered = table.select_columns(["quantity", "sales"])?;
27 println!("selected names: {:?}", reordered.column_names());
28
29 // The output order is exactly what was requested.
30 assert_eq!(
31 reordered.column_names(),
32 &["quantity".to_string(), "sales".to_string()]
33 );
34 assert_eq!(reordered.column_count(), 2);
35 assert_eq!(reordered.row_count(), 2);
36
37 // Asking for a column that does not exist is a structured error.
38 match table.select_columns(["sales", "profit"]) {
39 Err(MattenDataError::MissingColumn { name }) => {
40 println!("missing column reported: {name}");
41 assert_eq!(name, "profit");
42 }
43 other => panic!("expected MissingColumn, got {other:?}"),
44 }
45
46 println!("data_02_select_columns: OK");
47 Ok(())
48}More examples
18fn main() -> Result<(), matten_data::MattenDataError> {
19 let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25 let table = Table::from_csv_str(csv)?;
26
27 // Top-level shape of the table.
28 println!("rows : {}", table.row_count());
29 println!("columns : {}", table.column_count());
30 println!("names : {:?}", table.column_names());
31
32 // A printable, one-glance summary (Table: R rows x C columns, then a line
33 // per column with its inferred kind and missing count).
34 let summary = table.schema_summary();
35 print!("{summary}");
36
37 // The same information, per column, if you want to act on it in code.
38 println!("--- per-column ---");
39 for col in summary.column_summaries() {
40 println!(
41 "{:<8} kind={:<7} missing={}",
42 col.name, col.kind, col.missing
43 );
44 }
45
46 // The "cost" column has exactly one missing cell (south).
47 let cost = summary
48 .column_summaries()
49 .iter()
50 .find(|c| c.name == "cost")
51 .expect("cost column exists");
52 assert_eq!(cost.missing, 1);
53 assert_eq!(table.row_count(), 3);
54 assert_eq!(table.column_count(), 4);
55
56 println!("data_01_schema_summary: OK");
57 Ok(())
58}Sourcepub fn column_names(&self) -> &[String]
pub fn column_names(&self) -> &[String]
Column names, in column order.
Examples found in repository?
17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23 let table = Table::from_csv_str(csv)?;
24
25 // Ask for a subset in a deliberately different order than the CSV.
26 let reordered = table.select_columns(["quantity", "sales"])?;
27 println!("selected names: {:?}", reordered.column_names());
28
29 // The output order is exactly what was requested.
30 assert_eq!(
31 reordered.column_names(),
32 &["quantity".to_string(), "sales".to_string()]
33 );
34 assert_eq!(reordered.column_count(), 2);
35 assert_eq!(reordered.row_count(), 2);
36
37 // Asking for a column that does not exist is a structured error.
38 match table.select_columns(["sales", "profit"]) {
39 Err(MattenDataError::MissingColumn { name }) => {
40 println!("missing column reported: {name}");
41 assert_eq!(name, "profit");
42 }
43 other => panic!("expected MissingColumn, got {other:?}"),
44 }
45
46 println!("data_02_select_columns: OK");
47 Ok(())
48}More examples
18fn main() -> Result<(), matten_data::MattenDataError> {
19 let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25 let table = Table::from_csv_str(csv)?;
26
27 // Top-level shape of the table.
28 println!("rows : {}", table.row_count());
29 println!("columns : {}", table.column_count());
30 println!("names : {:?}", table.column_names());
31
32 // A printable, one-glance summary (Table: R rows x C columns, then a line
33 // per column with its inferred kind and missing count).
34 let summary = table.schema_summary();
35 print!("{summary}");
36
37 // The same information, per column, if you want to act on it in code.
38 println!("--- per-column ---");
39 for col in summary.column_summaries() {
40 println!(
41 "{:<8} kind={:<7} missing={}",
42 col.name, col.kind, col.missing
43 );
44 }
45
46 // The "cost" column has exactly one missing cell (south).
47 let cost = summary
48 .column_summaries()
49 .iter()
50 .find(|c| c.name == "cost")
51 .expect("cost column exists");
52 assert_eq!(cost.missing, 1);
53 assert_eq!(table.row_count(), 3);
54 assert_eq!(table.column_count(), 4);
55
56 println!("data_01_schema_summary: OK");
57 Ok(())
58}Sourcepub fn schema_summary(&self) -> SchemaSummary
pub fn schema_summary(&self) -> SchemaSummary
A small, displayable schema summary (row/column counts, per-column missing counts and inferred kinds). Does not perform expensive analysis.
Examples found in repository?
19fn main() -> Result<(), matten_data::MattenDataError> {
20 // A small, messy table: a text column and one missing numeric cell.
21 let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27 let table = Table::from_csv_str(csv)?;
28
29 // Inspect what we have before converting anything.
30 println!("{}", table.schema_summary());
31
32 // Select only the numeric columns we want, fill the one missing cost with 0,
33 // convert explicitly, and produce a [rows, columns] f64 tensor.
34 let tensor = table
35 .select_columns(["sales", "cost", "quantity"])?
36 .fill_missing(0.0)?
37 .try_numeric()?
38 .to_tensor()?;
39
40 println!("tensor shape: {:?}", tensor.shape());
41 println!("tensor data : {:?}", tensor.as_slice());
42
43 // 3 rows x 3 columns; the missing south/cost became 0.0.
44 assert_eq!(tensor.shape(), &[3, 3]);
45 assert_eq!(
46 tensor.as_slice(),
47 &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48 );
49 println!("csv_to_tensor: OK");
50 Ok(())
51}More examples
18fn main() -> Result<(), matten_data::MattenDataError> {
19 let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25 let table = Table::from_csv_str(csv)?;
26
27 // Top-level shape of the table.
28 println!("rows : {}", table.row_count());
29 println!("columns : {}", table.column_count());
30 println!("names : {:?}", table.column_names());
31
32 // A printable, one-glance summary (Table: R rows x C columns, then a line
33 // per column with its inferred kind and missing count).
34 let summary = table.schema_summary();
35 print!("{summary}");
36
37 // The same information, per column, if you want to act on it in code.
38 println!("--- per-column ---");
39 for col in summary.column_summaries() {
40 println!(
41 "{:<8} kind={:<7} missing={}",
42 col.name, col.kind, col.missing
43 );
44 }
45
46 // The "cost" column has exactly one missing cell (south).
47 let cost = summary
48 .column_summaries()
49 .iter()
50 .find(|c| c.name == "cost")
51 .expect("cost column exists");
52 assert_eq!(cost.missing, 1);
53 assert_eq!(table.row_count(), 3);
54 assert_eq!(table.column_count(), 4);
55
56 println!("data_01_schema_summary: OK");
57 Ok(())
58}Sourcepub fn select_columns<I, S>(&self, columns: I) -> Result<Table, MattenDataError>
pub fn select_columns<I, S>(&self, columns: I) -> Result<Table, MattenDataError>
Select columns by name, returning a new Table.
Behavior (RFC-034 §5.3): preserves the requested column order; errors with
MattenDataError::MissingColumn if a requested column does not exist;
rejects duplicate selections with MattenDataError::DuplicateSelection;
an empty selection is MattenDataError::EmptySelection.
Examples found in repository?
20fn main() -> Result<(), matten_data::MattenDataError> {
21 // A small table with one text column and one missing numeric cell.
22 let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28 let tensor = Table::from_csv_str(csv)?
29 .select_columns(["sales", "cost"])? // keep only the numeric columns
30 .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31 .try_numeric()? // strict, explicit conversion to f64
32 .to_tensor()?; // a normal [rows, columns] Tensor
33
34 println!("shape: {:?}", tensor.shape());
35 println!("data : {:?}", tensor.as_slice());
36
37 assert_eq!(tensor.shape(), &[3, 2]);
38 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40 println!("data_00_quickstart: OK");
41 Ok(())
42}More examples
19fn main() -> Result<(), matten_data::MattenDataError> {
20 // A small, messy table: a text column and one missing numeric cell.
21 let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27 let table = Table::from_csv_str(csv)?;
28
29 // Inspect what we have before converting anything.
30 println!("{}", table.schema_summary());
31
32 // Select only the numeric columns we want, fill the one missing cost with 0,
33 // convert explicitly, and produce a [rows, columns] f64 tensor.
34 let tensor = table
35 .select_columns(["sales", "cost", "quantity"])?
36 .fill_missing(0.0)?
37 .try_numeric()?
38 .to_tensor()?;
39
40 println!("tensor shape: {:?}", tensor.shape());
41 println!("tensor data : {:?}", tensor.as_slice());
42
43 // 3 rows x 3 columns; the missing south/cost became 0.0.
44 assert_eq!(tensor.shape(), &[3, 3]);
45 assert_eq!(
46 tensor.as_slice(),
47 &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48 );
49 println!("csv_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23 let table = Table::from_csv_str(csv)?;
24
25 // Ask for a subset in a deliberately different order than the CSV.
26 let reordered = table.select_columns(["quantity", "sales"])?;
27 println!("selected names: {:?}", reordered.column_names());
28
29 // The output order is exactly what was requested.
30 assert_eq!(
31 reordered.column_names(),
32 &["quantity".to_string(), "sales".to_string()]
33 );
34 assert_eq!(reordered.column_count(), 2);
35 assert_eq!(reordered.row_count(), 2);
36
37 // Asking for a column that does not exist is a structured error.
38 match table.select_columns(["sales", "profit"]) {
39 Err(MattenDataError::MissingColumn { name }) => {
40 println!("missing column reported: {name}");
41 assert_eq!(name, "profit");
42 }
43 other => panic!("expected MissingColumn, got {other:?}"),
44 }
45
46 println!("data_02_select_columns: OK");
47 Ok(())
48}19fn main() -> Result<(), matten_data::MattenDataError> {
20 let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26 let tensor: Tensor = Table::from_csv_str(csv)?
27 .select_columns(["sales", "cost"])?
28 .try_numeric()?
29 .to_tensor()?;
30
31 // [rows, columns] = [3, 2].
32 assert_eq!(tensor.shape(), &[3, 2]);
33
34 // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37 // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38 // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39 let column_means = tensor.mean_axis(0);
40 println!("shape : {:?}", tensor.shape());
41 println!("column means : {:?}", column_means.as_slice());
42
43 assert_eq!(column_means.shape(), &[2]);
44 assert_eq!(
45 column_means.as_slice(),
46 &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47 );
48
49 println!("data_04_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24 let table = Table::from_csv_str(csv)?;
25 let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27 // Converting with a missing cell still present is rejected — no silent zero.
28 match numeric_cols.try_numeric() {
29 Err(MattenDataError::MissingValue { column, row }) => {
30 println!("missing value blocked conversion: column={column}, csv_line={row}");
31 assert_eq!(column, "cost");
32 assert_eq!(row, 3); // header is line 1, so the south row is line 3
33 }
34 other => panic!("expected MissingValue, got {other:?}"),
35 }
36
37 // Decide explicitly what a missing cost means here, then convert.
38 let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40 println!("filled shape: {:?}", tensor.shape());
41 println!("filled data : {:?}", tensor.as_slice());
42
43 assert_eq!(tensor.shape(), &[3, 2]);
44 // Only the missing south/cost was filled; the other cells are untouched.
45 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47 println!("data_03_missing_values: OK");
48 Ok(())
49}20fn main() {
21 // 1. Duplicate header column.
22 match Table::from_csv_str("sales,sales\n1,2") {
23 Err(MattenDataError::DuplicateColumn { name }) => {
24 println!("duplicate header : {name}");
25 assert_eq!(name, "sales");
26 }
27 other => panic!("expected DuplicateColumn, got {other:?}"),
28 }
29
30 // 2. Ragged row: the second data row (CSV line 3) has too few cells.
31 match Table::from_csv_str("a,b,c\n1,2,3\n4,5") {
32 Err(MattenDataError::RaggedRow {
33 row,
34 expected,
35 actual,
36 }) => {
37 println!("ragged row : line {row}, expected {expected}, got {actual}");
38 assert_eq!((row, expected, actual), (3, 3, 2));
39 }
40 other => panic!("expected RaggedRow, got {other:?}"),
41 }
42
43 // 3. Non-numeric value during conversion.
44 let with_text = Table::from_csv_str("label,value\nok,10\nbad,oops")
45 .expect("parses fine; the text only fails at numeric conversion");
46 match with_text
47 .select_columns(["value"])
48 .and_then(|t| t.try_numeric())
49 {
50 Err(MattenDataError::NonNumericValue { column, row, value }) => {
51 println!("non-numeric value: column={column}, line={row}, value={value:?}");
52 assert_eq!((column.as_str(), row, value.as_str()), ("value", 3, "oops"));
53 }
54 other => panic!("expected NonNumericValue, got {other:?}"),
55 }
56
57 // 4. Missing value during conversion (fill it first to proceed).
58 let with_missing =
59 Table::from_csv_str("a,b\n1,2\n3,").expect("missing cell is allowed in a Table");
60 match with_missing.try_numeric() {
61 Err(MattenDataError::MissingValue { column, row }) => {
62 println!("missing value : column={column}, line={row}");
63 assert_eq!((column.as_str(), row), ("b", 3));
64 }
65 other => panic!("expected MissingValue, got {other:?}"),
66 }
67
68 println!("data_05_errors: OK");
69}Sourcepub fn fill_missing(
&self,
value: impl Into<CellValue>,
) -> Result<Table, MattenDataError>
pub fn fill_missing( &self, value: impl Into<CellValue>, ) -> Result<Table, MattenDataError>
Fill every missing cell with value, returning a new Table.
Missing values are never silently turned into zero; filling is always explicit (RFC-035 §6). Non-missing cells and the shape are unchanged.
Examples found in repository?
20fn main() -> Result<(), matten_data::MattenDataError> {
21 // A small table with one text column and one missing numeric cell.
22 let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28 let tensor = Table::from_csv_str(csv)?
29 .select_columns(["sales", "cost"])? // keep only the numeric columns
30 .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31 .try_numeric()? // strict, explicit conversion to f64
32 .to_tensor()?; // a normal [rows, columns] Tensor
33
34 println!("shape: {:?}", tensor.shape());
35 println!("data : {:?}", tensor.as_slice());
36
37 assert_eq!(tensor.shape(), &[3, 2]);
38 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40 println!("data_00_quickstart: OK");
41 Ok(())
42}More examples
19fn main() -> Result<(), matten_data::MattenDataError> {
20 // A small, messy table: a text column and one missing numeric cell.
21 let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27 let table = Table::from_csv_str(csv)?;
28
29 // Inspect what we have before converting anything.
30 println!("{}", table.schema_summary());
31
32 // Select only the numeric columns we want, fill the one missing cost with 0,
33 // convert explicitly, and produce a [rows, columns] f64 tensor.
34 let tensor = table
35 .select_columns(["sales", "cost", "quantity"])?
36 .fill_missing(0.0)?
37 .try_numeric()?
38 .to_tensor()?;
39
40 println!("tensor shape: {:?}", tensor.shape());
41 println!("tensor data : {:?}", tensor.as_slice());
42
43 // 3 rows x 3 columns; the missing south/cost became 0.0.
44 assert_eq!(tensor.shape(), &[3, 3]);
45 assert_eq!(
46 tensor.as_slice(),
47 &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48 );
49 println!("csv_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24 let table = Table::from_csv_str(csv)?;
25 let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27 // Converting with a missing cell still present is rejected — no silent zero.
28 match numeric_cols.try_numeric() {
29 Err(MattenDataError::MissingValue { column, row }) => {
30 println!("missing value blocked conversion: column={column}, csv_line={row}");
31 assert_eq!(column, "cost");
32 assert_eq!(row, 3); // header is line 1, so the south row is line 3
33 }
34 other => panic!("expected MissingValue, got {other:?}"),
35 }
36
37 // Decide explicitly what a missing cost means here, then convert.
38 let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40 println!("filled shape: {:?}", tensor.shape());
41 println!("filled data : {:?}", tensor.as_slice());
42
43 assert_eq!(tensor.shape(), &[3, 2]);
44 // Only the missing south/cost was filled; the other cells are untouched.
45 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47 println!("data_03_missing_values: OK");
48 Ok(())
49}Sourcepub fn try_numeric(&self) -> Result<NumericTable, MattenDataError>
pub fn try_numeric(&self) -> Result<NumericTable, MattenDataError>
Convert the table to an explicit numeric table (RFC-035 §7).
Strict conversion: Int/Float become f64; Bool and Text are
rejected (MattenDataError::NonNumericValue); a remaining Missing cell
is rejected (MattenDataError::MissingValue). Text is never parsed as a
number by default.
Examples found in repository?
20fn main() -> Result<(), matten_data::MattenDataError> {
21 // A small table with one text column and one missing numeric cell.
22 let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28 let tensor = Table::from_csv_str(csv)?
29 .select_columns(["sales", "cost"])? // keep only the numeric columns
30 .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31 .try_numeric()? // strict, explicit conversion to f64
32 .to_tensor()?; // a normal [rows, columns] Tensor
33
34 println!("shape: {:?}", tensor.shape());
35 println!("data : {:?}", tensor.as_slice());
36
37 assert_eq!(tensor.shape(), &[3, 2]);
38 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40 println!("data_00_quickstart: OK");
41 Ok(())
42}More examples
19fn main() -> Result<(), matten_data::MattenDataError> {
20 // A small, messy table: a text column and one missing numeric cell.
21 let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27 let table = Table::from_csv_str(csv)?;
28
29 // Inspect what we have before converting anything.
30 println!("{}", table.schema_summary());
31
32 // Select only the numeric columns we want, fill the one missing cost with 0,
33 // convert explicitly, and produce a [rows, columns] f64 tensor.
34 let tensor = table
35 .select_columns(["sales", "cost", "quantity"])?
36 .fill_missing(0.0)?
37 .try_numeric()?
38 .to_tensor()?;
39
40 println!("tensor shape: {:?}", tensor.shape());
41 println!("tensor data : {:?}", tensor.as_slice());
42
43 // 3 rows x 3 columns; the missing south/cost became 0.0.
44 assert_eq!(tensor.shape(), &[3, 3]);
45 assert_eq!(
46 tensor.as_slice(),
47 &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48 );
49 println!("csv_to_tensor: OK");
50 Ok(())
51}19fn main() -> Result<(), matten_data::MattenDataError> {
20 let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26 let tensor: Tensor = Table::from_csv_str(csv)?
27 .select_columns(["sales", "cost"])?
28 .try_numeric()?
29 .to_tensor()?;
30
31 // [rows, columns] = [3, 2].
32 assert_eq!(tensor.shape(), &[3, 2]);
33
34 // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37 // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38 // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39 let column_means = tensor.mean_axis(0);
40 println!("shape : {:?}", tensor.shape());
41 println!("column means : {:?}", column_means.as_slice());
42
43 assert_eq!(column_means.shape(), &[2]);
44 assert_eq!(
45 column_means.as_slice(),
46 &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47 );
48
49 println!("data_04_to_tensor: OK");
50 Ok(())
51}17fn main() -> Result<(), MattenDataError> {
18 let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24 let table = Table::from_csv_str(csv)?;
25 let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27 // Converting with a missing cell still present is rejected — no silent zero.
28 match numeric_cols.try_numeric() {
29 Err(MattenDataError::MissingValue { column, row }) => {
30 println!("missing value blocked conversion: column={column}, csv_line={row}");
31 assert_eq!(column, "cost");
32 assert_eq!(row, 3); // header is line 1, so the south row is line 3
33 }
34 other => panic!("expected MissingValue, got {other:?}"),
35 }
36
37 // Decide explicitly what a missing cost means here, then convert.
38 let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40 println!("filled shape: {:?}", tensor.shape());
41 println!("filled data : {:?}", tensor.as_slice());
42
43 assert_eq!(tensor.shape(), &[3, 2]);
44 // Only the missing south/cost was filled; the other cells are untouched.
45 assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47 println!("data_03_missing_values: OK");
48 Ok(())
49}20fn main() {
21 // 1. Duplicate header column.
22 match Table::from_csv_str("sales,sales\n1,2") {
23 Err(MattenDataError::DuplicateColumn { name }) => {
24 println!("duplicate header : {name}");
25 assert_eq!(name, "sales");
26 }
27 other => panic!("expected DuplicateColumn, got {other:?}"),
28 }
29
30 // 2. Ragged row: the second data row (CSV line 3) has too few cells.
31 match Table::from_csv_str("a,b,c\n1,2,3\n4,5") {
32 Err(MattenDataError::RaggedRow {
33 row,
34 expected,
35 actual,
36 }) => {
37 println!("ragged row : line {row}, expected {expected}, got {actual}");
38 assert_eq!((row, expected, actual), (3, 3, 2));
39 }
40 other => panic!("expected RaggedRow, got {other:?}"),
41 }
42
43 // 3. Non-numeric value during conversion.
44 let with_text = Table::from_csv_str("label,value\nok,10\nbad,oops")
45 .expect("parses fine; the text only fails at numeric conversion");
46 match with_text
47 .select_columns(["value"])
48 .and_then(|t| t.try_numeric())
49 {
50 Err(MattenDataError::NonNumericValue { column, row, value }) => {
51 println!("non-numeric value: column={column}, line={row}, value={value:?}");
52 assert_eq!((column.as_str(), row, value.as_str()), ("value", 3, "oops"));
53 }
54 other => panic!("expected NonNumericValue, got {other:?}"),
55 }
56
57 // 4. Missing value during conversion (fill it first to proceed).
58 let with_missing =
59 Table::from_csv_str("a,b\n1,2\n3,").expect("missing cell is allowed in a Table");
60 match with_missing.try_numeric() {
61 Err(MattenDataError::MissingValue { column, row }) => {
62 println!("missing value : column={column}, line={row}");
63 assert_eq!((column.as_str(), row), ("b", 3));
64 }
65 other => panic!("expected MissingValue, got {other:?}"),
66 }
67
68 println!("data_05_errors: OK");
69}