Skip to main content

Table

Struct Table 

Source
pub struct Table { /* private fields */ }
Expand description

A small, owned, rectangular table-like data set.

External guarantees: row order is preserved; column order is preserved; column names are stable after loading. Operations return new owned Table values; no borrowed view lifetimes appear in normal use.

Implementations§

Source§

impl Table

Source

pub fn from_csv_str(input: &str) -> Result<Table, MattenDataError>

Parse a Table from a CSV string.

The first row is the header. Empty input, empty or duplicate header names, and ragged rows are reported as MattenDataError (never a panic).

use matten_data::Table;
let table = Table::from_csv_str("a,b\n1,2\n3,4").unwrap();
assert_eq!(table.row_count(), 2);
assert_eq!(table.column_names(), &["a".to_string(), "b".to_string()]);
Examples found in repository?
examples/data_00_quickstart.rs (line 28)
20fn main() -> Result<(), matten_data::MattenDataError> {
21    // A small table with one text column and one missing numeric cell.
22    let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28    let tensor = Table::from_csv_str(csv)?
29        .select_columns(["sales", "cost"])? // keep only the numeric columns
30        .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31        .try_numeric()? // strict, explicit conversion to f64
32        .to_tensor()?; // a normal [rows, columns] Tensor
33
34    println!("shape: {:?}", tensor.shape());
35    println!("data : {:?}", tensor.as_slice());
36
37    assert_eq!(tensor.shape(), &[3, 2]);
38    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40    println!("data_00_quickstart: OK");
41    Ok(())
42}
More examples
Hide additional examples
examples/csv_to_tensor.rs (line 27)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}
examples/data_02_select_columns.rs (line 23)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23    let table = Table::from_csv_str(csv)?;
24
25    // Ask for a subset in a deliberately different order than the CSV.
26    let reordered = table.select_columns(["quantity", "sales"])?;
27    println!("selected names: {:?}", reordered.column_names());
28
29    // The output order is exactly what was requested.
30    assert_eq!(
31        reordered.column_names(),
32        &["quantity".to_string(), "sales".to_string()]
33    );
34    assert_eq!(reordered.column_count(), 2);
35    assert_eq!(reordered.row_count(), 2);
36
37    // Asking for a column that does not exist is a structured error.
38    match table.select_columns(["sales", "profit"]) {
39        Err(MattenDataError::MissingColumn { name }) => {
40            println!("missing column reported: {name}");
41            assert_eq!(name, "profit");
42        }
43        other => panic!("expected MissingColumn, got {other:?}"),
44    }
45
46    println!("data_02_select_columns: OK");
47    Ok(())
48}
examples/data_04_to_tensor.rs (line 26)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26    let tensor: Tensor = Table::from_csv_str(csv)?
27        .select_columns(["sales", "cost"])?
28        .try_numeric()?
29        .to_tensor()?;
30
31    // [rows, columns] = [3, 2].
32    assert_eq!(tensor.shape(), &[3, 2]);
33
34    // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37    // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38    // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39    let column_means = tensor.mean_axis(0);
40    println!("shape        : {:?}", tensor.shape());
41    println!("column means : {:?}", column_means.as_slice());
42
43    assert_eq!(column_means.shape(), &[2]);
44    assert_eq!(
45        column_means.as_slice(),
46        &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47    );
48
49    println!("data_04_to_tensor: OK");
50    Ok(())
51}
examples/data_03_missing_values.rs (line 24)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24    let table = Table::from_csv_str(csv)?;
25    let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27    // Converting with a missing cell still present is rejected — no silent zero.
28    match numeric_cols.try_numeric() {
29        Err(MattenDataError::MissingValue { column, row }) => {
30            println!("missing value blocked conversion: column={column}, csv_line={row}");
31            assert_eq!(column, "cost");
32            assert_eq!(row, 3); // header is line 1, so the south row is line 3
33        }
34        other => panic!("expected MissingValue, got {other:?}"),
35    }
36
37    // Decide explicitly what a missing cost means here, then convert.
38    let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40    println!("filled shape: {:?}", tensor.shape());
41    println!("filled data : {:?}", tensor.as_slice());
42
43    assert_eq!(tensor.shape(), &[3, 2]);
44    // Only the missing south/cost was filled; the other cells are untouched.
45    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47    println!("data_03_missing_values: OK");
48    Ok(())
49}
examples/data_01_schema_summary.rs (line 25)
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}
Source

pub fn from_csv_path<P: AsRef<Path>>(path: P) -> Result<Table, MattenDataError>

Parse a Table from a CSV file at path.

I/O failures (for example a missing file) are reported as MattenDataError::Io with the path and underlying error preserved.

Source§

impl Table

Source

pub fn row_count(&self) -> usize

Number of data rows.

Examples found in repository?
examples/data_02_select_columns.rs (line 35)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23    let table = Table::from_csv_str(csv)?;
24
25    // Ask for a subset in a deliberately different order than the CSV.
26    let reordered = table.select_columns(["quantity", "sales"])?;
27    println!("selected names: {:?}", reordered.column_names());
28
29    // The output order is exactly what was requested.
30    assert_eq!(
31        reordered.column_names(),
32        &["quantity".to_string(), "sales".to_string()]
33    );
34    assert_eq!(reordered.column_count(), 2);
35    assert_eq!(reordered.row_count(), 2);
36
37    // Asking for a column that does not exist is a structured error.
38    match table.select_columns(["sales", "profit"]) {
39        Err(MattenDataError::MissingColumn { name }) => {
40            println!("missing column reported: {name}");
41            assert_eq!(name, "profit");
42        }
43        other => panic!("expected MissingColumn, got {other:?}"),
44    }
45
46    println!("data_02_select_columns: OK");
47    Ok(())
48}
More examples
Hide additional examples
examples/data_01_schema_summary.rs (line 28)
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}
Source

pub fn column_count(&self) -> usize

Number of columns.

Examples found in repository?
examples/data_02_select_columns.rs (line 34)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23    let table = Table::from_csv_str(csv)?;
24
25    // Ask for a subset in a deliberately different order than the CSV.
26    let reordered = table.select_columns(["quantity", "sales"])?;
27    println!("selected names: {:?}", reordered.column_names());
28
29    // The output order is exactly what was requested.
30    assert_eq!(
31        reordered.column_names(),
32        &["quantity".to_string(), "sales".to_string()]
33    );
34    assert_eq!(reordered.column_count(), 2);
35    assert_eq!(reordered.row_count(), 2);
36
37    // Asking for a column that does not exist is a structured error.
38    match table.select_columns(["sales", "profit"]) {
39        Err(MattenDataError::MissingColumn { name }) => {
40            println!("missing column reported: {name}");
41            assert_eq!(name, "profit");
42        }
43        other => panic!("expected MissingColumn, got {other:?}"),
44    }
45
46    println!("data_02_select_columns: OK");
47    Ok(())
48}
More examples
Hide additional examples
examples/data_01_schema_summary.rs (line 29)
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}
Source

pub fn column_names(&self) -> &[String]

Column names, in column order.

Examples found in repository?
examples/data_02_select_columns.rs (line 27)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23    let table = Table::from_csv_str(csv)?;
24
25    // Ask for a subset in a deliberately different order than the CSV.
26    let reordered = table.select_columns(["quantity", "sales"])?;
27    println!("selected names: {:?}", reordered.column_names());
28
29    // The output order is exactly what was requested.
30    assert_eq!(
31        reordered.column_names(),
32        &["quantity".to_string(), "sales".to_string()]
33    );
34    assert_eq!(reordered.column_count(), 2);
35    assert_eq!(reordered.row_count(), 2);
36
37    // Asking for a column that does not exist is a structured error.
38    match table.select_columns(["sales", "profit"]) {
39        Err(MattenDataError::MissingColumn { name }) => {
40            println!("missing column reported: {name}");
41            assert_eq!(name, "profit");
42        }
43        other => panic!("expected MissingColumn, got {other:?}"),
44    }
45
46    println!("data_02_select_columns: OK");
47    Ok(())
48}
More examples
Hide additional examples
examples/data_01_schema_summary.rs (line 30)
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}
Source

pub fn schema_summary(&self) -> SchemaSummary

A small, displayable schema summary (row/column counts, per-column missing counts and inferred kinds). Does not perform expensive analysis.

Examples found in repository?
examples/csv_to_tensor.rs (line 30)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}
More examples
Hide additional examples
examples/data_01_schema_summary.rs (line 34)
18fn main() -> Result<(), matten_data::MattenDataError> {
19    let csv = "\
20region,sales,cost,active
21north,100,40.5,true
22south,150,,true
23east,120,55.0,false";
24
25    let table = Table::from_csv_str(csv)?;
26
27    // Top-level shape of the table.
28    println!("rows    : {}", table.row_count());
29    println!("columns : {}", table.column_count());
30    println!("names   : {:?}", table.column_names());
31
32    // A printable, one-glance summary (Table: R rows x C columns, then a line
33    // per column with its inferred kind and missing count).
34    let summary = table.schema_summary();
35    print!("{summary}");
36
37    // The same information, per column, if you want to act on it in code.
38    println!("--- per-column ---");
39    for col in summary.column_summaries() {
40        println!(
41            "{:<8} kind={:<7} missing={}",
42            col.name, col.kind, col.missing
43        );
44    }
45
46    // The "cost" column has exactly one missing cell (south).
47    let cost = summary
48        .column_summaries()
49        .iter()
50        .find(|c| c.name == "cost")
51        .expect("cost column exists");
52    assert_eq!(cost.missing, 1);
53    assert_eq!(table.row_count(), 3);
54    assert_eq!(table.column_count(), 4);
55
56    println!("data_01_schema_summary: OK");
57    Ok(())
58}
Source

pub fn select_columns<I, S>(&self, columns: I) -> Result<Table, MattenDataError>
where I: IntoIterator<Item = S>, S: AsRef<str>,

Select columns by name, returning a new Table.

Behavior (RFC-034 §5.3): preserves the requested column order; errors with MattenDataError::MissingColumn if a requested column does not exist; rejects duplicate selections with MattenDataError::DuplicateSelection; an empty selection is MattenDataError::EmptySelection.

Examples found in repository?
examples/data_00_quickstart.rs (line 29)
20fn main() -> Result<(), matten_data::MattenDataError> {
21    // A small table with one text column and one missing numeric cell.
22    let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28    let tensor = Table::from_csv_str(csv)?
29        .select_columns(["sales", "cost"])? // keep only the numeric columns
30        .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31        .try_numeric()? // strict, explicit conversion to f64
32        .to_tensor()?; // a normal [rows, columns] Tensor
33
34    println!("shape: {:?}", tensor.shape());
35    println!("data : {:?}", tensor.as_slice());
36
37    assert_eq!(tensor.shape(), &[3, 2]);
38    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40    println!("data_00_quickstart: OK");
41    Ok(())
42}
More examples
Hide additional examples
examples/csv_to_tensor.rs (line 35)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}
examples/data_02_select_columns.rs (line 26)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost,quantity
20north,100,40,5
21south,150,45,7";
22
23    let table = Table::from_csv_str(csv)?;
24
25    // Ask for a subset in a deliberately different order than the CSV.
26    let reordered = table.select_columns(["quantity", "sales"])?;
27    println!("selected names: {:?}", reordered.column_names());
28
29    // The output order is exactly what was requested.
30    assert_eq!(
31        reordered.column_names(),
32        &["quantity".to_string(), "sales".to_string()]
33    );
34    assert_eq!(reordered.column_count(), 2);
35    assert_eq!(reordered.row_count(), 2);
36
37    // Asking for a column that does not exist is a structured error.
38    match table.select_columns(["sales", "profit"]) {
39        Err(MattenDataError::MissingColumn { name }) => {
40            println!("missing column reported: {name}");
41            assert_eq!(name, "profit");
42        }
43        other => panic!("expected MissingColumn, got {other:?}"),
44    }
45
46    println!("data_02_select_columns: OK");
47    Ok(())
48}
examples/data_04_to_tensor.rs (line 27)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26    let tensor: Tensor = Table::from_csv_str(csv)?
27        .select_columns(["sales", "cost"])?
28        .try_numeric()?
29        .to_tensor()?;
30
31    // [rows, columns] = [3, 2].
32    assert_eq!(tensor.shape(), &[3, 2]);
33
34    // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37    // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38    // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39    let column_means = tensor.mean_axis(0);
40    println!("shape        : {:?}", tensor.shape());
41    println!("column means : {:?}", column_means.as_slice());
42
43    assert_eq!(column_means.shape(), &[2]);
44    assert_eq!(
45        column_means.as_slice(),
46        &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47    );
48
49    println!("data_04_to_tensor: OK");
50    Ok(())
51}
examples/data_03_missing_values.rs (line 25)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24    let table = Table::from_csv_str(csv)?;
25    let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27    // Converting with a missing cell still present is rejected — no silent zero.
28    match numeric_cols.try_numeric() {
29        Err(MattenDataError::MissingValue { column, row }) => {
30            println!("missing value blocked conversion: column={column}, csv_line={row}");
31            assert_eq!(column, "cost");
32            assert_eq!(row, 3); // header is line 1, so the south row is line 3
33        }
34        other => panic!("expected MissingValue, got {other:?}"),
35    }
36
37    // Decide explicitly what a missing cost means here, then convert.
38    let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40    println!("filled shape: {:?}", tensor.shape());
41    println!("filled data : {:?}", tensor.as_slice());
42
43    assert_eq!(tensor.shape(), &[3, 2]);
44    // Only the missing south/cost was filled; the other cells are untouched.
45    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47    println!("data_03_missing_values: OK");
48    Ok(())
49}
examples/data_05_errors.rs (line 47)
20fn main() {
21    // 1. Duplicate header column.
22    match Table::from_csv_str("sales,sales\n1,2") {
23        Err(MattenDataError::DuplicateColumn { name }) => {
24            println!("duplicate header : {name}");
25            assert_eq!(name, "sales");
26        }
27        other => panic!("expected DuplicateColumn, got {other:?}"),
28    }
29
30    // 2. Ragged row: the second data row (CSV line 3) has too few cells.
31    match Table::from_csv_str("a,b,c\n1,2,3\n4,5") {
32        Err(MattenDataError::RaggedRow {
33            row,
34            expected,
35            actual,
36        }) => {
37            println!("ragged row       : line {row}, expected {expected}, got {actual}");
38            assert_eq!((row, expected, actual), (3, 3, 2));
39        }
40        other => panic!("expected RaggedRow, got {other:?}"),
41    }
42
43    // 3. Non-numeric value during conversion.
44    let with_text = Table::from_csv_str("label,value\nok,10\nbad,oops")
45        .expect("parses fine; the text only fails at numeric conversion");
46    match with_text
47        .select_columns(["value"])
48        .and_then(|t| t.try_numeric())
49    {
50        Err(MattenDataError::NonNumericValue { column, row, value }) => {
51            println!("non-numeric value: column={column}, line={row}, value={value:?}");
52            assert_eq!((column.as_str(), row, value.as_str()), ("value", 3, "oops"));
53        }
54        other => panic!("expected NonNumericValue, got {other:?}"),
55    }
56
57    // 4. Missing value during conversion (fill it first to proceed).
58    let with_missing =
59        Table::from_csv_str("a,b\n1,2\n3,").expect("missing cell is allowed in a Table");
60    match with_missing.try_numeric() {
61        Err(MattenDataError::MissingValue { column, row }) => {
62            println!("missing value    : column={column}, line={row}");
63            assert_eq!((column.as_str(), row), ("b", 3));
64        }
65        other => panic!("expected MissingValue, got {other:?}"),
66    }
67
68    println!("data_05_errors: OK");
69}
Source

pub fn fill_missing( &self, value: impl Into<CellValue>, ) -> Result<Table, MattenDataError>

Fill every missing cell with value, returning a new Table.

Missing values are never silently turned into zero; filling is always explicit (RFC-035 §6). Non-missing cells and the shape are unchanged.

Examples found in repository?
examples/data_00_quickstart.rs (line 30)
20fn main() -> Result<(), matten_data::MattenDataError> {
21    // A small table with one text column and one missing numeric cell.
22    let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28    let tensor = Table::from_csv_str(csv)?
29        .select_columns(["sales", "cost"])? // keep only the numeric columns
30        .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31        .try_numeric()? // strict, explicit conversion to f64
32        .to_tensor()?; // a normal [rows, columns] Tensor
33
34    println!("shape: {:?}", tensor.shape());
35    println!("data : {:?}", tensor.as_slice());
36
37    assert_eq!(tensor.shape(), &[3, 2]);
38    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40    println!("data_00_quickstart: OK");
41    Ok(())
42}
More examples
Hide additional examples
examples/csv_to_tensor.rs (line 36)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}
examples/data_03_missing_values.rs (line 38)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24    let table = Table::from_csv_str(csv)?;
25    let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27    // Converting with a missing cell still present is rejected — no silent zero.
28    match numeric_cols.try_numeric() {
29        Err(MattenDataError::MissingValue { column, row }) => {
30            println!("missing value blocked conversion: column={column}, csv_line={row}");
31            assert_eq!(column, "cost");
32            assert_eq!(row, 3); // header is line 1, so the south row is line 3
33        }
34        other => panic!("expected MissingValue, got {other:?}"),
35    }
36
37    // Decide explicitly what a missing cost means here, then convert.
38    let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40    println!("filled shape: {:?}", tensor.shape());
41    println!("filled data : {:?}", tensor.as_slice());
42
43    assert_eq!(tensor.shape(), &[3, 2]);
44    // Only the missing south/cost was filled; the other cells are untouched.
45    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47    println!("data_03_missing_values: OK");
48    Ok(())
49}
Source

pub fn try_numeric(&self) -> Result<NumericTable, MattenDataError>

Convert the table to an explicit numeric table (RFC-035 §7).

Strict conversion: Int/Float become f64; Bool and Text are rejected (MattenDataError::NonNumericValue); a remaining Missing cell is rejected (MattenDataError::MissingValue). Text is never parsed as a number by default.

Examples found in repository?
examples/data_00_quickstart.rs (line 31)
20fn main() -> Result<(), matten_data::MattenDataError> {
21    // A small table with one text column and one missing numeric cell.
22    let csv = "\
23region,sales,cost
24north,100,40
25south,150,
26east,120,55";
27
28    let tensor = Table::from_csv_str(csv)?
29        .select_columns(["sales", "cost"])? // keep only the numeric columns
30        .fill_missing(0.0)? // the missing south/cost becomes 0.0, explicitly
31        .try_numeric()? // strict, explicit conversion to f64
32        .to_tensor()?; // a normal [rows, columns] Tensor
33
34    println!("shape: {:?}", tensor.shape());
35    println!("data : {:?}", tensor.as_slice());
36
37    assert_eq!(tensor.shape(), &[3, 2]);
38    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
39
40    println!("data_00_quickstart: OK");
41    Ok(())
42}
More examples
Hide additional examples
examples/csv_to_tensor.rs (line 37)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    // A small, messy table: a text column and one missing numeric cell.
21    let csv = "\
22region,sales,cost,quantity
23north,100,40,5
24south,150,,7
25east,120,55,6";
26
27    let table = Table::from_csv_str(csv)?;
28
29    // Inspect what we have before converting anything.
30    println!("{}", table.schema_summary());
31
32    // Select only the numeric columns we want, fill the one missing cost with 0,
33    // convert explicitly, and produce a [rows, columns] f64 tensor.
34    let tensor = table
35        .select_columns(["sales", "cost", "quantity"])?
36        .fill_missing(0.0)?
37        .try_numeric()?
38        .to_tensor()?;
39
40    println!("tensor shape: {:?}", tensor.shape());
41    println!("tensor data : {:?}", tensor.as_slice());
42
43    // 3 rows x 3 columns; the missing south/cost became 0.0.
44    assert_eq!(tensor.shape(), &[3, 3]);
45    assert_eq!(
46        tensor.as_slice(),
47        &[100.0, 40.0, 5.0, 150.0, 0.0, 7.0, 120.0, 55.0, 6.0]
48    );
49    println!("csv_to_tensor: OK");
50    Ok(())
51}
examples/data_04_to_tensor.rs (line 28)
19fn main() -> Result<(), matten_data::MattenDataError> {
20    let csv = "\
21region,sales,cost
22north,100,40
23south,150,45
24east,120,55";
25
26    let tensor: Tensor = Table::from_csv_str(csv)?
27        .select_columns(["sales", "cost"])?
28        .try_numeric()?
29        .to_tensor()?;
30
31    // [rows, columns] = [3, 2].
32    assert_eq!(tensor.shape(), &[3, 2]);
33
34    // Row-major: [north.sales, north.cost, south.sales, south.cost, ...].
35    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 45.0, 120.0, 55.0]);
36
37    // It is a plain Tensor, so core `matten` operations apply. Mean over axis 0
38    // (down the rows) gives the per-column mean: [mean(sales), mean(cost)].
39    let column_means = tensor.mean_axis(0);
40    println!("shape        : {:?}", tensor.shape());
41    println!("column means : {:?}", column_means.as_slice());
42
43    assert_eq!(column_means.shape(), &[2]);
44    assert_eq!(
45        column_means.as_slice(),
46        &[(100.0 + 150.0 + 120.0) / 3.0, (40.0 + 45.0 + 55.0) / 3.0]
47    );
48
49    println!("data_04_to_tensor: OK");
50    Ok(())
51}
examples/data_03_missing_values.rs (line 28)
17fn main() -> Result<(), MattenDataError> {
18    let csv = "\
19region,sales,cost
20north,100,40
21south,150,
22east,120,55";
23
24    let table = Table::from_csv_str(csv)?;
25    let numeric_cols = table.select_columns(["sales", "cost"])?;
26
27    // Converting with a missing cell still present is rejected — no silent zero.
28    match numeric_cols.try_numeric() {
29        Err(MattenDataError::MissingValue { column, row }) => {
30            println!("missing value blocked conversion: column={column}, csv_line={row}");
31            assert_eq!(column, "cost");
32            assert_eq!(row, 3); // header is line 1, so the south row is line 3
33        }
34        other => panic!("expected MissingValue, got {other:?}"),
35    }
36
37    // Decide explicitly what a missing cost means here, then convert.
38    let tensor = numeric_cols.fill_missing(0.0)?.try_numeric()?.to_tensor()?;
39
40    println!("filled shape: {:?}", tensor.shape());
41    println!("filled data : {:?}", tensor.as_slice());
42
43    assert_eq!(tensor.shape(), &[3, 2]);
44    // Only the missing south/cost was filled; the other cells are untouched.
45    assert_eq!(tensor.as_slice(), &[100.0, 40.0, 150.0, 0.0, 120.0, 55.0]);
46
47    println!("data_03_missing_values: OK");
48    Ok(())
49}
examples/data_05_errors.rs (line 48)
20fn main() {
21    // 1. Duplicate header column.
22    match Table::from_csv_str("sales,sales\n1,2") {
23        Err(MattenDataError::DuplicateColumn { name }) => {
24            println!("duplicate header : {name}");
25            assert_eq!(name, "sales");
26        }
27        other => panic!("expected DuplicateColumn, got {other:?}"),
28    }
29
30    // 2. Ragged row: the second data row (CSV line 3) has too few cells.
31    match Table::from_csv_str("a,b,c\n1,2,3\n4,5") {
32        Err(MattenDataError::RaggedRow {
33            row,
34            expected,
35            actual,
36        }) => {
37            println!("ragged row       : line {row}, expected {expected}, got {actual}");
38            assert_eq!((row, expected, actual), (3, 3, 2));
39        }
40        other => panic!("expected RaggedRow, got {other:?}"),
41    }
42
43    // 3. Non-numeric value during conversion.
44    let with_text = Table::from_csv_str("label,value\nok,10\nbad,oops")
45        .expect("parses fine; the text only fails at numeric conversion");
46    match with_text
47        .select_columns(["value"])
48        .and_then(|t| t.try_numeric())
49    {
50        Err(MattenDataError::NonNumericValue { column, row, value }) => {
51            println!("non-numeric value: column={column}, line={row}, value={value:?}");
52            assert_eq!((column.as_str(), row, value.as_str()), ("value", 3, "oops"));
53        }
54        other => panic!("expected NonNumericValue, got {other:?}"),
55    }
56
57    // 4. Missing value during conversion (fill it first to proceed).
58    let with_missing =
59        Table::from_csv_str("a,b\n1,2\n3,").expect("missing cell is allowed in a Table");
60    match with_missing.try_numeric() {
61        Err(MattenDataError::MissingValue { column, row }) => {
62            println!("missing value    : column={column}, line={row}");
63            assert_eq!((column.as_str(), row), ("b", 3));
64        }
65        other => panic!("expected MissingValue, got {other:?}"),
66    }
67
68    println!("data_05_errors: OK");
69}

Trait Implementations§

Source§

impl Clone for Table

Source§

fn clone(&self) -> Table

Returns a duplicate of the value. Read more
1.0.0 (const: unstable) · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for Table

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl Freeze for Table

§

impl RefUnwindSafe for Table

§

impl Send for Table

§

impl Sync for Table

§

impl Unpin for Table

§

impl UnsafeUnpin for Table

§

impl UnwindSafe for Table

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.