matten_data/numeric.rs
1//! Explicit numeric conversion to a [`matten::Tensor`] (RFC-035 §7–8).
2//!
3//! Conversion is always explicit (`try_numeric()` then `to_tensor()`); there is no
4//! silent coercion. Missing values never become zero, booleans never become 1/0,
5//! and non-numeric text is rejected.
6
7use crate::error::MattenDataError;
8use crate::table::{CellValue, Table};
9
10/// A table whose cells have all been validated as numeric (RFC-034 §4.4).
11///
12/// Produced by [`Table::try_numeric`]. Holds row-major `f64` data ready to become
13/// a [`matten::Tensor`] via [`NumericTable::to_tensor`].
14#[derive(Debug, Clone)]
15pub struct NumericTable {
16 headers: Vec<String>,
17 data: Vec<f64>,
18 rows: usize,
19 cols: usize,
20}
21
22impl NumericTable {
23 /// Number of rows.
24 pub fn row_count(&self) -> usize {
25 self.rows
26 }
27
28 /// Number of columns.
29 pub fn column_count(&self) -> usize {
30 self.cols
31 }
32
33 /// Column names, in column order.
34 pub fn column_names(&self) -> &[String] {
35 &self.headers
36 }
37
38 /// Build a numeric [`matten::Tensor`] with shape `[rows, columns]`, row-major.
39 ///
40 /// Errors with [`MattenDataError::EmptySelection`] if there are no columns, and
41 /// wraps any core construction failure (for example a zero-length dimension
42 /// when there are no rows) as [`MattenDataError::Matten`].
43 pub fn to_tensor(&self) -> Result<matten::Tensor, MattenDataError> {
44 if self.cols == 0 {
45 return Err(MattenDataError::EmptySelection);
46 }
47 matten::Tensor::try_new(self.data.clone(), &[self.rows, self.cols])
48 .map_err(MattenDataError::Matten)
49 }
50}
51
52/// Strictly convert a table's cells to `f64` (RFC-035 §7.1–7.4).
53pub(crate) fn try_numeric(table: &Table) -> Result<NumericTable, MattenDataError> {
54 let headers: Vec<String> = table.headers().to_vec();
55 let cols = headers.len();
56 let table_rows = table.rows();
57 let rows = table_rows.len();
58
59 let mut data = Vec::with_capacity(rows * cols);
60 for (r, row) in table_rows.iter().enumerate() {
61 // One-based CSV line number: header is line 1, first data row is line 2.
62 let line = r + 2;
63 for (c, cell) in row.iter().enumerate() {
64 let value = match cell {
65 CellValue::Int(i) => *i as f64,
66 CellValue::Float(f) => *f,
67 CellValue::Missing => {
68 return Err(MattenDataError::MissingValue {
69 column: headers[c].clone(),
70 row: line,
71 });
72 }
73 CellValue::Bool(b) => {
74 return Err(MattenDataError::NonNumericValue {
75 column: headers[c].clone(),
76 row: line,
77 value: b.to_string(),
78 });
79 }
80 CellValue::Text(s) => {
81 return Err(MattenDataError::NonNumericValue {
82 column: headers[c].clone(),
83 row: line,
84 value: s.clone(),
85 });
86 }
87 };
88 data.push(value);
89 }
90 }
91
92 Ok(NumericTable {
93 headers,
94 data,
95 rows,
96 cols,
97 })
98}