Skip to main content

matten_data/
table.rs

1//! The `Table` model and `CellValue` (RFC-034 §4–5).
2//!
3//! `Table` is a small, owned, rectangular table-like data set whose end goal is a
4//! numeric [`matten::Tensor`]. It is **not** a dataframe (RFC-033, RFC-042): there
5//! are no joins, group-by, pivot, query, lazy execution, or indexing APIs.
6
7use std::collections::HashSet;
8
9use crate::error::MattenDataError;
10use crate::schema::SchemaSummary;
11use crate::{numeric, schema};
12
13/// A single table cell value (RFC-034 §4.2).
14///
15/// `CellValue` is **intentionally crate-local** (architect ruling, RFC-033–042
16/// review Q4): it models table-ingestion cells, not core `Tensor` dynamic values,
17/// and is distinct from `matten::Element` (it is not an alias). `Text` holds an
18/// owned `String` (core `Element::Text` uses `Arc<str>`); the representation is a
19/// local, practical choice that can change without affecting the public model.
20#[derive(Debug, Clone, PartialEq)]
21#[non_exhaustive]
22pub enum CellValue {
23    /// Free text that did not parse as a number or boolean.
24    Text(String),
25    /// A floating-point value.
26    Float(f64),
27    /// An integer value.
28    Int(i64),
29    /// A boolean (`true` / `false`).
30    Bool(bool),
31    /// A missing / empty cell.
32    Missing,
33}
34
35impl From<f64> for CellValue {
36    fn from(v: f64) -> Self {
37        CellValue::Float(v)
38    }
39}
40impl From<i64> for CellValue {
41    fn from(v: i64) -> Self {
42        CellValue::Int(v)
43    }
44}
45impl From<bool> for CellValue {
46    fn from(v: bool) -> Self {
47        CellValue::Bool(v)
48    }
49}
50impl From<&str> for CellValue {
51    fn from(v: &str) -> Self {
52        CellValue::Text(v.to_string())
53    }
54}
55impl From<String> for CellValue {
56    fn from(v: String) -> Self {
57        CellValue::Text(v)
58    }
59}
60
61/// A small, owned, rectangular table-like data set.
62///
63/// External guarantees: row order is preserved; column order is preserved; column
64/// names are stable after loading. Operations return new owned `Table` values; no
65/// borrowed view lifetimes appear in normal use.
66#[derive(Debug, Clone)]
67pub struct Table {
68    headers: Vec<String>,
69    rows: Vec<Vec<CellValue>>,
70}
71
72impl Table {
73    /// Construct a `Table` from validated parts. Internal: callers (CSV ingestion,
74    /// selection, fill) guarantee that every row has `headers.len()` cells.
75    pub(crate) fn from_parts(headers: Vec<String>, rows: Vec<Vec<CellValue>>) -> Self {
76        Table { headers, rows }
77    }
78
79    /// Number of data rows.
80    pub fn row_count(&self) -> usize {
81        self.rows.len()
82    }
83
84    /// Number of columns.
85    pub fn column_count(&self) -> usize {
86        self.headers.len()
87    }
88
89    /// Column names, in column order.
90    pub fn column_names(&self) -> &[String] {
91        &self.headers
92    }
93
94    /// A small, displayable schema summary (row/column counts, per-column missing
95    /// counts and inferred kinds). Does not perform expensive analysis.
96    pub fn schema_summary(&self) -> SchemaSummary {
97        schema::summarize(self)
98    }
99
100    /// Select columns by name, returning a new `Table`.
101    ///
102    /// Behavior (RFC-034 §5.3): preserves the requested column order; errors with
103    /// [`MattenDataError::MissingColumn`] if a requested column does not exist;
104    /// rejects duplicate selections with [`MattenDataError::DuplicateSelection`];
105    /// an empty selection is [`MattenDataError::EmptySelection`].
106    pub fn select_columns<I, S>(&self, columns: I) -> Result<Table, MattenDataError>
107    where
108        I: IntoIterator<Item = S>,
109        S: AsRef<str>,
110    {
111        let requested: Vec<String> = columns
112            .into_iter()
113            .map(|s| s.as_ref().to_string())
114            .collect();
115
116        if requested.is_empty() {
117            return Err(MattenDataError::EmptySelection);
118        }
119
120        let mut seen = HashSet::with_capacity(requested.len());
121        for name in &requested {
122            if !seen.insert(name.as_str()) {
123                return Err(MattenDataError::DuplicateSelection { name: name.clone() });
124            }
125        }
126
127        let mut indices = Vec::with_capacity(requested.len());
128        for name in &requested {
129            match self.headers.iter().position(|h| h == name) {
130                Some(idx) => indices.push(idx),
131                None => return Err(MattenDataError::MissingColumn { name: name.clone() }),
132            }
133        }
134
135        let rows = self
136            .rows
137            .iter()
138            .map(|row| indices.iter().map(|&i| row[i].clone()).collect())
139            .collect();
140
141        Ok(Table::from_parts(requested, rows))
142    }
143
144    /// Fill every missing cell with `value`, returning a new `Table`.
145    ///
146    /// Missing values are never silently turned into zero; filling is always
147    /// explicit (RFC-035 §6). Non-missing cells and the shape are unchanged.
148    pub fn fill_missing(&self, value: impl Into<CellValue>) -> Result<Table, MattenDataError> {
149        let fill = value.into();
150        let rows = self
151            .rows
152            .iter()
153            .map(|row| {
154                row.iter()
155                    .map(|cell| {
156                        if matches!(cell, CellValue::Missing) {
157                            fill.clone()
158                        } else {
159                            cell.clone()
160                        }
161                    })
162                    .collect()
163            })
164            .collect();
165        Ok(Table::from_parts(self.headers.clone(), rows))
166    }
167
168    /// Convert the table to an explicit numeric table (RFC-035 §7).
169    ///
170    /// Strict conversion: `Int`/`Float` become `f64`; `Bool` and `Text` are
171    /// rejected ([`MattenDataError::NonNumericValue`]); a remaining `Missing` cell
172    /// is rejected ([`MattenDataError::MissingValue`]). Text is never parsed as a
173    /// number by default.
174    pub fn try_numeric(&self) -> Result<numeric::NumericTable, MattenDataError> {
175        numeric::try_numeric(self)
176    }
177
178    // --- crate-internal accessors used by sibling modules ---
179
180    pub(crate) fn headers(&self) -> &[String] {
181        &self.headers
182    }
183
184    pub(crate) fn rows(&self) -> &[Vec<CellValue>] {
185        &self.rows
186    }
187}