proof_of_sql/base/database/
table.rs

1use super::{Column, ColumnField};
2use crate::base::{map::IndexMap, scalar::Scalar};
3use alloc::vec::Vec;
4use bumpalo::Bump;
5use snafu::Snafu;
6use sqlparser::ast::Ident;
7
8/// Options for creating a table.
9/// Inspired by [`RecordBatchOptions`](https://docs.rs/arrow/latest/arrow/record_batch/struct.RecordBatchOptions.html)
10#[derive(Debug, Default, Clone, Copy)]
11pub struct TableOptions {
12    /// The number of rows in the table. Mostly useful for tables without columns.
13    pub row_count: Option<usize>,
14}
15
16impl TableOptions {
17    /// Creates a new [`TableOptions`].
18    #[must_use]
19    pub fn new(row_count: Option<usize>) -> Self {
20        Self { row_count }
21    }
22}
23
24/// An error that occurs when working with tables.
25#[derive(Snafu, Debug, PartialEq, Eq)]
26pub enum TableError {
27    /// The columns have different lengths.
28    #[snafu(display("Columns have different lengths"))]
29    ColumnLengthMismatch,
30
31    /// At least one column has length different from the provided row count.
32    #[snafu(display("Column has length different from the provided row count"))]
33    ColumnLengthMismatchWithSpecifiedRowCount,
34
35    /// The table is empty and there is no specified row count.
36    #[snafu(display("Table is empty and no row count is specified"))]
37    EmptyTableWithoutSpecifiedRowCount,
38}
39/// A table of data, with schema included. This is simply a map from `Ident` to `Column`,
40/// where columns order matters.
41/// This is primarily used as an internal result that is used before
42/// converting to the final result in either Arrow format or JSON.
43/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch).
44#[derive(Debug, Clone, Eq)]
45pub struct Table<'a, S: Scalar> {
46    table: IndexMap<Ident, Column<'a, S>>,
47    row_count: usize,
48}
49impl<'a, S: Scalar> Table<'a, S> {
50    /// Creates a new [`Table`] with the given columns and default [`TableOptions`].
51    pub fn try_new(table: IndexMap<Ident, Column<'a, S>>) -> Result<Self, TableError> {
52        Self::try_new_with_options(table, TableOptions::default())
53    }
54
55    /// Creates a new [`Table`] with the given columns and with [`TableOptions`].
56    pub fn try_new_with_options(
57        table: IndexMap<Ident, Column<'a, S>>,
58        options: TableOptions,
59    ) -> Result<Self, TableError> {
60        match (table.is_empty(), options.row_count) {
61            (true, None) => Err(TableError::EmptyTableWithoutSpecifiedRowCount),
62            (true, Some(row_count)) => Ok(Self { table, row_count }),
63            (false, None) => {
64                let row_count = table[0].len();
65                if table.values().any(|column| column.len() != row_count) {
66                    Err(TableError::ColumnLengthMismatch)
67                } else {
68                    Ok(Self { table, row_count })
69                }
70            }
71            (false, Some(row_count)) => {
72                if table.values().any(|column| column.len() != row_count) {
73                    Err(TableError::ColumnLengthMismatchWithSpecifiedRowCount)
74                } else {
75                    Ok(Self { table, row_count })
76                }
77            }
78        }
79    }
80
81    /// Creates a new [`Table`] from an iterator of `(Ident, Column)` pairs with default [`TableOptions`].
82    pub fn try_from_iter<T: IntoIterator<Item = (Ident, Column<'a, S>)>>(
83        iter: T,
84    ) -> Result<Self, TableError> {
85        Self::try_from_iter_with_options(iter, TableOptions::default())
86    }
87
88    /// Creates a new [`Table`] from an iterator of `(Ident, Column)` pairs with [`TableOptions`].
89    pub fn try_from_iter_with_options<T: IntoIterator<Item = (Ident, Column<'a, S>)>>(
90        iter: T,
91        options: TableOptions,
92    ) -> Result<Self, TableError> {
93        Self::try_new_with_options(IndexMap::from_iter(iter), options)
94    }
95
96    /// Number of columns in the table.
97    #[must_use]
98    pub fn num_columns(&self) -> usize {
99        self.table.len()
100    }
101    /// Number of rows in the table.
102    #[must_use]
103    pub fn num_rows(&self) -> usize {
104        self.row_count
105    }
106    /// Whether the table has no columns.
107    #[must_use]
108    pub fn is_empty(&self) -> bool {
109        self.table.is_empty()
110    }
111    /// Returns the columns of this table as an `IndexMap`
112    #[must_use]
113    pub fn into_inner(self) -> IndexMap<Ident, Column<'a, S>> {
114        self.table
115    }
116    /// Returns the columns of this table as an `IndexMap`
117    #[must_use]
118    pub fn inner_table(&self) -> &IndexMap<Ident, Column<'a, S>> {
119        &self.table
120    }
121    /// Return the schema of this table as a `Vec` of `ColumnField`s
122    #[must_use]
123    pub fn schema(&self) -> Vec<ColumnField> {
124        self.table
125            .iter()
126            .map(|(name, column)| ColumnField::new(name.clone(), column.column_type()))
127            .collect()
128    }
129    /// Returns the columns of this table as an Iterator
130    pub fn column_names(&self) -> impl Iterator<Item = &Ident> {
131        self.table.keys()
132    }
133    /// Returns the columns of this table as an Iterator
134    pub fn columns(&self) -> impl Iterator<Item = &Column<'a, S>> {
135        self.table.values()
136    }
137    /// Returns the column with the given position.
138    #[must_use]
139    pub fn column(&self, index: usize) -> Option<&Column<'a, S>> {
140        self.table.values().nth(index)
141    }
142    /// Add the `rho` column as the last column to the table.
143    #[must_use]
144    pub fn add_rho_column(mut self, alloc: &'a Bump) -> Self {
145        self.table
146            .insert(Ident::new("rho"), Column::rho(self.row_count, alloc));
147        self
148    }
149}
150
151// Note: we modify the default PartialEq for IndexMap to also check for column ordering.
152// This is to align with the behaviour of a `RecordBatch`.
153impl<S: Scalar> PartialEq for Table<'_, S> {
154    fn eq(&self, other: &Self) -> bool {
155        self.table == other.table
156            && self
157                .table
158                .keys()
159                .zip(other.table.keys())
160                .all(|(a, b)| a == b)
161    }
162}
163
164#[cfg(test)]
165impl<'a, S: Scalar> core::ops::Index<&str> for Table<'a, S> {
166    type Output = Column<'a, S>;
167    fn index(&self, index: &str) -> &Self::Output {
168        self.table.get(&Ident::new(index)).unwrap()
169    }
170}