proof_of_sql/base/database/
column.rs

1use super::{ColumnType, LiteralValue, OwnedColumn};
2use crate::base::{
3    math::decimal::Precision,
4    posql_time::{PoSQLTimeUnit, PoSQLTimeZone},
5    scalar::{Scalar, ScalarExt},
6    slice_ops::slice_cast_with,
7};
8use alloc::vec::Vec;
9use bumpalo::Bump;
10
11/// Represents a read-only view of a column in an in-memory,
12/// column-oriented database.
13///
14/// Note: The types here should correspond to native SQL database types.
15/// See `<https://ignite.apache.org/docs/latest/sql-reference/data-types>` for
16/// a description of the native types used by Apache Ignite.
17#[derive(Debug, Eq, PartialEq, Clone, Copy)]
18#[non_exhaustive]
19pub enum Column<'a, S: Scalar> {
20    /// Boolean columns
21    Boolean(&'a [bool]),
22    /// u8 columns
23    Uint8(&'a [u8]),
24    /// i8 columns
25    TinyInt(&'a [i8]),
26    /// i16 columns
27    SmallInt(&'a [i16]),
28    /// i32 columns
29    Int(&'a [i32]),
30    /// i64 columns
31    BigInt(&'a [i64]),
32    /// i128 columns
33    Int128(&'a [i128]),
34    /// String columns
35    ///  - the first element maps to the str values.
36    ///  - the second element maps to the str hashes (see [`crate::base::scalar::Scalar`]).
37    VarChar((&'a [&'a str], &'a [S])),
38    /// Decimal columns with a max width of 252 bits
39    ///  - the backing store maps to the type `S`
40    Decimal75(Precision, i8, &'a [S]),
41    /// Timestamp columns with timezone
42    /// - the first element maps to the stored `TimeUnit`
43    /// - the second element maps to a timezone
44    /// - the third element maps to columns of timeunits since unix epoch
45    TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, &'a [i64]),
46    /// Scalar columns
47    Scalar(&'a [S]),
48    /// Variable length binary columns
49    VarBinary((&'a [&'a [u8]], &'a [S])),
50}
51
52impl<'a, S: Scalar> Column<'a, S> {
53    /// Provides the column type associated with the column
54    #[must_use]
55    pub fn column_type(&self) -> ColumnType {
56        match self {
57            Self::Boolean(_) => ColumnType::Boolean,
58            Self::Uint8(_) => ColumnType::Uint8,
59            Self::TinyInt(_) => ColumnType::TinyInt,
60            Self::SmallInt(_) => ColumnType::SmallInt,
61            Self::Int(_) => ColumnType::Int,
62            Self::BigInt(_) => ColumnType::BigInt,
63            Self::VarChar(_) => ColumnType::VarChar,
64            Self::Int128(_) => ColumnType::Int128,
65            Self::Scalar(_) => ColumnType::Scalar,
66            Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale),
67            Self::TimestampTZ(time_unit, timezone, _) => {
68                ColumnType::TimestampTZ(*time_unit, *timezone)
69            }
70            Self::VarBinary(..) => ColumnType::VarBinary,
71        }
72    }
73    /// Returns the length of the column.
74    /// # Panics
75    /// this function requires that `col` and `scals` have the same length.
76    #[must_use]
77    pub fn len(&self) -> usize {
78        match self {
79            Self::Boolean(col) => col.len(),
80            Self::Uint8(col) => col.len(),
81            Self::TinyInt(col) => col.len(),
82            Self::SmallInt(col) => col.len(),
83            Self::Int(col) => col.len(),
84            Self::BigInt(col) | Self::TimestampTZ(_, _, col) => col.len(),
85            Self::VarChar((col, scals)) => {
86                assert_eq!(col.len(), scals.len());
87                col.len()
88            }
89            Self::VarBinary((col, scals)) => {
90                assert_eq!(col.len(), scals.len());
91                col.len()
92            }
93            Self::Int128(col) => col.len(),
94            Self::Scalar(col) | Self::Decimal75(_, _, col) => col.len(),
95        }
96    }
97    /// Returns `true` if the column has no elements.
98    #[must_use]
99    pub fn is_empty(&self) -> bool {
100        self.len() == 0
101    }
102
103    /// Generate a constant column from a literal value with a given length
104    pub fn from_literal_with_length(
105        literal: &LiteralValue,
106        length: usize,
107        alloc: &'a Bump,
108    ) -> Self {
109        match literal {
110            LiteralValue::Boolean(value) => {
111                Column::Boolean(alloc.alloc_slice_fill_copy(length, *value))
112            }
113            LiteralValue::Uint8(value) => {
114                Column::Uint8(alloc.alloc_slice_fill_copy(length, *value))
115            }
116            LiteralValue::TinyInt(value) => {
117                Column::TinyInt(alloc.alloc_slice_fill_copy(length, *value))
118            }
119            LiteralValue::SmallInt(value) => {
120                Column::SmallInt(alloc.alloc_slice_fill_copy(length, *value))
121            }
122            LiteralValue::Int(value) => Column::Int(alloc.alloc_slice_fill_copy(length, *value)),
123            LiteralValue::BigInt(value) => {
124                Column::BigInt(alloc.alloc_slice_fill_copy(length, *value))
125            }
126            LiteralValue::Int128(value) => {
127                Column::Int128(alloc.alloc_slice_fill_copy(length, *value))
128            }
129            LiteralValue::Scalar(value) => {
130                Column::Scalar(alloc.alloc_slice_fill_copy(length, (*value).into()))
131            }
132            LiteralValue::Decimal75(precision, scale, value) => Column::Decimal75(
133                *precision,
134                *scale,
135                alloc.alloc_slice_fill_copy(length, value.into_scalar()),
136            ),
137            LiteralValue::TimeStampTZ(tu, tz, value) => {
138                Column::TimestampTZ(*tu, *tz, alloc.alloc_slice_fill_copy(length, *value))
139            }
140            LiteralValue::VarChar(string) => Column::VarChar((
141                alloc.alloc_slice_fill_with(length, |_| alloc.alloc_str(string) as &str),
142                alloc.alloc_slice_fill_copy(length, S::from(string)),
143            )),
144            LiteralValue::VarBinary(bytes) => {
145                // Convert the bytes to a slice of bytes references
146                let bytes_slice = alloc
147                    .alloc_slice_fill_with(length, |_| alloc.alloc_slice_copy(bytes) as &[_]);
148
149                // Convert the bytes to scalars using from_byte_slice_via_hash
150                let scalars =
151                    alloc.alloc_slice_fill_copy(length, S::from_byte_slice_via_hash(bytes));
152
153                Column::VarBinary((bytes_slice, scalars))
154            }
155        }
156    }
157
158    /// Generate a `Int128` `rho` column [0, 1, 2, ..., length - 1]
159    pub fn rho(length: usize, alloc: &'a Bump) -> Self {
160        let raw_rho = (0..length as i128).collect::<Vec<_>>();
161        let rho = alloc.alloc_slice_copy(raw_rho.as_slice());
162        Column::<S>::Int128(rho as &[_])
163    }
164
165    /// Convert an `OwnedColumn` to a `Column`
166    pub fn from_owned_column(owned_column: &'a OwnedColumn<S>, alloc: &'a Bump) -> Self {
167        match owned_column {
168            OwnedColumn::Boolean(col) => Column::Boolean(col.as_slice()),
169            OwnedColumn::Uint8(col) => Column::Uint8(col.as_slice()),
170            OwnedColumn::TinyInt(col) => Column::TinyInt(col.as_slice()),
171            OwnedColumn::SmallInt(col) => Column::SmallInt(col.as_slice()),
172            OwnedColumn::Int(col) => Column::Int(col.as_slice()),
173            OwnedColumn::BigInt(col) => Column::BigInt(col.as_slice()),
174            OwnedColumn::Int128(col) => Column::Int128(col.as_slice()),
175            OwnedColumn::Decimal75(precision, scale, col) => {
176                Column::Decimal75(*precision, *scale, col.as_slice())
177            }
178            OwnedColumn::Scalar(col) => Column::Scalar(col.as_slice()),
179            OwnedColumn::VarChar(col) => {
180                let scalars = col.iter().map(S::from).collect::<Vec<_>>();
181                let strs = col
182                    .iter()
183                    .map(|s| s.as_str() as &'a str)
184                    .collect::<Vec<_>>();
185                Column::VarChar((
186                    alloc.alloc_slice_clone(strs.as_slice()),
187                    alloc.alloc_slice_copy(scalars.as_slice()),
188                ))
189            }
190            OwnedColumn::VarBinary(col) => {
191                let scalars = col
192                    .iter()
193                    .map(|b| S::from_byte_slice_via_hash(b))
194                    .collect::<Vec<_>>();
195                let bytes = col.iter().map(|s| s as &'a [u8]).collect::<Vec<_>>();
196                Column::VarBinary((
197                    alloc.alloc_slice_clone(&bytes),
198                    alloc.alloc_slice_copy(scalars.as_slice()),
199                ))
200            }
201            OwnedColumn::TimestampTZ(tu, tz, col) => Column::TimestampTZ(*tu, *tz, col.as_slice()),
202        }
203    }
204
205    /// Returns the column as a slice of booleans if it is a boolean column. Otherwise, returns None.
206    pub(crate) fn as_boolean(&self) -> Option<&'a [bool]> {
207        match self {
208            Self::Boolean(col) => Some(col),
209            _ => None,
210        }
211    }
212
213    /// Returns the column as a slice of u8 if it is a uint8 column. Otherwise, returns None.
214    pub(crate) fn as_uint8(&self) -> Option<&'a [u8]> {
215        match self {
216            Self::Uint8(col) => Some(col),
217            _ => None,
218        }
219    }
220
221    /// Returns the column as a slice of i8 if it is a tinyint column. Otherwise, returns None.
222    pub(crate) fn as_tinyint(&self) -> Option<&'a [i8]> {
223        match self {
224            Self::TinyInt(col) => Some(col),
225            _ => None,
226        }
227    }
228
229    /// Returns the column as a slice of i16 if it is a smallint column. Otherwise, returns None.
230    pub(crate) fn as_smallint(&self) -> Option<&'a [i16]> {
231        match self {
232            Self::SmallInt(col) => Some(col),
233            _ => None,
234        }
235    }
236
237    /// Returns the column as a slice of i32 if it is an int column. Otherwise, returns None.
238    pub(crate) fn as_int(&self) -> Option<&'a [i32]> {
239        match self {
240            Self::Int(col) => Some(col),
241            _ => None,
242        }
243    }
244
245    /// Returns the column as a slice of i64 if it is a bigint column. Otherwise, returns None.
246    pub(crate) fn as_bigint(&self) -> Option<&'a [i64]> {
247        match self {
248            Self::BigInt(col) => Some(col),
249            _ => None,
250        }
251    }
252
253    /// Returns the column as a slice of i128 if it is an int128 column. Otherwise, returns None.
254    pub(crate) fn as_int128(&self) -> Option<&'a [i128]> {
255        match self {
256            Self::Int128(col) => Some(col),
257            _ => None,
258        }
259    }
260
261    /// Returns the column as a slice of scalars if it is a scalar column. Otherwise, returns None.
262    pub(crate) fn as_scalar(&self) -> Option<&'a [S]> {
263        match self {
264            Self::Scalar(col) => Some(col),
265            _ => None,
266        }
267    }
268
269    /// Returns the column as a slice of scalars if it is a decimal75 column. Otherwise, returns None.
270    pub(crate) fn as_decimal75(&self) -> Option<&'a [S]> {
271        match self {
272            Self::Decimal75(_, _, col) => Some(col),
273            _ => None,
274        }
275    }
276
277    /// Returns the column as a slice of strings and a slice of scalars if it is a varchar column. Otherwise, returns None.
278    pub(crate) fn as_varchar(&self) -> Option<(&'a [&'a str], &'a [S])> {
279        match self {
280            Self::VarChar((col, scals)) => Some((col, scals)),
281            _ => None,
282        }
283    }
284
285    /// Returns the column as a slice of strings and a slice of scalars if it is a varchar column. Otherwise, returns None.
286    pub(crate) fn as_varbinary(&self) -> Option<(&'a [&'a [u8]], &'a [S])> {
287        match self {
288            Self::VarBinary((col, scals)) => Some((col, scals)),
289            _ => None,
290        }
291    }
292
293    /// Returns the column as a slice of i64 if it is a timestamp column. Otherwise, returns None.
294    pub(crate) fn as_timestamptz(&self) -> Option<&'a [i64]> {
295        match self {
296            Self::TimestampTZ(_, _, col) => Some(col),
297            _ => None,
298        }
299    }
300
301    /// Returns element at index as scalar
302    ///
303    /// Note that if index is out of bounds, this function will return None
304    pub(crate) fn scalar_at(&self, index: usize) -> Option<S> {
305        (index < self.len()).then_some(match self {
306            Self::Boolean(col) => S::from(col[index]),
307            Self::Uint8(col) => S::from(col[index]),
308            Self::TinyInt(col) => S::from(col[index]),
309            Self::SmallInt(col) => S::from(col[index]),
310            Self::Int(col) => S::from(col[index]),
311            Self::BigInt(col) | Self::TimestampTZ(_, _, col) => S::from(col[index]),
312            Self::Int128(col) => S::from(col[index]),
313            Self::Scalar(col) | Self::Decimal75(_, _, col) => col[index],
314            Self::VarChar((_, scals)) | Self::VarBinary((_, scals)) => scals[index],
315        })
316    }
317
318    /// Convert a column to a vector of Scalar values
319    #[tracing::instrument(name = "Column::to_scalar", level = "debug", skip_all)]
320    pub(crate) fn to_scalar(self) -> Vec<S> {
321        match self {
322            Self::Boolean(col) => slice_cast_with(col, |b| S::from(b)),
323            Self::Decimal75(_, _, col) => slice_cast_with(col, |s| *s),
324            Self::VarChar((_, values)) => slice_cast_with(values, |s| *s),
325            Self::VarBinary((_, values)) => slice_cast_with(values, |s| *s),
326            Self::Uint8(col) => slice_cast_with(col, |i| S::from(i)),
327            Self::TinyInt(col) => slice_cast_with(col, |i| S::from(i)),
328            Self::SmallInt(col) => slice_cast_with(col, |i| S::from(i)),
329            Self::Int(col) => slice_cast_with(col, |i| S::from(i)),
330            Self::BigInt(col) => slice_cast_with(col, |i| S::from(i)),
331            Self::Int128(col) => slice_cast_with(col, |i| S::from(i)),
332            Self::Scalar(col) => slice_cast_with(col, |i| S::from(i)),
333            Self::TimestampTZ(_, _, col) => slice_cast_with(col, |i| S::from(i)),
334        }
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use crate::{base::scalar::test_scalar::TestScalar, proof_primitive::dory::DoryScalar};
342    use alloc::{string::String, vec};
343
344    #[test]
345    fn we_can_get_the_len_of_a_column() {
346        let precision = 10;
347        let scale = 2;
348
349        let scalar_values = [
350            TestScalar::from(1),
351            TestScalar::from(2),
352            TestScalar::from(3),
353        ];
354
355        // Test non-empty columns
356        let column = Column::<DoryScalar>::Boolean(&[true, false, true]);
357        assert_eq!(column.len(), 3);
358        assert!(!column.is_empty());
359
360        let column = Column::<DoryScalar>::TinyInt(&[1, 2, 3]);
361        assert_eq!(column.len(), 3);
362        assert!(!column.is_empty());
363
364        let column = Column::<TestScalar>::SmallInt(&[1, 2, 3]);
365        assert_eq!(column.len(), 3);
366        assert!(!column.is_empty());
367
368        let column = Column::<TestScalar>::Int(&[1, 2, 3]);
369        assert_eq!(column.len(), 3);
370        assert!(!column.is_empty());
371
372        let column = Column::<TestScalar>::BigInt(&[1, 2, 3]);
373        assert_eq!(column.len(), 3);
374        assert!(!column.is_empty());
375
376        let column = Column::VarChar((&["a", "b", "c"], &scalar_values));
377        assert_eq!(column.len(), 3);
378        assert!(!column.is_empty());
379
380        let column = Column::<DoryScalar>::Int128(&[1, 2, 3]);
381        assert_eq!(column.len(), 3);
382        assert!(!column.is_empty());
383
384        let column = Column::Scalar(&scalar_values);
385        assert_eq!(column.len(), 3);
386        assert!(!column.is_empty());
387
388        let decimal_data = [
389            TestScalar::from(1),
390            TestScalar::from(2),
391            TestScalar::from(3),
392        ];
393
394        let precision = Precision::new(precision).unwrap();
395        let column = Column::Decimal75(precision, scale, &decimal_data);
396        assert_eq!(column.len(), 3);
397        assert!(!column.is_empty());
398
399        // Test empty columns
400        let column = Column::<DoryScalar>::Boolean(&[]);
401        assert_eq!(column.len(), 0);
402        assert!(column.is_empty());
403
404        let column = Column::<DoryScalar>::TinyInt(&[]);
405        assert_eq!(column.len(), 0);
406        assert!(column.is_empty());
407
408        let column = Column::<TestScalar>::SmallInt(&[]);
409        assert_eq!(column.len(), 0);
410        assert!(column.is_empty());
411
412        let column = Column::<TestScalar>::Int(&[]);
413        assert_eq!(column.len(), 0);
414        assert!(column.is_empty());
415
416        let column = Column::<TestScalar>::BigInt(&[]);
417        assert_eq!(column.len(), 0);
418        assert!(column.is_empty());
419
420        let column = Column::<DoryScalar>::VarChar((&[], &[]));
421        assert_eq!(column.len(), 0);
422        assert!(column.is_empty());
423
424        let column = Column::<TestScalar>::Int128(&[]);
425        assert_eq!(column.len(), 0);
426        assert!(column.is_empty());
427
428        let column = Column::<DoryScalar>::Scalar(&[]);
429        assert_eq!(column.len(), 0);
430        assert!(column.is_empty());
431
432        let column: Column<'_, TestScalar> = Column::Decimal75(precision, scale, &[]);
433        assert_eq!(column.len(), 0);
434        assert!(column.is_empty());
435    }
436
437    #[test]
438    fn we_can_convert_owned_columns_to_columns_round_trip() {
439        let alloc = Bump::new();
440        // Integers
441        let owned_col: OwnedColumn<TestScalar> = OwnedColumn::Int128(vec![1, 2, 3, 4, 5]);
442        let col = Column::<TestScalar>::from_owned_column(&owned_col, &alloc);
443        assert_eq!(col, Column::Int128(&[1, 2, 3, 4, 5]));
444        let new_owned_col = (&col).into();
445        assert_eq!(owned_col, new_owned_col);
446
447        // Booleans
448        let owned_col: OwnedColumn<TestScalar> =
449            OwnedColumn::Boolean(vec![true, false, true, false, true]);
450        let col = Column::<TestScalar>::from_owned_column(&owned_col, &alloc);
451        assert_eq!(col, Column::Boolean(&[true, false, true, false, true]));
452        let new_owned_col = (&col).into();
453        assert_eq!(owned_col, new_owned_col);
454
455        // Strings
456        let strs = [
457            "Space and Time",
458            "Tér és Idő",
459            "Пространство и время",
460            "Spațiu și Timp",
461            "Spazju u Ħin",
462        ];
463        let scalars = strs.iter().map(TestScalar::from).collect::<Vec<_>>();
464        let owned_col = OwnedColumn::VarChar(
465            strs.iter()
466                .map(ToString::to_string)
467                .collect::<Vec<String>>(),
468        );
469        let col = Column::<TestScalar>::from_owned_column(&owned_col, &alloc);
470        assert_eq!(col, Column::VarChar((&strs, &scalars)));
471        let new_owned_col = (&col).into();
472        assert_eq!(owned_col, new_owned_col);
473
474        // Decimals
475        let scalars: Vec<TestScalar> = [1, 2, 3, 4, 5].iter().map(TestScalar::from).collect();
476        let owned_col: OwnedColumn<TestScalar> =
477            OwnedColumn::Decimal75(Precision::new(75).unwrap(), 127, scalars.clone());
478        let col = Column::<TestScalar>::from_owned_column(&owned_col, &alloc);
479        assert_eq!(
480            col,
481            Column::Decimal75(Precision::new(75).unwrap(), 127, &scalars)
482        );
483        let new_owned_col = (&col).into();
484        assert_eq!(owned_col, new_owned_col);
485    }
486
487    #[test]
488    fn we_can_get_the_data_size_of_a_column() {
489        let column = Column::<DoryScalar>::Boolean(&[true, false, true]);
490        assert_eq!(column.column_type().byte_size(), 1);
491        assert_eq!(column.column_type().bit_size(), 8);
492
493        let column = Column::<TestScalar>::TinyInt(&[1, 2, 3, 4]);
494        assert_eq!(column.column_type().byte_size(), 1);
495        assert_eq!(column.column_type().bit_size(), 8);
496
497        let column = Column::<TestScalar>::SmallInt(&[1, 2, 3, 4]);
498        assert_eq!(column.column_type().byte_size(), 2);
499        assert_eq!(column.column_type().bit_size(), 16);
500
501        let column = Column::<TestScalar>::Int(&[1, 2, 3]);
502        assert_eq!(column.column_type().byte_size(), 4);
503        assert_eq!(column.column_type().bit_size(), 32);
504
505        let column = Column::<TestScalar>::BigInt(&[1]);
506        assert_eq!(column.column_type().byte_size(), 8);
507        assert_eq!(column.column_type().bit_size(), 64);
508
509        let column = Column::<DoryScalar>::Int128(&[1, 2]);
510        assert_eq!(column.column_type().byte_size(), 16);
511        assert_eq!(column.column_type().bit_size(), 128);
512
513        let scalar_values = [
514            TestScalar::from(1),
515            TestScalar::from(2),
516            TestScalar::from(3),
517        ];
518
519        let column = Column::VarChar((&["a", "b", "c", "d", "e"], &scalar_values));
520        assert_eq!(column.column_type().byte_size(), 32);
521        assert_eq!(column.column_type().bit_size(), 256);
522
523        let column = Column::Scalar(&scalar_values);
524        assert_eq!(column.column_type().byte_size(), 32);
525        assert_eq!(column.column_type().bit_size(), 256);
526
527        let precision = 10;
528        let scale = 2;
529        let decimal_data = [
530            TestScalar::from(1),
531            TestScalar::from(2),
532            TestScalar::from(3),
533        ];
534
535        let precision = Precision::new(precision).unwrap();
536        let column = Column::Decimal75(precision, scale, &decimal_data);
537        assert_eq!(column.column_type().byte_size(), 32);
538        assert_eq!(column.column_type().bit_size(), 256);
539
540        let column: Column<'_, DoryScalar> =
541            Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::utc(), &[1, 2, 3]);
542        assert_eq!(column.column_type().byte_size(), 8);
543        assert_eq!(column.column_type().bit_size(), 64);
544    }
545
546    #[test]
547    fn we_can_get_length_of_varbinary_column() {
548        let raw_bytes: &[&[u8]] = &[b"foo", b"bar", b""];
549        let scalars: Vec<TestScalar> = raw_bytes
550            .iter()
551            .map(|b| TestScalar::from_le_bytes_mod_order(b))
552            .collect();
553
554        let column = Column::VarBinary((raw_bytes, &scalars));
555        assert_eq!(column.len(), 3);
556        assert!(!column.is_empty());
557        assert_eq!(column.column_type(), ColumnType::VarBinary);
558    }
559
560    #[test]
561    fn we_can_convert_varbinary_owned_column_to_column_and_back() {
562        use bumpalo::Bump;
563        let alloc = Bump::new();
564
565        let owned_varbinary = OwnedColumn::VarBinary(vec![b"abc".to_vec(), b"xyz".to_vec()]);
566
567        let column = Column::<TestScalar>::from_owned_column(&owned_varbinary, &alloc);
568        match column {
569            Column::VarBinary((bytes, scalars)) => {
570                assert_eq!(bytes.len(), 2);
571                assert_eq!(scalars.len(), 2);
572                assert_eq!(bytes[0], b"abc");
573                assert_eq!(bytes[1], b"xyz");
574            }
575            _ => panic!("Expected VarBinary column"),
576        }
577
578        let round_trip_owned: OwnedColumn<TestScalar> = (&column).into();
579        assert_eq!(owned_varbinary, round_trip_owned);
580    }
581}