Skip to main content

vortex_array/scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`ScalarValue`] type definition.
5
6use std::cmp::Ordering;
7use std::fmt::Display;
8use std::fmt::Formatter;
9
10use itertools::Itertools;
11use vortex_buffer::BufferString;
12use vortex_buffer::ByteBuffer;
13use vortex_dtype::DType;
14use vortex_error::vortex_panic;
15
16use crate::scalar::DecimalValue;
17// use crate::scalar::ExtScalarValueRef;
18use crate::scalar::PValue;
19
20/// The value stored in a [`Scalar`][crate::scalar::Scalar].
21///
22/// This enum represents the possible non-null values that can be stored in a scalar. When the
23/// scalar is null, the value is represented as `None` in the `Option<ScalarValue>` field.
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum ScalarValue {
26    /// A boolean value.
27    Bool(bool),
28    /// A primitive numeric value.
29    Primitive(PValue),
30    /// A decimal value.
31    Decimal(DecimalValue),
32    /// A UTF-8 encoded string value.
33    Utf8(BufferString),
34    /// A binary (byte array) value.
35    Binary(ByteBuffer),
36    /// A list of potentially null scalar values.
37    List(Vec<Option<ScalarValue>>),
38    // /// An extension value reference.
39    // ///
40    // /// This internally contains a `ScalarValue` and an vtable that implements
41    // /// [`ExtScalarVTable`](crate::scalar::ExtScalarVTable)
42    // Extension(ExtScalarValueRef),
43}
44
45impl ScalarValue {
46    /// Returns the zero / identity value for the given [`DType`].
47    ///
48    /// # Zero Values
49    ///
50    /// Here is the list of zero values for each [`DType`] (when the [`DType`] is non-nullable):
51    ///
52    /// - `Null`: Does not have a "zero" value
53    /// - `Bool`: `false`
54    /// - `Primitive`: `0`
55    /// - `Decimal`: `0`
56    /// - `Utf8`: `""`
57    /// - `Binary`: An empty buffer
58    /// - `List`: An empty list
59    /// - `FixedSizeList`: A list (with correct size) of zero values, which is determined by the
60    ///   element [`DType`]
61    /// - `Struct`: A struct where each field has a zero value, which is determined by the field
62    ///   [`DType`]
63    ///
64    /// - `Extension`: TODO(connor): Is this right?
65    ///   The zero value of the storage [`DType`]
66    pub fn zero_value(dtype: &DType) -> Self {
67        match dtype {
68            DType::Null => vortex_panic!("Null dtype has no zero value"),
69            DType::Bool(_) => Self::Bool(false),
70            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
71            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
72            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
73            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
74            DType::List(..) => Self::List(vec![]),
75            DType::FixedSizeList(edt, size, _) => {
76                let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect();
77                Self::List(elements)
78            }
79            DType::Struct(fields, _) => {
80                let field_values = fields
81                    .fields()
82                    .map(|f| Some(Self::zero_value(&f)))
83                    .collect();
84                Self::List(field_values)
85            }
86            DType::Extension(ext_dtype) => Self::zero_value(ext_dtype.storage_dtype()), // TODO(connor): Fix this!
87        }
88    }
89
90    /// A similar function to [`ScalarValue::zero_value`], but for nullable [`DType`]s, this returns
91    /// `None` instead.
92    ///
93    /// For non-nullable and nested types that may need null values in their children (as of right
94    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide `None` as the
95    /// default child values (whereas [`ScalarValue::zero_value`] would provide `Some(_)`).
96    pub fn default_value(dtype: &DType) -> Option<Self> {
97        if dtype.is_nullable() {
98            return None;
99        }
100
101        Some(match dtype {
102            DType::Null => vortex_panic!("Null dtype has no zero value"),
103            DType::Bool(_) => Self::Bool(false),
104            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
105            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
106            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
107            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
108            DType::List(..) => Self::List(vec![]),
109            DType::FixedSizeList(edt, size, _) => {
110                let elements = (0..*size).map(|_| Self::default_value(edt)).collect();
111                Self::List(elements)
112            }
113            DType::Struct(fields, _) => {
114                let field_values = fields.fields().map(|f| Self::default_value(&f)).collect();
115                Self::List(field_values)
116            }
117            DType::Extension(ext_dtype) => Self::default_value(ext_dtype.storage_dtype())?, // TODO(connor): Fix this!
118        })
119    }
120}
121
122impl PartialOrd for ScalarValue {
123    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
124        match (self, other) {
125            (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b),
126            (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b),
127            (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b),
128            (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b),
129            (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b),
130            (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b),
131            // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b),
132            _ => None,
133        }
134    }
135}
136
137impl Display for ScalarValue {
138    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
139        match self {
140            ScalarValue::Bool(b) => write!(f, "{}", b),
141            ScalarValue::Primitive(p) => write!(f, "{}", p),
142            ScalarValue::Decimal(d) => write!(f, "{}", d),
143            ScalarValue::Utf8(s) => {
144                let bufstr = s.as_str();
145                let str_len = bufstr.chars().count();
146
147                if str_len > 10 {
148                    let prefix = String::from_iter(bufstr.chars().take(5));
149                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
150
151                    write!(f, "\"{prefix}..{suffix}\"")
152                } else {
153                    write!(f, "\"{bufstr}\"")
154                }
155            }
156            ScalarValue::Binary(b) => {
157                if b.len() > 10 {
158                    write!(
159                        f,
160                        "{}..{}",
161                        to_hex(&b[0..5]),
162                        to_hex(&b[b.len() - 5..b.len()]),
163                    )
164                } else {
165                    write!(f, "{}", to_hex(b))
166                }
167            }
168            ScalarValue::List(elements) => {
169                write!(f, "[")?;
170                for (i, element) in elements.iter().enumerate() {
171                    if i > 0 {
172                        write!(f, ", ")?;
173                    }
174                    match element {
175                        None => write!(f, "null")?,
176                        Some(e) => write!(f, "{}", e)?,
177                    }
178                }
179                write!(f, "]")
180            } //
181              // ScalarValue::Extension(e) => write!(f, "{}", e),
182        }
183    }
184}
185
186/// Formats a byte slice as a hexadecimal string.
187fn to_hex(slice: &[u8]) -> String {
188    slice
189        .iter()
190        .format_with("", |f, b| b(&format_args!("{f:02x}")))
191        .to_string()
192}