Skip to main content

vortex_array/scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`ScalarValue`] type definition.
5
6use std::fmt::Display;
7use std::fmt::Formatter;
8
9use itertools::Itertools;
10use vortex_buffer::BufferString;
11use vortex_buffer::ByteBuffer;
12use vortex_error::vortex_panic;
13
14use crate::dtype::DType;
15use crate::scalar::DecimalValue;
16use crate::scalar::PValue;
17use crate::scalar::Scalar;
18
19/// The value stored in a [`Scalar`][crate::scalar::Scalar].
20///
21/// This enum represents the possible non-null values that can be stored in a scalar. When the
22/// scalar is null, the value is represented as `None` in the `Option<ScalarValue>` field.
23#[derive(Debug, Clone, PartialEq, Eq, Hash)]
24pub enum ScalarValue {
25    /// A boolean value.
26    Bool(bool),
27    /// A primitive numeric value.
28    Primitive(PValue),
29    /// A decimal value.
30    Decimal(DecimalValue),
31    /// A UTF-8 encoded string value.
32    Utf8(BufferString),
33    /// A binary (byte array) value.
34    Binary(ByteBuffer),
35    /// A tuple of potentially null scalar values.
36    ///
37    /// Used as the underlying representation for list, fixed-size list, and struct scalars.
38    Tuple(Vec<Option<ScalarValue>>),
39    /// A row-specific scalar wrapped by `DType::Variant`.
40    Variant(Box<Scalar>),
41}
42
43impl ScalarValue {
44    /// Returns the zero / identity value for the given [`DType`].
45    pub(super) fn zero_value(dtype: &DType) -> Self {
46        match dtype {
47            DType::Null => vortex_panic!("Null dtype has no zero value"),
48            DType::Bool(_) => Self::Bool(false),
49            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
50            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
51            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
52            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
53            DType::List(..) => Self::Tuple(vec![]),
54            DType::FixedSizeList(edt, size, _) => {
55                let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect();
56                Self::Tuple(elements)
57            }
58            DType::Struct(fields, _) => {
59                let field_values = fields
60                    .fields()
61                    .map(|f| Some(Self::zero_value(&f)))
62                    .collect();
63                Self::Tuple(field_values)
64            }
65            DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
66            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
67            DType::Extension(ext_dtype) => {
68                // Since we have no way to define a "zero" extension value (since we have no idea
69                // what the semantics of the extension is), a best effort attempt is to just use the
70                // zero storage value and try to make an extension scalar from that.
71                Self::zero_value(ext_dtype.storage_dtype())
72            }
73        }
74    }
75
76    /// A similar function to [`ScalarValue::zero_value`], but for nullable [`DType`]s, this returns
77    /// `None` instead.
78    ///
79    /// For non-nullable and nested types that may need null values in their children (as of right
80    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide `None` as the
81    /// default child values (whereas [`ScalarValue::zero_value`] would provide `Some(_)`).
82    pub(super) fn default_value(dtype: &DType) -> Option<Self> {
83        if dtype.is_nullable() {
84            return None;
85        }
86
87        Some(match dtype {
88            DType::Null => vortex_panic!("Null dtype has no zero value"),
89            DType::Bool(_) => Self::Bool(false),
90            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
91            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
92            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
93            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
94            DType::List(..) => Self::Tuple(vec![]),
95            DType::FixedSizeList(edt, size, _) => {
96                let elements = (0..*size).map(|_| Self::default_value(edt)).collect();
97                Self::Tuple(elements)
98            }
99            DType::Struct(fields, _) => {
100                let field_values = fields.fields().map(|f| Self::default_value(&f)).collect();
101                Self::Tuple(field_values)
102            }
103            DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
104            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
105            DType::Extension(ext_dtype) => {
106                // Since we have no way to define a "default" extension value (since we have no idea
107                // what the semantics of the extension is), a best effort attempt is to just use the
108                // default storage value and try to make an extension scalar from that.
109                Self::default_value(ext_dtype.storage_dtype())?
110            }
111        })
112    }
113}
114
115impl Display for ScalarValue {
116    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
117        match self {
118            ScalarValue::Bool(b) => write!(f, "{b}"),
119            ScalarValue::Primitive(p) => write!(f, "{p}"),
120            ScalarValue::Decimal(d) => write!(f, "{d}"),
121            ScalarValue::Utf8(s) => {
122                let bufstr = s.as_str();
123                let str_len = bufstr.chars().count();
124
125                if str_len > 10 {
126                    let prefix = String::from_iter(bufstr.chars().take(5));
127                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
128
129                    write!(f, "\"{prefix}..{suffix}\"")
130                } else {
131                    write!(f, "\"{bufstr}\"")
132                }
133            }
134            ScalarValue::Binary(b) => {
135                if b.len() > 10 {
136                    write!(
137                        f,
138                        "{}..{}",
139                        to_hex(&b[0..5]),
140                        to_hex(&b[b.len() - 5..b.len()]),
141                    )
142                } else {
143                    write!(f, "{}", to_hex(b))
144                }
145            }
146            ScalarValue::Tuple(elements) => {
147                write!(f, "[")?;
148                for (i, element) in elements.iter().enumerate() {
149                    if i > 0 {
150                        write!(f, ", ")?;
151                    }
152                    match element {
153                        None => write!(f, "null")?,
154                        Some(e) => write!(f, "{}", e)?,
155                    }
156                }
157                write!(f, "]")
158            }
159            ScalarValue::Variant(value) => write!(f, "{value}"),
160        }
161    }
162}
163
164/// Formats a byte slice as a hexadecimal string.
165fn to_hex(slice: &[u8]) -> String {
166    slice
167        .iter()
168        .format_with("", |f, b| b(&format_args!("{f:02x}")))
169        .to_string()
170}