Skip to main content

vortex_array/scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`ScalarValue`] type definition.
5
6use std::fmt::Display;
7use std::fmt::Formatter;
8
9use itertools::Itertools;
10use vortex_buffer::BufferString;
11use vortex_buffer::ByteBuffer;
12use vortex_error::vortex_panic;
13
14use crate::dtype::DType;
15use crate::scalar::DecimalValue;
16use crate::scalar::PValue;
17use crate::scalar::Scalar;
18
19/// The value stored in a [`Scalar`][crate::scalar::Scalar].
20///
21/// This enum represents the possible non-null values that can be stored in a scalar. When the
22/// scalar is null, the value is represented as `None` in the `Option<ScalarValue>` field.
23#[derive(Debug, Clone, PartialEq, Eq, Hash)]
24pub enum ScalarValue {
25    /// A boolean value.
26    Bool(bool),
27    /// A primitive numeric value.
28    Primitive(PValue),
29    /// A decimal value.
30    Decimal(DecimalValue),
31    /// A UTF-8 encoded string value.
32    Utf8(BufferString),
33    /// A binary (byte array) value.
34    Binary(ByteBuffer),
35    /// A tuple of potentially null scalar values.
36    ///
37    /// Used as the underlying representation for list, fixed-size list, and struct scalars.
38    Tuple(Vec<Option<ScalarValue>>),
39    /// A row-specific scalar wrapped by `DType::Variant`.
40    Variant(Box<Scalar>),
41}
42
43impl ScalarValue {
44    /// Returns the zero / identity value for the given [`DType`].
45    pub(super) fn zero_value(dtype: &DType) -> Self {
46        match dtype {
47            DType::Null => vortex_panic!("Null dtype has no zero value"),
48            DType::Bool(_) => Self::Bool(false),
49            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
50            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
51            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
52            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
53            DType::List(..) => Self::Tuple(vec![]),
54            DType::FixedSizeList(edt, size, _) => {
55                let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect();
56                Self::Tuple(elements)
57            }
58            DType::Struct(fields, _) => {
59                let field_values = fields
60                    .fields()
61                    .map(|f| Some(Self::zero_value(&f)))
62                    .collect();
63                Self::Tuple(field_values)
64            }
65            DType::Extension(ext_dtype) => {
66                // Since we have no way to define a "zero" extension value (since we have no idea
67                // what the semantics of the extension is), a best effort attempt is to just use the
68                // zero storage value and try to make an extension scalar from that.
69                Self::zero_value(ext_dtype.storage_dtype())
70            }
71            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
72        }
73    }
74
75    /// A similar function to [`ScalarValue::zero_value`], but for nullable [`DType`]s, this returns
76    /// `None` instead.
77    ///
78    /// For non-nullable and nested types that may need null values in their children (as of right
79    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide `None` as the
80    /// default child values (whereas [`ScalarValue::zero_value`] would provide `Some(_)`).
81    pub(super) fn default_value(dtype: &DType) -> Option<Self> {
82        if dtype.is_nullable() {
83            return None;
84        }
85
86        Some(match dtype {
87            DType::Null => vortex_panic!("Null dtype has no zero value"),
88            DType::Bool(_) => Self::Bool(false),
89            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
90            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
91            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
92            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
93            DType::List(..) => Self::Tuple(vec![]),
94            DType::FixedSizeList(edt, size, _) => {
95                let elements = (0..*size).map(|_| Self::default_value(edt)).collect();
96                Self::Tuple(elements)
97            }
98            DType::Struct(fields, _) => {
99                let field_values = fields.fields().map(|f| Self::default_value(&f)).collect();
100                Self::Tuple(field_values)
101            }
102            DType::Extension(ext_dtype) => {
103                // Since we have no way to define a "default" extension value (since we have no idea
104                // what the semantics of the extension is), a best effort attempt is to just use the
105                // default storage value and try to make an extension scalar from that.
106                Self::default_value(ext_dtype.storage_dtype())?
107            }
108            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
109        })
110    }
111}
112
113impl Display for ScalarValue {
114    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
115        match self {
116            ScalarValue::Bool(b) => write!(f, "{b}"),
117            ScalarValue::Primitive(p) => write!(f, "{p}"),
118            ScalarValue::Decimal(d) => write!(f, "{d}"),
119            ScalarValue::Utf8(s) => {
120                let bufstr = s.as_str();
121                let str_len = bufstr.chars().count();
122
123                if str_len > 10 {
124                    let prefix = String::from_iter(bufstr.chars().take(5));
125                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
126
127                    write!(f, "\"{prefix}..{suffix}\"")
128                } else {
129                    write!(f, "\"{bufstr}\"")
130                }
131            }
132            ScalarValue::Binary(b) => {
133                if b.len() > 10 {
134                    write!(
135                        f,
136                        "{}..{}",
137                        to_hex(&b[0..5]),
138                        to_hex(&b[b.len() - 5..b.len()]),
139                    )
140                } else {
141                    write!(f, "{}", to_hex(b))
142                }
143            }
144            ScalarValue::Tuple(elements) => {
145                write!(f, "[")?;
146                for (i, element) in elements.iter().enumerate() {
147                    if i > 0 {
148                        write!(f, ", ")?;
149                    }
150                    match element {
151                        None => write!(f, "null")?,
152                        Some(e) => write!(f, "{}", e)?,
153                    }
154                }
155                write!(f, "]")
156            }
157            ScalarValue::Variant(value) => write!(f, "{value}"),
158        }
159    }
160}
161
162/// Formats a byte slice as a hexadecimal string.
163fn to_hex(slice: &[u8]) -> String {
164    slice
165        .iter()
166        .format_with("", |f, b| b(&format_args!("{f:02x}")))
167        .to_string()
168}