Skip to main content

vortex_array/scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`ScalarValue`] type definition.
5
6use std::cmp::Ordering;
7use std::fmt::Display;
8use std::fmt::Formatter;
9
10use itertools::Itertools;
11use vortex_buffer::BufferString;
12use vortex_buffer::ByteBuffer;
13use vortex_error::vortex_panic;
14
15use crate::dtype::DType;
16use crate::scalar::DecimalValue;
17use crate::scalar::PValue;
18use crate::scalar::Scalar;
19
20/// The value stored in a [`Scalar`][crate::scalar::Scalar].
21///
22/// This enum represents the possible non-null values that can be stored in a scalar. When the
23/// scalar is null, the value is represented as `None` in the `Option<ScalarValue>` field.
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum ScalarValue {
26    /// A boolean value.
27    Bool(bool),
28    /// A primitive numeric value.
29    Primitive(PValue),
30    /// A decimal value.
31    Decimal(DecimalValue),
32    /// A UTF-8 encoded string value.
33    Utf8(BufferString),
34    /// A binary (byte array) value.
35    Binary(ByteBuffer),
36    /// A list of potentially null scalar values.
37    List(Vec<Option<ScalarValue>>),
38    /// A row-specific scalar wrapped by `DType::Variant`.
39    Variant(Box<Scalar>),
40}
41
42impl ScalarValue {
43    /// Returns the zero / identity value for the given [`DType`].
44    pub(super) fn zero_value(dtype: &DType) -> Self {
45        match dtype {
46            DType::Null => vortex_panic!("Null dtype has no zero value"),
47            DType::Bool(_) => Self::Bool(false),
48            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
49            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
50            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
51            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
52            DType::List(..) => Self::List(vec![]),
53            DType::FixedSizeList(edt, size, _) => {
54                let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect();
55                Self::List(elements)
56            }
57            DType::Struct(fields, _) => {
58                let field_values = fields
59                    .fields()
60                    .map(|f| Some(Self::zero_value(&f)))
61                    .collect();
62                Self::List(field_values)
63            }
64            DType::Extension(ext_dtype) => {
65                // Since we have no way to define a "zero" extension value (since we have no idea
66                // what the semantics of the extension is), a best effort attempt is to just use the
67                // zero storage value and try to make an extension scalar from that.
68                Self::zero_value(ext_dtype.storage_dtype())
69            }
70            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
71        }
72    }
73
74    /// A similar function to [`ScalarValue::zero_value`], but for nullable [`DType`]s, this returns
75    /// `None` instead.
76    ///
77    /// For non-nullable and nested types that may need null values in their children (as of right
78    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide `None` as the
79    /// default child values (whereas [`ScalarValue::zero_value`] would provide `Some(_)`).
80    pub(super) fn default_value(dtype: &DType) -> Option<Self> {
81        if dtype.is_nullable() {
82            return None;
83        }
84
85        Some(match dtype {
86            DType::Null => vortex_panic!("Null dtype has no zero value"),
87            DType::Bool(_) => Self::Bool(false),
88            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
89            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
90            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
91            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
92            DType::List(..) => Self::List(vec![]),
93            DType::FixedSizeList(edt, size, _) => {
94                let elements = (0..*size).map(|_| Self::default_value(edt)).collect();
95                Self::List(elements)
96            }
97            DType::Struct(fields, _) => {
98                let field_values = fields.fields().map(|f| Self::default_value(&f)).collect();
99                Self::List(field_values)
100            }
101            DType::Extension(ext_dtype) => {
102                // Since we have no way to define a "default" extension value (since we have no idea
103                // what the semantics of the extension is), a best effort attempt is to just use the
104                // default storage value and try to make an extension scalar from that.
105                Self::default_value(ext_dtype.storage_dtype())?
106            }
107            DType::Variant(_) => Self::Variant(Box::new(Scalar::null(DType::Null))),
108        })
109    }
110}
111
112impl PartialOrd for ScalarValue {
113    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
114        match (self, other) {
115            (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b),
116            (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b),
117            (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b),
118            (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b),
119            (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b),
120            (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b),
121            (ScalarValue::Variant(a), ScalarValue::Variant(b)) => a.partial_cmp(b),
122            // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b),
123            _ => None,
124        }
125    }
126}
127
128impl Display for ScalarValue {
129    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130        match self {
131            ScalarValue::Bool(b) => write!(f, "{b}"),
132            ScalarValue::Primitive(p) => write!(f, "{p}"),
133            ScalarValue::Decimal(d) => write!(f, "{d}"),
134            ScalarValue::Utf8(s) => {
135                let bufstr = s.as_str();
136                let str_len = bufstr.chars().count();
137
138                if str_len > 10 {
139                    let prefix = String::from_iter(bufstr.chars().take(5));
140                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
141
142                    write!(f, "\"{prefix}..{suffix}\"")
143                } else {
144                    write!(f, "\"{bufstr}\"")
145                }
146            }
147            ScalarValue::Binary(b) => {
148                if b.len() > 10 {
149                    write!(
150                        f,
151                        "{}..{}",
152                        to_hex(&b[0..5]),
153                        to_hex(&b[b.len() - 5..b.len()]),
154                    )
155                } else {
156                    write!(f, "{}", to_hex(b))
157                }
158            }
159            ScalarValue::List(elements) => {
160                write!(f, "[")?;
161                for (i, element) in elements.iter().enumerate() {
162                    if i > 0 {
163                        write!(f, ", ")?;
164                    }
165                    match element {
166                        None => write!(f, "null")?,
167                        Some(e) => write!(f, "{}", e)?,
168                    }
169                }
170                write!(f, "]")
171            }
172            ScalarValue::Variant(value) => write!(f, "{value}"),
173        }
174    }
175}
176
177/// Formats a byte slice as a hexadecimal string.
178fn to_hex(slice: &[u8]) -> String {
179    slice
180        .iter()
181        .format_with("", |f, b| b(&format_args!("{f:02x}")))
182        .to_string()
183}