Skip to main content

vortex_array/scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`ScalarValue`] type definition.
5
6use std::cmp::Ordering;
7use std::fmt::Display;
8use std::fmt::Formatter;
9
10use itertools::Itertools;
11use vortex_buffer::BufferString;
12use vortex_buffer::ByteBuffer;
13use vortex_error::vortex_panic;
14
15use crate::dtype::DType;
16use crate::scalar::DecimalValue;
17use crate::scalar::PValue;
18
19/// The value stored in a [`Scalar`][crate::scalar::Scalar].
20///
21/// This enum represents the possible non-null values that can be stored in a scalar. When the
22/// scalar is null, the value is represented as `None` in the `Option<ScalarValue>` field.
23#[derive(Debug, Clone, PartialEq, Eq, Hash)]
24pub enum ScalarValue {
25    /// A boolean value.
26    Bool(bool),
27    /// A primitive numeric value.
28    Primitive(PValue),
29    /// A decimal value.
30    Decimal(DecimalValue),
31    /// A UTF-8 encoded string value.
32    Utf8(BufferString),
33    /// A binary (byte array) value.
34    Binary(ByteBuffer),
35    /// A list of potentially null scalar values.
36    List(Vec<Option<ScalarValue>>),
37}
38
39impl ScalarValue {
40    /// Returns the zero / identity value for the given [`DType`].
41    pub(super) fn zero_value(dtype: &DType) -> Self {
42        match dtype {
43            DType::Null => vortex_panic!("Null dtype has no zero value"),
44            DType::Bool(_) => Self::Bool(false),
45            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
46            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
47            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
48            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
49            DType::List(..) => Self::List(vec![]),
50            DType::FixedSizeList(edt, size, _) => {
51                let elements = (0..*size).map(|_| Some(Self::zero_value(edt))).collect();
52                Self::List(elements)
53            }
54            DType::Struct(fields, _) => {
55                let field_values = fields
56                    .fields()
57                    .map(|f| Some(Self::zero_value(&f)))
58                    .collect();
59                Self::List(field_values)
60            }
61            DType::Extension(ext_dtype) => {
62                // Since we have no way to define a "zero" extension value (since we have no idea
63                // what the semantics of the extension is), a best effort attempt is to just use the
64                // zero storage value and try to make an extension scalar from that.
65                Self::zero_value(ext_dtype.storage_dtype())
66            }
67        }
68    }
69
70    /// A similar function to [`ScalarValue::zero_value`], but for nullable [`DType`]s, this returns
71    /// `None` instead.
72    ///
73    /// For non-nullable and nested types that may need null values in their children (as of right
74    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide `None` as the
75    /// default child values (whereas [`ScalarValue::zero_value`] would provide `Some(_)`).
76    pub(super) fn default_value(dtype: &DType) -> Option<Self> {
77        if dtype.is_nullable() {
78            return None;
79        }
80
81        Some(match dtype {
82            DType::Null => vortex_panic!("Null dtype has no zero value"),
83            DType::Bool(_) => Self::Bool(false),
84            DType::Primitive(ptype, _) => Self::Primitive(PValue::zero(ptype)),
85            DType::Decimal(dt, ..) => Self::Decimal(DecimalValue::zero(dt)),
86            DType::Utf8(_) => Self::Utf8(BufferString::empty()),
87            DType::Binary(_) => Self::Binary(ByteBuffer::empty()),
88            DType::List(..) => Self::List(vec![]),
89            DType::FixedSizeList(edt, size, _) => {
90                let elements = (0..*size).map(|_| Self::default_value(edt)).collect();
91                Self::List(elements)
92            }
93            DType::Struct(fields, _) => {
94                let field_values = fields.fields().map(|f| Self::default_value(&f)).collect();
95                Self::List(field_values)
96            }
97            DType::Extension(ext_dtype) => {
98                // Since we have no way to define a "default" extension value (since we have no idea
99                // what the semantics of the extension is), a best effort attempt is to just use the
100                // default storage value and try to make an extension scalar from that.
101                Self::default_value(ext_dtype.storage_dtype())?
102            }
103        })
104    }
105}
106
107impl PartialOrd for ScalarValue {
108    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
109        match (self, other) {
110            (ScalarValue::Bool(a), ScalarValue::Bool(b)) => a.partial_cmp(b),
111            (ScalarValue::Primitive(a), ScalarValue::Primitive(b)) => a.partial_cmp(b),
112            (ScalarValue::Decimal(a), ScalarValue::Decimal(b)) => a.partial_cmp(b),
113            (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b),
114            (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b),
115            (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b),
116            // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b),
117            _ => None,
118        }
119    }
120}
121
122impl Display for ScalarValue {
123    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
124        match self {
125            ScalarValue::Bool(b) => write!(f, "{b}"),
126            ScalarValue::Primitive(p) => write!(f, "{p}"),
127            ScalarValue::Decimal(d) => write!(f, "{d}"),
128            ScalarValue::Utf8(s) => {
129                let bufstr = s.as_str();
130                let str_len = bufstr.chars().count();
131
132                if str_len > 10 {
133                    let prefix = String::from_iter(bufstr.chars().take(5));
134                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
135
136                    write!(f, "\"{prefix}..{suffix}\"")
137                } else {
138                    write!(f, "\"{bufstr}\"")
139                }
140            }
141            ScalarValue::Binary(b) => {
142                if b.len() > 10 {
143                    write!(
144                        f,
145                        "{}..{}",
146                        to_hex(&b[0..5]),
147                        to_hex(&b[b.len() - 5..b.len()]),
148                    )
149                } else {
150                    write!(f, "{}", to_hex(b))
151                }
152            }
153            ScalarValue::List(elements) => {
154                write!(f, "[")?;
155                for (i, element) in elements.iter().enumerate() {
156                    if i > 0 {
157                        write!(f, ", ")?;
158                    }
159                    match element {
160                        None => write!(f, "null")?,
161                        Some(e) => write!(f, "{}", e)?,
162                    }
163                }
164                write!(f, "]")
165            }
166        }
167    }
168}
169
170/// Formats a byte slice as a hexadecimal string.
171fn to_hex(slice: &[u8]) -> String {
172    slice
173        .iter()
174        .format_with("", |f, b| b(&format_args!("{f:02x}")))
175        .to_string()
176}