Skip to main content

vortex_array/scalar/
scalar_impl.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Core [`Scalar`] type definition.
5
6use std::cmp::Ordering;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use vortex_error::VortexResult;
11use vortex_error::vortex_ensure_eq;
12use vortex_error::vortex_panic;
13
14use crate::dtype::DType;
15use crate::dtype::NativeDType;
16use crate::dtype::PType;
17use crate::scalar::Scalar;
18use crate::scalar::ScalarValue;
19
20impl Scalar {
21    // Constructors for null scalars.
22
23    /// Creates a new null [`Scalar`] with the given [`DType`].
24    ///
25    /// # Panics
26    ///
27    /// Panics if the given [`DType`] is non-nullable.
28    pub fn null(dtype: DType) -> Self {
29        assert!(
30            dtype.is_nullable(),
31            "Cannot create null scalar with non-nullable dtype {dtype}"
32        );
33
34        Self { dtype, value: None }
35    }
36
37    // TODO(connor): This method arguably shouldn't exist...
38    /// Creates a new null [`Scalar`] for the given scalar type.
39    ///
40    /// The resulting scalar will have a nullable version of the type's data type.
41    pub fn null_native<T: NativeDType>() -> Self {
42        Self {
43            dtype: T::dtype().as_nullable(),
44            value: None,
45        }
46    }
47
48    // Constructors for potentially null scalars.
49
50    /// Creates a new [`Scalar`] with the given [`DType`] and potentially null [`ScalarValue`].
51    ///
52    /// This is just a helper function for tests.
53    ///
54    /// # Panics
55    ///
56    /// Panics if the given [`DType`] and [`ScalarValue`] are incompatible.
57    #[cfg(test)]
58    pub fn new(dtype: DType, value: Option<ScalarValue>) -> Self {
59        use vortex_error::VortexExpect;
60
61        Self::try_new(dtype, value).vortex_expect("Failed to create Scalar")
62    }
63
64    /// Attempts to create a new [`Scalar`] with the given [`DType`] and potentially null
65    /// [`ScalarValue`].
66    ///
67    /// # Errors
68    ///
69    /// Returns an error if the given [`DType`] and [`ScalarValue`] are incompatible.
70    pub fn try_new(dtype: DType, value: Option<ScalarValue>) -> VortexResult<Self> {
71        Self::validate(&dtype, value.as_ref())?;
72
73        Ok(Self { dtype, value })
74    }
75
76    /// Creates a new [`Scalar`] with the given [`DType`] and potentially null [`ScalarValue`]
77    /// without checking compatibility.
78    ///
79    /// # Safety
80    ///
81    /// The caller must ensure that the given [`DType`] and [`ScalarValue`] are compatible per the
82    /// rules defined in [`Self::validate`].
83    pub unsafe fn new_unchecked(dtype: DType, value: Option<ScalarValue>) -> Self {
84        #[cfg(debug_assertions)]
85        {
86            use vortex_error::VortexExpect;
87
88            Self::validate(&dtype, value.as_ref())
89                .vortex_expect("Scalar::new_unchecked called with incompatible dtype and value");
90        }
91
92        Self { dtype, value }
93    }
94
95    /// Returns a default value for the given [`DType`].
96    ///
97    /// For nullable types, this returns a null scalar. For non-nullable and non-nested types, this
98    /// returns the zero value for the type.
99    ///
100    /// See [`Scalar::zero_value`] for more details about "zero" values.
101    ///
102    /// For non-nullable and nested types that may need null values in their children (as of right
103    /// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide null default
104    /// children.
105    pub fn default_value(dtype: &DType) -> Self {
106        let value = ScalarValue::default_value(dtype);
107
108        // SAFETY: We assume that `default_value` creates a valid `ScalarValue` for the `DType`.
109        unsafe { Self::new_unchecked(dtype.clone(), value) }
110    }
111
112    /// Returns a non-null zero / identity value for the given [`DType`].
113    ///
114    /// # Zero Values
115    ///
116    /// Here is the list of zero values for each [`DType`] (when the [`DType`] is non-nullable):
117    ///
118    /// - `Null`: Does not have a "zero" value
119    /// - `Bool`: `false`
120    /// - `Primitive`: `0`
121    /// - `Decimal`: `0`
122    /// - `Utf8`: `""`
123    /// - `Binary`: An empty buffer
124    /// - `List`: An empty list
125    /// - `FixedSizeList`: A list (with correct size) of zero values, which is determined by the
126    ///   element [`DType`]
127    /// - `Struct`: A struct where each field has a zero value, which is determined by the field
128    ///   [`DType`]
129    /// - `Extension`: The zero value of the storage [`DType`]
130    pub fn zero_value(dtype: &DType) -> Self {
131        let value = ScalarValue::zero_value(dtype);
132
133        // SAFETY: We assume that `zero_value` creates a valid `ScalarValue` for the `DType`.
134        unsafe { Self::new_unchecked(dtype.clone(), Some(value)) }
135    }
136
137    // Other methods.
138
139    /// Check if two scalars are equal, ignoring nullability of the [`DType`].
140    pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
141        self.dtype.eq_ignore_nullability(&other.dtype) && self.value == other.value
142    }
143
144    /// Returns the parts of the [`Scalar`].
145    pub fn into_parts(self) -> (DType, Option<ScalarValue>) {
146        (self.dtype, self.value)
147    }
148
149    /// Returns the [`DType`] of the [`Scalar`].
150    pub fn dtype(&self) -> &DType {
151        &self.dtype
152    }
153
154    /// Returns an optional [`ScalarValue`] of the [`Scalar`], where `None` means the value is null.
155    pub fn value(&self) -> Option<&ScalarValue> {
156        self.value.as_ref()
157    }
158
159    /// Returns the internal optional [`ScalarValue`], where `None` means the value is null,
160    /// consuming the [`Scalar`].
161    pub fn into_value(self) -> Option<ScalarValue> {
162        self.value
163    }
164
165    /// Returns `true` if the [`Scalar`] has a non-null value.
166    pub fn is_valid(&self) -> bool {
167        self.value.is_some()
168    }
169
170    /// Returns `true` if the [`Scalar`] is null.
171    pub fn is_null(&self) -> bool {
172        self.value.is_none()
173    }
174
175    /// Returns `true` if the [`Scalar`] has a non-null zero value.
176    ///
177    /// Returns `None` if the scalar is null, otherwise returns `Some(true)` if the value is zero
178    /// and `Some(false)` otherwise.
179    pub fn is_zero(&self) -> Option<bool> {
180        let value = self.value()?;
181
182        let is_zero = match self.dtype() {
183            DType::Null => vortex_panic!("non-null value somehow had `DType::Null`"),
184            DType::Bool(_) => !value.as_bool(),
185            DType::Primitive(..) => value.as_primitive().is_zero(),
186            DType::Decimal(..) => value.as_decimal().is_zero(),
187            DType::Utf8(_) => value.as_utf8().is_empty(),
188            DType::Binary(_) => value.as_binary().is_empty(),
189            DType::List(..) => value.as_list().is_empty(),
190            DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize,
191            DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(),
192            DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?,
193        };
194
195        Some(is_zero)
196    }
197
198    /// Reinterprets the bytes of this scalar as a different primitive type.
199    ///
200    /// # Errors
201    ///
202    /// Panics if the scalar is not a primitive type or if the types have different byte widths.
203    pub fn primitive_reinterpret_cast(&self, ptype: PType) -> VortexResult<Self> {
204        let primitive = self.as_primitive();
205        if primitive.ptype() == ptype {
206            return Ok(self.clone());
207        }
208
209        vortex_ensure_eq!(
210            primitive.ptype().byte_width(),
211            ptype.byte_width(),
212            "can't reinterpret cast between integers of two different widths"
213        );
214
215        Scalar::try_new(
216            DType::Primitive(ptype, self.dtype().nullability()),
217            primitive
218                .pvalue()
219                .map(|p| p.reinterpret_cast(ptype))
220                .map(ScalarValue::Primitive),
221        )
222    }
223
224    /// Returns an **ESTIMATE** of the size of the scalar in bytes, uncompressed.
225    ///
226    /// Note that the protobuf serialization of scalars will likely have a different (but roughly
227    /// similar) length.
228    pub fn approx_nbytes(&self) -> usize {
229        use crate::dtype::NativeDecimalType;
230        use crate::dtype::i256;
231
232        match self.dtype() {
233            DType::Null => 0,
234            DType::Bool(_) => 1,
235            DType::Primitive(ptype, _) => ptype.byte_width(),
236            DType::Decimal(dt, _) => {
237                if dt.precision() <= i128::MAX_PRECISION {
238                    size_of::<i128>()
239                } else {
240                    size_of::<i256>()
241                }
242            }
243            DType::Utf8(_) => self
244                .value()
245                .map_or_else(|| 0, |value| value.as_utf8().len()),
246            DType::Binary(_) => self
247                .value()
248                .map_or_else(|| 0, |value| value.as_binary().len()),
249            DType::Struct(..) => self
250                .as_struct()
251                .fields_iter()
252                .map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::<usize>())
253                .unwrap_or_default(),
254            DType::List(..) | DType::FixedSizeList(..) => self
255                .as_list()
256                .elements()
257                .map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::<usize>())
258                .unwrap_or_default(),
259            DType::Extension(_) => self.as_extension().to_storage_scalar().approx_nbytes(),
260        }
261    }
262}
263
264/// We implement `PartialEq` manually because we want to ignore nullability when comparing scalars.
265/// Two scalars with the same value but different nullability should be considered equal.
266impl PartialEq for Scalar {
267    fn eq(&self, other: &Self) -> bool {
268        self.dtype.eq_ignore_nullability(&other.dtype) && self.value == other.value
269    }
270}
271
272impl PartialOrd for Scalar {
273    /// Compares two scalar values for ordering.
274    ///
275    /// # Returns
276    /// - `Some(Ordering)` if both scalars have the same data type (ignoring nullability)
277    /// - `None` if the scalars have different data types
278    ///
279    /// # Ordering Rules
280    /// When types match, the ordering follows these rules:
281    /// - Null values are considered less than all non-null values
282    /// - Non-null values are compared according to their natural ordering
283    ///
284    /// # Examples
285    /// ```ignore
286    /// // Same types compare successfully
287    /// let a = Scalar::primitive(10i32, Nullability::NonNullable);
288    /// let b = Scalar::primitive(20i32, Nullability::NonNullable);
289    /// assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
290    ///
291    /// // Different types return None
292    /// let int_scalar = Scalar::primitive(10i32, Nullability::NonNullable);
293    /// let str_scalar = Scalar::utf8("hello", Nullability::NonNullable);
294    /// assert_eq!(int_scalar.partial_cmp(&str_scalar), None);
295    ///
296    /// // Nulls are less than non-nulls
297    /// let null = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable));
298    /// let value = Scalar::primitive(0i32, Nullability::Nullable);
299    /// assert_eq!(null.partial_cmp(&value), Some(Ordering::Less));
300    /// ```
301    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
302        if !self.dtype().eq_ignore_nullability(other.dtype()) {
303            return None;
304        }
305        self.value().partial_cmp(&other.value())
306    }
307}
308
309/// We implement `Hash` manually to be consistent with `PartialEq`. Since we ignore nullability
310/// in equality comparisons, we must also ignore it when hashing to maintain the invariant that
311/// equal values have equal hashes.
312impl Hash for Scalar {
313    fn hash<H: Hasher>(&self, state: &mut H) {
314        self.dtype.as_nonnullable().hash(state);
315        self.value.hash(state);
316    }
317}