1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
//! Core [`Scalar`] type definition.
use std::cmp::Ordering;
use std::hash::Hash;
use std::hash::Hasher;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure_eq;
use vortex_error::vortex_panic;
use crate::dtype::DType;
use crate::dtype::NativeDType;
use crate::dtype::PType;
use crate::scalar::Scalar;
use crate::scalar::ScalarValue;
impl Scalar {
// Constructors for null scalars.
/// Creates a new null [`Scalar`] with the given [`DType`].
///
/// # Panics
///
/// Panics if the given [`DType`] is non-nullable.
pub fn null(dtype: DType) -> Self {
assert!(
dtype.is_nullable(),
"Cannot create null scalar with non-nullable dtype {dtype}"
);
Self { dtype, value: None }
}
// TODO(connor): This method arguably shouldn't exist...
/// Creates a new null [`Scalar`] for the given scalar type.
///
/// The resulting scalar will have a nullable version of the type's data type.
pub fn null_native<T: NativeDType>() -> Self {
Self {
dtype: T::dtype().as_nullable(),
value: None,
}
}
// Constructors for potentially null scalars.
/// Creates a new [`Scalar`] with the given [`DType`] and potentially null [`ScalarValue`].
///
/// This is just a helper function for tests.
///
/// # Panics
///
/// Panics if the given [`DType`] and [`ScalarValue`] are incompatible.
#[cfg(test)]
pub fn new(dtype: DType, value: Option<ScalarValue>) -> Self {
use vortex_error::VortexExpect;
Self::try_new(dtype, value).vortex_expect("Failed to create Scalar")
}
/// Attempts to create a new [`Scalar`] with the given [`DType`] and potentially null
/// [`ScalarValue`].
///
/// # Errors
///
/// Returns an error if the given [`DType`] and [`ScalarValue`] are incompatible.
pub fn try_new(dtype: DType, value: Option<ScalarValue>) -> VortexResult<Self> {
Self::validate(&dtype, value.as_ref())?;
Ok(Self { dtype, value })
}
/// Creates a new [`Scalar`] with the given [`DType`] and potentially null [`ScalarValue`]
/// without checking compatibility.
///
/// # Safety
///
/// The caller must ensure that the given [`DType`] and [`ScalarValue`] are compatible per the
/// rules defined in [`Self::validate`].
pub unsafe fn new_unchecked(dtype: DType, value: Option<ScalarValue>) -> Self {
#[cfg(debug_assertions)]
{
use vortex_error::VortexExpect;
Self::validate(&dtype, value.as_ref())
.vortex_expect("Scalar::new_unchecked called with incompatible dtype and value");
}
Self { dtype, value }
}
/// Returns a default value for the given [`DType`].
///
/// For nullable types, this returns a null scalar. For non-nullable and non-nested types, this
/// returns the zero value for the type.
///
/// See [`Scalar::zero_value`] for more details about "zero" values.
///
/// For non-nullable and nested types that may need null values in their children (as of right
/// now, that is _only_ `FixedSizeList` and `Struct`), this function will provide null default
/// children.
pub fn default_value(dtype: &DType) -> Self {
let value = ScalarValue::default_value(dtype);
// SAFETY: We assume that `default_value` creates a valid `ScalarValue` for the `DType`.
unsafe { Self::new_unchecked(dtype.clone(), value) }
}
/// Returns a non-null zero / identity value for the given [`DType`].
///
/// # Zero Values
///
/// Here is the list of zero values for each [`DType`] (when the [`DType`] is non-nullable):
///
/// - `Null`: Does not have a "zero" value
/// - `Bool`: `false`
/// - `Primitive`: `0`
/// - `Decimal`: `0`
/// - `Utf8`: `""`
/// - `Binary`: An empty buffer
/// - `List`: An empty list
/// - `FixedSizeList`: A list (with correct size) of zero values, which is determined by the
/// element [`DType`]
/// - `Struct`: A struct where each field has a zero value, which is determined by the field
/// [`DType`]
/// - `Extension`: The zero value of the storage [`DType`]
pub fn zero_value(dtype: &DType) -> Self {
let value = ScalarValue::zero_value(dtype);
// SAFETY: We assume that `zero_value` creates a valid `ScalarValue` for the `DType`.
unsafe { Self::new_unchecked(dtype.clone(), Some(value)) }
}
// Other methods.
/// Check if two scalars are equal, ignoring nullability of the [`DType`].
pub fn eq_ignore_nullability(&self, other: &Self) -> bool {
self.dtype.eq_ignore_nullability(&other.dtype) && self.value == other.value
}
/// Returns the parts of the [`Scalar`].
pub fn into_parts(self) -> (DType, Option<ScalarValue>) {
(self.dtype, self.value)
}
/// Returns the [`DType`] of the [`Scalar`].
pub fn dtype(&self) -> &DType {
&self.dtype
}
/// Returns an optional [`ScalarValue`] of the [`Scalar`], where `None` means the value is null.
pub fn value(&self) -> Option<&ScalarValue> {
self.value.as_ref()
}
/// Returns the internal optional [`ScalarValue`], where `None` means the value is null,
/// consuming the [`Scalar`].
pub fn into_value(self) -> Option<ScalarValue> {
self.value
}
/// Returns `true` if the [`Scalar`] has a non-null value.
pub fn is_valid(&self) -> bool {
self.value.is_some()
}
/// Returns `true` if the [`Scalar`] is null.
pub fn is_null(&self) -> bool {
self.value.is_none()
}
/// Returns `true` if the [`Scalar`] has a non-null zero value.
///
/// Returns `None` if the scalar is null, otherwise returns `Some(true)` if the value is zero
/// and `Some(false)` otherwise.
pub fn is_zero(&self) -> Option<bool> {
let value = self.value()?;
let is_zero = match self.dtype() {
DType::Null => vortex_panic!("non-null value somehow had `DType::Null`"),
DType::Bool(_) => !value.as_bool(),
DType::Primitive(..) => value.as_primitive().is_zero(),
DType::Decimal(..) => value.as_decimal().is_zero(),
DType::Utf8(_) => value.as_utf8().is_empty(),
DType::Binary(_) => value.as_binary().is_empty(),
DType::List(..) => value.as_list().is_empty(),
DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize,
DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(),
DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?,
};
Some(is_zero)
}
/// Reinterprets the bytes of this scalar as a different primitive type.
///
/// # Errors
///
/// Panics if the scalar is not a primitive type or if the types have different byte widths.
pub fn primitive_reinterpret_cast(&self, ptype: PType) -> VortexResult<Self> {
let primitive = self.as_primitive();
if primitive.ptype() == ptype {
return Ok(self.clone());
}
vortex_ensure_eq!(
primitive.ptype().byte_width(),
ptype.byte_width(),
"can't reinterpret cast between integers of two different widths"
);
Scalar::try_new(
DType::Primitive(ptype, self.dtype().nullability()),
primitive
.pvalue()
.map(|p| p.reinterpret_cast(ptype))
.map(ScalarValue::Primitive),
)
}
/// Returns an **ESTIMATE** of the size of the scalar in bytes, uncompressed.
///
/// Note that the protobuf serialization of scalars will likely have a different (but roughly
/// similar) length.
pub fn approx_nbytes(&self) -> usize {
use crate::dtype::NativeDecimalType;
use crate::dtype::i256;
match self.dtype() {
DType::Null => 0,
DType::Bool(_) => 1,
DType::Primitive(ptype, _) => ptype.byte_width(),
DType::Decimal(dt, _) => {
if dt.precision() <= i128::MAX_PRECISION {
size_of::<i128>()
} else {
size_of::<i256>()
}
}
DType::Utf8(_) => self
.value()
.map_or_else(|| 0, |value| value.as_utf8().len()),
DType::Binary(_) => self
.value()
.map_or_else(|| 0, |value| value.as_binary().len()),
DType::Struct(..) => self
.as_struct()
.fields_iter()
.map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::<usize>())
.unwrap_or_default(),
DType::List(..) | DType::FixedSizeList(..) => self
.as_list()
.elements()
.map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::<usize>())
.unwrap_or_default(),
DType::Extension(_) => self.as_extension().to_storage_scalar().approx_nbytes(),
}
}
}
/// We implement `PartialEq` manually because we want to ignore nullability when comparing scalars.
/// Two scalars with the same value but different nullability should be considered equal.
impl PartialEq for Scalar {
fn eq(&self, other: &Self) -> bool {
self.dtype.eq_ignore_nullability(&other.dtype) && self.value == other.value
}
}
impl PartialOrd for Scalar {
/// Compares two scalar values for ordering.
///
/// # Returns
/// - `Some(Ordering)` if both scalars have the same data type (ignoring nullability)
/// - `None` if the scalars have different data types
///
/// # Ordering Rules
/// When types match, the ordering follows these rules:
/// - Null values are considered less than all non-null values
/// - Non-null values are compared according to their natural ordering
///
/// # Examples
/// ```ignore
/// // Same types compare successfully
/// let a = Scalar::primitive(10i32, Nullability::NonNullable);
/// let b = Scalar::primitive(20i32, Nullability::NonNullable);
/// assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
///
/// // Different types return None
/// let int_scalar = Scalar::primitive(10i32, Nullability::NonNullable);
/// let str_scalar = Scalar::utf8("hello", Nullability::NonNullable);
/// assert_eq!(int_scalar.partial_cmp(&str_scalar), None);
///
/// // Nulls are less than non-nulls
/// let null = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable));
/// let value = Scalar::primitive(0i32, Nullability::Nullable);
/// assert_eq!(null.partial_cmp(&value), Some(Ordering::Less));
/// ```
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if !self.dtype().eq_ignore_nullability(other.dtype()) {
return None;
}
self.value().partial_cmp(&other.value())
}
}
/// We implement `Hash` manually to be consistent with `PartialEq`. Since we ignore nullability
/// in equality comparisons, we must also ignore it when hashing to maintain the invariant that
/// equal values have equal hashes.
impl Hash for Scalar {
fn hash<H: Hasher>(&self, state: &mut H) {
self.dtype.as_nonnullable().hash(state);
self.value.hash(state);
}
}