vortex_scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::sync::Arc;
6
7use bytes::BufMut;
8use itertools::Itertools;
9use prost::Message;
10use vortex_buffer::{BufferString, ByteBuffer};
11use vortex_dtype::DType;
12use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
13use vortex_proto::scalar as pb;
14
15use crate::decimal::DecimalValue;
16use crate::pvalue::PValue;
17use crate::{ScalarType, i256};
18
19/// Represents the internal data of a scalar value. Must be interpreted by wrapping up with a
20/// [`DType`] to make a [`super::Scalar`].
21///
22/// Note that these values can be deserialized from JSON or other formats. So a [`PValue`] may not
23/// have the correct width for what the [`DType`] expects. Primitive values should therefore be
24/// read using [`super::PrimitiveScalar`] which will handle the conversion.
25#[derive(Debug, Clone)]
26pub struct ScalarValue(pub(crate) InnerScalarValue);
27
28#[derive(Debug, Clone)]
29pub(crate) enum InnerScalarValue {
30    Null,
31    Bool(bool),
32    Primitive(PValue),
33    Decimal(DecimalValue),
34    Buffer(Arc<ByteBuffer>),
35    BufferString(Arc<BufferString>),
36    List(Arc<[ScalarValue]>),
37}
38
39impl ScalarValue {
40    /// Serializes the scalar value to Protocol Buffers format.
41    pub fn to_protobytes<B: BufMut + Default>(&self) -> B {
42        let pb_scalar = pb::ScalarValue::from(self);
43
44        let mut buf = B::default();
45        pb_scalar
46            .encode(&mut buf)
47            .map_err(|e| vortex_err!("Failed to serialize protobuf {e}"))
48            .vortex_unwrap();
49        buf
50    }
51
52    /// Deserializes a scalar value from Protocol Buffers format.
53    pub fn from_protobytes(buf: &[u8]) -> VortexResult<Self> {
54        ScalarValue::try_from(
55            &pb::ScalarValue::decode(buf)
56                .map_err(|e| vortex_err!("Failed to deserialize protobuf {e}"))?,
57        )
58    }
59}
60
61fn to_hex(slice: &[u8]) -> String {
62    slice
63        .iter()
64        .format_with("", |f, b| b(&format_args!("{f:02x}")))
65        .to_string()
66}
67
68impl Display for ScalarValue {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        write!(f, "{}", self.0)
71    }
72}
73
74impl Display for InnerScalarValue {
75    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76        match self {
77            Self::Bool(b) => write!(f, "{b}"),
78            Self::Primitive(pvalue) => write!(f, "{pvalue}"),
79            Self::Decimal(value) => write!(f, "{value}"),
80            Self::Buffer(buf) => {
81                if buf.len() > 10 {
82                    write!(
83                        f,
84                        "{}..{}",
85                        to_hex(&buf[0..5]),
86                        to_hex(&buf[buf.len() - 5..buf.len()]),
87                    )
88                } else {
89                    write!(f, "{}", to_hex(buf))
90                }
91            }
92            Self::BufferString(bufstr) => {
93                let bufstr = bufstr.as_str();
94                let str_len = bufstr.chars().count();
95
96                if str_len > 10 {
97                    let prefix = String::from_iter(bufstr.chars().take(5));
98                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
99
100                    write!(f, "\"{prefix}..{suffix}\"")
101                } else {
102                    write!(f, "\"{bufstr}\"")
103                }
104            }
105            Self::List(elems) => {
106                write!(f, "[{}]", elems.iter().format(","))
107            }
108            Self::Null => write!(f, "null"),
109        }
110    }
111}
112
113impl ScalarValue {
114    /// Creates a null scalar value.
115    pub const fn null() -> Self {
116        ScalarValue(InnerScalarValue::Null)
117    }
118
119    /// Returns true if this is a null value.
120    pub fn is_null(&self) -> bool {
121        self.0.is_null()
122    }
123
124    /// Returns true if this value is compatible with the given data type.
125    pub fn is_instance_of(&self, dtype: &DType) -> bool {
126        self.0.is_instance_of(dtype)
127    }
128
129    /// Returns scalar as a null value
130    pub(crate) fn as_null(&self) -> VortexResult<()> {
131        self.0.as_null()
132    }
133
134    /// Returns scalar as a boolean value
135    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
136        self.0.as_bool()
137    }
138
139    /// Return scalar as a primitive value. PValues don't match dtypes but will be castable to the scalars dtype
140    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
141        self.0.as_pvalue()
142    }
143
144    /// Returns scalar as a decimal value
145    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
146        self.0.as_decimal()
147    }
148
149    /// Returns scalar as a binary buffer
150    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
151        self.0.as_buffer()
152    }
153
154    /// Returns scalar as a string buffer
155    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
156        self.0.as_buffer_string()
157    }
158
159    /// Returns scalar as a list value
160    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
161        self.0.as_list()
162    }
163}
164
165impl InnerScalarValue {
166    pub(crate) fn is_null(&self) -> bool {
167        matches!(self, InnerScalarValue::Null)
168    }
169
170    pub(crate) fn is_instance_of(&self, dtype: &DType) -> bool {
171        match (&self, dtype) {
172            (InnerScalarValue::Bool(_), DType::Bool(_)) => true,
173            (InnerScalarValue::Primitive(pvalue), DType::Primitive(ptype, _)) => {
174                pvalue.is_instance_of(ptype)
175            }
176            (InnerScalarValue::Decimal(_) | InnerScalarValue::Buffer(_), DType::Decimal(..)) => {
177                true
178            }
179            (InnerScalarValue::Buffer(_), DType::Binary(_)) => true,
180            (InnerScalarValue::BufferString(_), DType::Utf8(_)) => true,
181            (InnerScalarValue::List(values), DType::List(dtype, _)) => {
182                values.iter().all(|v| v.is_instance_of(dtype))
183            }
184            (InnerScalarValue::List(values), DType::Struct(structdt, _)) => values
185                .iter()
186                .zip(structdt.fields())
187                .all(|(v, dt)| v.is_instance_of(&dt)),
188            (InnerScalarValue::Null, dtype) => dtype.is_nullable(),
189            (_, DType::Extension(ext_dtype)) => self.is_instance_of(ext_dtype.storage_dtype()),
190            _ => false,
191        }
192    }
193
194    pub(crate) fn as_null(&self) -> VortexResult<()> {
195        match self {
196            InnerScalarValue::Null => Ok(()),
197            _ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
198        }
199    }
200
201    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
202        match &self {
203            InnerScalarValue::Null => Ok(None),
204            InnerScalarValue::Bool(b) => Ok(Some(*b)),
205            _ => Err(vortex_err!("Expected a bool scalar, found {:?}", self)),
206        }
207    }
208
209    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
210    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
211    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
212    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
213        match &self {
214            InnerScalarValue::Null => Ok(None),
215            InnerScalarValue::Primitive(p) => Ok(Some(*p)),
216            _ => Err(vortex_err!("Expected a primitive scalar, found {:?}", self)),
217        }
218    }
219
220    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
221        match self {
222            InnerScalarValue::Null => Ok(None),
223            InnerScalarValue::Decimal(v) => Ok(Some(*v)),
224            InnerScalarValue::Buffer(b) => Ok(Some(match b.len() {
225                1 => DecimalValue::I8(b[0] as i8),
226                2 => DecimalValue::I16(i16::from_le_bytes(b.as_slice().try_into()?)),
227                4 => DecimalValue::I32(i32::from_le_bytes(b.as_slice().try_into()?)),
228                8 => DecimalValue::I64(i64::from_le_bytes(b.as_slice().try_into()?)),
229                16 => DecimalValue::I128(i128::from_le_bytes(b.as_slice().try_into()?)),
230                32 => DecimalValue::I256(i256::from_le_bytes(b.as_slice().try_into()?)),
231                l => vortex_bail!("Buffer is not a decimal value length {l}"),
232            })),
233            _ => vortex_bail!("Expected a decimal scalar, found {:?}", self),
234        }
235    }
236
237    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
238        match &self {
239            InnerScalarValue::Null => Ok(None),
240            InnerScalarValue::Buffer(b) => Ok(Some(b.clone())),
241            InnerScalarValue::BufferString(b) => {
242                Ok(Some(Arc::new(b.as_ref().clone().into_inner())))
243            }
244            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
245        }
246    }
247
248    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
249        match &self {
250            InnerScalarValue::Null => Ok(None),
251            InnerScalarValue::Buffer(b) => {
252                Ok(Some(Arc::new(BufferString::try_from(b.as_ref().clone())?)))
253            }
254            InnerScalarValue::BufferString(b) => Ok(Some(b.clone())),
255            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
256        }
257    }
258
259    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
260        match &self {
261            InnerScalarValue::Null => Ok(None),
262            InnerScalarValue::List(l) => Ok(Some(l)),
263            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
264        }
265    }
266}
267
268impl<T> From<Option<T>> for ScalarValue
269where
270    T: ScalarType,
271    ScalarValue: From<T>,
272{
273    fn from(value: Option<T>) -> Self {
274        value
275            .map(ScalarValue::from)
276            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
277    }
278}
279
280#[cfg(test)]
281mod test {
282    use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields};
283
284    use crate::{InnerScalarValue, PValue, ScalarValue};
285
286    #[test]
287    pub fn test_is_instance_of_bool() {
288        assert!(
289            ScalarValue(InnerScalarValue::Bool(true))
290                .is_instance_of(&DType::Bool(Nullability::Nullable))
291        );
292        assert!(
293            ScalarValue(InnerScalarValue::Bool(true))
294                .is_instance_of(&DType::Bool(Nullability::NonNullable))
295        );
296        assert!(
297            ScalarValue(InnerScalarValue::Bool(false))
298                .is_instance_of(&DType::Bool(Nullability::Nullable))
299        );
300        assert!(
301            ScalarValue(InnerScalarValue::Bool(false))
302                .is_instance_of(&DType::Bool(Nullability::NonNullable))
303        );
304    }
305
306    #[test]
307    pub fn test_is_instance_of_primitive() {
308        assert!(
309            ScalarValue(InnerScalarValue::Primitive(PValue::F64(0.0)))
310                .is_instance_of(&DType::Primitive(PType::F64, Nullability::NonNullable))
311        );
312    }
313
314    #[test]
315    pub fn test_is_instance_of_list_and_struct() {
316        let tbool = DType::Bool(Nullability::NonNullable);
317        let tboolnull = DType::Bool(Nullability::Nullable);
318        let tnull = DType::Null;
319
320        let bool_null = ScalarValue(InnerScalarValue::List(
321            vec![
322                ScalarValue(InnerScalarValue::Bool(true)),
323                ScalarValue(InnerScalarValue::Null),
324            ]
325            .into(),
326        ));
327        let bool_bool = ScalarValue(InnerScalarValue::List(
328            vec![
329                ScalarValue(InnerScalarValue::Bool(true)),
330                ScalarValue(InnerScalarValue::Bool(false)),
331            ]
332            .into(),
333        ));
334
335        fn tlist(element: &DType) -> DType {
336            DType::List(element.clone().into(), Nullability::NonNullable)
337        }
338
339        assert!(bool_null.is_instance_of(&tlist(&tboolnull)));
340        assert!(!bool_null.is_instance_of(&tlist(&tbool)));
341        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
342        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
343
344        fn tstruct(left: &DType, right: &DType) -> DType {
345            DType::Struct(
346                StructFields::new(
347                    vec!["left".into(), "right".into()].into(),
348                    vec![left.clone(), right.clone()],
349                ),
350                Nullability::NonNullable,
351            )
352        }
353
354        assert!(bool_null.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
355        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tboolnull)));
356        assert!(!bool_null.is_instance_of(&tstruct(&tboolnull, &tbool)));
357        assert!(!bool_null.is_instance_of(&tstruct(&tbool, &tbool)));
358
359        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tnull)));
360        assert!(!bool_null.is_instance_of(&tstruct(&tnull, &tbool)));
361
362        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
363        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tboolnull)));
364        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tbool)));
365        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tbool)));
366
367        assert!(!bool_bool.is_instance_of(&tstruct(&tbool, &tnull)));
368        assert!(!bool_bool.is_instance_of(&tstruct(&tnull, &tbool)));
369    }
370
371    #[test]
372    pub fn test_is_instance_of_null() {
373        assert!(
374            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Bool(Nullability::Nullable))
375        );
376        assert!(
377            !ScalarValue(InnerScalarValue::Null)
378                .is_instance_of(&DType::Bool(Nullability::NonNullable))
379        );
380
381        assert!(
382            ScalarValue(InnerScalarValue::Null)
383                .is_instance_of(&DType::Primitive(PType::U8, Nullability::Nullable))
384        );
385        assert!(
386            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Utf8(Nullability::Nullable))
387        );
388        assert!(
389            ScalarValue(InnerScalarValue::Null)
390                .is_instance_of(&DType::Binary(Nullability::Nullable))
391        );
392        assert!(
393            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct(
394                StructFields::new(FieldNames::default(), [].into()),
395                Nullability::Nullable,
396            ))
397        );
398        assert!(
399            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::List(
400                DType::Utf8(Nullability::NonNullable).into(),
401                Nullability::Nullable
402            ))
403        );
404        assert!(ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Null));
405    }
406}