vortex_scalar/scalar_value/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod binary;
5mod bool;
6mod decimal;
7mod list;
8mod primitive;
9mod utf8;
10
11use std::fmt::Display;
12use std::sync::Arc;
13
14use bytes::BufMut;
15use itertools::Itertools;
16use prost::Message;
17use vortex_buffer::{BufferString, ByteBuffer};
18use vortex_dtype::DType;
19use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
20use vortex_proto::scalar as pb;
21
22use crate::decimal::DecimalValue;
23use crate::pvalue::PValue;
24use crate::{ScalarType, i256};
25
26/// Represents the internal data of a scalar value. Must be interpreted by wrapping up with a
27/// [`DType`] to make a [`super::Scalar`].
28///
29/// Note that these values can be deserialized from JSON or other formats. So a [`PValue`] may not
30/// have the correct width for what the [`DType`] expects. Primitive values should therefore be
31/// read using [`super::PrimitiveScalar`] which will handle the conversion.
32#[derive(Debug, Clone)]
33pub struct ScalarValue(pub(crate) InnerScalarValue);
34
35#[derive(Debug, Clone)]
36pub(crate) enum InnerScalarValue {
37    Null,
38    Bool(bool),
39    Primitive(PValue),
40    Decimal(DecimalValue),
41    Buffer(Arc<ByteBuffer>),
42    BufferString(Arc<BufferString>),
43    List(Arc<[ScalarValue]>),
44}
45
46impl ScalarValue {
47    /// Serializes the scalar value to Protocol Buffers format.
48    pub fn to_protobytes<B: BufMut + Default>(&self) -> B {
49        let pb_scalar = pb::ScalarValue::from(self);
50
51        let mut buf = B::default();
52        pb_scalar
53            .encode(&mut buf)
54            .map_err(|e| vortex_err!("Failed to serialize protobuf {e}"))
55            .vortex_unwrap();
56        buf
57    }
58
59    /// Deserializes a scalar value from Protocol Buffers format.
60    pub fn from_protobytes(buf: &[u8]) -> VortexResult<Self> {
61        ScalarValue::try_from(
62            &pb::ScalarValue::decode(buf)
63                .map_err(|e| vortex_err!("Failed to deserialize protobuf {e}"))?,
64        )
65    }
66}
67
68fn to_hex(slice: &[u8]) -> String {
69    slice
70        .iter()
71        .format_with("", |f, b| b(&format_args!("{f:02x}")))
72        .to_string()
73}
74
75impl Display for ScalarValue {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        write!(f, "{}", self.0)
78    }
79}
80
81impl Display for InnerScalarValue {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        match self {
84            Self::Bool(b) => write!(f, "{b}"),
85            Self::Primitive(pvalue) => write!(f, "{pvalue}"),
86            Self::Decimal(value) => write!(f, "{value}"),
87            Self::Buffer(buf) => {
88                if buf.len() > 10 {
89                    write!(
90                        f,
91                        "{}..{}",
92                        to_hex(&buf[0..5]),
93                        to_hex(&buf[buf.len() - 5..buf.len()]),
94                    )
95                } else {
96                    write!(f, "{}", to_hex(buf))
97                }
98            }
99            Self::BufferString(bufstr) => {
100                let bufstr = bufstr.as_str();
101                let str_len = bufstr.chars().count();
102
103                if str_len > 10 {
104                    let prefix = String::from_iter(bufstr.chars().take(5));
105                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
106
107                    write!(f, "\"{prefix}..{suffix}\"")
108                } else {
109                    write!(f, "\"{bufstr}\"")
110                }
111            }
112            Self::List(elems) => {
113                write!(f, "[{}]", elems.iter().format(","))
114            }
115            Self::Null => write!(f, "null"),
116        }
117    }
118}
119
120impl ScalarValue {
121    /// Creates a null scalar value.
122    pub const fn null() -> Self {
123        ScalarValue(InnerScalarValue::Null)
124    }
125
126    /// Returns true if this is a null value.
127    pub fn is_null(&self) -> bool {
128        self.0.is_null()
129    }
130
131    /// Returns true if this value is compatible with the given data type.
132    pub fn is_instance_of(&self, dtype: &DType) -> bool {
133        self.0.is_instance_of(dtype)
134    }
135
136    /// Returns scalar as a null value
137    pub fn as_null(&self) -> VortexResult<()> {
138        self.0.as_null()
139    }
140
141    /// Returns scalar as a boolean value
142    pub fn as_bool(&self) -> VortexResult<Option<bool>> {
143        self.0.as_bool()
144    }
145
146    /// Return scalar as a primitive value. PValues don't match dtypes but will be castable to the scalars dtype
147    pub fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
148        self.0.as_pvalue()
149    }
150
151    /// Returns scalar as a decimal value
152    pub fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
153        self.0.as_decimal()
154    }
155
156    /// Returns scalar as a binary buffer
157    pub fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
158        self.0.as_buffer()
159    }
160
161    /// Returns scalar as a string buffer
162    pub fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
163        self.0.as_buffer_string()
164    }
165
166    /// Returns scalar as a list value
167    pub fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
168        self.0.as_list()
169    }
170}
171
172impl InnerScalarValue {
173    pub(crate) fn is_null(&self) -> bool {
174        matches!(self, InnerScalarValue::Null)
175    }
176
177    pub(crate) fn is_instance_of(&self, dtype: &DType) -> bool {
178        match (&self, dtype) {
179            (InnerScalarValue::Bool(_), DType::Bool(_)) => true,
180            (InnerScalarValue::Primitive(pvalue), DType::Primitive(ptype, _)) => {
181                pvalue.is_instance_of(ptype)
182            }
183            (InnerScalarValue::Decimal(_) | InnerScalarValue::Buffer(_), DType::Decimal(..)) => {
184                true
185            }
186            (InnerScalarValue::Buffer(_), DType::Binary(_)) => true,
187            (InnerScalarValue::BufferString(_), DType::Utf8(_)) => true,
188            (InnerScalarValue::List(values), DType::List(dtype, _)) => {
189                values.iter().all(|v| v.is_instance_of(dtype))
190            }
191            (InnerScalarValue::List(values), DType::Struct(structdt, _)) => values
192                .iter()
193                .zip(structdt.fields())
194                .all(|(v, dt)| v.is_instance_of(&dt)),
195            (InnerScalarValue::Null, dtype) => dtype.is_nullable(),
196            (_, DType::Extension(ext_dtype)) => self.is_instance_of(ext_dtype.storage_dtype()),
197            _ => false,
198        }
199    }
200
201    pub(crate) fn as_null(&self) -> VortexResult<()> {
202        match self {
203            InnerScalarValue::Null => Ok(()),
204            _ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
205        }
206    }
207
208    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
209        match &self {
210            InnerScalarValue::Null => Ok(None),
211            InnerScalarValue::Bool(b) => Ok(Some(*b)),
212            _ => Err(vortex_err!("Expected a bool scalar, found {:?}", self)),
213        }
214    }
215
216    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
217    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
218    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
219    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
220        match &self {
221            InnerScalarValue::Null => Ok(None),
222            InnerScalarValue::Primitive(p) => Ok(Some(*p)),
223            _ => Err(vortex_err!("Expected a primitive scalar, found {:?}", self)),
224        }
225    }
226
227    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
228        match self {
229            InnerScalarValue::Null => Ok(None),
230            InnerScalarValue::Decimal(v) => Ok(Some(*v)),
231            InnerScalarValue::Buffer(b) => Ok(Some(match b.len() {
232                1 => DecimalValue::I8(b[0] as i8),
233                2 => DecimalValue::I16(i16::from_le_bytes(b.as_slice().try_into()?)),
234                4 => DecimalValue::I32(i32::from_le_bytes(b.as_slice().try_into()?)),
235                8 => DecimalValue::I64(i64::from_le_bytes(b.as_slice().try_into()?)),
236                16 => DecimalValue::I128(i128::from_le_bytes(b.as_slice().try_into()?)),
237                32 => DecimalValue::I256(i256::from_le_bytes(b.as_slice().try_into()?)),
238                l => vortex_bail!("Buffer is not a decimal value length {l}"),
239            })),
240            _ => vortex_bail!("Expected a decimal scalar, found {:?}", self),
241        }
242    }
243
244    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
245        match &self {
246            InnerScalarValue::Null => Ok(None),
247            InnerScalarValue::Buffer(b) => Ok(Some(b.clone())),
248            InnerScalarValue::BufferString(b) => {
249                Ok(Some(Arc::new(b.as_ref().clone().into_inner())))
250            }
251            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
252        }
253    }
254
255    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
256        match &self {
257            InnerScalarValue::Null => Ok(None),
258            InnerScalarValue::Buffer(b) => {
259                Ok(Some(Arc::new(BufferString::try_from(b.as_ref().clone())?)))
260            }
261            InnerScalarValue::BufferString(b) => Ok(Some(b.clone())),
262            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
263        }
264    }
265
266    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
267        match &self {
268            InnerScalarValue::Null => Ok(None),
269            InnerScalarValue::List(l) => Ok(Some(l)),
270            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
271        }
272    }
273}
274
275impl<T> From<Option<T>> for ScalarValue
276where
277    T: ScalarType,
278    ScalarValue: From<T>,
279{
280    fn from(value: Option<T>) -> Self {
281        value
282            .map(ScalarValue::from)
283            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
284    }
285}
286
287#[cfg(test)]
288mod test {
289    use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields};
290
291    use crate::{InnerScalarValue, PValue, ScalarValue};
292
293    #[test]
294    pub fn test_is_instance_of_bool() {
295        assert!(
296            ScalarValue(InnerScalarValue::Bool(true))
297                .is_instance_of(&DType::Bool(Nullability::Nullable))
298        );
299        assert!(
300            ScalarValue(InnerScalarValue::Bool(true))
301                .is_instance_of(&DType::Bool(Nullability::NonNullable))
302        );
303        assert!(
304            ScalarValue(InnerScalarValue::Bool(false))
305                .is_instance_of(&DType::Bool(Nullability::Nullable))
306        );
307        assert!(
308            ScalarValue(InnerScalarValue::Bool(false))
309                .is_instance_of(&DType::Bool(Nullability::NonNullable))
310        );
311    }
312
313    #[test]
314    pub fn test_is_instance_of_primitive() {
315        assert!(
316            ScalarValue(InnerScalarValue::Primitive(PValue::F64(0.0)))
317                .is_instance_of(&DType::Primitive(PType::F64, Nullability::NonNullable))
318        );
319    }
320
321    #[test]
322    pub fn test_is_instance_of_list_and_struct() {
323        let tbool = DType::Bool(Nullability::NonNullable);
324        let tboolnull = DType::Bool(Nullability::Nullable);
325        let tnull = DType::Null;
326
327        let bool_null = ScalarValue(InnerScalarValue::List(
328            vec![
329                ScalarValue(InnerScalarValue::Bool(true)),
330                ScalarValue(InnerScalarValue::Null),
331            ]
332            .into(),
333        ));
334        let bool_bool = ScalarValue(InnerScalarValue::List(
335            vec![
336                ScalarValue(InnerScalarValue::Bool(true)),
337                ScalarValue(InnerScalarValue::Bool(false)),
338            ]
339            .into(),
340        ));
341
342        fn tlist(element: &DType) -> DType {
343            DType::List(element.clone().into(), Nullability::NonNullable)
344        }
345
346        assert!(bool_null.is_instance_of(&tlist(&tboolnull)));
347        assert!(!bool_null.is_instance_of(&tlist(&tbool)));
348        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
349        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
350
351        fn tstruct(left: &DType, right: &DType) -> DType {
352            DType::Struct(
353                StructFields::new(
354                    vec!["left".into(), "right".into()].into(),
355                    vec![left.clone(), right.clone()],
356                ),
357                Nullability::NonNullable,
358            )
359        }
360
361        assert!(bool_null.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
362        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tboolnull)));
363        assert!(!bool_null.is_instance_of(&tstruct(&tboolnull, &tbool)));
364        assert!(!bool_null.is_instance_of(&tstruct(&tbool, &tbool)));
365
366        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tnull)));
367        assert!(!bool_null.is_instance_of(&tstruct(&tnull, &tbool)));
368
369        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
370        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tboolnull)));
371        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tbool)));
372        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tbool)));
373
374        assert!(!bool_bool.is_instance_of(&tstruct(&tbool, &tnull)));
375        assert!(!bool_bool.is_instance_of(&tstruct(&tnull, &tbool)));
376    }
377
378    #[test]
379    pub fn test_is_instance_of_null() {
380        assert!(
381            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Bool(Nullability::Nullable))
382        );
383        assert!(
384            !ScalarValue(InnerScalarValue::Null)
385                .is_instance_of(&DType::Bool(Nullability::NonNullable))
386        );
387
388        assert!(
389            ScalarValue(InnerScalarValue::Null)
390                .is_instance_of(&DType::Primitive(PType::U8, Nullability::Nullable))
391        );
392        assert!(
393            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Utf8(Nullability::Nullable))
394        );
395        assert!(
396            ScalarValue(InnerScalarValue::Null)
397                .is_instance_of(&DType::Binary(Nullability::Nullable))
398        );
399        assert!(
400            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct(
401                StructFields::new(FieldNames::default(), [].into()),
402                Nullability::Nullable,
403            ))
404        );
405        assert!(
406            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::List(
407                DType::Utf8(Nullability::NonNullable).into(),
408                Nullability::Nullable
409            ))
410        );
411        assert!(ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Null));
412    }
413}