vortex_scalar/scalar_value/
mod.rs

1mod binary;
2mod bool;
3mod decimal;
4mod list;
5mod primitive;
6mod utf8;
7
8use std::fmt::Display;
9use std::sync::Arc;
10
11use bytes::BufMut;
12use itertools::Itertools;
13use prost::Message;
14use vortex_buffer::{BufferString, ByteBuffer};
15use vortex_dtype::DType;
16use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
17use vortex_proto::scalar as pb;
18
19use crate::decimal::DecimalValue;
20use crate::pvalue::PValue;
21use crate::{ScalarType, i256};
22
23/// Represents the internal data of a scalar value. Must be interpreted by wrapping
24/// up with a DType to make a Scalar.
25///
26/// Note that these values can be deserialized from JSON or other formats. So a PValue may not
27/// have the correct width for what the DType expects. Primitive values should therefore be
28/// read using [crate::PrimitiveScalar] which will handle the conversion.
29#[derive(Debug, Clone)]
30pub struct ScalarValue(pub(crate) InnerScalarValue);
31
32#[derive(Debug, Clone)]
33pub(crate) enum InnerScalarValue {
34    Null,
35    Bool(bool),
36    Primitive(PValue),
37    Decimal(DecimalValue),
38    Buffer(Arc<ByteBuffer>),
39    BufferString(Arc<BufferString>),
40    List(Arc<[ScalarValue]>),
41}
42
43impl ScalarValue {
44    pub fn to_protobytes<B: BufMut + Default>(&self) -> B {
45        let pb_scalar = pb::ScalarValue::from(self);
46
47        let mut buf = B::default();
48        pb_scalar
49            .encode(&mut buf)
50            .map_err(|e| vortex_err!("Failed to serialize protobuf {e}"))
51            .vortex_unwrap();
52        buf
53    }
54
55    pub fn from_protobytes(buf: &[u8]) -> VortexResult<Self> {
56        ScalarValue::try_from(
57            &pb::ScalarValue::decode(buf)
58                .map_err(|e| vortex_err!("Failed to deserialize protobuf {e}"))?,
59        )
60    }
61}
62
63fn to_hex(slice: &[u8]) -> String {
64    slice
65        .iter()
66        .format_with("", |f, b| b(&format_args!("{f:02x}")))
67        .to_string()
68}
69
70impl Display for ScalarValue {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        write!(f, "{}", self.0)
73    }
74}
75
76impl Display for InnerScalarValue {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            Self::Bool(b) => write!(f, "{b}"),
80            Self::Primitive(pvalue) => write!(f, "{pvalue}"),
81            Self::Decimal(value) => write!(f, "{value}"),
82            Self::Buffer(buf) => {
83                if buf.len() > 10 {
84                    write!(
85                        f,
86                        "{}..{}",
87                        to_hex(&buf[0..5]),
88                        to_hex(&buf[buf.len() - 5..buf.len()]),
89                    )
90                } else {
91                    write!(f, "{}", to_hex(buf))
92                }
93            }
94            Self::BufferString(bufstr) => {
95                let bufstr = bufstr.as_str();
96                let str_len = bufstr.chars().count();
97
98                if str_len > 10 {
99                    let prefix = String::from_iter(bufstr.chars().take(5));
100                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
101
102                    write!(f, "\"{prefix}..{suffix}\"")
103                } else {
104                    write!(f, "\"{bufstr}\"")
105                }
106            }
107            Self::List(elems) => {
108                write!(f, "[{}]", elems.iter().format(","))
109            }
110            Self::Null => write!(f, "null"),
111        }
112    }
113}
114
115impl ScalarValue {
116    pub const fn null() -> Self {
117        ScalarValue(InnerScalarValue::Null)
118    }
119
120    pub fn is_null(&self) -> bool {
121        self.0.is_null()
122    }
123
124    pub fn is_instance_of(&self, dtype: &DType) -> bool {
125        self.0.is_instance_of(dtype)
126    }
127
128    pub(crate) fn as_null(&self) -> VortexResult<()> {
129        self.0.as_null()
130    }
131
132    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
133        self.0.as_bool()
134    }
135
136    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
137    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
138    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
139    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
140        self.0.as_pvalue()
141    }
142
143    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
144        self.0.as_decimal()
145    }
146
147    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
148        self.0.as_buffer()
149    }
150
151    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
152        self.0.as_buffer_string()
153    }
154
155    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
156        self.0.as_list()
157    }
158}
159
160impl InnerScalarValue {
161    pub(crate) fn is_null(&self) -> bool {
162        matches!(self, InnerScalarValue::Null)
163    }
164
165    pub(crate) fn is_instance_of(&self, dtype: &DType) -> bool {
166        match (&self, dtype) {
167            (InnerScalarValue::Bool(_), DType::Bool(_)) => true,
168            (InnerScalarValue::Primitive(pvalue), DType::Primitive(ptype, _)) => {
169                pvalue.is_instance_of(ptype)
170            }
171            (InnerScalarValue::Decimal(_) | InnerScalarValue::Buffer(_), DType::Decimal(..)) => {
172                true
173            }
174            (InnerScalarValue::Buffer(_), DType::Binary(_)) => true,
175            (InnerScalarValue::BufferString(_), DType::Utf8(_)) => true,
176            (InnerScalarValue::List(values), DType::List(dtype, _)) => {
177                values.iter().all(|v| v.is_instance_of(dtype))
178            }
179            (InnerScalarValue::List(values), DType::Struct(structdt, _)) => values
180                .iter()
181                .zip(structdt.fields())
182                .all(|(v, dt)| v.is_instance_of(&dt)),
183            (InnerScalarValue::Null, dtype) => dtype.is_nullable(),
184            (_, DType::Extension(ext_dtype)) => self.is_instance_of(ext_dtype.storage_dtype()),
185            _ => false,
186        }
187    }
188
189    pub(crate) fn as_null(&self) -> VortexResult<()> {
190        match self {
191            InnerScalarValue::Null => Ok(()),
192            _ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
193        }
194    }
195
196    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
197        match &self {
198            InnerScalarValue::Null => Ok(None),
199            InnerScalarValue::Bool(b) => Ok(Some(*b)),
200            _ => Err(vortex_err!("Expected a bool scalar, found {:?}", self)),
201        }
202    }
203
204    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
205    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
206    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
207    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
208        match &self {
209            InnerScalarValue::Null => Ok(None),
210            InnerScalarValue::Primitive(p) => Ok(Some(*p)),
211            _ => Err(vortex_err!("Expected a primitive scalar, found {:?}", self)),
212        }
213    }
214
215    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
216        match self {
217            InnerScalarValue::Null => Ok(None),
218            InnerScalarValue::Decimal(v) => Ok(Some(*v)),
219            InnerScalarValue::Buffer(b) => Ok(Some(match b.len() {
220                1 => DecimalValue::I8(b[0] as i8),
221                2 => DecimalValue::I16(i16::from_le_bytes(b.as_slice().try_into()?)),
222                4 => DecimalValue::I32(i32::from_le_bytes(b.as_slice().try_into()?)),
223                8 => DecimalValue::I64(i64::from_le_bytes(b.as_slice().try_into()?)),
224                16 => DecimalValue::I128(i128::from_le_bytes(b.as_slice().try_into()?)),
225                32 => DecimalValue::I256(i256::from_le_bytes(b.as_slice().try_into()?)),
226                l => vortex_bail!("Buffer is not a decimal value length {l}"),
227            })),
228            _ => vortex_bail!("Expected a decimal scalar, found {:?}", self),
229        }
230    }
231
232    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
233        match &self {
234            InnerScalarValue::Null => Ok(None),
235            InnerScalarValue::Buffer(b) => Ok(Some(b.clone())),
236            InnerScalarValue::BufferString(b) => {
237                Ok(Some(Arc::new(b.as_ref().clone().into_inner())))
238            }
239            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
240        }
241    }
242
243    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
244        match &self {
245            InnerScalarValue::Null => Ok(None),
246            InnerScalarValue::Buffer(b) => {
247                Ok(Some(Arc::new(BufferString::try_from(b.as_ref().clone())?)))
248            }
249            InnerScalarValue::BufferString(b) => Ok(Some(b.clone())),
250            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
251        }
252    }
253
254    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
255        match &self {
256            InnerScalarValue::Null => Ok(None),
257            InnerScalarValue::List(l) => Ok(Some(l)),
258            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
259        }
260    }
261}
262
263impl<T> From<Option<T>> for ScalarValue
264where
265    T: ScalarType,
266    ScalarValue: From<T>,
267{
268    fn from(value: Option<T>) -> Self {
269        value
270            .map(ScalarValue::from)
271            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
272    }
273}
274
275#[cfg(test)]
276mod test {
277    use std::sync::Arc;
278
279    use vortex_dtype::{DType, Nullability, PType, StructFields};
280
281    use crate::{InnerScalarValue, PValue, ScalarValue};
282
283    #[test]
284    pub fn test_is_instance_of_bool() {
285        assert!(
286            ScalarValue(InnerScalarValue::Bool(true))
287                .is_instance_of(&DType::Bool(Nullability::Nullable))
288        );
289        assert!(
290            ScalarValue(InnerScalarValue::Bool(true))
291                .is_instance_of(&DType::Bool(Nullability::NonNullable))
292        );
293        assert!(
294            ScalarValue(InnerScalarValue::Bool(false))
295                .is_instance_of(&DType::Bool(Nullability::Nullable))
296        );
297        assert!(
298            ScalarValue(InnerScalarValue::Bool(false))
299                .is_instance_of(&DType::Bool(Nullability::NonNullable))
300        );
301    }
302
303    #[test]
304    pub fn test_is_instance_of_primitive() {
305        assert!(
306            ScalarValue(InnerScalarValue::Primitive(PValue::F64(0.0)))
307                .is_instance_of(&DType::Primitive(PType::F64, Nullability::NonNullable))
308        );
309    }
310
311    #[test]
312    pub fn test_is_instance_of_list_and_struct() {
313        let tbool = DType::Bool(Nullability::NonNullable);
314        let tboolnull = DType::Bool(Nullability::Nullable);
315        let tnull = DType::Null;
316
317        let bool_null = ScalarValue(InnerScalarValue::List(
318            vec![
319                ScalarValue(InnerScalarValue::Bool(true)),
320                ScalarValue(InnerScalarValue::Null),
321            ]
322            .into(),
323        ));
324        let bool_bool = ScalarValue(InnerScalarValue::List(
325            vec![
326                ScalarValue(InnerScalarValue::Bool(true)),
327                ScalarValue(InnerScalarValue::Bool(false)),
328            ]
329            .into(),
330        ));
331
332        fn tlist(element: &DType) -> DType {
333            DType::List(element.clone().into(), Nullability::NonNullable)
334        }
335
336        assert!(bool_null.is_instance_of(&tlist(&tboolnull)));
337        assert!(!bool_null.is_instance_of(&tlist(&tbool)));
338        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
339        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
340
341        fn tstruct(left: &DType, right: &DType) -> DType {
342            DType::Struct(
343                Arc::new(StructFields::new(
344                    vec!["left".into(), "right".into()].into(),
345                    vec![left.clone(), right.clone()],
346                )),
347                Nullability::NonNullable,
348            )
349        }
350
351        assert!(bool_null.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
352        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tboolnull)));
353        assert!(!bool_null.is_instance_of(&tstruct(&tboolnull, &tbool)));
354        assert!(!bool_null.is_instance_of(&tstruct(&tbool, &tbool)));
355
356        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tnull)));
357        assert!(!bool_null.is_instance_of(&tstruct(&tnull, &tbool)));
358
359        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
360        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tboolnull)));
361        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tbool)));
362        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tbool)));
363
364        assert!(!bool_bool.is_instance_of(&tstruct(&tbool, &tnull)));
365        assert!(!bool_bool.is_instance_of(&tstruct(&tnull, &tbool)));
366    }
367
368    #[test]
369    pub fn test_is_instance_of_null() {
370        assert!(
371            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Bool(Nullability::Nullable))
372        );
373        assert!(
374            !ScalarValue(InnerScalarValue::Null)
375                .is_instance_of(&DType::Bool(Nullability::NonNullable))
376        );
377
378        assert!(
379            ScalarValue(InnerScalarValue::Null)
380                .is_instance_of(&DType::Primitive(PType::U8, Nullability::Nullable))
381        );
382        assert!(
383            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Utf8(Nullability::Nullable))
384        );
385        assert!(
386            ScalarValue(InnerScalarValue::Null)
387                .is_instance_of(&DType::Binary(Nullability::Nullable))
388        );
389        assert!(
390            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct(
391                Arc::new(StructFields::new([].into(), [].into())),
392                Nullability::Nullable,
393            ))
394        );
395        assert!(
396            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::List(
397                DType::Utf8(Nullability::NonNullable).into(),
398                Nullability::Nullable
399            ))
400        );
401        assert!(ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Null));
402    }
403}