vortex_scalar/scalarvalue/
mod.rs

1mod binary;
2mod bool;
3mod list;
4mod primitive;
5mod utf8;
6
7use std::fmt::{Display, Write};
8use std::sync::Arc;
9
10use itertools::Itertools;
11use vortex_buffer::{BufferString, ByteBuffer};
12use vortex_dtype::DType;
13use vortex_error::{VortexResult, vortex_err};
14
15use crate::ScalarType;
16use crate::pvalue::PValue;
17
18/// Represents the internal data of a scalar value. Must be interpreted by wrapping
19/// up with a DType to make a Scalar.
20///
21/// Note that these values can be deserialized from JSON or other formats. So a PValue may not
22/// have the correct width for what the DType expects. Primitive values should therefore be
23/// read using [crate::PrimitiveScalar] which will handle the conversion.
24#[derive(Debug, Clone)]
25pub struct ScalarValue(pub(crate) InnerScalarValue);
26
27#[derive(Debug, Clone)]
28pub(crate) enum InnerScalarValue {
29    Null,
30    Bool(bool),
31    Primitive(PValue),
32    Buffer(Arc<ByteBuffer>),
33    BufferString(Arc<BufferString>),
34    List(Arc<[ScalarValue]>),
35}
36
37#[cfg(feature = "flatbuffers")]
38impl ScalarValue {
39    pub fn to_flexbytes<B: Default + for<'a> Extend<&'a u8>>(&self) -> B {
40        use serde::Serialize;
41        use vortex_error::VortexExpect;
42
43        let mut ser = flexbuffers::FlexbufferSerializer::new();
44        self.serialize(&mut ser)
45            .vortex_expect("Failed to serialize ScalarValue");
46        let view = ser.view();
47
48        let mut buf = B::default();
49        buf.extend(view);
50        buf
51    }
52
53    pub fn from_flexbytes(buf: &[u8]) -> VortexResult<Self> {
54        use serde::Deserialize;
55
56        Ok(ScalarValue::deserialize(flexbuffers::Reader::get_root(
57            buf,
58        )?)?)
59    }
60}
61
62fn to_hex(slice: &[u8]) -> Result<String, std::fmt::Error> {
63    let mut output = String::new();
64    for byte in slice {
65        write!(output, "{:02x}", byte)?;
66    }
67    Ok(output)
68}
69
70impl Display for ScalarValue {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        write!(f, "{}", self.0)
73    }
74}
75
76impl Display for InnerScalarValue {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            Self::Bool(b) => write!(f, "{}", b),
80            Self::Primitive(pvalue) => write!(f, "{}", pvalue),
81            Self::Buffer(buf) => {
82                if buf.len() > 10 {
83                    write!(
84                        f,
85                        "{}..{}",
86                        to_hex(&buf[0..5])?,
87                        to_hex(&buf[buf.len() - 5..buf.len()])?,
88                    )
89                } else {
90                    write!(f, "{}", to_hex(buf)?)
91                }
92            }
93            Self::BufferString(bufstr) => {
94                let bufstr = bufstr.as_str();
95                let str_len = bufstr.chars().count();
96
97                if str_len > 10 {
98                    let prefix = String::from_iter(bufstr.chars().take(5));
99                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
100
101                    write!(f, "\"{prefix}..{suffix}\"")
102                } else {
103                    write!(f, "\"{}\"", bufstr)
104                }
105            }
106            Self::List(elems) => {
107                write!(f, "[{}]", elems.iter().format(","))
108            }
109            Self::Null => write!(f, "null"),
110        }
111    }
112}
113
114impl ScalarValue {
115    pub const fn null() -> Self {
116        ScalarValue(InnerScalarValue::Null)
117    }
118
119    pub fn is_null(&self) -> bool {
120        self.0.is_null()
121    }
122
123    pub fn is_instance_of(&self, dtype: &DType) -> bool {
124        self.0.is_instance_of(dtype)
125    }
126
127    pub(crate) fn as_null(&self) -> VortexResult<()> {
128        self.0.as_null()
129    }
130
131    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
132        self.0.as_bool()
133    }
134
135    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
136    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
137    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
138    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
139        self.0.as_pvalue()
140    }
141
142    pub(crate) fn as_buffer(&self) -> VortexResult<Option<ByteBuffer>> {
143        self.0.as_buffer()
144    }
145
146    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<BufferString>> {
147        self.0.as_buffer_string()
148    }
149
150    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
151        self.0.as_list()
152    }
153}
154
155impl InnerScalarValue {
156    pub(crate) fn is_null(&self) -> bool {
157        matches!(self, InnerScalarValue::Null)
158    }
159
160    pub fn is_instance_of(&self, dtype: &DType) -> bool {
161        match (&self, dtype) {
162            (InnerScalarValue::Bool(_), DType::Bool(_)) => true,
163            (InnerScalarValue::Primitive(pvalue), DType::Primitive(ptype, _)) => {
164                pvalue.is_instance_of(ptype)
165            }
166            (InnerScalarValue::Buffer(_), DType::Binary(_)) => true,
167            (InnerScalarValue::BufferString(_), DType::Utf8(_)) => true,
168            (InnerScalarValue::List(values), DType::List(dtype, _)) => {
169                values.iter().all(|v| v.is_instance_of(dtype))
170            }
171            (InnerScalarValue::List(values), DType::Struct(structdt, _)) => values
172                .iter()
173                .zip(structdt.fields())
174                .all(|(v, dt)| v.is_instance_of(&dt)),
175            (InnerScalarValue::Null, dtype) => dtype.is_nullable(),
176            (_, DType::Extension(ext_dtype)) => self.is_instance_of(ext_dtype.storage_dtype()),
177            _ => false,
178        }
179    }
180
181    pub(crate) fn as_null(&self) -> VortexResult<()> {
182        match self {
183            InnerScalarValue::Null => Ok(()),
184            _ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
185        }
186    }
187
188    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
189        match &self {
190            InnerScalarValue::Null => Ok(None),
191            InnerScalarValue::Bool(b) => Ok(Some(*b)),
192            _ => Err(vortex_err!("Expected a bool scalar, found {:?}", self)),
193        }
194    }
195
196    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
197    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
198    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
199    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
200        match &self {
201            InnerScalarValue::Null => Ok(None),
202            InnerScalarValue::Primitive(p) => Ok(Some(*p)),
203            _ => Err(vortex_err!("Expected a primitive scalar, found {:?}", self)),
204        }
205    }
206
207    pub(crate) fn as_buffer(&self) -> VortexResult<Option<ByteBuffer>> {
208        match &self {
209            InnerScalarValue::Null => Ok(None),
210            InnerScalarValue::Buffer(b) => Ok(Some(b.as_ref().clone())),
211            InnerScalarValue::BufferString(b) => {
212                let buffer_string = b.as_ref();
213                let buffer_string = buffer_string.clone();
214                Ok(Some(buffer_string.into_inner()))
215            }
216            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
217        }
218    }
219
220    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<BufferString>> {
221        match &self {
222            InnerScalarValue::Null => Ok(None),
223            InnerScalarValue::Buffer(b) => Ok(Some(BufferString::try_from(b.as_ref().clone())?)),
224            InnerScalarValue::BufferString(b) => Ok(Some(b.as_ref().clone())),
225            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
226        }
227    }
228
229    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
230        match &self {
231            InnerScalarValue::Null => Ok(None),
232            InnerScalarValue::List(l) => Ok(Some(l)),
233            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
234        }
235    }
236}
237
238impl<T> From<Option<T>> for ScalarValue
239where
240    T: ScalarType,
241    ScalarValue: From<T>,
242{
243    fn from(value: Option<T>) -> Self {
244        value
245            .map(ScalarValue::from)
246            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
247    }
248}
249
250#[cfg(test)]
251mod test {
252    use std::sync::Arc;
253
254    use vortex_dtype::{DType, Nullability, PType, StructDType};
255
256    use crate::{InnerScalarValue, PValue, ScalarValue};
257
258    #[test]
259    pub fn test_is_instance_of_bool() {
260        assert!(
261            ScalarValue(InnerScalarValue::Bool(true))
262                .is_instance_of(&DType::Bool(Nullability::Nullable))
263        );
264        assert!(
265            ScalarValue(InnerScalarValue::Bool(true))
266                .is_instance_of(&DType::Bool(Nullability::NonNullable))
267        );
268        assert!(
269            ScalarValue(InnerScalarValue::Bool(false))
270                .is_instance_of(&DType::Bool(Nullability::Nullable))
271        );
272        assert!(
273            ScalarValue(InnerScalarValue::Bool(false))
274                .is_instance_of(&DType::Bool(Nullability::NonNullable))
275        );
276    }
277
278    #[test]
279    pub fn test_is_instance_of_primitive() {
280        assert!(
281            ScalarValue(InnerScalarValue::Primitive(PValue::F64(0.0)))
282                .is_instance_of(&DType::Primitive(PType::F64, Nullability::NonNullable))
283        );
284    }
285
286    #[test]
287    pub fn test_is_instance_of_list_and_struct() {
288        let tbool = DType::Bool(Nullability::NonNullable);
289        let tboolnull = DType::Bool(Nullability::Nullable);
290        let tnull = DType::Null;
291
292        let bool_null = ScalarValue(InnerScalarValue::List(
293            vec![
294                ScalarValue(InnerScalarValue::Bool(true)),
295                ScalarValue(InnerScalarValue::Null),
296            ]
297            .into(),
298        ));
299        let bool_bool = ScalarValue(InnerScalarValue::List(
300            vec![
301                ScalarValue(InnerScalarValue::Bool(true)),
302                ScalarValue(InnerScalarValue::Bool(false)),
303            ]
304            .into(),
305        ));
306
307        fn tlist(element: &DType) -> DType {
308            DType::List(element.clone().into(), Nullability::NonNullable)
309        }
310
311        assert!(bool_null.is_instance_of(&tlist(&tboolnull)));
312        assert!(!bool_null.is_instance_of(&tlist(&tbool)));
313        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
314        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
315
316        fn tstruct(left: &DType, right: &DType) -> DType {
317            DType::Struct(
318                Arc::new(StructDType::new(
319                    vec!["left".into(), "right".into()].into(),
320                    vec![left.clone(), right.clone()],
321                )),
322                Nullability::NonNullable,
323            )
324        }
325
326        assert!(bool_null.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
327        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tboolnull)));
328        assert!(!bool_null.is_instance_of(&tstruct(&tboolnull, &tbool)));
329        assert!(!bool_null.is_instance_of(&tstruct(&tbool, &tbool)));
330
331        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tnull)));
332        assert!(!bool_null.is_instance_of(&tstruct(&tnull, &tbool)));
333
334        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
335        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tboolnull)));
336        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tbool)));
337        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tbool)));
338
339        assert!(!bool_bool.is_instance_of(&tstruct(&tbool, &tnull)));
340        assert!(!bool_bool.is_instance_of(&tstruct(&tnull, &tbool)));
341    }
342
343    #[test]
344    pub fn test_is_instance_of_null() {
345        assert!(
346            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Bool(Nullability::Nullable))
347        );
348        assert!(
349            !ScalarValue(InnerScalarValue::Null)
350                .is_instance_of(&DType::Bool(Nullability::NonNullable))
351        );
352
353        assert!(
354            ScalarValue(InnerScalarValue::Null)
355                .is_instance_of(&DType::Primitive(PType::U8, Nullability::Nullable))
356        );
357        assert!(
358            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Utf8(Nullability::Nullable))
359        );
360        assert!(
361            ScalarValue(InnerScalarValue::Null)
362                .is_instance_of(&DType::Binary(Nullability::Nullable))
363        );
364        assert!(
365            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct(
366                Arc::new(StructDType::new([].into(), [].into())),
367                Nullability::Nullable,
368            ))
369        );
370        assert!(
371            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::List(
372                DType::Utf8(Nullability::NonNullable).into(),
373                Nullability::Nullable
374            ))
375        );
376        assert!(ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Null));
377    }
378}