vortex_scalar/scalarvalue/
mod.rs

1mod binary;
2mod bool;
3mod list;
4mod primitive;
5mod utf8;
6
7use std::fmt::{Display, Write};
8use std::sync::Arc;
9
10use itertools::Itertools;
11use vortex_buffer::{BufferString, ByteBuffer};
12use vortex_dtype::DType;
13use vortex_error::{VortexResult, vortex_err};
14
15use crate::ScalarType;
16use crate::pvalue::PValue;
17
18/// Represents the internal data of a scalar value. Must be interpreted by wrapping
19/// up with a DType to make a Scalar.
20///
21/// Note that these values can be deserialized from JSON or other formats. So a PValue may not
22/// have the correct width for what the DType expects. Primitive values should therefore be
23/// read using [crate::PrimitiveScalar] which will handle the conversion.
24#[derive(Debug, Clone)]
25pub struct ScalarValue(pub(crate) InnerScalarValue);
26
27#[derive(Debug, Clone)]
28pub(crate) enum InnerScalarValue {
29    Null,
30    Bool(bool),
31    Primitive(PValue),
32    Buffer(Arc<ByteBuffer>),
33    BufferString(Arc<BufferString>),
34    List(Arc<[ScalarValue]>),
35}
36
37#[cfg(feature = "flatbuffers")]
38impl ScalarValue {
39    pub fn to_flexbytes<B: Default + for<'a> Extend<&'a u8>>(&self) -> B {
40        use serde::Serialize;
41        use vortex_error::VortexExpect;
42
43        let mut ser = flexbuffers::FlexbufferSerializer::new();
44        self.0
45            .serialize(&mut ser)
46            .vortex_expect("Failed to serialize ScalarValue");
47        let view = ser.view();
48
49        let mut buf = B::default();
50        buf.extend(view);
51        buf
52    }
53
54    pub fn from_flexbytes(buf: &[u8]) -> VortexResult<Self> {
55        use serde::Deserialize;
56
57        Ok(ScalarValue::deserialize(flexbuffers::Reader::get_root(
58            buf,
59        )?)?)
60    }
61}
62
63fn to_hex(slice: &[u8]) -> Result<String, std::fmt::Error> {
64    let mut output = String::new();
65    for byte in slice {
66        write!(output, "{:02x}", byte)?;
67    }
68    Ok(output)
69}
70
71impl Display for ScalarValue {
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        write!(f, "{}", self.0)
74    }
75}
76
77impl Display for InnerScalarValue {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            Self::Bool(b) => write!(f, "{}", b),
81            Self::Primitive(pvalue) => write!(f, "{}", pvalue),
82            Self::Buffer(buf) => {
83                if buf.len() > 10 {
84                    write!(
85                        f,
86                        "{}..{}",
87                        to_hex(&buf[0..5])?,
88                        to_hex(&buf[buf.len() - 5..buf.len()])?,
89                    )
90                } else {
91                    write!(f, "{}", to_hex(buf)?)
92                }
93            }
94            Self::BufferString(bufstr) => {
95                let bufstr = bufstr.as_str();
96                let str_len = bufstr.chars().count();
97
98                if str_len > 10 {
99                    let prefix = String::from_iter(bufstr.chars().take(5));
100                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
101
102                    write!(f, "\"{prefix}..{suffix}\"")
103                } else {
104                    write!(f, "\"{}\"", bufstr)
105                }
106            }
107            Self::List(elems) => {
108                write!(f, "[{}]", elems.iter().format(","))
109            }
110            Self::Null => write!(f, "null"),
111        }
112    }
113}
114
115impl ScalarValue {
116    pub const fn null() -> Self {
117        ScalarValue(InnerScalarValue::Null)
118    }
119
120    pub fn is_null(&self) -> bool {
121        self.0.is_null()
122    }
123
124    pub fn is_instance_of(&self, dtype: &DType) -> bool {
125        self.0.is_instance_of(dtype)
126    }
127
128    pub(crate) fn as_null(&self) -> VortexResult<()> {
129        self.0.as_null()
130    }
131
132    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
133        self.0.as_bool()
134    }
135
136    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
137    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
138    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
139    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
140        self.0.as_pvalue()
141    }
142
143    pub(crate) fn as_buffer(&self) -> VortexResult<Option<ByteBuffer>> {
144        self.0.as_buffer()
145    }
146
147    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<BufferString>> {
148        self.0.as_buffer_string()
149    }
150
151    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
152        self.0.as_list()
153    }
154}
155
156impl InnerScalarValue {
157    pub(crate) fn is_null(&self) -> bool {
158        matches!(self, InnerScalarValue::Null)
159    }
160
161    pub fn is_instance_of(&self, dtype: &DType) -> bool {
162        match (&self, dtype) {
163            (InnerScalarValue::Bool(_), DType::Bool(_)) => true,
164            (InnerScalarValue::Primitive(pvalue), DType::Primitive(ptype, _)) => {
165                pvalue.is_instance_of(ptype)
166            }
167            (InnerScalarValue::Buffer(_), DType::Binary(_)) => true,
168            (InnerScalarValue::BufferString(_), DType::Utf8(_)) => true,
169            (InnerScalarValue::List(values), DType::List(dtype, _)) => {
170                values.iter().all(|v| v.is_instance_of(dtype))
171            }
172            (InnerScalarValue::List(values), DType::Struct(structdt, _)) => values
173                .iter()
174                .zip(structdt.fields())
175                .all(|(v, dt)| v.is_instance_of(&dt)),
176            (InnerScalarValue::Null, dtype) => dtype.is_nullable(),
177            (_, DType::Extension(ext_dtype)) => self.is_instance_of(ext_dtype.storage_dtype()),
178            _ => false,
179        }
180    }
181
182    pub(crate) fn as_null(&self) -> VortexResult<()> {
183        match self {
184            InnerScalarValue::Null => Ok(()),
185            _ => Err(vortex_err!("Expected a Null scalar, found {:?}", self)),
186        }
187    }
188
189    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
190        match &self {
191            InnerScalarValue::Null => Ok(None),
192            InnerScalarValue::Bool(b) => Ok(Some(*b)),
193            _ => Err(vortex_err!("Expected a bool scalar, found {:?}", self)),
194        }
195    }
196
197    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
198    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
199    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
200    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
201        match &self {
202            InnerScalarValue::Null => Ok(None),
203            InnerScalarValue::Primitive(p) => Ok(Some(*p)),
204            _ => Err(vortex_err!("Expected a primitive scalar, found {:?}", self)),
205        }
206    }
207
208    pub(crate) fn as_buffer(&self) -> VortexResult<Option<ByteBuffer>> {
209        match &self {
210            InnerScalarValue::Null => Ok(None),
211            InnerScalarValue::Buffer(b) => Ok(Some(b.as_ref().clone())),
212            InnerScalarValue::BufferString(b) => {
213                let buffer_string = b.as_ref();
214                let buffer_string = buffer_string.clone();
215                Ok(Some(buffer_string.into_inner()))
216            }
217            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
218        }
219    }
220
221    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<BufferString>> {
222        match &self {
223            InnerScalarValue::Null => Ok(None),
224            InnerScalarValue::Buffer(b) => Ok(Some(BufferString::try_from(b.as_ref().clone())?)),
225            InnerScalarValue::BufferString(b) => Ok(Some(b.as_ref().clone())),
226            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
227        }
228    }
229
230    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
231        match &self {
232            InnerScalarValue::Null => Ok(None),
233            InnerScalarValue::List(l) => Ok(Some(l)),
234            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
235        }
236    }
237}
238
239impl<T> From<Option<T>> for ScalarValue
240where
241    T: ScalarType,
242    ScalarValue: From<T>,
243{
244    fn from(value: Option<T>) -> Self {
245        value
246            .map(ScalarValue::from)
247            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
248    }
249}
250
251#[cfg(test)]
252mod test {
253    use std::sync::Arc;
254
255    use vortex_dtype::{DType, Nullability, PType, StructDType};
256
257    use crate::{InnerScalarValue, PValue, ScalarValue};
258
259    #[test]
260    pub fn test_is_instance_of_bool() {
261        assert!(
262            ScalarValue(InnerScalarValue::Bool(true))
263                .is_instance_of(&DType::Bool(Nullability::Nullable))
264        );
265        assert!(
266            ScalarValue(InnerScalarValue::Bool(true))
267                .is_instance_of(&DType::Bool(Nullability::NonNullable))
268        );
269        assert!(
270            ScalarValue(InnerScalarValue::Bool(false))
271                .is_instance_of(&DType::Bool(Nullability::Nullable))
272        );
273        assert!(
274            ScalarValue(InnerScalarValue::Bool(false))
275                .is_instance_of(&DType::Bool(Nullability::NonNullable))
276        );
277    }
278
279    #[test]
280    pub fn test_is_instance_of_primitive() {
281        assert!(
282            ScalarValue(InnerScalarValue::Primitive(PValue::F64(0.0)))
283                .is_instance_of(&DType::Primitive(PType::F64, Nullability::NonNullable))
284        );
285    }
286
287    #[test]
288    pub fn test_is_instance_of_list_and_struct() {
289        let tbool = DType::Bool(Nullability::NonNullable);
290        let tboolnull = DType::Bool(Nullability::Nullable);
291        let tnull = DType::Null;
292
293        let bool_null = ScalarValue(InnerScalarValue::List(
294            vec![
295                ScalarValue(InnerScalarValue::Bool(true)),
296                ScalarValue(InnerScalarValue::Null),
297            ]
298            .into(),
299        ));
300        let bool_bool = ScalarValue(InnerScalarValue::List(
301            vec![
302                ScalarValue(InnerScalarValue::Bool(true)),
303                ScalarValue(InnerScalarValue::Bool(false)),
304            ]
305            .into(),
306        ));
307
308        fn tlist(element: &DType) -> DType {
309            DType::List(element.clone().into(), Nullability::NonNullable)
310        }
311
312        assert!(bool_null.is_instance_of(&tlist(&tboolnull)));
313        assert!(!bool_null.is_instance_of(&tlist(&tbool)));
314        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
315        assert!(bool_bool.is_instance_of(&tlist(&tbool)));
316
317        fn tstruct(left: &DType, right: &DType) -> DType {
318            DType::Struct(
319                Arc::new(StructDType::new(
320                    vec!["left".into(), "right".into()].into(),
321                    vec![left.clone(), right.clone()],
322                )),
323                Nullability::NonNullable,
324            )
325        }
326
327        assert!(bool_null.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
328        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tboolnull)));
329        assert!(!bool_null.is_instance_of(&tstruct(&tboolnull, &tbool)));
330        assert!(!bool_null.is_instance_of(&tstruct(&tbool, &tbool)));
331
332        assert!(bool_null.is_instance_of(&tstruct(&tbool, &tnull)));
333        assert!(!bool_null.is_instance_of(&tstruct(&tnull, &tbool)));
334
335        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tboolnull)));
336        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tboolnull)));
337        assert!(bool_bool.is_instance_of(&tstruct(&tboolnull, &tbool)));
338        assert!(bool_bool.is_instance_of(&tstruct(&tbool, &tbool)));
339
340        assert!(!bool_bool.is_instance_of(&tstruct(&tbool, &tnull)));
341        assert!(!bool_bool.is_instance_of(&tstruct(&tnull, &tbool)));
342    }
343
344    #[test]
345    pub fn test_is_instance_of_null() {
346        assert!(
347            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Bool(Nullability::Nullable))
348        );
349        assert!(
350            !ScalarValue(InnerScalarValue::Null)
351                .is_instance_of(&DType::Bool(Nullability::NonNullable))
352        );
353
354        assert!(
355            ScalarValue(InnerScalarValue::Null)
356                .is_instance_of(&DType::Primitive(PType::U8, Nullability::Nullable))
357        );
358        assert!(
359            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Utf8(Nullability::Nullable))
360        );
361        assert!(
362            ScalarValue(InnerScalarValue::Null)
363                .is_instance_of(&DType::Binary(Nullability::Nullable))
364        );
365        assert!(
366            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct(
367                Arc::new(StructDType::new([].into(), [].into())),
368                Nullability::Nullable,
369            ))
370        );
371        assert!(
372            ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::List(
373                DType::Utf8(Nullability::NonNullable).into(),
374                Nullability::Nullable
375            ))
376        );
377        assert!(ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Null));
378    }
379}