vortex_scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::sync::Arc;
6
7use bytes::BufMut;
8use itertools::Itertools;
9use prost::Message;
10use vortex_buffer::{BufferString, ByteBuffer};
11use vortex_dtype::{NativeDType, i256};
12use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
13use vortex_proto::scalar as pb;
14
15use crate::Scalar;
16use crate::decimal::DecimalValue;
17use crate::pvalue::PValue;
18
19/// Represents the internal data of a scalar value. Must be interpreted by wrapping up with a
20/// [`vortex_dtype::DType`] to make a [`super::Scalar`].
21///
22/// Note that these values can be deserialized from JSON or other formats. So a [`PValue`] may not
23/// have the correct width for what the [`vortex_dtype::DType`] expects. Primitive values should therefore be
24/// read using [`super::PrimitiveScalar`] which will handle the conversion.
25#[derive(Debug, Clone)]
26pub struct ScalarValue(pub(crate) InnerScalarValue);
27
28/// It is common to represent a nullable type `T` as an `Option<T>`, so we implement a blanket
29/// implementation for all `Option<T>` to simply be a nullable `T`.
30impl<T> From<Option<T>> for ScalarValue
31where
32    T: NativeDType,
33    ScalarValue: From<T>,
34{
35    fn from(value: Option<T>) -> Self {
36        value
37            .map(ScalarValue::from)
38            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
39    }
40}
41
42impl<T> From<Vec<T>> for ScalarValue
43where
44    T: NativeDType,
45    Scalar: From<T>,
46{
47    /// Converts a vector into a `ScalarValue` (specifically a `ListScalar`).
48    fn from(value: Vec<T>) -> Self {
49        ScalarValue(InnerScalarValue::List(
50            value
51                .into_iter()
52                .map(|x| {
53                    let scalar: Scalar = T::into(x);
54                    scalar.into_value()
55                })
56                .collect::<Arc<[ScalarValue]>>(),
57        ))
58    }
59}
60
61#[derive(Debug, Clone)]
62pub(crate) enum InnerScalarValue {
63    Null,
64    Bool(bool),
65    Primitive(PValue),
66    Decimal(DecimalValue),
67    Buffer(Arc<ByteBuffer>),
68    BufferString(Arc<BufferString>),
69    List(Arc<[ScalarValue]>),
70}
71
72impl ScalarValue {
73    /// Serializes the scalar value to Protocol Buffers format.
74    pub fn to_protobytes<B: BufMut + Default>(&self) -> B {
75        let pb_scalar = pb::ScalarValue::from(self);
76
77        let mut buf = B::default();
78        pb_scalar
79            .encode(&mut buf)
80            .map_err(|e| vortex_err!("Failed to serialize protobuf {e}"))
81            .vortex_unwrap();
82        buf
83    }
84
85    /// Deserializes a scalar value from Protocol Buffers format.
86    pub fn from_protobytes(buf: &[u8]) -> VortexResult<Self> {
87        ScalarValue::try_from(
88            &pb::ScalarValue::decode(buf)
89                .map_err(|e| vortex_err!("Failed to deserialize protobuf {e}"))?,
90        )
91    }
92}
93
94fn to_hex(slice: &[u8]) -> String {
95    slice
96        .iter()
97        .format_with("", |f, b| b(&format_args!("{f:02x}")))
98        .to_string()
99}
100
101impl Display for ScalarValue {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        write!(f, "{}", self.0)
104    }
105}
106
107impl Display for InnerScalarValue {
108    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        match self {
110            Self::Bool(b) => write!(f, "{b}"),
111            Self::Primitive(pvalue) => write!(f, "{pvalue}"),
112            Self::Decimal(value) => write!(f, "{value}"),
113            Self::Buffer(buf) => {
114                if buf.len() > 10 {
115                    write!(
116                        f,
117                        "{}..{}",
118                        to_hex(&buf[0..5]),
119                        to_hex(&buf[buf.len() - 5..buf.len()]),
120                    )
121                } else {
122                    write!(f, "{}", to_hex(buf))
123                }
124            }
125            Self::BufferString(bufstr) => {
126                let bufstr = bufstr.as_str();
127                let str_len = bufstr.chars().count();
128
129                if str_len > 10 {
130                    let prefix = String::from_iter(bufstr.chars().take(5));
131                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
132
133                    write!(f, "\"{prefix}..{suffix}\"")
134                } else {
135                    write!(f, "\"{bufstr}\"")
136                }
137            }
138            Self::List(elems) => {
139                write!(f, "[{}]", elems.iter().format(","))
140            }
141            Self::Null => write!(f, "null"),
142        }
143    }
144}
145
146impl ScalarValue {
147    /// Creates a null scalar value.
148    pub const fn null() -> Self {
149        ScalarValue(InnerScalarValue::Null)
150    }
151
152    /// Returns true if this is a null value.
153    #[inline]
154    pub fn is_null(&self) -> bool {
155        self.0.is_null()
156    }
157
158    /// Returns scalar as a null value
159    #[inline]
160    pub(crate) fn as_null(&self) -> VortexResult<()> {
161        self.0.as_null()
162    }
163
164    /// Returns scalar as a boolean value
165    #[inline]
166    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
167        self.0.as_bool()
168    }
169
170    /// Return scalar as a primitive value. PValues don't match dtypes but will be castable to the scalars dtype
171    #[inline]
172    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
173        self.0.as_pvalue()
174    }
175
176    /// Returns scalar as a decimal value
177    #[inline]
178    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
179        self.0.as_decimal()
180    }
181
182    /// Returns scalar as a binary buffer
183    #[inline]
184    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
185        self.0.as_buffer()
186    }
187
188    /// Returns scalar as a string buffer
189    #[inline]
190    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
191        self.0.as_buffer_string()
192    }
193
194    /// Returns scalar as a list value
195    #[inline]
196    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
197        self.0.as_list()
198    }
199}
200
201impl InnerScalarValue {
202    #[inline]
203    pub(crate) fn is_null(&self) -> bool {
204        matches!(self, InnerScalarValue::Null)
205    }
206
207    #[inline]
208    pub(crate) fn as_null(&self) -> VortexResult<()> {
209        if matches!(self, InnerScalarValue::Null) {
210            Ok(())
211        } else {
212            Err(vortex_err!("Expected a Null scalar, found {self}"))
213        }
214    }
215
216    #[inline]
217    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
218        match self {
219            InnerScalarValue::Null => Ok(None),
220            InnerScalarValue::Bool(b) => Ok(Some(*b)),
221            other => Err(vortex_err!("Expected a bool scalar, found {other}",)),
222        }
223    }
224
225    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
226    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
227    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
228    #[inline]
229    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
230        match self {
231            InnerScalarValue::Null => Ok(None),
232            InnerScalarValue::Primitive(pvalue) => Ok(Some(*pvalue)),
233            other => Err(vortex_err!("Expected a primitive scalar, found {other}")),
234        }
235    }
236
237    #[inline]
238    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
239        match self {
240            InnerScalarValue::Null => Ok(None),
241            InnerScalarValue::Decimal(v) => Ok(Some(*v)),
242            InnerScalarValue::Buffer(b) => Ok(Some(match b.len() {
243                1 => DecimalValue::I8(b[0] as i8),
244                2 => DecimalValue::I16(i16::from_le_bytes(b.as_slice().try_into()?)),
245                4 => DecimalValue::I32(i32::from_le_bytes(b.as_slice().try_into()?)),
246                8 => DecimalValue::I64(i64::from_le_bytes(b.as_slice().try_into()?)),
247                16 => DecimalValue::I128(i128::from_le_bytes(b.as_slice().try_into()?)),
248                32 => DecimalValue::I256(i256::from_le_bytes(b.as_slice().try_into()?)),
249                l => vortex_bail!("Buffer is not a decimal value length {l}"),
250            })),
251            _ => vortex_bail!("Expected a decimal scalar, found {:?}", self),
252        }
253    }
254
255    #[inline]
256    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
257        match &self {
258            InnerScalarValue::Null => Ok(None),
259            InnerScalarValue::Buffer(b) => Ok(Some(b.clone())),
260            InnerScalarValue::BufferString(b) => {
261                Ok(Some(Arc::new(b.as_ref().clone().into_inner())))
262            }
263            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
264        }
265    }
266
267    #[inline]
268    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
269        match &self {
270            InnerScalarValue::Null => Ok(None),
271            InnerScalarValue::Buffer(b) => {
272                Ok(Some(Arc::new(BufferString::try_from(b.as_ref().clone())?)))
273            }
274            InnerScalarValue::BufferString(b) => Ok(Some(b.clone())),
275            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
276        }
277    }
278
279    #[inline]
280    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
281        match &self {
282            InnerScalarValue::Null => Ok(None),
283            InnerScalarValue::List(l) => Ok(Some(l)),
284            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
285        }
286    }
287}