vortex_scalar/
scalar_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::sync::Arc;
6
7use bytes::BufMut;
8use itertools::Itertools;
9use prost::Message;
10use vortex_buffer::{BufferString, ByteBuffer};
11use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
12use vortex_proto::scalar as pb;
13
14use crate::decimal::DecimalValue;
15use crate::pvalue::PValue;
16use crate::{Scalar, ScalarType, i256};
17
18/// Represents the internal data of a scalar value. Must be interpreted by wrapping up with a
19/// [`vortex_dtype::DType`] to make a [`super::Scalar`].
20///
21/// Note that these values can be deserialized from JSON or other formats. So a [`PValue`] may not
22/// have the correct width for what the [`vortex_dtype::DType`] expects. Primitive values should therefore be
23/// read using [`super::PrimitiveScalar`] which will handle the conversion.
24#[derive(Debug, Clone)]
25pub struct ScalarValue(pub(crate) InnerScalarValue);
26
27/// It is common to represent a nullable type `T` as an `Option<T>`, so we implement a blanket
28/// implementation for all `Option<T>` to simply be a nullable `T`.
29impl<T> From<Option<T>> for ScalarValue
30where
31    T: ScalarType,
32    ScalarValue: From<T>,
33{
34    fn from(value: Option<T>) -> Self {
35        value
36            .map(ScalarValue::from)
37            .unwrap_or_else(|| ScalarValue(InnerScalarValue::Null))
38    }
39}
40
41impl<T> From<Vec<T>> for ScalarValue
42where
43    T: ScalarType,
44    Scalar: From<T>,
45{
46    /// Converts a vector into a `ScalarValue` (specifically a `ListScalar`).
47    fn from(value: Vec<T>) -> Self {
48        ScalarValue(InnerScalarValue::List(
49            value
50                .into_iter()
51                .map(|x| {
52                    let scalar: Scalar = T::into(x);
53                    scalar.into_value()
54                })
55                .collect::<Arc<[ScalarValue]>>(),
56        ))
57    }
58}
59
60#[derive(Debug, Clone)]
61pub(crate) enum InnerScalarValue {
62    Null,
63    Bool(bool),
64    Primitive(PValue),
65    Decimal(DecimalValue),
66    Buffer(Arc<ByteBuffer>),
67    BufferString(Arc<BufferString>),
68    List(Arc<[ScalarValue]>),
69}
70
71impl ScalarValue {
72    /// Serializes the scalar value to Protocol Buffers format.
73    pub fn to_protobytes<B: BufMut + Default>(&self) -> B {
74        let pb_scalar = pb::ScalarValue::from(self);
75
76        let mut buf = B::default();
77        pb_scalar
78            .encode(&mut buf)
79            .map_err(|e| vortex_err!("Failed to serialize protobuf {e}"))
80            .vortex_unwrap();
81        buf
82    }
83
84    /// Deserializes a scalar value from Protocol Buffers format.
85    pub fn from_protobytes(buf: &[u8]) -> VortexResult<Self> {
86        ScalarValue::try_from(
87            &pb::ScalarValue::decode(buf)
88                .map_err(|e| vortex_err!("Failed to deserialize protobuf {e}"))?,
89        )
90    }
91}
92
93fn to_hex(slice: &[u8]) -> String {
94    slice
95        .iter()
96        .format_with("", |f, b| b(&format_args!("{f:02x}")))
97        .to_string()
98}
99
100impl Display for ScalarValue {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        write!(f, "{}", self.0)
103    }
104}
105
106impl Display for InnerScalarValue {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        match self {
109            Self::Bool(b) => write!(f, "{b}"),
110            Self::Primitive(pvalue) => write!(f, "{pvalue}"),
111            Self::Decimal(value) => write!(f, "{value}"),
112            Self::Buffer(buf) => {
113                if buf.len() > 10 {
114                    write!(
115                        f,
116                        "{}..{}",
117                        to_hex(&buf[0..5]),
118                        to_hex(&buf[buf.len() - 5..buf.len()]),
119                    )
120                } else {
121                    write!(f, "{}", to_hex(buf))
122                }
123            }
124            Self::BufferString(bufstr) => {
125                let bufstr = bufstr.as_str();
126                let str_len = bufstr.chars().count();
127
128                if str_len > 10 {
129                    let prefix = String::from_iter(bufstr.chars().take(5));
130                    let suffix = String::from_iter(bufstr.chars().skip(str_len - 5));
131
132                    write!(f, "\"{prefix}..{suffix}\"")
133                } else {
134                    write!(f, "\"{bufstr}\"")
135                }
136            }
137            Self::List(elems) => {
138                write!(f, "[{}]", elems.iter().format(","))
139            }
140            Self::Null => write!(f, "null"),
141        }
142    }
143}
144
145impl ScalarValue {
146    /// Creates a null scalar value.
147    pub const fn null() -> Self {
148        ScalarValue(InnerScalarValue::Null)
149    }
150
151    /// Returns true if this is a null value.
152    pub fn is_null(&self) -> bool {
153        self.0.is_null()
154    }
155
156    /// Returns scalar as a null value
157    pub(crate) fn as_null(&self) -> VortexResult<()> {
158        self.0.as_null()
159    }
160
161    /// Returns scalar as a boolean value
162    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
163        self.0.as_bool()
164    }
165
166    /// Return scalar as a primitive value. PValues don't match dtypes but will be castable to the scalars dtype
167    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
168        self.0.as_pvalue()
169    }
170
171    /// Returns scalar as a decimal value
172    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
173        self.0.as_decimal()
174    }
175
176    /// Returns scalar as a binary buffer
177    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
178        self.0.as_buffer()
179    }
180
181    /// Returns scalar as a string buffer
182    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
183        self.0.as_buffer_string()
184    }
185
186    /// Returns scalar as a list value
187    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
188        self.0.as_list()
189    }
190}
191
192impl InnerScalarValue {
193    pub(crate) fn is_null(&self) -> bool {
194        matches!(self, InnerScalarValue::Null)
195    }
196
197    pub(crate) fn as_null(&self) -> VortexResult<()> {
198        if matches!(self, InnerScalarValue::Null) {
199            Ok(())
200        } else {
201            Err(vortex_err!("Expected a Null scalar, found {:?}", self))
202        }
203    }
204
205    pub(crate) fn as_bool(&self) -> VortexResult<Option<bool>> {
206        if matches!(&self, InnerScalarValue::Null) {
207            Ok(None)
208        } else if let InnerScalarValue::Bool(b) = &self {
209            Ok(Some(*b))
210        } else {
211            Err(vortex_err!("Expected a bool scalar, found {:?}", self))
212        }
213    }
214
215    /// FIXME(ngates): PValues are such a footgun... we should probably remove this.
216    ///  But the other accessors can sometimes be useful? e.g. as_buffer. But maybe we just force
217    ///  the user to switch over Utf8 and Binary and use the correct Scalar wrapper?
218    pub(crate) fn as_pvalue(&self) -> VortexResult<Option<PValue>> {
219        if matches!(&self, InnerScalarValue::Null) {
220            Ok(None)
221        } else if let InnerScalarValue::Primitive(p) = &self {
222            Ok(Some(*p))
223        } else {
224            Err(vortex_err!("Expected a primitive scalar, found {:?}", self))
225        }
226    }
227
228    pub(crate) fn as_decimal(&self) -> VortexResult<Option<DecimalValue>> {
229        match self {
230            InnerScalarValue::Null => Ok(None),
231            InnerScalarValue::Decimal(v) => Ok(Some(*v)),
232            InnerScalarValue::Buffer(b) => Ok(Some(match b.len() {
233                1 => DecimalValue::I8(b[0] as i8),
234                2 => DecimalValue::I16(i16::from_le_bytes(b.as_slice().try_into()?)),
235                4 => DecimalValue::I32(i32::from_le_bytes(b.as_slice().try_into()?)),
236                8 => DecimalValue::I64(i64::from_le_bytes(b.as_slice().try_into()?)),
237                16 => DecimalValue::I128(i128::from_le_bytes(b.as_slice().try_into()?)),
238                32 => DecimalValue::I256(i256::from_le_bytes(b.as_slice().try_into()?)),
239                l => vortex_bail!("Buffer is not a decimal value length {l}"),
240            })),
241            _ => vortex_bail!("Expected a decimal scalar, found {:?}", self),
242        }
243    }
244
245    pub(crate) fn as_buffer(&self) -> VortexResult<Option<Arc<ByteBuffer>>> {
246        match &self {
247            InnerScalarValue::Null => Ok(None),
248            InnerScalarValue::Buffer(b) => Ok(Some(b.clone())),
249            InnerScalarValue::BufferString(b) => {
250                Ok(Some(Arc::new(b.as_ref().clone().into_inner())))
251            }
252            _ => Err(vortex_err!("Expected a binary scalar, found {:?}", self)),
253        }
254    }
255
256    pub(crate) fn as_buffer_string(&self) -> VortexResult<Option<Arc<BufferString>>> {
257        match &self {
258            InnerScalarValue::Null => Ok(None),
259            InnerScalarValue::Buffer(b) => {
260                Ok(Some(Arc::new(BufferString::try_from(b.as_ref().clone())?)))
261            }
262            InnerScalarValue::BufferString(b) => Ok(Some(b.clone())),
263            _ => Err(vortex_err!("Expected a string scalar, found {:?}", self)),
264        }
265    }
266
267    pub(crate) fn as_list(&self) -> VortexResult<Option<&Arc<[ScalarValue]>>> {
268        match &self {
269            InnerScalarValue::Null => Ok(None),
270            InnerScalarValue::List(l) => Ok(Some(l)),
271            _ => Err(vortex_err!("Expected a list scalar, found {:?}", self)),
272        }
273    }
274}