vortex_scalar/
binary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use itertools::Itertools;
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14/// A scalar value representing binary data.
15///
16/// This type provides a view into a binary scalar value, which can be either
17/// a valid byte buffer or null.
18#[derive(Debug, Hash)]
19pub struct BinaryScalar<'a> {
20    dtype: &'a DType,
21    value: Option<Arc<ByteBuffer>>,
22}
23
24impl Display for BinaryScalar<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        match &self.value {
27            None => write!(f, "null"),
28            Some(v) => write!(
29                f,
30                "\"{}\"",
31                v.as_slice().iter().map(|b| format!("{b:x}")).format(" ")
32            ),
33        }
34    }
35}
36
37impl PartialEq for BinaryScalar<'_> {
38    fn eq(&self, other: &Self) -> bool {
39        self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
40    }
41}
42
43impl Eq for BinaryScalar<'_> {}
44
45impl PartialOrd for BinaryScalar<'_> {
46    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
47        Some(self.value.cmp(&other.value))
48    }
49}
50
51impl Ord for BinaryScalar<'_> {
52    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
53        self.value.cmp(&other.value)
54    }
55}
56
57impl<'a> BinaryScalar<'a> {
58    /// Creates a binary scalar from a data type and scalar value.
59    ///
60    /// # Errors
61    ///
62    /// Returns an error if the data type is not a binary type.
63    pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
64        if !matches!(dtype, DType::Binary(..)) {
65            vortex_bail!("Can only construct binary scalar from binary dtype, found {dtype}")
66        }
67        Ok(Self {
68            dtype,
69            value: value.as_buffer()?,
70        })
71    }
72
73    /// Returns the data type of this binary scalar.
74    #[inline]
75    pub fn dtype(&self) -> &'a DType {
76        self.dtype
77    }
78
79    /// Returns the binary value as a byte buffer, or None if null.
80    pub fn value(&self) -> Option<ByteBuffer> {
81        self.value.as_ref().map(|v| v.as_ref().clone())
82    }
83
84    /// Constructs a value at most `max_length` in size that's greater than this value.
85    ///
86    /// Returns None if constructing a greater value would overflow.
87    pub fn upper_bound(self, max_length: usize) -> Option<Self> {
88        if let Some(value) = self.value {
89            if value.len() > max_length {
90                let sliced = value.slice(0..max_length);
91                drop(value);
92                let mut sliced_mut = sliced.into_mut();
93                for b in sliced_mut.iter_mut().rev() {
94                    let (incr, overflow) = b.overflowing_add(1);
95                    *b = incr;
96                    if !overflow {
97                        return Some(Self {
98                            dtype: self.dtype,
99                            value: Some(Arc::new(sliced_mut.freeze())),
100                        });
101                    }
102                }
103                None
104            } else {
105                Some(Self {
106                    dtype: self.dtype,
107                    value: Some(value),
108                })
109            }
110        } else {
111            Some(self)
112        }
113    }
114
115    /// Construct a value at most `max_length` in size that's less than ourselves.
116    pub fn lower_bound(self, max_length: usize) -> Self {
117        if let Some(value) = self.value {
118            if value.len() > max_length {
119                Self {
120                    dtype: self.dtype,
121                    value: Some(Arc::new(value.slice(0..max_length))),
122                }
123            } else {
124                Self {
125                    dtype: self.dtype,
126                    value: Some(value),
127                }
128            }
129        } else {
130            self
131        }
132    }
133
134    pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
135        if !matches!(dtype, DType::Binary(..)) {
136            vortex_bail!("Can't cast binary to {}", dtype)
137        }
138        Ok(Scalar::new(
139            dtype.clone(),
140            ScalarValue(InnerScalarValue::Buffer(
141                self.value
142                    .as_ref()
143                    .vortex_expect("nullness handled in Scalar::cast")
144                    .clone(),
145            )),
146        ))
147    }
148
149    /// Length of the scalar value or None if value is null
150    pub fn len(&self) -> Option<usize> {
151        self.value.as_ref().map(|v| v.len())
152    }
153
154    /// Returns whether its value is non-null and empty, otherwise `None`.
155    pub fn is_empty(&self) -> Option<bool> {
156        self.value.as_ref().map(|v| v.is_empty())
157    }
158
159    /// Extract value as a ScalarValue
160    pub fn into_value(self) -> ScalarValue {
161        ScalarValue(
162            self.value
163                .map(InnerScalarValue::Buffer)
164                .unwrap_or_else(|| InnerScalarValue::Null),
165        )
166    }
167}
168
169impl Scalar {
170    /// Creates a new binary scalar from a byte buffer.
171    pub fn binary(buffer: impl Into<ByteBuffer>, nullability: Nullability) -> Self {
172        Self {
173            dtype: DType::Binary(nullability),
174            value: ScalarValue(InnerScalarValue::Buffer(Arc::new(buffer.into()))),
175        }
176    }
177}
178
179impl<'a> TryFrom<&'a Scalar> for BinaryScalar<'a> {
180    type Error = VortexError;
181
182    fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
183        if !matches!(value.dtype(), DType::Binary(_)) {
184            vortex_bail!("Expected binary scalar, found {}", value.dtype())
185        }
186        Ok(Self {
187            dtype: value.dtype(),
188            value: value.value.as_buffer()?,
189        })
190    }
191}
192
193impl<'a> TryFrom<&'a Scalar> for ByteBuffer {
194    type Error = VortexError;
195
196    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
197        let binary = scalar
198            .as_binary_opt()
199            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?;
200
201        binary
202            .value()
203            .ok_or_else(|| vortex_err!("Cannot extract present value from null scalar"))
204    }
205}
206
207impl<'a> TryFrom<&'a Scalar> for Option<ByteBuffer> {
208    type Error = VortexError;
209
210    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
211        Ok(scalar
212            .as_binary_opt()
213            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?
214            .value())
215    }
216}
217
218impl TryFrom<Scalar> for ByteBuffer {
219    type Error = VortexError;
220
221    fn try_from(scalar: Scalar) -> VortexResult<Self> {
222        Self::try_from(&scalar)
223    }
224}
225
226impl TryFrom<Scalar> for Option<ByteBuffer> {
227    type Error = VortexError;
228
229    fn try_from(scalar: Scalar) -> VortexResult<Self> {
230        Self::try_from(&scalar)
231    }
232}
233
234impl From<&[u8]> for Scalar {
235    fn from(value: &[u8]) -> Self {
236        Scalar::from(ByteBuffer::from(value.to_vec()))
237    }
238}
239
240impl From<ByteBuffer> for Scalar {
241    fn from(value: ByteBuffer) -> Self {
242        Self {
243            dtype: DType::Binary(Nullability::NonNullable),
244            value: ScalarValue(InnerScalarValue::Buffer(Arc::new(value))),
245        }
246    }
247}
248
249impl From<Arc<ByteBuffer>> for Scalar {
250    fn from(value: Arc<ByteBuffer>) -> Self {
251        Self {
252            dtype: DType::Binary(Nullability::NonNullable),
253            value: ScalarValue(InnerScalarValue::Buffer(value)),
254        }
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use vortex_buffer::buffer;
261    use vortex_dtype::Nullability;
262    use vortex_error::{VortexExpect, VortexUnwrap};
263
264    use crate::{BinaryScalar, Scalar};
265
266    #[test]
267    fn lower_bound() {
268        let binary = Scalar::binary(buffer![0u8, 5, 47, 33, 129], Nullability::NonNullable);
269        let expected = Scalar::binary(buffer![0u8, 5], Nullability::NonNullable);
270        assert_eq!(
271            BinaryScalar::try_from(&binary)
272                .vortex_unwrap()
273                .lower_bound(2),
274            BinaryScalar::try_from(&expected).vortex_unwrap()
275        );
276    }
277
278    #[test]
279    fn upper_bound() {
280        let binary = Scalar::binary(buffer![0u8, 5, 255, 234, 23], Nullability::NonNullable);
281        let expected = Scalar::binary(buffer![0u8, 6, 0], Nullability::NonNullable);
282        assert_eq!(
283            BinaryScalar::try_from(&binary)
284                .vortex_unwrap()
285                .upper_bound(3)
286                .vortex_expect("must have upper bound"),
287            BinaryScalar::try_from(&expected).vortex_unwrap()
288        );
289    }
290
291    #[test]
292    fn upper_bound_overflow() {
293        let binary = Scalar::binary(buffer![255u8, 255, 255], Nullability::NonNullable);
294        assert!(
295            BinaryScalar::try_from(&binary)
296                .vortex_unwrap()
297                .upper_bound(2)
298                .is_none()
299        );
300    }
301}