vortex_scalar/
binary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use itertools::Itertools;
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14/// A scalar value representing binary data.
15///
16/// This type provides a view into a binary scalar value, which can be either
17/// a valid byte buffer or null.
18#[derive(Debug, Hash)]
19pub struct BinaryScalar<'a> {
20    dtype: &'a DType,
21    value: Option<Arc<ByteBuffer>>,
22}
23
24impl Display for BinaryScalar<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        match &self.value {
27            None => write!(f, "null"),
28            Some(v) => write!(
29                f,
30                "\"{}\"",
31                v.as_slice().iter().map(|b| format!("{b:x}")).format(" ")
32            ),
33        }
34    }
35}
36
37impl PartialEq for BinaryScalar<'_> {
38    fn eq(&self, other: &Self) -> bool {
39        self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
40    }
41}
42
43impl Eq for BinaryScalar<'_> {}
44
45impl PartialOrd for BinaryScalar<'_> {
46    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
47        Some(self.cmp(other))
48    }
49}
50
51impl Ord for BinaryScalar<'_> {
52    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
53        self.value.cmp(&other.value)
54    }
55}
56
57impl<'a> BinaryScalar<'a> {
58    /// Creates a binary scalar from a data type and scalar value.
59    ///
60    /// # Errors
61    ///
62    /// Returns an error if the data type is not a binary type.
63    pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
64        if !matches!(dtype, DType::Binary(..)) {
65            vortex_bail!("Can only construct binary scalar from binary dtype, found {dtype}")
66        }
67        Ok(Self {
68            dtype,
69            value: value.as_buffer()?,
70        })
71    }
72
73    /// Returns the data type of this binary scalar.
74    #[inline]
75    pub fn dtype(&self) -> &'a DType {
76        self.dtype
77    }
78
79    /// Returns the binary value as a byte buffer, or None if null.
80    pub fn value(&self) -> Option<ByteBuffer> {
81        self.value.as_ref().map(|v| v.as_ref().clone())
82    }
83
84    /// Returns a reference to the binary value, or None if null.
85    /// This avoids cloning the underlying ByteBuffer.
86    pub fn value_ref(&self) -> Option<&ByteBuffer> {
87        self.value.as_ref().map(|v| v.as_ref())
88    }
89
90    /// Constructs a value at most `max_length` in size that's greater than this value.
91    ///
92    /// Returns None if constructing a greater value would overflow.
93    pub fn upper_bound(self, max_length: usize) -> Option<Self> {
94        if let Some(value) = self.value {
95            if value.len() > max_length {
96                let sliced = value.slice(0..max_length);
97                drop(value);
98                let mut sliced_mut = sliced.into_mut();
99                for b in sliced_mut.iter_mut().rev() {
100                    let (incr, overflow) = b.overflowing_add(1);
101                    *b = incr;
102                    if !overflow {
103                        return Some(Self {
104                            dtype: self.dtype,
105                            value: Some(Arc::new(sliced_mut.freeze())),
106                        });
107                    }
108                }
109                None
110            } else {
111                Some(Self {
112                    dtype: self.dtype,
113                    value: Some(value),
114                })
115            }
116        } else {
117            Some(self)
118        }
119    }
120
121    /// Construct a value at most `max_length` in size that's less than ourselves.
122    pub fn lower_bound(self, max_length: usize) -> Self {
123        if let Some(value) = self.value {
124            if value.len() > max_length {
125                Self {
126                    dtype: self.dtype,
127                    value: Some(Arc::new(value.slice(0..max_length))),
128                }
129            } else {
130                Self {
131                    dtype: self.dtype,
132                    value: Some(value),
133                }
134            }
135        } else {
136            self
137        }
138    }
139
140    pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
141        if !matches!(dtype, DType::Binary(..)) {
142            vortex_bail!(
143                "Cannot cast binary to {dtype}: binary scalars can only be cast to binary types with different nullability"
144            )
145        }
146        Ok(Scalar::new(
147            dtype.clone(),
148            ScalarValue(InnerScalarValue::Buffer(
149                self.value
150                    .as_ref()
151                    .vortex_expect("nullness handled in Scalar::cast")
152                    .clone(),
153            )),
154        ))
155    }
156
157    /// Length of the scalar value or None if value is null
158    pub fn len(&self) -> Option<usize> {
159        self.value.as_ref().map(|v| v.len())
160    }
161
162    /// Returns whether its value is non-null and empty, otherwise `None`.
163    pub fn is_empty(&self) -> Option<bool> {
164        self.value.as_ref().map(|v| v.is_empty())
165    }
166}
167
168impl Scalar {
169    /// Creates a new binary scalar from a byte buffer.
170    pub fn binary(buffer: impl Into<ByteBuffer>, nullability: Nullability) -> Self {
171        Self {
172            dtype: DType::Binary(nullability),
173            value: ScalarValue(InnerScalarValue::Buffer(Arc::new(buffer.into()))),
174        }
175    }
176}
177
178impl<'a> TryFrom<&'a Scalar> for BinaryScalar<'a> {
179    type Error = VortexError;
180
181    fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
182        if !matches!(value.dtype(), DType::Binary(_)) {
183            vortex_bail!("Expected binary scalar, found {}", value.dtype())
184        }
185        Ok(Self {
186            dtype: value.dtype(),
187            value: value.value.as_buffer()?,
188        })
189    }
190}
191
192impl<'a> TryFrom<&'a Scalar> for ByteBuffer {
193    type Error = VortexError;
194
195    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
196        let binary = scalar
197            .as_binary_opt()
198            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?;
199
200        binary
201            .value()
202            .ok_or_else(|| vortex_err!("Cannot extract present value from null scalar"))
203    }
204}
205
206impl<'a> TryFrom<&'a Scalar> for Option<ByteBuffer> {
207    type Error = VortexError;
208
209    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
210        Ok(scalar
211            .as_binary_opt()
212            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?
213            .value())
214    }
215}
216
217impl TryFrom<Scalar> for ByteBuffer {
218    type Error = VortexError;
219
220    fn try_from(scalar: Scalar) -> VortexResult<Self> {
221        Self::try_from(&scalar)
222    }
223}
224
225impl TryFrom<Scalar> for Option<ByteBuffer> {
226    type Error = VortexError;
227
228    fn try_from(scalar: Scalar) -> VortexResult<Self> {
229        Self::try_from(&scalar)
230    }
231}
232
233impl From<&[u8]> for Scalar {
234    fn from(value: &[u8]) -> Self {
235        Scalar::from(ByteBuffer::from(value.to_vec()))
236    }
237}
238
239impl From<ByteBuffer> for Scalar {
240    fn from(value: ByteBuffer) -> Self {
241        Self {
242            dtype: DType::Binary(Nullability::NonNullable),
243            value: value.into(),
244        }
245    }
246}
247
248impl From<Arc<ByteBuffer>> for Scalar {
249    fn from(value: Arc<ByteBuffer>) -> Self {
250        Self {
251            dtype: DType::Binary(Nullability::NonNullable),
252            value: ScalarValue(InnerScalarValue::Buffer(value)),
253        }
254    }
255}
256
257impl From<&[u8]> for ScalarValue {
258    fn from(value: &[u8]) -> Self {
259        ScalarValue::from(ByteBuffer::from(value.to_vec()))
260    }
261}
262
263impl From<ByteBuffer> for ScalarValue {
264    fn from(value: ByteBuffer) -> Self {
265        ScalarValue(InnerScalarValue::Buffer(Arc::new(value)))
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use std::cmp::Ordering;
272
273    use rstest::rstest;
274    use vortex_buffer::buffer;
275    use vortex_dtype::Nullability;
276    use vortex_error::{VortexExpect, VortexUnwrap};
277
278    use crate::{BinaryScalar, Scalar};
279
280    #[test]
281    fn lower_bound() {
282        let binary = Scalar::binary(buffer![0u8, 5, 47, 33, 129], Nullability::NonNullable);
283        let expected = Scalar::binary(buffer![0u8, 5], Nullability::NonNullable);
284        assert_eq!(
285            BinaryScalar::try_from(&binary)
286                .vortex_unwrap()
287                .lower_bound(2),
288            BinaryScalar::try_from(&expected).vortex_unwrap()
289        );
290    }
291
292    #[test]
293    fn upper_bound() {
294        let binary = Scalar::binary(buffer![0u8, 5, 255, 234, 23], Nullability::NonNullable);
295        let expected = Scalar::binary(buffer![0u8, 6, 0], Nullability::NonNullable);
296        assert_eq!(
297            BinaryScalar::try_from(&binary)
298                .vortex_unwrap()
299                .upper_bound(3)
300                .vortex_expect("must have upper bound"),
301            BinaryScalar::try_from(&expected).vortex_unwrap()
302        );
303    }
304
305    #[test]
306    fn upper_bound_overflow() {
307        let binary = Scalar::binary(buffer![255u8, 255, 255], Nullability::NonNullable);
308        assert!(
309            BinaryScalar::try_from(&binary)
310                .vortex_unwrap()
311                .upper_bound(2)
312                .is_none()
313        );
314    }
315
316    #[rstest]
317    #[case(&[1u8, 2, 3], &[1u8, 2, 3], true)]
318    #[case(&[1u8, 2, 3], &[1u8, 2, 4], false)]
319    #[case(&[], &[], true)]
320    #[case(&[255u8], &[255u8], true)]
321    fn test_binary_scalar_equality(
322        #[case] data1: &[u8],
323        #[case] data2: &[u8],
324        #[case] expected: bool,
325    ) {
326        let binary1 = Scalar::binary(data1.to_vec(), Nullability::NonNullable);
327        let binary2 = Scalar::binary(data2.to_vec(), Nullability::NonNullable);
328
329        let scalar1 = BinaryScalar::try_from(&binary1).unwrap();
330        let scalar2 = BinaryScalar::try_from(&binary2).unwrap();
331
332        assert_eq!(scalar1 == scalar2, expected);
333    }
334
335    #[rstest]
336    #[case(&[1u8, 2, 3], &[1u8, 2, 4], Ordering::Less)]
337    #[case(&[1u8, 2, 4], &[1u8, 2, 3], Ordering::Greater)]
338    #[case(&[1u8, 2, 3], &[1u8, 2, 3], Ordering::Equal)]
339    #[case(&[], &[1u8], Ordering::Less)]
340    #[case(&[2u8, 0, 0], &[1u8, 255, 255], Ordering::Greater)]
341    fn test_binary_scalar_ordering(
342        #[case] data1: &[u8],
343        #[case] data2: &[u8],
344        #[case] expected: Ordering,
345    ) {
346        let binary1 = Scalar::binary(data1.to_vec(), Nullability::NonNullable);
347        let binary2 = Scalar::binary(data2.to_vec(), Nullability::NonNullable);
348
349        let scalar1 = BinaryScalar::try_from(&binary1).unwrap();
350        let scalar2 = BinaryScalar::try_from(&binary2).unwrap();
351
352        assert_eq!(scalar1.partial_cmp(&scalar2), Some(expected));
353    }
354
355    #[test]
356    fn test_binary_null_value() {
357        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
358        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
359
360        assert!(scalar.value().is_none());
361        assert!(scalar.value_ref().is_none());
362        assert!(scalar.len().is_none());
363        assert!(scalar.is_empty().is_none());
364    }
365
366    #[test]
367    fn test_binary_len_and_empty() {
368        use vortex_buffer::ByteBuffer;
369
370        let empty = Scalar::binary(ByteBuffer::empty(), Nullability::NonNullable);
371        let non_empty = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
372
373        let empty_scalar = BinaryScalar::try_from(&empty).unwrap();
374        assert_eq!(empty_scalar.len(), Some(0));
375        assert_eq!(empty_scalar.is_empty(), Some(true));
376
377        let non_empty_scalar = BinaryScalar::try_from(&non_empty).unwrap();
378        assert_eq!(non_empty_scalar.len(), Some(3));
379        assert_eq!(non_empty_scalar.is_empty(), Some(false));
380    }
381
382    #[test]
383    fn test_binary_value_ref() {
384        use vortex_buffer::ByteBuffer;
385
386        let data = vec![1u8, 2, 3, 4, 5];
387        let binary = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::NonNullable);
388        let scalar = BinaryScalar::try_from(&binary).unwrap();
389
390        // value_ref should not clone
391        let value_ref = scalar.value_ref().unwrap();
392        assert_eq!(value_ref.as_slice(), &data);
393
394        // value should clone
395        let value = scalar.value().unwrap();
396        assert_eq!(value.as_slice(), &data);
397    }
398
399    #[test]
400    fn test_binary_cast_to_binary() {
401        use vortex_dtype::{DType, Nullability};
402
403        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
404        let scalar = BinaryScalar::try_from(&binary).unwrap();
405
406        // Cast to nullable binary
407        let result = scalar.cast(&DType::Binary(Nullability::Nullable)).unwrap();
408        assert_eq!(result.dtype(), &DType::Binary(Nullability::Nullable));
409
410        let casted = BinaryScalar::try_from(&result).unwrap();
411        assert_eq!(casted.value().unwrap().as_slice(), &[1, 2, 3]);
412    }
413
414    #[test]
415    fn test_binary_cast_to_non_binary_fails() {
416        use vortex_dtype::{DType, Nullability, PType};
417
418        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
419        let scalar = BinaryScalar::try_from(&binary).unwrap();
420
421        let result = scalar.cast(&DType::Primitive(PType::I32, Nullability::NonNullable));
422        assert!(result.is_err());
423    }
424
425    #[test]
426    fn test_from_scalar_value_non_binary_dtype() {
427        use vortex_dtype::{DType, Nullability, PType};
428
429        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
430        let value = crate::ScalarValue(crate::InnerScalarValue::Primitive(crate::PValue::I32(42)));
431
432        let result = BinaryScalar::from_scalar_value(&dtype, value);
433        assert!(result.is_err());
434    }
435
436    #[test]
437    fn test_try_from_non_binary_scalar() {
438        use vortex_dtype::Nullability;
439
440        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
441        let result = BinaryScalar::try_from(&scalar);
442        assert!(result.is_err());
443    }
444
445    #[test]
446    fn test_upper_bound_null() {
447        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
448        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
449
450        let result = scalar.upper_bound(10);
451        assert!(result.is_some());
452        assert!(result.unwrap().value().is_none());
453    }
454
455    #[test]
456    fn test_lower_bound_null() {
457        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
458        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
459
460        let result = scalar.lower_bound(10);
461        assert!(result.value().is_none());
462    }
463
464    #[test]
465    fn test_upper_bound_exact_length() {
466        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
467        let scalar = BinaryScalar::try_from(&binary).unwrap();
468
469        let result = scalar.upper_bound(3);
470        assert!(result.is_some());
471        let upper = result.unwrap();
472        assert_eq!(upper.value().unwrap().as_slice(), &[1, 2, 3]);
473    }
474
475    #[test]
476    fn test_lower_bound_exact_length() {
477        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
478        let scalar = BinaryScalar::try_from(&binary).unwrap();
479
480        let result = scalar.lower_bound(3);
481        assert_eq!(result.value().unwrap().as_slice(), &[1, 2, 3]);
482    }
483
484    #[test]
485    fn test_from_slice() {
486        let data: &[u8] = &[1u8, 2, 3, 4];
487        let scalar: Scalar = data.into();
488
489        assert_eq!(
490            scalar.dtype(),
491            &vortex_dtype::DType::Binary(Nullability::NonNullable)
492        );
493        let binary = BinaryScalar::try_from(&scalar).unwrap();
494        assert_eq!(binary.value().unwrap().as_slice(), data);
495    }
496
497    #[test]
498    fn test_try_from_scalar_to_bytebuffer() {
499        use vortex_buffer::ByteBuffer;
500
501        let data = vec![5u8, 6, 7];
502        let scalar = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::NonNullable);
503
504        // Try from &Scalar
505        let buffer: ByteBuffer = (&scalar).try_into().unwrap();
506        assert_eq!(buffer.as_slice(), &data);
507
508        // Try from Scalar (owned)
509        let data2 = vec![5u8, 6, 7];
510        let scalar2 = Scalar::binary(ByteBuffer::from(data2.clone()), Nullability::NonNullable);
511        let buffer2: ByteBuffer = scalar2.try_into().unwrap();
512        assert_eq!(buffer2.as_slice(), &data2);
513    }
514
515    #[test]
516    fn test_try_from_scalar_to_option_bytebuffer() {
517        use vortex_buffer::ByteBuffer;
518
519        // Non-null case
520        let data = vec![5u8, 6, 7];
521        let scalar = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::Nullable);
522        let buffer: Option<ByteBuffer> = (&scalar).try_into().unwrap();
523        assert_eq!(buffer.unwrap().as_slice(), &data);
524
525        // Null case
526        let null_scalar = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
527        let null_buffer: Option<ByteBuffer> = (&null_scalar).try_into().unwrap();
528        assert!(null_buffer.is_none());
529    }
530
531    #[test]
532    fn test_try_from_non_binary_to_bytebuffer() {
533        use vortex_buffer::ByteBuffer;
534        use vortex_dtype::Nullability;
535
536        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
537
538        let result: Result<ByteBuffer, _> = (&scalar).try_into();
539        assert!(result.is_err());
540
541        let result2: Result<Option<ByteBuffer>, _> = (&scalar).try_into();
542        assert!(result2.is_err());
543    }
544
545    #[test]
546    fn test_from_arc_bytebuffer() {
547        use std::sync::Arc;
548
549        use vortex_buffer::ByteBuffer;
550
551        let data = vec![10u8, 20, 30];
552        let buffer = Arc::new(ByteBuffer::from(data.clone()));
553        let scalar: Scalar = buffer.into();
554
555        assert_eq!(
556            scalar.dtype(),
557            &vortex_dtype::DType::Binary(Nullability::NonNullable)
558        );
559        let binary = BinaryScalar::try_from(&scalar).unwrap();
560        assert_eq!(binary.value().unwrap().as_slice(), &data);
561    }
562
563    #[test]
564    fn test_scalar_value_from_slice() {
565        let data: &[u8] = &[100u8, 200];
566        let value: crate::ScalarValue = data.into();
567
568        let scalar = Scalar::new(vortex_dtype::DType::Binary(Nullability::NonNullable), value);
569        let binary = BinaryScalar::try_from(&scalar).unwrap();
570        assert_eq!(binary.value().unwrap().as_slice(), data);
571    }
572
573    #[test]
574    fn test_scalar_value_from_bytebuffer() {
575        use vortex_buffer::ByteBuffer;
576
577        let data = vec![111u8, 222];
578        let buffer = ByteBuffer::from(data.clone());
579        let value: crate::ScalarValue = buffer.into();
580
581        let scalar = Scalar::new(vortex_dtype::DType::Binary(Nullability::NonNullable), value);
582        let binary = BinaryScalar::try_from(&scalar).unwrap();
583        assert_eq!(binary.value().unwrap().as_slice(), &data);
584    }
585}