vortex_scalar/
binary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use itertools::Itertools;
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14/// A scalar value representing binary data.
15///
16/// This type provides a view into a binary scalar value, which can be either
17/// a valid byte buffer or null.
18#[derive(Debug, Clone, Hash)]
19pub struct BinaryScalar<'a> {
20    dtype: &'a DType,
21    value: Option<Arc<ByteBuffer>>,
22}
23
24impl Display for BinaryScalar<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        match &self.value {
27            None => write!(f, "null"),
28            Some(v) => write!(
29                f,
30                "\"{}\"",
31                v.as_slice().iter().map(|b| format!("{b:x}")).format(" ")
32            ),
33        }
34    }
35}
36
37impl PartialEq for BinaryScalar<'_> {
38    fn eq(&self, other: &Self) -> bool {
39        self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
40    }
41}
42
43impl Eq for BinaryScalar<'_> {}
44
45impl PartialOrd for BinaryScalar<'_> {
46    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
47        Some(self.cmp(other))
48    }
49}
50
51impl Ord for BinaryScalar<'_> {
52    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
53        self.value.cmp(&other.value)
54    }
55}
56
57impl<'a> BinaryScalar<'a> {
58    /// Creates a binary scalar from a data type and scalar value.
59    ///
60    /// # Errors
61    ///
62    /// Returns an error if the data type is not a binary type.
63    pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
64        if !matches!(dtype, DType::Binary(..)) {
65            vortex_bail!("Can only construct binary scalar from binary dtype, found {dtype}")
66        }
67        Ok(Self {
68            dtype,
69            value: value.as_buffer()?,
70        })
71    }
72
73    /// Returns the data type of this binary scalar.
74    #[inline]
75    pub fn dtype(&self) -> &'a DType {
76        self.dtype
77    }
78
79    /// Returns the binary value as a byte buffer, or None if null.
80    pub fn value(&self) -> Option<ByteBuffer> {
81        self.value.as_ref().map(|v| v.as_ref().clone())
82    }
83
84    /// Returns a reference to the binary value, or None if null.
85    /// This avoids cloning the underlying ByteBuffer.
86    pub fn value_ref(&self) -> Option<&ByteBuffer> {
87        self.value.as_ref().map(|v| v.as_ref())
88    }
89
90    /// Constructs a value at most `max_length` in size that's greater than this value.
91    ///
92    /// Returns None if constructing a greater value would overflow.
93    pub fn upper_bound(self, max_length: usize) -> Option<Self> {
94        if let Some(value) = self.value {
95            if value.len() > max_length {
96                let sliced = value.slice(0..max_length);
97                drop(value);
98                let mut sliced_mut = sliced.into_mut();
99                for b in sliced_mut.iter_mut().rev() {
100                    let (incr, overflow) = b.overflowing_add(1);
101                    *b = incr;
102                    if !overflow {
103                        return Some(Self {
104                            dtype: self.dtype,
105                            value: Some(Arc::new(sliced_mut.freeze())),
106                        });
107                    }
108                }
109                None
110            } else {
111                Some(Self {
112                    dtype: self.dtype,
113                    value: Some(value),
114                })
115            }
116        } else {
117            Some(self)
118        }
119    }
120
121    /// Construct a value at most `max_length` in size that's less than ourselves.
122    pub fn lower_bound(self, max_length: usize) -> Self {
123        if let Some(value) = self.value {
124            if value.len() > max_length {
125                Self {
126                    dtype: self.dtype,
127                    value: Some(Arc::new(value.slice(0..max_length))),
128                }
129            } else {
130                Self {
131                    dtype: self.dtype,
132                    value: Some(value),
133                }
134            }
135        } else {
136            self
137        }
138    }
139
140    pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
141        if !matches!(dtype, DType::Binary(..)) {
142            vortex_bail!(
143                "Cannot cast binary to {dtype}: binary scalars can only be cast to binary types with different nullability"
144            )
145        }
146        Ok(Scalar::new(
147            dtype.clone(),
148            ScalarValue(InnerScalarValue::Buffer(
149                self.value
150                    .as_ref()
151                    .vortex_expect("nullness handled in Scalar::cast")
152                    .clone(),
153            )),
154        ))
155    }
156
157    /// Length of the scalar value or None if value is null
158    pub fn len(&self) -> Option<usize> {
159        self.value.as_ref().map(|v| v.len())
160    }
161
162    /// Returns whether its value is non-null and empty, otherwise `None`.
163    pub fn is_empty(&self) -> Option<bool> {
164        self.value.as_ref().map(|v| v.is_empty())
165    }
166}
167
168impl Scalar {
169    /// Creates a new binary scalar from a byte buffer.
170    pub fn binary(buffer: impl Into<ByteBuffer>, nullability: Nullability) -> Self {
171        Self::new(
172            DType::Binary(nullability),
173            ScalarValue(InnerScalarValue::Buffer(Arc::new(buffer.into()))),
174        )
175    }
176}
177
178impl<'a> TryFrom<&'a Scalar> for BinaryScalar<'a> {
179    type Error = VortexError;
180
181    fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
182        if !matches!(value.dtype(), DType::Binary(_)) {
183            vortex_bail!("Expected binary scalar, found {}", value.dtype())
184        }
185        Ok(Self {
186            dtype: value.dtype(),
187            value: value.value().as_buffer()?,
188        })
189    }
190}
191
192impl<'a> TryFrom<&'a Scalar> for ByteBuffer {
193    type Error = VortexError;
194
195    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
196        let binary = scalar
197            .as_binary_opt()
198            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?;
199
200        binary
201            .value()
202            .ok_or_else(|| vortex_err!("Cannot extract present value from null scalar"))
203    }
204}
205
206impl<'a> TryFrom<&'a Scalar> for Option<ByteBuffer> {
207    type Error = VortexError;
208
209    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
210        Ok(scalar
211            .as_binary_opt()
212            .ok_or_else(|| vortex_err!("Cannot extract buffer from non-buffer scalar"))?
213            .value())
214    }
215}
216
217impl TryFrom<Scalar> for ByteBuffer {
218    type Error = VortexError;
219
220    fn try_from(scalar: Scalar) -> VortexResult<Self> {
221        Self::try_from(&scalar)
222    }
223}
224
225impl TryFrom<Scalar> for Option<ByteBuffer> {
226    type Error = VortexError;
227
228    fn try_from(scalar: Scalar) -> VortexResult<Self> {
229        Self::try_from(&scalar)
230    }
231}
232
233impl From<&[u8]> for Scalar {
234    fn from(value: &[u8]) -> Self {
235        Scalar::from(ByteBuffer::from(value.to_vec()))
236    }
237}
238
239impl From<ByteBuffer> for Scalar {
240    fn from(value: ByteBuffer) -> Self {
241        Self::new(DType::Binary(Nullability::NonNullable), value.into())
242    }
243}
244
245impl From<Arc<ByteBuffer>> for Scalar {
246    fn from(value: Arc<ByteBuffer>) -> Self {
247        Self::new(
248            DType::Binary(Nullability::NonNullable),
249            ScalarValue(InnerScalarValue::Buffer(value)),
250        )
251    }
252}
253
254impl From<&[u8]> for ScalarValue {
255    fn from(value: &[u8]) -> Self {
256        ScalarValue::from(ByteBuffer::from(value.to_vec()))
257    }
258}
259
260impl From<ByteBuffer> for ScalarValue {
261    fn from(value: ByteBuffer) -> Self {
262        ScalarValue(InnerScalarValue::Buffer(Arc::new(value)))
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use std::cmp::Ordering;
269
270    use rstest::rstest;
271    use vortex_buffer::buffer;
272    use vortex_dtype::Nullability;
273    use vortex_error::{VortexExpect, VortexUnwrap};
274
275    use crate::{BinaryScalar, Scalar};
276
277    #[test]
278    fn lower_bound() {
279        let binary = Scalar::binary(buffer![0u8, 5, 47, 33, 129], Nullability::NonNullable);
280        let expected = Scalar::binary(buffer![0u8, 5], Nullability::NonNullable);
281        assert_eq!(
282            BinaryScalar::try_from(&binary)
283                .vortex_unwrap()
284                .lower_bound(2),
285            BinaryScalar::try_from(&expected).vortex_unwrap()
286        );
287    }
288
289    #[test]
290    fn upper_bound() {
291        let binary = Scalar::binary(buffer![0u8, 5, 255, 234, 23], Nullability::NonNullable);
292        let expected = Scalar::binary(buffer![0u8, 6, 0], Nullability::NonNullable);
293        assert_eq!(
294            BinaryScalar::try_from(&binary)
295                .vortex_unwrap()
296                .upper_bound(3)
297                .vortex_expect("must have upper bound"),
298            BinaryScalar::try_from(&expected).vortex_unwrap()
299        );
300    }
301
302    #[test]
303    fn upper_bound_overflow() {
304        let binary = Scalar::binary(buffer![255u8, 255, 255], Nullability::NonNullable);
305        assert!(
306            BinaryScalar::try_from(&binary)
307                .vortex_unwrap()
308                .upper_bound(2)
309                .is_none()
310        );
311    }
312
313    #[rstest]
314    #[case(&[1u8, 2, 3], &[1u8, 2, 3], true)]
315    #[case(&[1u8, 2, 3], &[1u8, 2, 4], false)]
316    #[case(&[], &[], true)]
317    #[case(&[255u8], &[255u8], true)]
318    fn test_binary_scalar_equality(
319        #[case] data1: &[u8],
320        #[case] data2: &[u8],
321        #[case] expected: bool,
322    ) {
323        let binary1 = Scalar::binary(data1.to_vec(), Nullability::NonNullable);
324        let binary2 = Scalar::binary(data2.to_vec(), Nullability::NonNullable);
325
326        let scalar1 = BinaryScalar::try_from(&binary1).unwrap();
327        let scalar2 = BinaryScalar::try_from(&binary2).unwrap();
328
329        assert_eq!(scalar1 == scalar2, expected);
330    }
331
332    #[rstest]
333    #[case(&[1u8, 2, 3], &[1u8, 2, 4], Ordering::Less)]
334    #[case(&[1u8, 2, 4], &[1u8, 2, 3], Ordering::Greater)]
335    #[case(&[1u8, 2, 3], &[1u8, 2, 3], Ordering::Equal)]
336    #[case(&[], &[1u8], Ordering::Less)]
337    #[case(&[2u8, 0, 0], &[1u8, 255, 255], Ordering::Greater)]
338    fn test_binary_scalar_ordering(
339        #[case] data1: &[u8],
340        #[case] data2: &[u8],
341        #[case] expected: Ordering,
342    ) {
343        let binary1 = Scalar::binary(data1.to_vec(), Nullability::NonNullable);
344        let binary2 = Scalar::binary(data2.to_vec(), Nullability::NonNullable);
345
346        let scalar1 = BinaryScalar::try_from(&binary1).unwrap();
347        let scalar2 = BinaryScalar::try_from(&binary2).unwrap();
348
349        assert_eq!(scalar1.partial_cmp(&scalar2), Some(expected));
350    }
351
352    #[test]
353    fn test_binary_null_value() {
354        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
355        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
356
357        assert!(scalar.value().is_none());
358        assert!(scalar.value_ref().is_none());
359        assert!(scalar.len().is_none());
360        assert!(scalar.is_empty().is_none());
361    }
362
363    #[test]
364    fn test_binary_len_and_empty() {
365        use vortex_buffer::ByteBuffer;
366
367        let empty = Scalar::binary(ByteBuffer::empty(), Nullability::NonNullable);
368        let non_empty = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
369
370        let empty_scalar = BinaryScalar::try_from(&empty).unwrap();
371        assert_eq!(empty_scalar.len(), Some(0));
372        assert_eq!(empty_scalar.is_empty(), Some(true));
373
374        let non_empty_scalar = BinaryScalar::try_from(&non_empty).unwrap();
375        assert_eq!(non_empty_scalar.len(), Some(3));
376        assert_eq!(non_empty_scalar.is_empty(), Some(false));
377    }
378
379    #[test]
380    fn test_binary_value_ref() {
381        use vortex_buffer::ByteBuffer;
382
383        let data = vec![1u8, 2, 3, 4, 5];
384        let binary = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::NonNullable);
385        let scalar = BinaryScalar::try_from(&binary).unwrap();
386
387        // value_ref should not clone
388        let value_ref = scalar.value_ref().unwrap();
389        assert_eq!(value_ref.as_slice(), &data);
390
391        // value should clone
392        let value = scalar.value().unwrap();
393        assert_eq!(value.as_slice(), &data);
394    }
395
396    #[test]
397    fn test_binary_cast_to_binary() {
398        use vortex_dtype::{DType, Nullability};
399
400        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
401        let scalar = BinaryScalar::try_from(&binary).unwrap();
402
403        // Cast to nullable binary
404        let result = scalar.cast(&DType::Binary(Nullability::Nullable)).unwrap();
405        assert_eq!(result.dtype(), &DType::Binary(Nullability::Nullable));
406
407        let casted = BinaryScalar::try_from(&result).unwrap();
408        assert_eq!(casted.value().unwrap().as_slice(), &[1, 2, 3]);
409    }
410
411    #[test]
412    fn test_binary_cast_to_non_binary_fails() {
413        use vortex_dtype::{DType, Nullability, PType};
414
415        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
416        let scalar = BinaryScalar::try_from(&binary).unwrap();
417
418        let result = scalar.cast(&DType::Primitive(PType::I32, Nullability::NonNullable));
419        assert!(result.is_err());
420    }
421
422    #[test]
423    fn test_from_scalar_value_non_binary_dtype() {
424        use vortex_dtype::{DType, Nullability, PType};
425
426        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
427        let value = crate::ScalarValue(crate::InnerScalarValue::Primitive(crate::PValue::I32(42)));
428
429        let result = BinaryScalar::from_scalar_value(&dtype, value);
430        assert!(result.is_err());
431    }
432
433    #[test]
434    fn test_try_from_non_binary_scalar() {
435        use vortex_dtype::Nullability;
436
437        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
438        let result = BinaryScalar::try_from(&scalar);
439        assert!(result.is_err());
440    }
441
442    #[test]
443    fn test_upper_bound_null() {
444        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
445        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
446
447        let result = scalar.upper_bound(10);
448        assert!(result.is_some());
449        assert!(result.unwrap().value().is_none());
450    }
451
452    #[test]
453    fn test_lower_bound_null() {
454        let null_binary = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
455        let scalar = BinaryScalar::try_from(&null_binary).unwrap();
456
457        let result = scalar.lower_bound(10);
458        assert!(result.value().is_none());
459    }
460
461    #[test]
462    fn test_upper_bound_exact_length() {
463        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
464        let scalar = BinaryScalar::try_from(&binary).unwrap();
465
466        let result = scalar.upper_bound(3);
467        assert!(result.is_some());
468        let upper = result.unwrap();
469        assert_eq!(upper.value().unwrap().as_slice(), &[1, 2, 3]);
470    }
471
472    #[test]
473    fn test_lower_bound_exact_length() {
474        let binary = Scalar::binary(buffer![1u8, 2, 3], Nullability::NonNullable);
475        let scalar = BinaryScalar::try_from(&binary).unwrap();
476
477        let result = scalar.lower_bound(3);
478        assert_eq!(result.value().unwrap().as_slice(), &[1, 2, 3]);
479    }
480
481    #[test]
482    fn test_from_slice() {
483        let data: &[u8] = &[1u8, 2, 3, 4];
484        let scalar: Scalar = data.into();
485
486        assert_eq!(
487            scalar.dtype(),
488            &vortex_dtype::DType::Binary(Nullability::NonNullable)
489        );
490        let binary = BinaryScalar::try_from(&scalar).unwrap();
491        assert_eq!(binary.value().unwrap().as_slice(), data);
492    }
493
494    #[test]
495    fn test_try_from_scalar_to_bytebuffer() {
496        use vortex_buffer::ByteBuffer;
497
498        let data = vec![5u8, 6, 7];
499        let scalar = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::NonNullable);
500
501        // Try from &Scalar
502        let buffer: ByteBuffer = (&scalar).try_into().unwrap();
503        assert_eq!(buffer.as_slice(), &data);
504
505        // Try from Scalar (owned)
506        let data2 = vec![5u8, 6, 7];
507        let scalar2 = Scalar::binary(ByteBuffer::from(data2.clone()), Nullability::NonNullable);
508        let buffer2: ByteBuffer = scalar2.try_into().unwrap();
509        assert_eq!(buffer2.as_slice(), &data2);
510    }
511
512    #[test]
513    fn test_try_from_scalar_to_option_bytebuffer() {
514        use vortex_buffer::ByteBuffer;
515
516        // Non-null case
517        let data = vec![5u8, 6, 7];
518        let scalar = Scalar::binary(ByteBuffer::from(data.clone()), Nullability::Nullable);
519        let buffer: Option<ByteBuffer> = (&scalar).try_into().unwrap();
520        assert_eq!(buffer.unwrap().as_slice(), &data);
521
522        // Null case
523        let null_scalar = Scalar::null(vortex_dtype::DType::Binary(Nullability::Nullable));
524        let null_buffer: Option<ByteBuffer> = (&null_scalar).try_into().unwrap();
525        assert!(null_buffer.is_none());
526    }
527
528    #[test]
529    fn test_try_from_non_binary_to_bytebuffer() {
530        use vortex_buffer::ByteBuffer;
531        use vortex_dtype::Nullability;
532
533        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
534
535        let result: Result<ByteBuffer, _> = (&scalar).try_into();
536        assert!(result.is_err());
537
538        let result2: Result<Option<ByteBuffer>, _> = (&scalar).try_into();
539        assert!(result2.is_err());
540    }
541
542    #[test]
543    fn test_from_arc_bytebuffer() {
544        use std::sync::Arc;
545
546        use vortex_buffer::ByteBuffer;
547
548        let data = vec![10u8, 20, 30];
549        let buffer = Arc::new(ByteBuffer::from(data.clone()));
550        let scalar: Scalar = buffer.into();
551
552        assert_eq!(
553            scalar.dtype(),
554            &vortex_dtype::DType::Binary(Nullability::NonNullable)
555        );
556        let binary = BinaryScalar::try_from(&scalar).unwrap();
557        assert_eq!(binary.value().unwrap().as_slice(), &data);
558    }
559
560    #[test]
561    fn test_scalar_value_from_slice() {
562        let data: &[u8] = &[100u8, 200];
563        let value: crate::ScalarValue = data.into();
564
565        let scalar = Scalar::new(vortex_dtype::DType::Binary(Nullability::NonNullable), value);
566        let binary = BinaryScalar::try_from(&scalar).unwrap();
567        assert_eq!(binary.value().unwrap().as_slice(), data);
568    }
569
570    #[test]
571    fn test_scalar_value_from_bytebuffer() {
572        use vortex_buffer::ByteBuffer;
573
574        let data = vec![111u8, 222];
575        let buffer = ByteBuffer::from(data.clone());
576        let value: crate::ScalarValue = buffer.into();
577
578        let scalar = Scalar::new(vortex_dtype::DType::Binary(Nullability::NonNullable), value);
579        let binary = BinaryScalar::try_from(&scalar).unwrap();
580        assert_eq!(binary.value().unwrap().as_slice(), &data);
581    }
582}