vortex_scalar/
utf8.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use vortex_buffer::BufferString;
8use vortex_dtype::Nullability::NonNullable;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14/// A scalar value representing a UTF-8 encoded string.
15///
16/// This type provides a view into a UTF-8 string scalar value, which can be either
17/// a valid UTF-8 string or null.
18#[derive(Debug, Clone, Hash, Eq)]
19pub struct Utf8Scalar<'a> {
20    dtype: &'a DType,
21    value: Option<Arc<BufferString>>,
22}
23
24impl Display for Utf8Scalar<'_> {
25    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26        match &self.value {
27            None => write!(f, "null"),
28            Some(v) => write!(f, "\"{}\"", v.as_str()),
29        }
30    }
31}
32
33impl PartialEq for Utf8Scalar<'_> {
34    fn eq(&self, other: &Self) -> bool {
35        self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
36    }
37}
38
39impl PartialOrd for Utf8Scalar<'_> {
40    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41        Some(self.cmp(other))
42    }
43}
44
45impl Ord for Utf8Scalar<'_> {
46    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
47        self.value.cmp(&other.value)
48    }
49}
50
51impl<'a> Utf8Scalar<'a> {
52    /// Creates a UTF-8 scalar from a data type and scalar value.
53    ///
54    /// # Errors
55    ///
56    /// Returns an error if the data type is not a UTF-8 type.
57    pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
58        if !matches!(dtype, DType::Utf8(..)) {
59            vortex_bail!("Can only construct utf8 scalar from utf8 dtype, found {dtype}")
60        }
61        Ok(Self {
62            dtype,
63            value: value.as_buffer_string()?,
64        })
65    }
66
67    /// Returns the data type of this UTF-8 scalar.
68    #[inline]
69    pub fn dtype(&self) -> &'a DType {
70        self.dtype
71    }
72
73    /// Returns the string value, or None if null.
74    pub fn value(&self) -> Option<BufferString> {
75        self.value.as_ref().map(|v| v.as_ref().clone())
76    }
77
78    /// Returns a reference to the string value, or None if null.
79    /// This avoids cloning the underlying BufferString.
80    pub fn value_ref(&self) -> Option<&BufferString> {
81        self.value.as_ref().map(|v| v.as_ref())
82    }
83
84    /// Constructs a value at most `max_length` in size that's greater than this value.
85    ///
86    /// Returns None if constructing a greater value would overflow.
87    pub fn upper_bound(self, max_length: usize) -> Option<Self> {
88        if let Some(value) = self.value {
89            if value.len() > max_length {
90                let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
91                    .rfind(|p| value.is_char_boundary(*p))
92                    .vortex_expect("Failed to find utf8 character boundary");
93
94                let utf8_mut = value
95                    .get(..utf8_split_pos)
96                    .vortex_expect("Slicing with existing index");
97
98                for (idx, original_char) in utf8_mut.char_indices().rev() {
99                    let original_len = original_char.len_utf8();
100                    if let Some(next_char) = char::from_u32(original_char as u32 + 1) {
101                        // do not allow increasing byte width of incremented char
102                        if next_char.len_utf8() == original_len {
103                            let sliced = value.inner().slice(0..idx + original_len);
104                            drop(value);
105                            let mut result = sliced.into_mut();
106                            next_char.encode_utf8(&mut result[idx..]);
107                            return Some(Self {
108                                dtype: self.dtype,
109                                value: Some(Arc::new(unsafe {
110                                    BufferString::new_unchecked(result.freeze())
111                                })),
112                            });
113                        }
114                    }
115                }
116                None
117            } else {
118                Some(Self {
119                    dtype: self.dtype,
120                    value: Some(value),
121                })
122            }
123        } else {
124            Some(self)
125        }
126    }
127
128    /// Construct a value at most `max_length` in size that's less than ourselves.
129    pub fn lower_bound(self, max_length: usize) -> Self {
130        if let Some(value) = self.value {
131            if value.len() > max_length {
132                // UTF8 characters are at most 4 bytes, since we know that BufferString is UTF8 we must have a valid character boundary
133                let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
134                    .rfind(|p| value.is_char_boundary(*p))
135                    .vortex_expect("Failed to find utf8 character boundary");
136
137                Self {
138                    dtype: self.dtype,
139                    value: Some(Arc::new(unsafe {
140                        BufferString::new_unchecked(value.inner().slice(0..utf8_split_pos))
141                    })),
142                }
143            } else {
144                Self {
145                    dtype: self.dtype,
146                    value: Some(value),
147                }
148            }
149        } else {
150            self
151        }
152    }
153
154    pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
155        if !matches!(dtype, DType::Utf8(..)) {
156            vortex_bail!(
157                "Cannot cast utf8 to {dtype}: UTF-8 scalars can only be cast to UTF-8 types with different nullability"
158            )
159        }
160        Ok(Scalar::new(
161            dtype.clone(),
162            ScalarValue(InnerScalarValue::BufferString(
163                self.value
164                    .as_ref()
165                    .vortex_expect("nullness handled in Scalar::cast")
166                    .clone(),
167            )),
168        ))
169    }
170
171    /// Length of the scalar value or None if value is null
172    pub fn len(&self) -> Option<usize> {
173        self.value.as_ref().map(|v| v.len())
174    }
175
176    /// Returns whether its value is non-null and empty, otherwise `None`.
177    pub fn is_empty(&self) -> Option<bool> {
178        self.value.as_ref().map(|v| v.is_empty())
179    }
180}
181
182impl Scalar {
183    /// Creates a new UTF-8 scalar from a string-like value.
184    ///
185    /// # Panics
186    ///
187    /// Panics if the input cannot be converted to a valid UTF-8 string.
188    pub fn utf8<B>(str: B, nullability: Nullability) -> Self
189    where
190        B: Into<BufferString>,
191    {
192        Self::try_utf8(str, nullability).unwrap()
193    }
194
195    /// Tries to create a new UTF-8 scalar from a string-like value.
196    ///
197    /// # Errors
198    ///
199    /// Returns an error if the input cannot be converted to a valid UTF-8 string.
200    pub fn try_utf8<B>(
201        str: B,
202        nullability: Nullability,
203    ) -> Result<Self, <B as TryInto<BufferString>>::Error>
204    where
205        B: TryInto<BufferString>,
206    {
207        Ok(Self::new(
208            DType::Utf8(nullability),
209            ScalarValue(InnerScalarValue::BufferString(Arc::new(str.try_into()?))),
210        ))
211    }
212}
213
214impl<'a> TryFrom<&'a Scalar> for Utf8Scalar<'a> {
215    type Error = VortexError;
216
217    fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
218        if !matches!(value.dtype(), DType::Utf8(_)) {
219            vortex_bail!("Expected utf8 scalar, found {}", value.dtype())
220        }
221        Ok(Self {
222            dtype: value.dtype(),
223            value: value.value().as_buffer_string()?,
224        })
225    }
226}
227
228impl<'a> TryFrom<&'a Scalar> for String {
229    type Error = VortexError;
230
231    fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
232        Ok(BufferString::try_from(value)?.to_string())
233    }
234}
235
236impl TryFrom<Scalar> for String {
237    type Error = VortexError;
238
239    fn try_from(value: Scalar) -> Result<Self, Self::Error> {
240        Ok(BufferString::try_from(value)?.to_string())
241    }
242}
243
244impl From<&str> for Scalar {
245    fn from(value: &str) -> Self {
246        Self::new(
247            DType::Utf8(NonNullable),
248            ScalarValue(InnerScalarValue::BufferString(Arc::new(
249                value.to_string().into(),
250            ))),
251        )
252    }
253}
254
255impl From<String> for Scalar {
256    fn from(value: String) -> Self {
257        Self::new(
258            DType::Utf8(NonNullable),
259            ScalarValue(InnerScalarValue::BufferString(Arc::new(value.into()))),
260        )
261    }
262}
263
264impl From<BufferString> for Scalar {
265    fn from(value: BufferString) -> Self {
266        Self::new(
267            DType::Utf8(NonNullable),
268            ScalarValue(InnerScalarValue::BufferString(Arc::new(value))),
269        )
270    }
271}
272
273impl From<Arc<BufferString>> for Scalar {
274    fn from(value: Arc<BufferString>) -> Self {
275        Self::new(
276            DType::Utf8(NonNullable),
277            ScalarValue(InnerScalarValue::BufferString(value)),
278        )
279    }
280}
281
282impl<'a> TryFrom<&'a Scalar> for BufferString {
283    type Error = VortexError;
284
285    fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
286        <Option<BufferString>>::try_from(scalar)?
287            .ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
288    }
289}
290
291impl TryFrom<Scalar> for BufferString {
292    type Error = VortexError;
293
294    fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
295        Self::try_from(&scalar)
296    }
297}
298
299impl<'a> TryFrom<&'a Scalar> for Option<BufferString> {
300    type Error = VortexError;
301
302    fn try_from(scalar: &'a Scalar) -> Result<Self, Self::Error> {
303        Ok(Utf8Scalar::try_from(scalar)?.value())
304    }
305}
306
307impl TryFrom<Scalar> for Option<BufferString> {
308    type Error = VortexError;
309
310    fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
311        Self::try_from(&scalar)
312    }
313}
314
315impl From<&str> for ScalarValue {
316    fn from(value: &str) -> Self {
317        ScalarValue(InnerScalarValue::BufferString(Arc::new(
318            value.to_string().into(),
319        )))
320    }
321}
322
323impl From<String> for ScalarValue {
324    fn from(value: String) -> Self {
325        ScalarValue(InnerScalarValue::BufferString(Arc::new(value.into())))
326    }
327}
328
329impl From<BufferString> for ScalarValue {
330    fn from(value: BufferString) -> Self {
331        ScalarValue(InnerScalarValue::BufferString(Arc::new(value)))
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use std::cmp::Ordering;
338
339    use rstest::rstest;
340    use vortex_dtype::Nullability;
341    use vortex_error::{VortexExpect, VortexUnwrap};
342
343    use crate::{Scalar, Utf8Scalar};
344
345    #[test]
346    fn lower_bound() {
347        let utf8 = Scalar::utf8("snowman⛄️snowman", Nullability::NonNullable);
348        let expected = Scalar::utf8("snowman", Nullability::NonNullable);
349        assert_eq!(
350            Utf8Scalar::try_from(&utf8).vortex_unwrap().lower_bound(9),
351            Utf8Scalar::try_from(&expected).vortex_unwrap()
352        );
353    }
354
355    #[test]
356    fn upper_bound() {
357        let utf8 = Scalar::utf8("char🪩", Nullability::NonNullable);
358        let expected = Scalar::utf8("chas", Nullability::NonNullable);
359        assert_eq!(
360            Utf8Scalar::try_from(&utf8)
361                .vortex_unwrap()
362                .upper_bound(5)
363                .vortex_expect("must have upper bound"),
364            Utf8Scalar::try_from(&expected).vortex_unwrap()
365        );
366    }
367
368    #[test]
369    fn upper_bound_overflow() {
370        let utf8 = Scalar::utf8("🂑🂒🂓", Nullability::NonNullable);
371        assert!(
372            Utf8Scalar::try_from(&utf8)
373                .vortex_unwrap()
374                .upper_bound(2)
375                .is_none()
376        );
377    }
378
379    #[rstest]
380    #[case("hello", "hello", true)]
381    #[case("hello", "world", false)]
382    #[case("", "", true)]
383    #[case("abc", "ABC", false)]
384    fn test_utf8_scalar_equality(#[case] str1: &str, #[case] str2: &str, #[case] expected: bool) {
385        let scalar1 = Scalar::utf8(str1, Nullability::NonNullable);
386        let scalar2 = Scalar::utf8(str2, Nullability::NonNullable);
387
388        let utf8_scalar1 = Utf8Scalar::try_from(&scalar1).unwrap();
389        let utf8_scalar2 = Utf8Scalar::try_from(&scalar2).unwrap();
390
391        assert_eq!(utf8_scalar1 == utf8_scalar2, expected);
392    }
393
394    #[rstest]
395    #[case("apple", "banana", Ordering::Less)]
396    #[case("banana", "apple", Ordering::Greater)]
397    #[case("apple", "apple", Ordering::Equal)]
398    #[case("", "a", Ordering::Less)]
399    #[case("z", "aa", Ordering::Greater)]
400    fn test_utf8_scalar_ordering(
401        #[case] str1: &str,
402        #[case] str2: &str,
403        #[case] expected: Ordering,
404    ) {
405        let scalar1 = Scalar::utf8(str1, Nullability::NonNullable);
406        let scalar2 = Scalar::utf8(str2, Nullability::NonNullable);
407
408        let utf8_scalar1 = Utf8Scalar::try_from(&scalar1).unwrap();
409        let utf8_scalar2 = Utf8Scalar::try_from(&scalar2).unwrap();
410
411        assert_eq!(utf8_scalar1.partial_cmp(&utf8_scalar2), Some(expected));
412    }
413
414    #[test]
415    fn test_utf8_null_value() {
416        let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
417        let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
418
419        assert!(scalar.value().is_none());
420        assert!(scalar.value_ref().is_none());
421        assert!(scalar.len().is_none());
422        assert!(scalar.is_empty().is_none());
423    }
424
425    #[test]
426    fn test_utf8_len_and_empty() {
427        let empty = Scalar::utf8("", Nullability::NonNullable);
428        let non_empty = Scalar::utf8("hello", Nullability::NonNullable);
429
430        let empty_scalar = Utf8Scalar::try_from(&empty).unwrap();
431        assert_eq!(empty_scalar.len(), Some(0));
432        assert_eq!(empty_scalar.is_empty(), Some(true));
433
434        let non_empty_scalar = Utf8Scalar::try_from(&non_empty).unwrap();
435        assert_eq!(non_empty_scalar.len(), Some(5));
436        assert_eq!(non_empty_scalar.is_empty(), Some(false));
437    }
438
439    #[test]
440    fn test_utf8_value_ref() {
441        let data = "test string";
442        let utf8 = Scalar::utf8(data, Nullability::NonNullable);
443        let scalar = Utf8Scalar::try_from(&utf8).unwrap();
444
445        // value_ref should not clone
446        let value_ref = scalar.value_ref().unwrap();
447        assert_eq!(value_ref.as_str(), data);
448
449        // value should clone
450        let value = scalar.value().unwrap();
451        assert_eq!(value.as_str(), data);
452    }
453
454    #[test]
455    fn test_utf8_cast_to_utf8() {
456        use vortex_dtype::{DType, Nullability};
457
458        let utf8 = Scalar::utf8("test", Nullability::NonNullable);
459        let scalar = Utf8Scalar::try_from(&utf8).unwrap();
460
461        // Cast to nullable utf8
462        let result = scalar.cast(&DType::Utf8(Nullability::Nullable)).unwrap();
463        assert_eq!(result.dtype(), &DType::Utf8(Nullability::Nullable));
464
465        let casted = Utf8Scalar::try_from(&result).unwrap();
466        assert_eq!(casted.value().unwrap().as_str(), "test");
467    }
468
469    #[test]
470    fn test_utf8_cast_to_non_utf8_fails() {
471        use vortex_dtype::{DType, Nullability, PType};
472
473        let utf8 = Scalar::utf8("test", Nullability::NonNullable);
474        let scalar = Utf8Scalar::try_from(&utf8).unwrap();
475
476        let result = scalar.cast(&DType::Primitive(PType::I32, Nullability::NonNullable));
477        assert!(result.is_err());
478    }
479
480    #[test]
481    fn test_from_scalar_value_non_utf8_dtype() {
482        use vortex_dtype::{DType, Nullability, PType};
483
484        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
485        let value = crate::ScalarValue(crate::InnerScalarValue::Primitive(crate::PValue::I32(42)));
486
487        let result = Utf8Scalar::from_scalar_value(&dtype, value);
488        assert!(result.is_err());
489    }
490
491    #[test]
492    fn test_try_from_non_utf8_scalar() {
493        use vortex_dtype::Nullability;
494
495        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
496        let result = Utf8Scalar::try_from(&scalar);
497        assert!(result.is_err());
498    }
499
500    #[test]
501    fn test_upper_bound_null() {
502        let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
503        let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
504
505        let result = scalar.upper_bound(10);
506        assert!(result.is_some());
507        assert!(result.unwrap().value().is_none());
508    }
509
510    #[test]
511    fn test_lower_bound_null() {
512        let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
513        let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
514
515        let result = scalar.lower_bound(10);
516        assert!(result.value().is_none());
517    }
518
519    #[test]
520    fn test_upper_bound_exact_length() {
521        let utf8 = Scalar::utf8("abc", Nullability::NonNullable);
522        let scalar = Utf8Scalar::try_from(&utf8).unwrap();
523
524        let result = scalar.upper_bound(3);
525        assert!(result.is_some());
526        let upper = result.unwrap();
527        assert_eq!(upper.value().unwrap().as_str(), "abc");
528    }
529
530    #[test]
531    fn test_lower_bound_exact_length() {
532        let utf8 = Scalar::utf8("abc", Nullability::NonNullable);
533        let scalar = Utf8Scalar::try_from(&utf8).unwrap();
534
535        let result = scalar.lower_bound(3);
536        assert_eq!(result.value().unwrap().as_str(), "abc");
537    }
538
539    #[test]
540    fn test_from_str() {
541        let data = "hello world";
542        let scalar: Scalar = data.into();
543
544        assert_eq!(
545            scalar.dtype(),
546            &vortex_dtype::DType::Utf8(Nullability::NonNullable)
547        );
548        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
549        assert_eq!(utf8.value().unwrap().as_str(), data);
550    }
551
552    #[test]
553    fn test_from_string() {
554        let data = String::from("hello world");
555        let scalar: Scalar = data.into();
556
557        assert_eq!(
558            scalar.dtype(),
559            &vortex_dtype::DType::Utf8(Nullability::NonNullable)
560        );
561        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
562        assert_eq!(utf8.value().unwrap().as_str(), "hello world");
563    }
564
565    #[test]
566    fn test_from_buffer_string() {
567        use vortex_buffer::BufferString;
568
569        let data = BufferString::from("test");
570        let scalar: Scalar = data.into();
571
572        assert_eq!(
573            scalar.dtype(),
574            &vortex_dtype::DType::Utf8(Nullability::NonNullable)
575        );
576        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
577        assert_eq!(utf8.value().unwrap().as_str(), "test");
578    }
579
580    #[test]
581    fn test_from_arc_buffer_string() {
582        use std::sync::Arc;
583
584        use vortex_buffer::BufferString;
585
586        let data = Arc::new(BufferString::from("test"));
587        let scalar: Scalar = data.into();
588
589        assert_eq!(
590            scalar.dtype(),
591            &vortex_dtype::DType::Utf8(Nullability::NonNullable)
592        );
593        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
594        assert_eq!(utf8.value().unwrap().as_str(), "test");
595    }
596
597    #[test]
598    fn test_try_from_scalar_to_string() {
599        let data = "test string";
600        let scalar = Scalar::utf8(data, Nullability::NonNullable);
601
602        // Try from &Scalar to String
603        let string: String = (&scalar).try_into().unwrap();
604        assert_eq!(string, data);
605    }
606
607    #[test]
608    fn test_try_from_scalar_to_buffer_string() {
609        use vortex_buffer::BufferString;
610
611        let data = "test data";
612        let scalar = Scalar::utf8(data, Nullability::NonNullable);
613
614        // Try from &Scalar
615        let buffer: BufferString = (&scalar).try_into().unwrap();
616        assert_eq!(buffer.as_str(), data);
617
618        // Try from Scalar (owned)
619        let scalar2 = Scalar::utf8(data, Nullability::NonNullable);
620        let buffer2: BufferString = scalar2.try_into().unwrap();
621        assert_eq!(buffer2.as_str(), data);
622    }
623
624    #[test]
625    fn test_try_from_scalar_to_option_buffer_string() {
626        use vortex_buffer::BufferString;
627
628        // Non-null case
629        let data = "test";
630        let scalar = Scalar::utf8(data, Nullability::Nullable);
631        let buffer: Option<BufferString> = (&scalar).try_into().unwrap();
632        assert_eq!(buffer.unwrap().as_str(), data);
633
634        // Null case
635        let null_scalar = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
636        let null_buffer: Option<BufferString> = (&null_scalar).try_into().unwrap();
637        assert!(null_buffer.is_none());
638    }
639
640    #[test]
641    fn test_try_from_non_utf8_to_buffer_string() {
642        use vortex_buffer::BufferString;
643        use vortex_dtype::Nullability;
644
645        let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
646
647        let result: Result<BufferString, _> = (&scalar).try_into();
648        assert!(result.is_err());
649
650        let result2: Result<Option<BufferString>, _> = (&scalar).try_into();
651        assert!(result2.is_err());
652    }
653
654    #[test]
655    fn test_scalar_value_from_str() {
656        let data = "test";
657        let value: crate::ScalarValue = data.into();
658
659        let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
660        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
661        assert_eq!(utf8.value().unwrap().as_str(), data);
662    }
663
664    #[test]
665    fn test_scalar_value_from_string() {
666        let data = String::from("test");
667        let value: crate::ScalarValue = data.clone().into();
668
669        let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
670        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
671        assert_eq!(utf8.value().unwrap().as_str(), &data);
672    }
673
674    #[test]
675    fn test_scalar_value_from_buffer_string() {
676        use vortex_buffer::BufferString;
677
678        let data = BufferString::from("test");
679        let value: crate::ScalarValue = data.into();
680
681        let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
682        let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
683        assert_eq!(utf8.value().unwrap().as_str(), "test");
684    }
685
686    #[test]
687    fn test_utf8_with_emoji() {
688        let emoji_str = "Hello 👋 World 🌍!";
689        let scalar = Scalar::utf8(emoji_str, Nullability::NonNullable);
690        let utf8_scalar = Utf8Scalar::try_from(&scalar).unwrap();
691
692        assert_eq!(utf8_scalar.value().unwrap().as_str(), emoji_str);
693        assert!(utf8_scalar.len().unwrap() > emoji_str.chars().count()); // Byte length > char count
694    }
695
696    #[test]
697    fn test_partial_ord_null() {
698        let null_scalar = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
699        let non_null_scalar = Scalar::utf8("test", Nullability::Nullable);
700
701        let null = Utf8Scalar::try_from(&null_scalar).unwrap();
702        let non_null = Utf8Scalar::try_from(&non_null_scalar).unwrap();
703
704        // Null < Some("test")
705        assert!(null < non_null);
706        assert!(non_null > null);
707    }
708}