1use std::fmt::{Display, Formatter};
5use std::sync::Arc;
6
7use vortex_buffer::BufferString;
8use vortex_dtype::Nullability::NonNullable;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_bail, vortex_err};
11
12use crate::{InnerScalarValue, Scalar, ScalarValue};
13
14#[derive(Debug, Clone, Hash, Eq)]
19pub struct Utf8Scalar<'a> {
20 dtype: &'a DType,
21 value: Option<Arc<BufferString>>,
22}
23
24impl Display for Utf8Scalar<'_> {
25 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26 match &self.value {
27 None => write!(f, "null"),
28 Some(v) => write!(f, "\"{}\"", v.as_str()),
29 }
30 }
31}
32
33impl PartialEq for Utf8Scalar<'_> {
34 fn eq(&self, other: &Self) -> bool {
35 self.dtype.eq_ignore_nullability(other.dtype) && self.value == other.value
36 }
37}
38
39impl PartialOrd for Utf8Scalar<'_> {
40 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41 Some(self.cmp(other))
42 }
43}
44
45impl Ord for Utf8Scalar<'_> {
46 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
47 self.value.cmp(&other.value)
48 }
49}
50
51impl<'a> Utf8Scalar<'a> {
52 pub fn from_scalar_value(dtype: &'a DType, value: ScalarValue) -> VortexResult<Self> {
58 if !matches!(dtype, DType::Utf8(..)) {
59 vortex_bail!("Can only construct utf8 scalar from utf8 dtype, found {dtype}")
60 }
61 Ok(Self {
62 dtype,
63 value: value.as_buffer_string()?,
64 })
65 }
66
67 #[inline]
69 pub fn dtype(&self) -> &'a DType {
70 self.dtype
71 }
72
73 pub fn value(&self) -> Option<BufferString> {
75 self.value.as_ref().map(|v| v.as_ref().clone())
76 }
77
78 pub fn value_ref(&self) -> Option<&BufferString> {
81 self.value.as_ref().map(|v| v.as_ref())
82 }
83
84 pub fn upper_bound(self, max_length: usize) -> Option<Self> {
88 if let Some(value) = self.value {
89 if value.len() > max_length {
90 let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
91 .rfind(|p| value.is_char_boundary(*p))
92 .vortex_expect("Failed to find utf8 character boundary");
93
94 let utf8_mut = value
95 .get(..utf8_split_pos)
96 .vortex_expect("Slicing with existing index");
97
98 for (idx, original_char) in utf8_mut.char_indices().rev() {
99 let original_len = original_char.len_utf8();
100 if let Some(next_char) = char::from_u32(original_char as u32 + 1) {
101 if next_char.len_utf8() == original_len {
103 let sliced = value.inner().slice(0..idx + original_len);
104 drop(value);
105 let mut result = sliced.into_mut();
106 next_char.encode_utf8(&mut result[idx..]);
107 return Some(Self {
108 dtype: self.dtype,
109 value: Some(Arc::new(unsafe {
110 BufferString::new_unchecked(result.freeze())
111 })),
112 });
113 }
114 }
115 }
116 None
117 } else {
118 Some(Self {
119 dtype: self.dtype,
120 value: Some(value),
121 })
122 }
123 } else {
124 Some(self)
125 }
126 }
127
128 pub fn lower_bound(self, max_length: usize) -> Self {
130 if let Some(value) = self.value {
131 if value.len() > max_length {
132 let utf8_split_pos = (max_length.saturating_sub(3)..=max_length)
134 .rfind(|p| value.is_char_boundary(*p))
135 .vortex_expect("Failed to find utf8 character boundary");
136
137 Self {
138 dtype: self.dtype,
139 value: Some(Arc::new(unsafe {
140 BufferString::new_unchecked(value.inner().slice(0..utf8_split_pos))
141 })),
142 }
143 } else {
144 Self {
145 dtype: self.dtype,
146 value: Some(value),
147 }
148 }
149 } else {
150 self
151 }
152 }
153
154 pub(crate) fn cast(&self, dtype: &DType) -> VortexResult<Scalar> {
155 if !matches!(dtype, DType::Utf8(..)) {
156 vortex_bail!(
157 "Cannot cast utf8 to {dtype}: UTF-8 scalars can only be cast to UTF-8 types with different nullability"
158 )
159 }
160 Ok(Scalar::new(
161 dtype.clone(),
162 ScalarValue(InnerScalarValue::BufferString(
163 self.value
164 .as_ref()
165 .vortex_expect("nullness handled in Scalar::cast")
166 .clone(),
167 )),
168 ))
169 }
170
171 pub fn len(&self) -> Option<usize> {
173 self.value.as_ref().map(|v| v.len())
174 }
175
176 pub fn is_empty(&self) -> Option<bool> {
178 self.value.as_ref().map(|v| v.is_empty())
179 }
180}
181
182impl Scalar {
183 pub fn utf8<B>(str: B, nullability: Nullability) -> Self
189 where
190 B: Into<BufferString>,
191 {
192 Self::try_utf8(str, nullability).unwrap()
193 }
194
195 pub fn try_utf8<B>(
201 str: B,
202 nullability: Nullability,
203 ) -> Result<Self, <B as TryInto<BufferString>>::Error>
204 where
205 B: TryInto<BufferString>,
206 {
207 Ok(Self::new(
208 DType::Utf8(nullability),
209 ScalarValue(InnerScalarValue::BufferString(Arc::new(str.try_into()?))),
210 ))
211 }
212}
213
214impl<'a> TryFrom<&'a Scalar> for Utf8Scalar<'a> {
215 type Error = VortexError;
216
217 fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
218 if !matches!(value.dtype(), DType::Utf8(_)) {
219 vortex_bail!("Expected utf8 scalar, found {}", value.dtype())
220 }
221 Ok(Self {
222 dtype: value.dtype(),
223 value: value.value().as_buffer_string()?,
224 })
225 }
226}
227
228impl<'a> TryFrom<&'a Scalar> for String {
229 type Error = VortexError;
230
231 fn try_from(value: &'a Scalar) -> Result<Self, Self::Error> {
232 Ok(BufferString::try_from(value)?.to_string())
233 }
234}
235
236impl TryFrom<Scalar> for String {
237 type Error = VortexError;
238
239 fn try_from(value: Scalar) -> Result<Self, Self::Error> {
240 Ok(BufferString::try_from(value)?.to_string())
241 }
242}
243
244impl From<&str> for Scalar {
245 fn from(value: &str) -> Self {
246 Self::new(
247 DType::Utf8(NonNullable),
248 ScalarValue(InnerScalarValue::BufferString(Arc::new(
249 value.to_string().into(),
250 ))),
251 )
252 }
253}
254
255impl From<String> for Scalar {
256 fn from(value: String) -> Self {
257 Self::new(
258 DType::Utf8(NonNullable),
259 ScalarValue(InnerScalarValue::BufferString(Arc::new(value.into()))),
260 )
261 }
262}
263
264impl From<BufferString> for Scalar {
265 fn from(value: BufferString) -> Self {
266 Self::new(
267 DType::Utf8(NonNullable),
268 ScalarValue(InnerScalarValue::BufferString(Arc::new(value))),
269 )
270 }
271}
272
273impl From<Arc<BufferString>> for Scalar {
274 fn from(value: Arc<BufferString>) -> Self {
275 Self::new(
276 DType::Utf8(NonNullable),
277 ScalarValue(InnerScalarValue::BufferString(value)),
278 )
279 }
280}
281
282impl<'a> TryFrom<&'a Scalar> for BufferString {
283 type Error = VortexError;
284
285 fn try_from(scalar: &'a Scalar) -> VortexResult<Self> {
286 <Option<BufferString>>::try_from(scalar)?
287 .ok_or_else(|| vortex_err!("Can't extract present value from null scalar"))
288 }
289}
290
291impl TryFrom<Scalar> for BufferString {
292 type Error = VortexError;
293
294 fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
295 Self::try_from(&scalar)
296 }
297}
298
299impl<'a> TryFrom<&'a Scalar> for Option<BufferString> {
300 type Error = VortexError;
301
302 fn try_from(scalar: &'a Scalar) -> Result<Self, Self::Error> {
303 Ok(Utf8Scalar::try_from(scalar)?.value())
304 }
305}
306
307impl TryFrom<Scalar> for Option<BufferString> {
308 type Error = VortexError;
309
310 fn try_from(scalar: Scalar) -> Result<Self, Self::Error> {
311 Self::try_from(&scalar)
312 }
313}
314
315impl From<&str> for ScalarValue {
316 fn from(value: &str) -> Self {
317 ScalarValue(InnerScalarValue::BufferString(Arc::new(
318 value.to_string().into(),
319 )))
320 }
321}
322
323impl From<String> for ScalarValue {
324 fn from(value: String) -> Self {
325 ScalarValue(InnerScalarValue::BufferString(Arc::new(value.into())))
326 }
327}
328
329impl From<BufferString> for ScalarValue {
330 fn from(value: BufferString) -> Self {
331 ScalarValue(InnerScalarValue::BufferString(Arc::new(value)))
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use std::cmp::Ordering;
338
339 use rstest::rstest;
340 use vortex_dtype::Nullability;
341 use vortex_error::{VortexExpect, VortexUnwrap};
342
343 use crate::{Scalar, Utf8Scalar};
344
345 #[test]
346 fn lower_bound() {
347 let utf8 = Scalar::utf8("snowman⛄️snowman", Nullability::NonNullable);
348 let expected = Scalar::utf8("snowman", Nullability::NonNullable);
349 assert_eq!(
350 Utf8Scalar::try_from(&utf8).vortex_unwrap().lower_bound(9),
351 Utf8Scalar::try_from(&expected).vortex_unwrap()
352 );
353 }
354
355 #[test]
356 fn upper_bound() {
357 let utf8 = Scalar::utf8("char🪩", Nullability::NonNullable);
358 let expected = Scalar::utf8("chas", Nullability::NonNullable);
359 assert_eq!(
360 Utf8Scalar::try_from(&utf8)
361 .vortex_unwrap()
362 .upper_bound(5)
363 .vortex_expect("must have upper bound"),
364 Utf8Scalar::try_from(&expected).vortex_unwrap()
365 );
366 }
367
368 #[test]
369 fn upper_bound_overflow() {
370 let utf8 = Scalar::utf8("🂑🂒🂓", Nullability::NonNullable);
371 assert!(
372 Utf8Scalar::try_from(&utf8)
373 .vortex_unwrap()
374 .upper_bound(2)
375 .is_none()
376 );
377 }
378
379 #[rstest]
380 #[case("hello", "hello", true)]
381 #[case("hello", "world", false)]
382 #[case("", "", true)]
383 #[case("abc", "ABC", false)]
384 fn test_utf8_scalar_equality(#[case] str1: &str, #[case] str2: &str, #[case] expected: bool) {
385 let scalar1 = Scalar::utf8(str1, Nullability::NonNullable);
386 let scalar2 = Scalar::utf8(str2, Nullability::NonNullable);
387
388 let utf8_scalar1 = Utf8Scalar::try_from(&scalar1).unwrap();
389 let utf8_scalar2 = Utf8Scalar::try_from(&scalar2).unwrap();
390
391 assert_eq!(utf8_scalar1 == utf8_scalar2, expected);
392 }
393
394 #[rstest]
395 #[case("apple", "banana", Ordering::Less)]
396 #[case("banana", "apple", Ordering::Greater)]
397 #[case("apple", "apple", Ordering::Equal)]
398 #[case("", "a", Ordering::Less)]
399 #[case("z", "aa", Ordering::Greater)]
400 fn test_utf8_scalar_ordering(
401 #[case] str1: &str,
402 #[case] str2: &str,
403 #[case] expected: Ordering,
404 ) {
405 let scalar1 = Scalar::utf8(str1, Nullability::NonNullable);
406 let scalar2 = Scalar::utf8(str2, Nullability::NonNullable);
407
408 let utf8_scalar1 = Utf8Scalar::try_from(&scalar1).unwrap();
409 let utf8_scalar2 = Utf8Scalar::try_from(&scalar2).unwrap();
410
411 assert_eq!(utf8_scalar1.partial_cmp(&utf8_scalar2), Some(expected));
412 }
413
414 #[test]
415 fn test_utf8_null_value() {
416 let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
417 let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
418
419 assert!(scalar.value().is_none());
420 assert!(scalar.value_ref().is_none());
421 assert!(scalar.len().is_none());
422 assert!(scalar.is_empty().is_none());
423 }
424
425 #[test]
426 fn test_utf8_len_and_empty() {
427 let empty = Scalar::utf8("", Nullability::NonNullable);
428 let non_empty = Scalar::utf8("hello", Nullability::NonNullable);
429
430 let empty_scalar = Utf8Scalar::try_from(&empty).unwrap();
431 assert_eq!(empty_scalar.len(), Some(0));
432 assert_eq!(empty_scalar.is_empty(), Some(true));
433
434 let non_empty_scalar = Utf8Scalar::try_from(&non_empty).unwrap();
435 assert_eq!(non_empty_scalar.len(), Some(5));
436 assert_eq!(non_empty_scalar.is_empty(), Some(false));
437 }
438
439 #[test]
440 fn test_utf8_value_ref() {
441 let data = "test string";
442 let utf8 = Scalar::utf8(data, Nullability::NonNullable);
443 let scalar = Utf8Scalar::try_from(&utf8).unwrap();
444
445 let value_ref = scalar.value_ref().unwrap();
447 assert_eq!(value_ref.as_str(), data);
448
449 let value = scalar.value().unwrap();
451 assert_eq!(value.as_str(), data);
452 }
453
454 #[test]
455 fn test_utf8_cast_to_utf8() {
456 use vortex_dtype::{DType, Nullability};
457
458 let utf8 = Scalar::utf8("test", Nullability::NonNullable);
459 let scalar = Utf8Scalar::try_from(&utf8).unwrap();
460
461 let result = scalar.cast(&DType::Utf8(Nullability::Nullable)).unwrap();
463 assert_eq!(result.dtype(), &DType::Utf8(Nullability::Nullable));
464
465 let casted = Utf8Scalar::try_from(&result).unwrap();
466 assert_eq!(casted.value().unwrap().as_str(), "test");
467 }
468
469 #[test]
470 fn test_utf8_cast_to_non_utf8_fails() {
471 use vortex_dtype::{DType, Nullability, PType};
472
473 let utf8 = Scalar::utf8("test", Nullability::NonNullable);
474 let scalar = Utf8Scalar::try_from(&utf8).unwrap();
475
476 let result = scalar.cast(&DType::Primitive(PType::I32, Nullability::NonNullable));
477 assert!(result.is_err());
478 }
479
480 #[test]
481 fn test_from_scalar_value_non_utf8_dtype() {
482 use vortex_dtype::{DType, Nullability, PType};
483
484 let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
485 let value = crate::ScalarValue(crate::InnerScalarValue::Primitive(crate::PValue::I32(42)));
486
487 let result = Utf8Scalar::from_scalar_value(&dtype, value);
488 assert!(result.is_err());
489 }
490
491 #[test]
492 fn test_try_from_non_utf8_scalar() {
493 use vortex_dtype::Nullability;
494
495 let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
496 let result = Utf8Scalar::try_from(&scalar);
497 assert!(result.is_err());
498 }
499
500 #[test]
501 fn test_upper_bound_null() {
502 let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
503 let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
504
505 let result = scalar.upper_bound(10);
506 assert!(result.is_some());
507 assert!(result.unwrap().value().is_none());
508 }
509
510 #[test]
511 fn test_lower_bound_null() {
512 let null_utf8 = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
513 let scalar = Utf8Scalar::try_from(&null_utf8).unwrap();
514
515 let result = scalar.lower_bound(10);
516 assert!(result.value().is_none());
517 }
518
519 #[test]
520 fn test_upper_bound_exact_length() {
521 let utf8 = Scalar::utf8("abc", Nullability::NonNullable);
522 let scalar = Utf8Scalar::try_from(&utf8).unwrap();
523
524 let result = scalar.upper_bound(3);
525 assert!(result.is_some());
526 let upper = result.unwrap();
527 assert_eq!(upper.value().unwrap().as_str(), "abc");
528 }
529
530 #[test]
531 fn test_lower_bound_exact_length() {
532 let utf8 = Scalar::utf8("abc", Nullability::NonNullable);
533 let scalar = Utf8Scalar::try_from(&utf8).unwrap();
534
535 let result = scalar.lower_bound(3);
536 assert_eq!(result.value().unwrap().as_str(), "abc");
537 }
538
539 #[test]
540 fn test_from_str() {
541 let data = "hello world";
542 let scalar: Scalar = data.into();
543
544 assert_eq!(
545 scalar.dtype(),
546 &vortex_dtype::DType::Utf8(Nullability::NonNullable)
547 );
548 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
549 assert_eq!(utf8.value().unwrap().as_str(), data);
550 }
551
552 #[test]
553 fn test_from_string() {
554 let data = String::from("hello world");
555 let scalar: Scalar = data.into();
556
557 assert_eq!(
558 scalar.dtype(),
559 &vortex_dtype::DType::Utf8(Nullability::NonNullable)
560 );
561 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
562 assert_eq!(utf8.value().unwrap().as_str(), "hello world");
563 }
564
565 #[test]
566 fn test_from_buffer_string() {
567 use vortex_buffer::BufferString;
568
569 let data = BufferString::from("test");
570 let scalar: Scalar = data.into();
571
572 assert_eq!(
573 scalar.dtype(),
574 &vortex_dtype::DType::Utf8(Nullability::NonNullable)
575 );
576 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
577 assert_eq!(utf8.value().unwrap().as_str(), "test");
578 }
579
580 #[test]
581 fn test_from_arc_buffer_string() {
582 use std::sync::Arc;
583
584 use vortex_buffer::BufferString;
585
586 let data = Arc::new(BufferString::from("test"));
587 let scalar: Scalar = data.into();
588
589 assert_eq!(
590 scalar.dtype(),
591 &vortex_dtype::DType::Utf8(Nullability::NonNullable)
592 );
593 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
594 assert_eq!(utf8.value().unwrap().as_str(), "test");
595 }
596
597 #[test]
598 fn test_try_from_scalar_to_string() {
599 let data = "test string";
600 let scalar = Scalar::utf8(data, Nullability::NonNullable);
601
602 let string: String = (&scalar).try_into().unwrap();
604 assert_eq!(string, data);
605 }
606
607 #[test]
608 fn test_try_from_scalar_to_buffer_string() {
609 use vortex_buffer::BufferString;
610
611 let data = "test data";
612 let scalar = Scalar::utf8(data, Nullability::NonNullable);
613
614 let buffer: BufferString = (&scalar).try_into().unwrap();
616 assert_eq!(buffer.as_str(), data);
617
618 let scalar2 = Scalar::utf8(data, Nullability::NonNullable);
620 let buffer2: BufferString = scalar2.try_into().unwrap();
621 assert_eq!(buffer2.as_str(), data);
622 }
623
624 #[test]
625 fn test_try_from_scalar_to_option_buffer_string() {
626 use vortex_buffer::BufferString;
627
628 let data = "test";
630 let scalar = Scalar::utf8(data, Nullability::Nullable);
631 let buffer: Option<BufferString> = (&scalar).try_into().unwrap();
632 assert_eq!(buffer.unwrap().as_str(), data);
633
634 let null_scalar = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
636 let null_buffer: Option<BufferString> = (&null_scalar).try_into().unwrap();
637 assert!(null_buffer.is_none());
638 }
639
640 #[test]
641 fn test_try_from_non_utf8_to_buffer_string() {
642 use vortex_buffer::BufferString;
643 use vortex_dtype::Nullability;
644
645 let scalar = Scalar::primitive(42i32, Nullability::NonNullable);
646
647 let result: Result<BufferString, _> = (&scalar).try_into();
648 assert!(result.is_err());
649
650 let result2: Result<Option<BufferString>, _> = (&scalar).try_into();
651 assert!(result2.is_err());
652 }
653
654 #[test]
655 fn test_scalar_value_from_str() {
656 let data = "test";
657 let value: crate::ScalarValue = data.into();
658
659 let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
660 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
661 assert_eq!(utf8.value().unwrap().as_str(), data);
662 }
663
664 #[test]
665 fn test_scalar_value_from_string() {
666 let data = String::from("test");
667 let value: crate::ScalarValue = data.clone().into();
668
669 let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
670 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
671 assert_eq!(utf8.value().unwrap().as_str(), &data);
672 }
673
674 #[test]
675 fn test_scalar_value_from_buffer_string() {
676 use vortex_buffer::BufferString;
677
678 let data = BufferString::from("test");
679 let value: crate::ScalarValue = data.into();
680
681 let scalar = Scalar::new(vortex_dtype::DType::Utf8(Nullability::NonNullable), value);
682 let utf8 = Utf8Scalar::try_from(&scalar).unwrap();
683 assert_eq!(utf8.value().unwrap().as_str(), "test");
684 }
685
686 #[test]
687 fn test_utf8_with_emoji() {
688 let emoji_str = "Hello 👋 World 🌍!";
689 let scalar = Scalar::utf8(emoji_str, Nullability::NonNullable);
690 let utf8_scalar = Utf8Scalar::try_from(&scalar).unwrap();
691
692 assert_eq!(utf8_scalar.value().unwrap().as_str(), emoji_str);
693 assert!(utf8_scalar.len().unwrap() > emoji_str.chars().count()); }
695
696 #[test]
697 fn test_partial_ord_null() {
698 let null_scalar = Scalar::null(vortex_dtype::DType::Utf8(Nullability::Nullable));
699 let non_null_scalar = Scalar::utf8("test", Nullability::Nullable);
700
701 let null = Utf8Scalar::try_from(&null_scalar).unwrap();
702 let non_null = Utf8Scalar::try_from(&non_null_scalar).unwrap();
703
704 assert!(null < non_null);
706 assert!(non_null > null);
707 }
708}