1use std::sync::Arc;
19
20use arrow::array::{
21 Array, ArrayRef, AsArray, ByteView, GenericStringArray, OffsetSizeTrait,
22 PrimitiveArray, StringArrayType, StringViewArray, make_view, new_null_array,
23};
24use arrow::buffer::ScalarBuffer;
25use arrow::datatypes::{DataType, Int64Type};
26use arrow_buffer::NullBuffer;
27
28use crate::strings::GenericStringArrayBuilder;
29use crate::utils::make_scalar_function;
30use datafusion_common::{
31 Result, ScalarValue, exec_datafusion_err, exec_err, utils::take_function_args,
32};
33use datafusion_expr::TypeSignature::Exact;
34use datafusion_expr::{
35 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
36 Volatility,
37};
38use datafusion_macros::user_doc;
39use memchr::{memchr_iter, memmem, memrchr_iter};
40
41#[user_doc(
42 doc_section(label = "String Functions"),
43 description = r#"Returns the substring from str before count occurrences of the delimiter delim.
44If count is positive, everything to the left of the final delimiter (counting from the left) is returned.
45If count is negative, everything to the right of the final delimiter (counting from the right) is returned."#,
46 syntax_example = "substr_index(str, delim, count)",
47 sql_example = r#"```sql
48> select substr_index('www.apache.org', '.', 1);
49+---------------------------------------------------------+
50| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) |
51+---------------------------------------------------------+
52| www |
53+---------------------------------------------------------+
54> select substr_index('www.apache.org', '.', -1);
55+----------------------------------------------------------+
56| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) |
57+----------------------------------------------------------+
58| org |
59+----------------------------------------------------------+
60```"#,
61 standard_argument(name = "str", prefix = "String"),
62 argument(
63 name = "delim",
64 description = "The string to find in str to split str."
65 ),
66 argument(
67 name = "count",
68 description = "The number of times to search for the delimiter. Can be either a positive or negative number."
69 )
70)]
71#[derive(Debug, PartialEq, Eq, Hash)]
72pub struct SubstrIndexFunc {
73 signature: Signature,
74 aliases: Vec<String>,
75}
76
77impl Default for SubstrIndexFunc {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83impl SubstrIndexFunc {
84 pub fn new() -> Self {
85 use DataType::*;
86 Self {
87 signature: Signature::one_of(
88 vec![
89 Exact(vec![Utf8View, Utf8View, Int64]),
90 Exact(vec![Utf8, Utf8, Int64]),
91 Exact(vec![LargeUtf8, LargeUtf8, Int64]),
92 ],
93 Volatility::Immutable,
94 ),
95 aliases: vec![String::from("substring_index")],
96 }
97 }
98}
99
100impl ScalarUDFImpl for SubstrIndexFunc {
101 fn name(&self) -> &str {
102 "substr_index"
103 }
104
105 fn signature(&self) -> &Signature {
106 &self.signature
107 }
108
109 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
110 Ok(arg_types[0].clone())
111 }
112
113 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
114 let ScalarFunctionArgs { args, .. } = args;
115
116 if let (
117 ColumnarValue::Array(string_array),
118 ColumnarValue::Scalar(delim_scalar),
119 ColumnarValue::Scalar(count_scalar),
120 ) = (&args[0], &args[1], &args[2])
121 {
122 return substr_index_scalar(string_array, delim_scalar, count_scalar);
123 }
124
125 make_scalar_function(substr_index, vec![])(&args)
126 }
127
128 fn aliases(&self) -> &[String] {
129 &self.aliases
130 }
131
132 fn documentation(&self) -> Option<&Documentation> {
133 self.doc()
134 }
135}
136
137fn substr_index(args: &[ArrayRef]) -> Result<ArrayRef> {
143 let [str, delim, count] = take_function_args("substr_index", args)?;
144
145 match str.data_type() {
146 DataType::Utf8 => {
147 let string_array = str.as_string::<i32>();
148 let delimiter_array = delim.as_string::<i32>();
149 let count_array: &PrimitiveArray<Int64Type> = count.as_primitive();
150 substr_index_general(
151 string_array,
152 delimiter_array,
153 count_array,
154 GenericStringArrayBuilder::<i32>::with_capacity(
155 string_array.len(),
156 visible_string_bytes(string_array),
157 ),
158 )
159 }
160 DataType::LargeUtf8 => {
161 let string_array = str.as_string::<i64>();
162 let delimiter_array = delim.as_string::<i64>();
163 let count_array: &PrimitiveArray<Int64Type> = count.as_primitive();
164 substr_index_general(
165 string_array,
166 delimiter_array,
167 count_array,
168 GenericStringArrayBuilder::<i64>::with_capacity(
169 string_array.len(),
170 visible_string_bytes(string_array),
171 ),
172 )
173 }
174 DataType::Utf8View => {
175 let string_array = str.as_string_view();
176 let delimiter_array = delim.as_string_view();
177 let count_array: &PrimitiveArray<Int64Type> = count.as_primitive();
178 substr_index_view(string_array, delimiter_array, count_array)
179 }
180 other => {
181 exec_err!("Unsupported data type {other:?} for function substr_index")
182 }
183 }
184}
185
186fn substr_index_scalar(
187 string_array: &ArrayRef,
188 delim_scalar: &ScalarValue,
189 count_scalar: &ScalarValue,
190) -> Result<ColumnarValue> {
191 if string_array.is_empty() {
192 return Ok(ColumnarValue::Array(new_null_array(
193 string_array.data_type(),
194 0,
195 )));
196 }
197
198 let delimiter = delim_scalar.try_as_str().ok_or_else(|| {
199 exec_datafusion_err!(
200 "Unsupported delimiter type {:?} for substr_index",
201 delim_scalar.data_type()
202 )
203 })?;
204
205 let count = match count_scalar {
206 ScalarValue::Int64(v) => *v,
207 other => {
208 return exec_err!(
209 "Unsupported count type {:?} for substr_index",
210 other.data_type()
211 );
212 }
213 };
214
215 let (Some(delimiter), Some(count)) = (delimiter, count) else {
216 return Ok(ColumnarValue::Array(new_null_array(
217 string_array.data_type(),
218 string_array.len(),
219 )));
220 };
221
222 let result = match string_array.data_type() {
223 DataType::Utf8View => {
224 substr_index_scalar_view(string_array.as_string_view(), delimiter, count)
225 }
226 DataType::Utf8 => {
227 let arr = string_array.as_string::<i32>();
228 substr_index_scalar_impl(
229 arr,
230 delimiter,
231 count,
232 GenericStringArrayBuilder::<i32>::with_capacity(
233 arr.len(),
234 visible_string_bytes(arr),
235 ),
236 )
237 }
238 DataType::LargeUtf8 => {
239 let arr = string_array.as_string::<i64>();
240 substr_index_scalar_impl(
241 arr,
242 delimiter,
243 count,
244 GenericStringArrayBuilder::<i64>::with_capacity(
245 arr.len(),
246 visible_string_bytes(arr),
247 ),
248 )
249 }
250 other => exec_err!("Unsupported string type {other:?} for substr_index"),
251 }?;
252
253 Ok(ColumnarValue::Array(result))
254}
255
256#[inline]
257fn visible_string_bytes<T: OffsetSizeTrait>(
258 string_array: &GenericStringArray<T>,
259) -> usize {
260 let offsets = string_array.value_offsets();
261 offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize()
262}
263
264fn substr_index_general<'a, S, O>(
265 string_array: S,
266 delimiter_array: S,
267 count_array: &PrimitiveArray<Int64Type>,
268 mut builder: GenericStringArrayBuilder<O>,
269) -> Result<ArrayRef>
270where
271 S: StringArrayType<'a> + Copy,
272 O: OffsetSizeTrait,
273{
274 let num_rows = string_array.len();
275 let nulls = NullBuffer::union_many([
277 string_array.nulls(),
278 delimiter_array.nulls(),
279 count_array.nulls(),
280 ]);
281
282 for i in 0..num_rows {
283 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
284 builder.append_placeholder();
285 continue;
286 }
287 let string = unsafe { string_array.value_unchecked(i) };
290 let delimiter = unsafe { delimiter_array.value_unchecked(i) };
291 let n = unsafe { count_array.value_unchecked(i) };
292 builder.append_value(substr_index_slice(string, delimiter, n));
293 }
294
295 Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
296}
297
298fn substr_index_view(
299 string_array: &StringViewArray,
300 delimiter_array: &StringViewArray,
301 count_array: &PrimitiveArray<Int64Type>,
302) -> Result<ArrayRef> {
303 let nulls = NullBuffer::union_many([
304 string_array.nulls(),
305 delimiter_array.nulls(),
306 count_array.nulls(),
307 ]);
308 let views = string_array.views();
309 let mut views_buf = Vec::with_capacity(string_array.len());
310 let mut has_out_of_line = false;
311
312 for (i, raw_view) in views.iter().enumerate() {
313 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
314 views_buf.push(0);
315 continue;
316 }
317
318 let string = string_array.value(i);
319 let delimiter = delimiter_array.value(i);
320 let count = count_array.value(i);
321 let substr = substr_index_slice(string, delimiter, count);
322 has_out_of_line |= append_substr_view(&mut views_buf, raw_view, string, substr);
323 }
324
325 let data_buffers = if has_out_of_line {
326 string_array.data_buffers().to_vec()
327 } else {
328 vec![]
329 };
330
331 unsafe {
336 Ok(Arc::new(StringViewArray::new_unchecked(
337 ScalarBuffer::from(views_buf),
338 data_buffers,
339 nulls,
340 )) as ArrayRef)
341 }
342}
343
344fn substr_index_scalar_impl<'a, S, O>(
345 string_array: S,
346 delimiter: &str,
347 count: i64,
348 builder: GenericStringArrayBuilder<O>,
349) -> Result<ArrayRef>
350where
351 S: StringArrayType<'a> + Copy,
352 O: OffsetSizeTrait,
353{
354 if count == 0 || delimiter.is_empty() {
355 return map_strings(string_array, builder, |string| &string[..0]);
356 }
357
358 if delimiter.len() == 1 {
359 let delimiter_byte = delimiter.as_bytes()[0];
360 return map_strings(string_array, builder, |string| {
361 substr_index_single_byte(string, delimiter_byte, count)
362 });
363 }
364
365 let occurrence_idx = usize::try_from(count.unsigned_abs()).unwrap_or(usize::MAX) - 1;
366 if count > 0 {
367 let finder = memmem::Finder::new(delimiter.as_bytes());
368 map_strings(string_array, builder, |string| {
369 substr_index_slice_finder(string, &finder, delimiter.len(), occurrence_idx)
370 })
371 } else {
372 let finder_rev = memmem::FinderRev::new(delimiter.as_bytes());
373 map_strings(string_array, builder, |string| {
374 substr_index_rslice_finder(
375 string,
376 &finder_rev,
377 delimiter.len(),
378 occurrence_idx,
379 )
380 })
381 }
382}
383
384fn substr_index_scalar_view(
385 string_array: &StringViewArray,
386 delimiter: &str,
387 count: i64,
388) -> Result<ArrayRef> {
389 let views = string_array.views();
390 let mut views_buf = Vec::with_capacity(string_array.len());
391 let mut has_out_of_line = false;
392
393 if count == 0 || delimiter.is_empty() {
394 let empty_view = make_view(b"", 0, 0);
395 for i in 0..string_array.len() {
396 if string_array.is_null(i) {
397 views_buf.push(0);
398 } else {
399 views_buf.push(empty_view);
400 }
401 }
402 } else if delimiter.len() == 1 {
403 let delimiter_byte = delimiter.as_bytes()[0];
404 for (i, raw_view) in views.iter().enumerate() {
405 if string_array.is_null(i) {
406 views_buf.push(0);
407 continue;
408 }
409
410 let string = string_array.value(i);
411 let substr = substr_index_single_byte(string, delimiter_byte, count);
412 has_out_of_line |=
413 append_substr_view(&mut views_buf, raw_view, string, substr);
414 }
415 } else {
416 let occurrence_idx =
417 usize::try_from(count.unsigned_abs()).unwrap_or(usize::MAX) - 1;
418 if count > 0 {
419 let finder = memmem::Finder::new(delimiter.as_bytes());
420 for (i, raw_view) in views.iter().enumerate() {
421 if string_array.is_null(i) {
422 views_buf.push(0);
423 continue;
424 }
425
426 let string = string_array.value(i);
427 let substr = substr_index_slice_finder(
428 string,
429 &finder,
430 delimiter.len(),
431 occurrence_idx,
432 );
433 has_out_of_line |=
434 append_substr_view(&mut views_buf, raw_view, string, substr);
435 }
436 } else {
437 let finder_rev = memmem::FinderRev::new(delimiter.as_bytes());
438 for (i, raw_view) in views.iter().enumerate() {
439 if string_array.is_null(i) {
440 views_buf.push(0);
441 continue;
442 }
443
444 let string = string_array.value(i);
445 let substr = substr_index_rslice_finder(
446 string,
447 &finder_rev,
448 delimiter.len(),
449 occurrence_idx,
450 );
451 has_out_of_line |=
452 append_substr_view(&mut views_buf, raw_view, string, substr);
453 }
454 }
455 }
456
457 let data_buffers = if has_out_of_line {
458 string_array.data_buffers().to_vec()
459 } else {
460 vec![]
461 };
462
463 unsafe {
469 Ok(Arc::new(StringViewArray::new_unchecked(
470 ScalarBuffer::from(views_buf),
471 data_buffers,
472 string_array.nulls().cloned(),
473 )) as ArrayRef)
474 }
475}
476
477fn map_strings<'a, S, O, F>(
478 string_array: S,
479 mut builder: GenericStringArrayBuilder<O>,
480 f: F,
481) -> Result<ArrayRef>
482where
483 S: StringArrayType<'a> + Copy,
484 O: OffsetSizeTrait,
485 F: Fn(&'a str) -> &'a str,
486{
487 let nulls = string_array.nulls().cloned();
488 for i in 0..string_array.len() {
489 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
490 builder.append_placeholder();
491 continue;
492 }
493 let s = unsafe { string_array.value_unchecked(i) };
496 builder.append_value(f(s));
497 }
498 Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
499}
500
501#[inline]
502fn substr_index_slice<'a>(string: &'a str, delimiter: &str, count: i64) -> &'a str {
503 if count == 0 || string.is_empty() || delimiter.is_empty() {
504 return &string[..0];
505 }
506
507 if delimiter.len() == 1 {
508 return substr_index_single_byte(string, delimiter.as_bytes()[0], count);
509 }
510
511 let occurrences = usize::try_from(count.unsigned_abs()).unwrap_or(usize::MAX);
512 if count > 0 {
513 string
514 .match_indices(delimiter)
515 .nth(occurrences - 1)
516 .map(|(idx, _)| &string[..idx])
517 .unwrap_or(string)
518 } else {
519 string
520 .rmatch_indices(delimiter)
521 .nth(occurrences - 1)
522 .map(|(idx, _)| &string[idx + delimiter.len()..])
523 .unwrap_or(string)
524 }
525}
526
527#[inline]
528fn substr_index_single_byte(string: &str, delimiter: u8, count: i64) -> &str {
529 let occurrences = usize::try_from(count.unsigned_abs()).unwrap_or(usize::MAX);
530 let idx = if count > 0 {
531 memchr_iter(delimiter, string.as_bytes()).nth(occurrences - 1)
532 } else {
533 memrchr_iter(delimiter, string.as_bytes())
534 .nth(occurrences - 1)
535 .map(|idx| idx + 1)
536 };
537
538 match idx {
539 Some(idx) if count > 0 => &string[..idx],
540 Some(idx) => &string[idx..],
541 None => string,
542 }
543}
544
545#[inline]
546fn substr_index_slice_finder<'a>(
547 string: &'a str,
548 finder: &memmem::Finder,
549 delimiter_len: usize,
550 occurrence_idx: usize,
551) -> &'a str {
552 let bytes = string.as_bytes();
553 let mut start = 0;
554 for _ in 0..occurrence_idx {
555 match finder.find(&bytes[start..]) {
556 Some(pos) => start += pos + delimiter_len,
557 None => return string,
558 }
559 }
560
561 match finder.find(&bytes[start..]) {
562 Some(pos) => &string[..start + pos],
563 None => string,
564 }
565}
566
567#[inline]
568fn substr_index_rslice_finder<'a>(
569 string: &'a str,
570 finder: &memmem::FinderRev,
571 delimiter_len: usize,
572 occurrence_idx: usize,
573) -> &'a str {
574 let bytes = string.as_bytes();
575 let mut end = bytes.len();
576 for _ in 0..occurrence_idx {
577 match finder.rfind(&bytes[..end]) {
578 Some(pos) => end = pos,
579 None => return string,
580 }
581 }
582
583 match finder.rfind(&bytes[..end]) {
584 Some(pos) => &string[pos + delimiter_len..],
585 None => string,
586 }
587}
588
589#[inline]
590fn substr_view(original_view: &u128, substr: &str, start_offset: u32) -> u128 {
591 if substr.len() > 12 {
592 let view = ByteView::from(*original_view);
593 make_view(
594 substr.as_bytes(),
595 view.buffer_index,
596 view.offset + start_offset,
597 )
598 } else {
599 make_view(substr.as_bytes(), 0, 0)
600 }
601}
602
603#[inline]
604fn append_substr_view(
605 views_buf: &mut Vec<u128>,
606 raw_view: &u128,
607 string: &str,
608 substr: &str,
609) -> bool {
610 if substr.len() == string.len() {
611 views_buf.push(*raw_view);
612 return substr.len() > 12;
613 }
614
615 if substr.is_empty() {
616 views_buf.push(make_view(b"", 0, 0));
617 return false;
618 }
619
620 let start_offset = substr.as_ptr() as usize - string.as_ptr() as usize;
621 let start_offset =
622 u32::try_from(start_offset).expect("string view offsets fit in u32");
623 views_buf.push(substr_view(raw_view, substr, start_offset));
624 substr.len() > 12
625}
626
627#[cfg(test)]
628mod tests {
629 use arrow::array::{
630 Array, ArrayRef, AsArray, Int64Array, StringArray, StringViewArray,
631 };
632 use arrow::datatypes::DataType::{Utf8, Utf8View};
633 use arrow::datatypes::{DataType, Field};
634
635 use datafusion_common::config::ConfigOptions;
636 use datafusion_common::{Result, ScalarValue};
637 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
638 use std::sync::Arc;
639
640 use crate::unicode::substrindex::SubstrIndexFunc;
641 use crate::utils::test::test_function;
642
643 #[test]
644 fn test_functions() -> Result<()> {
645 test_function!(
646 SubstrIndexFunc::new(),
647 vec![
648 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
649 ColumnarValue::Scalar(ScalarValue::from(".")),
650 ColumnarValue::Scalar(ScalarValue::from(1i64)),
651 ],
652 Ok(Some("www")),
653 &str,
654 Utf8,
655 StringArray
656 );
657 test_function!(
658 SubstrIndexFunc::new(),
659 vec![
660 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
661 ColumnarValue::Scalar(ScalarValue::from(".")),
662 ColumnarValue::Scalar(ScalarValue::from(2i64)),
663 ],
664 Ok(Some("www.apache")),
665 &str,
666 Utf8,
667 StringArray
668 );
669 test_function!(
670 SubstrIndexFunc::new(),
671 vec![
672 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
673 ColumnarValue::Scalar(ScalarValue::from(".")),
674 ColumnarValue::Scalar(ScalarValue::from(-2i64)),
675 ],
676 Ok(Some("apache.org")),
677 &str,
678 Utf8,
679 StringArray
680 );
681 test_function!(
682 SubstrIndexFunc::new(),
683 vec![
684 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
685 ColumnarValue::Scalar(ScalarValue::from(".")),
686 ColumnarValue::Scalar(ScalarValue::from(-1i64)),
687 ],
688 Ok(Some("org")),
689 &str,
690 Utf8,
691 StringArray
692 );
693 test_function!(
694 SubstrIndexFunc::new(),
695 vec![
696 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
697 ColumnarValue::Scalar(ScalarValue::from(".")),
698 ColumnarValue::Scalar(ScalarValue::from(0i64)),
699 ],
700 Ok(Some("")),
701 &str,
702 Utf8,
703 StringArray
704 );
705 test_function!(
706 SubstrIndexFunc::new(),
707 vec![
708 ColumnarValue::Scalar(ScalarValue::from("")),
709 ColumnarValue::Scalar(ScalarValue::from(".")),
710 ColumnarValue::Scalar(ScalarValue::from(1i64)),
711 ],
712 Ok(Some("")),
713 &str,
714 Utf8,
715 StringArray
716 );
717 test_function!(
718 SubstrIndexFunc::new(),
719 vec![
720 ColumnarValue::Scalar(ScalarValue::from("www.apache.org")),
721 ColumnarValue::Scalar(ScalarValue::from("")),
722 ColumnarValue::Scalar(ScalarValue::from(1i64)),
723 ],
724 Ok(Some("")),
725 &str,
726 Utf8,
727 StringArray
728 );
729 test_function!(
730 SubstrIndexFunc::new(),
731 vec![
732 ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
733 "verylongprefix.segment.tail".into(),
734 ))),
735 ColumnarValue::Scalar(ScalarValue::Utf8View(Some(".".into()))),
736 ColumnarValue::Scalar(ScalarValue::from(1i64)),
737 ],
738 Ok(Some("verylongprefix")),
739 &str,
740 Utf8View,
741 StringViewArray
742 );
743 test_function!(
744 SubstrIndexFunc::new(),
745 vec![
746 ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
747 "www.apache.org".into(),
748 ))),
749 ColumnarValue::Scalar(ScalarValue::Utf8View(Some(".".into()))),
750 ColumnarValue::Scalar(ScalarValue::from(-1i64)),
751 ],
752 Ok(Some("org")),
753 &str,
754 Utf8View,
755 StringViewArray
756 );
757 Ok(())
758 }
759
760 #[test]
761 fn test_substr_index_utf8view_scalar_fast_path() -> Result<()> {
762 let input = Arc::new(StringViewArray::from(vec![
763 Some("alpha.beta.gamma"),
764 Some("short.val"),
765 None,
766 ])) as ArrayRef;
767
768 let arg_fields = vec![
769 Field::new("a", Utf8View, true).into(),
770 Field::new("b", Utf8View, true).into(),
771 Field::new("c", DataType::Int64, true).into(),
772 ];
773
774 let args = ScalarFunctionArgs {
775 number_rows: input.len(),
776 args: vec![
777 ColumnarValue::Array(Arc::clone(&input)),
778 ColumnarValue::Scalar(ScalarValue::Utf8View(Some(".".into()))),
779 ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
780 ],
781 arg_fields,
782 return_field: Field::new("f", Utf8View, true).into(),
783 config_options: Arc::new(ConfigOptions::default()),
784 };
785
786 let result = match SubstrIndexFunc::new().invoke_with_args(args)? {
787 ColumnarValue::Array(result) => result,
788 other => panic!("expected array result, got {other:?}"),
789 };
790 let result = result.as_string_view();
791
792 assert_eq!(result.len(), 3);
793 assert_eq!(result.value(0), "alpha");
794 assert_eq!(result.value(1), "short");
795 assert!(result.is_null(2));
796
797 Ok(())
798 }
799
800 #[test]
801 fn test_substr_index_utf8view_array_sliced() -> Result<()> {
802 use super::substr_index_view;
803
804 let strings: StringViewArray = vec![
805 Some("skip_this.value"),
806 Some("this_is_a_long_prefix.suffix"),
807 Some("short.val"),
808 Some("another_long_result.rest"),
809 None,
810 ]
811 .into_iter()
812 .collect();
813 let delimiters: StringViewArray =
814 vec![Some("."), Some("."), Some("."), Some("."), Some(".")]
815 .into_iter()
816 .collect();
817 let counts = Int64Array::from(vec![1, 1, -1, 1, 1]);
818
819 let sliced_strings = strings.slice(1, 4);
820 let sliced_delimiters = delimiters.slice(1, 4);
821 let sliced_counts = counts.slice(1, 4);
822
823 let result =
824 substr_index_view(&sliced_strings, &sliced_delimiters, &sliced_counts)?;
825 let result = result.as_string_view();
826
827 assert_eq!(result.len(), 4);
828 assert_eq!(result.value(0), "this_is_a_long_prefix");
829 assert_eq!(result.value(1), "val");
830 assert_eq!(result.value(2), "another_long_result");
831 assert!(result.is_null(3));
832
833 Ok(())
834 }
835
836 #[test]
837 fn test_substr_index_utf8view_scalar_reuses_original_view_when_unchanged()
838 -> Result<()> {
839 use super::substr_index_scalar_view;
840
841 let strings: StringViewArray = vec![
842 Some("very_long_value_without_separator"),
843 Some("short"),
844 None,
845 ]
846 .into_iter()
847 .collect();
848
849 let result = substr_index_scalar_view(&strings, ".", 1)?;
850 let result = result.as_string_view();
851
852 assert_eq!(result.len(), 3);
853 assert_eq!(result.value(0), "very_long_value_without_separator");
854 assert_eq!(result.value(1), "short");
855 assert_eq!(result.views()[0], strings.views()[0]);
856 assert_eq!(result.views()[1], strings.views()[1]);
857 assert!(result.is_null(2));
858
859 Ok(())
860 }
861}