datafusion_functions/unicode/
common.rs1use arrow::array::{
21 Array, ArrayAccessor, ArrayIter, ArrayRef, ByteView, GenericStringArray, Int64Array,
22 OffsetSizeTrait, StringViewArray, make_view,
23};
24use arrow::datatypes::DataType;
25use arrow_buffer::{NullBuffer, ScalarBuffer};
26use datafusion_common::cast::{
27 as_generic_string_array, as_int64_array, as_string_view_array,
28};
29use datafusion_common::exec_err;
30use std::cmp::Ordering;
31use std::ops::Range;
32use std::sync::Arc;
33
34pub(crate) trait LeftRightSlicer {
36 fn slice(string: &str, n: i64) -> Range<usize>;
37}
38
39pub(crate) struct LeftSlicer {}
40
41impl LeftRightSlicer for LeftSlicer {
42 fn slice(string: &str, n: i64) -> Range<usize> {
43 0..left_right_byte_length(string, n)
44 }
45}
46
47pub(crate) struct RightSlicer {}
48
49impl LeftRightSlicer for RightSlicer {
50 fn slice(string: &str, n: i64) -> Range<usize> {
51 if n == 0 {
52 0..0
54 } else if n == i64::MIN {
55 0..0
57 } else {
58 left_right_byte_length(string, -n)..string.len()
59 }
60 }
61}
62
63#[inline]
65fn left_right_byte_length(string: &str, n: i64) -> usize {
66 match n.cmp(&0) {
67 Ordering::Less => string
68 .char_indices()
69 .nth_back((n.unsigned_abs().min(usize::MAX as u64) - 1) as usize)
70 .map(|(index, _)| index)
71 .unwrap_or(0),
72 Ordering::Equal => 0,
73 Ordering::Greater => string
74 .char_indices()
75 .nth(n.unsigned_abs().min(usize::MAX as u64) as usize)
76 .map(|(index, _)| index)
77 .unwrap_or(string.len()),
78 }
79}
80
81pub(crate) fn general_left_right<F: LeftRightSlicer>(
83 args: &[ArrayRef],
84) -> datafusion_common::Result<ArrayRef> {
85 let n_array = as_int64_array(&args[1])?;
86
87 match args[0].data_type() {
88 DataType::Utf8 => {
89 let string_array = as_generic_string_array::<i32>(&args[0])?;
90 general_left_right_array::<i32, _, F>(string_array, n_array)
91 }
92 DataType::LargeUtf8 => {
93 let string_array = as_generic_string_array::<i64>(&args[0])?;
94 general_left_right_array::<i64, _, F>(string_array, n_array)
95 }
96 DataType::Utf8View => {
97 let string_view_array = as_string_view_array(&args[0])?;
98 general_left_right_view::<F>(string_view_array, n_array)
99 }
100 _ => exec_err!("Not supported"),
101 }
102}
103
104fn general_left_right_array<
106 'a,
107 T: OffsetSizeTrait,
108 V: ArrayAccessor<Item = &'a str>,
109 F: LeftRightSlicer,
110>(
111 string_array: V,
112 n_array: &Int64Array,
113) -> datafusion_common::Result<ArrayRef> {
114 let iter = ArrayIter::new(string_array);
115 let result = iter
116 .zip(n_array.iter())
117 .map(|(string, n)| match (string, n) {
118 (Some(string), Some(n)) => {
119 let range = F::slice(string, n);
120 Some(&string[range])
122 }
123 _ => None,
124 })
125 .collect::<GenericStringArray<T>>();
126
127 Ok(Arc::new(result) as ArrayRef)
128}
129
130fn general_left_right_view<F: LeftRightSlicer>(
132 string_view_array: &StringViewArray,
133 n_array: &Int64Array,
134) -> datafusion_common::Result<ArrayRef> {
135 let len = n_array.len();
136
137 let views = string_view_array.views();
138 debug_assert!(views.len() == string_view_array.len());
140
141 let string_nulls = string_view_array.nulls();
143 let n_nulls = n_array.nulls();
144 let new_nulls = NullBuffer::union(string_nulls, n_nulls);
145
146 let new_views = (0..len)
147 .map(|idx| {
148 let view = views[idx];
149
150 let is_valid = match &new_nulls {
151 Some(nulls_buf) => nulls_buf.is_valid(idx),
152 None => true,
153 };
154
155 if is_valid {
156 let string: &str = string_view_array.value(idx);
157 let n = n_array.value(idx);
158
159 let range = F::slice(string, n);
161 let result_bytes = &string.as_bytes()[range.clone()];
162
163 let byte_view = ByteView::from(view);
164 let new_offset = byte_view.offset + (range.start as u32);
167 make_view(result_bytes, byte_view.buffer_index, new_offset)
169 } else {
170 view
172 }
173 })
174 .collect::<Vec<u128>>();
175
176 let result = StringViewArray::try_new(
178 ScalarBuffer::from(new_views),
179 Vec::from(string_view_array.data_buffers()),
180 new_nulls,
181 )?;
182 Ok(Arc::new(result) as ArrayRef)
183}