1use std::sync::Arc;
20
21use arrow_array::builder::{
22 BinaryViewBuilder, BufferBuilder, FixedSizeBinaryBuilder, StringViewBuilder,
23};
24use arrow_array::types::ByteArrayType;
25use arrow_array::*;
26use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer};
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29
30pub fn concat_elements_bytes<T: ByteArrayType>(
32 left: &GenericByteArray<T>,
33 right: &GenericByteArray<T>,
34) -> Result<GenericByteArray<T>, ArrowError> {
35 if left.len() != right.len() {
36 return Err(ArrowError::ComputeError(format!(
37 "Arrays must have the same length: {} != {}",
38 left.len(),
39 right.len()
40 )));
41 }
42
43 let nulls = NullBuffer::union(left.nulls(), right.nulls());
44
45 let left_offsets = left.value_offsets();
46 let right_offsets = right.value_offsets();
47
48 let left_values = left.value_data();
49 let right_values = right.value_data();
50
51 let mut output_values = BufferBuilder::<u8>::new(
52 left_values.len() + right_values.len()
53 - left_offsets[0].as_usize()
54 - right_offsets[0].as_usize(),
55 );
56
57 let mut output_offsets = BufferBuilder::<T::Offset>::new(left_offsets.len());
58 output_offsets.append(T::Offset::usize_as(0));
59 for (left_idx, right_idx) in left_offsets.windows(2).zip(right_offsets.windows(2)) {
60 output_values.append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
61 output_values.append_slice(&right_values[right_idx[0].as_usize()..right_idx[1].as_usize()]);
62 output_offsets.append(T::Offset::from_usize(output_values.len()).unwrap());
63 }
64
65 let builder = ArrayDataBuilder::new(T::DATA_TYPE)
66 .len(left.len())
67 .add_buffer(output_offsets.finish())
68 .add_buffer(output_values.finish())
69 .nulls(nulls);
70
71 Ok(unsafe { builder.build_unchecked() }.into())
73}
74
75pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
90 left: &GenericStringArray<Offset>,
91 right: &GenericStringArray<Offset>,
92) -> Result<GenericStringArray<Offset>, ArrowError> {
93 concat_elements_bytes(left, right)
94}
95
96pub fn concat_element_binary<Offset: OffsetSizeTrait>(
98 left: &GenericBinaryArray<Offset>,
99 right: &GenericBinaryArray<Offset>,
100) -> Result<GenericBinaryArray<Offset>, ArrowError> {
101 concat_elements_bytes(left, right)
102}
103
104pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
112 arrays: &[&GenericStringArray<Offset>],
113) -> Result<GenericStringArray<Offset>, ArrowError> {
114 if arrays.is_empty() {
115 return Err(ArrowError::ComputeError(
116 "concat requires input of at least one array".to_string(),
117 ));
118 }
119
120 let size = arrays[0].len();
121 if !arrays.iter().all(|array| array.len() == size) {
122 return Err(ArrowError::ComputeError(format!(
123 "Arrays must have the same length of {size}",
124 )));
125 }
126
127 let nulls = arrays
128 .iter()
129 .fold(None, |acc, a| NullBuffer::union(acc.as_ref(), a.nulls()));
130
131 let data_values = arrays
132 .iter()
133 .map(|array| array.value_data())
134 .collect::<Vec<_>>();
135
136 let mut offsets = arrays
137 .iter()
138 .map(|a| a.value_offsets().iter().peekable())
139 .collect::<Vec<_>>();
140
141 let mut output_values = BufferBuilder::<u8>::new(
142 data_values
143 .iter()
144 .zip(offsets.iter_mut())
145 .map(|(data, offset)| data.len() - offset.peek().unwrap().as_usize())
146 .sum(),
147 );
148
149 let mut output_offsets = BufferBuilder::<Offset>::new(size + 1);
150 output_offsets.append(Offset::zero());
151 for _ in 0..size {
152 data_values
153 .iter()
154 .zip(offsets.iter_mut())
155 .for_each(|(values, offset)| {
156 let index_start = offset.next().unwrap().as_usize();
157 let index_end = offset.peek().unwrap().as_usize();
158 output_values.append_slice(&values[index_start..index_end]);
159 });
160 output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
161 }
162
163 let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
164 .len(size)
165 .add_buffer(output_offsets.finish())
166 .add_buffer(output_values.finish())
167 .nulls(nulls);
168
169 Ok(unsafe { builder.build_unchecked() }.into())
171}
172
173pub fn concat_elements_fixed_size_binary(
180 left: &FixedSizeBinaryArray,
181 right: &FixedSizeBinaryArray,
182) -> Result<FixedSizeBinaryArray, ArrowError> {
183 if left.len() != right.len() {
184 return Err(ArrowError::ComputeError(format!(
185 "Arrays must have the same length: {} != {}",
186 left.len(),
187 right.len()
188 )));
189 }
190
191 let left_size = left.value_length() as usize;
192 let right_size = right.value_length() as usize;
193 let output_size = left_size + right_size;
194
195 let nulls = NullBuffer::union(left.nulls(), right.nulls());
197
198 let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), output_size as i32);
199 let mut buffer = MutableBuffer::with_capacity(output_size);
200 for i in 0..left.len() {
201 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
202 result.append_null();
203 } else {
204 buffer.clear();
205 buffer.extend_from_slice(left.value(i));
206 buffer.extend_from_slice(right.value(i));
207 result.append_value(&buffer)?;
208 }
209 }
210
211 Ok(result.finish())
212}
213
214pub fn concat_elements_binary_view_array(
221 left: &BinaryViewArray,
222 right: &BinaryViewArray,
223) -> Result<BinaryViewArray, ArrowError> {
224 if left.len() != right.len() {
225 return Err(ArrowError::ComputeError(format!(
226 "Arrays must have the same length: {} != {}",
227 left.len(),
228 right.len()
229 )));
230 }
231 let mut result = BinaryViewBuilder::with_capacity(left.len());
232
233 let mut buffer = MutableBuffer::new(0);
235
236 let nulls = NullBuffer::union(left.nulls(), right.nulls());
238
239 for i in 0..left.len() {
240 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
241 result.append_null();
242 } else {
243 buffer.clear();
244 buffer.extend_from_slice(left.value(i));
245 buffer.extend_from_slice(right.value(i));
246 result.try_append_value(&buffer)?;
247 }
248 }
249 Ok(result.finish())
250}
251
252pub fn concat_elements_string_view_array(
262 left: &StringViewArray,
263 right: &StringViewArray,
264) -> Result<StringViewArray, ArrowError> {
265 if left.len() != right.len() {
266 return Err(ArrowError::ComputeError(format!(
267 "Arrays must have the same length: {} != {}",
268 left.len(),
269 right.len()
270 )));
271 }
272
273 let mut result = StringViewBuilder::with_capacity(left.len());
274
275 let mut buffer: Vec<u8> = Vec::new();
277
278 let nulls = NullBuffer::union(left.nulls(), right.nulls());
279
280 for i in 0..left.len() {
281 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
282 result.append_null();
283 } else {
284 buffer.clear();
285 buffer.extend_from_slice(left.value(i).as_bytes());
286 buffer.extend_from_slice(right.value(i).as_bytes());
287 let s = std::str::from_utf8(&buffer).map_err(|_| {
288 ArrowError::ComputeError("Concatenated values are not valid UTF-8".into())
289 })?;
290 result.try_append_value(s)?;
291 }
292 }
293 Ok(result.finish())
294}
295
296pub fn concat_elements_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef, ArrowError> {
302 if left.data_type() != right.data_type() {
303 return Err(ArrowError::ComputeError(format!(
304 "Cannot concat arrays of different types: {} != {}",
305 left.data_type(),
306 right.data_type()
307 )));
308 }
309 match (left.data_type(), right.data_type()) {
310 (DataType::Utf8, DataType::Utf8) => {
311 let left = left.as_any().downcast_ref::<StringArray>().unwrap();
312 let right = right.as_any().downcast_ref::<StringArray>().unwrap();
313 Ok(Arc::new(concat_elements_utf8(left, right)?))
314 }
315 (DataType::LargeUtf8, DataType::LargeUtf8) => {
316 let left = left.as_any().downcast_ref::<LargeStringArray>().unwrap();
317 let right = right.as_any().downcast_ref::<LargeStringArray>().unwrap();
318 Ok(Arc::new(concat_elements_utf8(left, right)?))
319 }
320 (DataType::Binary, DataType::Binary) => {
321 let left = left.as_any().downcast_ref::<BinaryArray>().unwrap();
322 let right = right.as_any().downcast_ref::<BinaryArray>().unwrap();
323 Ok(Arc::new(concat_element_binary(left, right)?))
324 }
325 (DataType::LargeBinary, DataType::LargeBinary) => {
326 let left = left.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
327 let right = right.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
328 Ok(Arc::new(concat_element_binary(left, right)?))
329 }
330 (DataType::BinaryView, DataType::BinaryView) => {
331 let left = left.as_any().downcast_ref::<BinaryViewArray>().unwrap();
332 let right = right.as_any().downcast_ref::<BinaryViewArray>().unwrap();
333 Ok(Arc::new(concat_elements_binary_view_array(left, right)?))
334 }
335 (DataType::Utf8View, DataType::Utf8View) => {
336 let left = left.as_any().downcast_ref::<StringViewArray>().unwrap();
337 let right = right.as_any().downcast_ref::<StringViewArray>().unwrap();
338 Ok(Arc::new(concat_elements_string_view_array(left, right)?))
339 }
340 (DataType::FixedSizeBinary(_), DataType::FixedSizeBinary(_)) => {
341 let left = left
342 .as_any()
343 .downcast_ref::<FixedSizeBinaryArray>()
344 .unwrap();
345 let right = right
346 .as_any()
347 .downcast_ref::<FixedSizeBinaryArray>()
348 .unwrap();
349 Ok(Arc::new(concat_elements_fixed_size_binary(left, right)?))
350 }
351 _ => Err(ArrowError::NotYetImplemented(format!(
353 "concat not supported for {}",
354 left.data_type()
355 ))),
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362 use arrow_buffer::Buffer;
363
364 #[test]
365 fn test_string_concat() {
366 let left = [Some("foo"), Some("bar"), None]
367 .into_iter()
368 .collect::<StringArray>();
369 let right = [None, Some("yyy"), Some("zzz")]
370 .into_iter()
371 .collect::<StringArray>();
372
373 let output = concat_elements_utf8(&left, &right).unwrap();
374
375 let expected = [None, Some("baryyy"), None]
376 .into_iter()
377 .collect::<StringArray>();
378
379 assert_eq!(output, expected);
380 }
381
382 #[test]
383 fn test_string_concat_empty_string() {
384 let left = [Some("foo"), Some(""), Some("bar")]
385 .into_iter()
386 .collect::<StringArray>();
387 let right = [Some("baz"), Some(""), Some("")]
388 .into_iter()
389 .collect::<StringArray>();
390
391 let output = concat_elements_utf8(&left, &right).unwrap();
392
393 let expected = [Some("foobaz"), Some(""), Some("bar")]
394 .into_iter()
395 .collect::<StringArray>();
396
397 assert_eq!(output, expected);
398 }
399
400 #[test]
401 fn test_string_concat_no_null() {
402 let left = StringArray::from(vec!["foo", "bar"]);
403 let right = StringArray::from(vec!["bar", "baz"]);
404
405 let output = concat_elements_utf8(&left, &right).unwrap();
406
407 let expected = StringArray::from(vec!["foobar", "barbaz"]);
408
409 assert_eq!(output, expected);
410 }
411
412 #[test]
413 fn test_string_concat_error() {
414 let left = StringArray::from(vec!["foo", "bar"]);
415 let right = StringArray::from(vec!["baz"]);
416
417 let output = concat_elements_utf8(&left, &right);
418
419 assert_eq!(
420 output.unwrap_err().to_string(),
421 "Compute error: Arrays must have the same length: 2 != 1".to_string()
422 );
423 }
424
425 #[test]
426 fn test_string_concat_slice() {
427 let left = &StringArray::from(vec![None, Some("foo"), Some("bar"), Some("baz")]);
428 let right = &StringArray::from(vec![Some("boo"), None, Some("far"), Some("faz")]);
429
430 let left_slice = left.slice(0, 3);
431 let right_slice = right.slice(1, 3);
432 let output = concat_elements_utf8(
433 left_slice
434 .as_any()
435 .downcast_ref::<GenericStringArray<i32>>()
436 .unwrap(),
437 right_slice
438 .as_any()
439 .downcast_ref::<GenericStringArray<i32>>()
440 .unwrap(),
441 )
442 .unwrap();
443
444 let expected = [None, Some("foofar"), Some("barfaz")]
445 .into_iter()
446 .collect::<StringArray>();
447
448 assert_eq!(output, expected);
449
450 let left_slice = left.slice(2, 2);
451 let right_slice = right.slice(1, 2);
452
453 let output = concat_elements_utf8(
454 left_slice
455 .as_any()
456 .downcast_ref::<GenericStringArray<i32>>()
457 .unwrap(),
458 right_slice
459 .as_any()
460 .downcast_ref::<GenericStringArray<i32>>()
461 .unwrap(),
462 )
463 .unwrap();
464
465 let expected = [None, Some("bazfar")].into_iter().collect::<StringArray>();
466
467 assert_eq!(output, expected);
468 }
469
470 #[test]
471 fn test_string_concat_error_empty() {
472 assert_eq!(
473 concat_elements_utf8_many::<i32>(&[])
474 .unwrap_err()
475 .to_string(),
476 "Compute error: concat requires input of at least one array".to_string()
477 );
478 }
479
480 #[test]
481 fn test_string_concat_one() {
482 let expected = [None, Some("baryyy"), None]
483 .into_iter()
484 .collect::<StringArray>();
485
486 let output = concat_elements_utf8_many(&[&expected]).unwrap();
487
488 assert_eq!(output, expected);
489 }
490
491 #[test]
492 fn test_string_concat_many() {
493 let foo = StringArray::from(vec![Some("f"), Some("o"), Some("o"), None]);
494 let bar = StringArray::from(vec![None, Some("b"), Some("a"), Some("r")]);
495 let baz = StringArray::from(vec![Some("b"), None, Some("a"), Some("z")]);
496
497 let output = concat_elements_utf8_many(&[&foo, &bar, &baz]).unwrap();
498
499 let expected = [None, None, Some("oaa"), None]
500 .into_iter()
501 .collect::<StringArray>();
502
503 assert_eq!(output, expected);
504 }
505
506 #[test]
507 fn test_fixed_size_binary_concat() {
508 let left = FixedSizeBinaryArray::from(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
509 let right = FixedSizeBinaryArray::from(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
510
511 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
512
513 let expected = FixedSizeBinaryArray::from(vec![None, Some(b"baryyy" as &[u8]), None]);
514 assert_eq!(output, expected);
515 }
516
517 #[test]
518 fn test_fixed_size_binary_concat_no_null() {
519 let left = FixedSizeBinaryArray::from(vec![b"ab" as &[u8], b"cd"]);
520 let right = FixedSizeBinaryArray::from(vec![b"12" as &[u8], b"34"]);
521
522 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
523
524 let expected = FixedSizeBinaryArray::from(vec![b"ab12" as &[u8], b"cd34"]);
525 assert_eq!(output, expected);
526 }
527
528 #[test]
529 fn test_fixed_size_binary_concat_error() {
530 let left = FixedSizeBinaryArray::from(vec![b"ab" as &[u8], b"cd"]);
531 let right = FixedSizeBinaryArray::from(vec![b"12" as &[u8]]);
532
533 let output = concat_elements_fixed_size_binary(&left, &right);
534 assert_eq!(
535 output.unwrap_err().to_string(),
536 "Compute error: Arrays must have the same length: 2 != 1".to_string()
537 );
538 }
539
540 #[test]
541 fn test_fixed_size_binary_concat_empty() {
542 let left = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
543 let right = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
544
545 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
546
547 let expected = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
548 assert_eq!(output, expected);
549 }
550
551 #[test]
552 fn test_binary_view_concat() {
553 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
554 let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
555
556 let output = concat_elements_binary_view_array(&left, &right).unwrap();
557
558 let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as &[u8]), None]);
559 assert_eq!(output, expected);
560 }
561
562 #[test]
563 fn test_string_view_concat() {
564 let left = StringViewArray::from_iter(vec![Some("foo"), Some("bar"), None]);
565 let right = StringViewArray::from_iter(vec![None, Some("yyy"), Some("zzz")]);
566
567 let output = concat_elements_string_view_array(&left, &right).unwrap();
568
569 let expected = StringViewArray::from_iter(vec![None, Some("baryyy"), None]);
570 assert_eq!(output, expected);
571 }
572
573 #[test]
574 fn test_binary_view_concat_no_null() {
575 let left = BinaryViewArray::from_iter(vec![
576 Some(b"foo" as &[u8]),
577 Some(b"bar"),
578 Some(b""),
579 Some(b"baz"),
580 ]);
581 let right = BinaryViewArray::from_iter(vec![
582 Some(b"bar" as &[u8]),
583 Some(b"baz"),
584 Some(b""),
585 Some(b""),
586 ]);
587
588 let output = concat_elements_binary_view_array(&left, &right).unwrap();
589
590 let expected = BinaryViewArray::from_iter(vec![
591 Some(b"foobar" as &[u8]),
592 Some(b"barbaz"),
593 Some(b""),
594 Some(b"baz"),
595 ]);
596 assert_eq!(output, expected);
597 }
598
599 #[test]
600 fn test_binary_view_concat_error() {
601 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar")]);
602 let right = BinaryViewArray::from_iter(vec![Some(b"baz" as &[u8])]);
603
604 let output = concat_elements_binary_view_array(&left, &right);
605 assert_eq!(
606 output.unwrap_err().to_string(),
607 "Compute error: Arrays must have the same length: 2 != 1".to_string()
608 );
609 }
610
611 #[test]
612 fn test_binary_view_concat_empty() {
613 let left = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
614 let right = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
615
616 let output = concat_elements_binary_view_array(&left, &right).unwrap();
617 let expected = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
618 assert_eq!(output, expected);
619 }
620
621 #[test]
622 fn test_concat_dyn_same_type() {
623 let left = StringArray::from(vec![Some("foo"), Some("bar"), None]);
625 let right = StringArray::from(vec![None, Some("yyy"), Some("zzz")]);
626
627 let output: StringArray = concat_elements_dyn(&left, &right)
628 .unwrap()
629 .into_data()
630 .into();
631 let expected = StringArray::from(vec![None, Some("baryyy"), None]);
632 assert_eq!(output, expected);
633
634 let left = LargeStringArray::from(vec![Some("foo"), Some("bar"), None]);
636 let right = LargeStringArray::from(vec![None, Some("yyy"), Some("zzz")]);
637
638 let output: LargeStringArray = concat_elements_dyn(&left, &right)
639 .unwrap()
640 .into_data()
641 .into();
642 let expected = LargeStringArray::from(vec![None, Some("baryyy"), None]);
643 assert_eq!(output, expected);
644
645 let left = BinaryArray::from_opt_vec(vec![Some(b"foo"), Some(b"bar"), None]);
647 let right = BinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
648 let output: BinaryArray = concat_elements_dyn(&left, &right)
649 .unwrap()
650 .into_data()
651 .into();
652 let expected = BinaryArray::from_opt_vec(vec![None, Some(b"baryyy"), None]);
653 assert_eq!(output, expected);
654
655 let left = LargeBinaryArray::from_opt_vec(vec![Some(b"foo"), Some(b"bar"), None]);
657 let right = LargeBinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
658 let output: LargeBinaryArray = concat_elements_dyn(&left, &right)
659 .unwrap()
660 .into_data()
661 .into();
662 let expected = LargeBinaryArray::from_opt_vec(vec![None, Some(b"baryyy"), None]);
663 assert_eq!(output, expected);
664
665 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
667 let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
668 let output: BinaryViewArray = concat_elements_dyn(&left, &right)
669 .unwrap()
670 .into_data()
671 .into();
672 let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as &[u8]), None]);
673 assert_eq!(output, expected);
674
675 let left = FixedSizeBinaryArray::from(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
677 let right = FixedSizeBinaryArray::from(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
678 let output: FixedSizeBinaryArray = concat_elements_dyn(&left, &right)
679 .unwrap()
680 .into_data()
681 .into();
682 let expected = FixedSizeBinaryArray::from(vec![None, Some(b"baryyy" as &[u8]), None]);
683 assert_eq!(output, expected);
684 }
685
686 #[test]
687 fn test_concat_dyn_different_type() {
688 let left = StringArray::from(vec![Some("foo"), Some("bar"), None]);
689 let right = LargeStringArray::from(vec![None, Some("1"), Some("2")]);
690
691 let output = concat_elements_dyn(&left, &right);
692 assert_eq!(
693 output.unwrap_err().to_string(),
694 "Compute error: Cannot concat arrays of different types: Utf8 != LargeUtf8".to_string()
695 );
696 }
697}