1use std::sync::Arc;
5
6use arrow_array::{make_array, Array, RecordBatch};
7use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
8use arrow_data::{transform::MutableArrayData, ArrayData, ArrayDataBuilder};
9
10pub fn deep_copy_buffer(buffer: &Buffer) -> Buffer {
11 Buffer::from(buffer.as_slice())
12}
13
14pub fn deep_copy_nulls(nulls: Option<&NullBuffer>) -> Option<NullBuffer> {
15 let nulls = nulls?;
16 let bit_buffer = deep_copy_buffer(nulls.inner().inner());
17 Some(unsafe {
18 NullBuffer::new_unchecked(
19 BooleanBuffer::new(bit_buffer, nulls.offset(), nulls.len()),
20 nulls.null_count(),
21 )
22 })
23}
24
25pub fn deep_copy_array_data(data: &ArrayData) -> ArrayData {
26 let data_type = data.data_type().clone();
27 let len = data.len();
28 let nulls = deep_copy_nulls(data.nulls());
29 let offset = data.offset();
30 let buffers = data
31 .buffers()
32 .iter()
33 .map(deep_copy_buffer)
34 .collect::<Vec<_>>();
35 let child_data = data
36 .child_data()
37 .iter()
38 .map(deep_copy_array_data)
39 .collect::<Vec<_>>();
40 unsafe {
41 ArrayDataBuilder::new(data_type)
42 .len(len)
43 .nulls(nulls)
44 .offset(offset)
45 .buffers(buffers)
46 .child_data(child_data)
47 .build_unchecked()
48 }
49}
50
51pub fn deep_copy_array(array: &dyn Array) -> Arc<dyn Array> {
52 let data = array.to_data();
53 let data = deep_copy_array_data(&data);
54 make_array(data)
55}
56
57pub fn deep_copy_batch(batch: &RecordBatch) -> crate::Result<RecordBatch> {
58 let arrays = batch
59 .columns()
60 .iter()
61 .map(|array| deep_copy_array(array))
62 .collect::<Vec<_>>();
63 RecordBatch::try_new(batch.schema(), arrays)
64}
65
66pub fn deep_copy_array_data_sliced(data: &ArrayData) -> ArrayData {
69 let mut mutable = MutableArrayData::new(vec![data], false, data.len());
71
72 mutable.extend(0, data.offset(), data.offset() + data.len());
74
75 mutable.freeze()
77}
78
79pub fn deep_copy_array_sliced(array: &dyn Array) -> Arc<dyn Array> {
81 let data = array.to_data();
82 let data = deep_copy_array_data_sliced(&data);
83 make_array(data)
84}
85
86pub fn deep_copy_batch_sliced(batch: &RecordBatch) -> crate::Result<RecordBatch> {
88 let arrays = batch
89 .columns()
90 .iter()
91 .map(|array| deep_copy_array_sliced(array))
92 .collect::<Vec<_>>();
93 RecordBatch::try_new(batch.schema(), arrays)
94}
95
96#[cfg(test)]
97pub mod tests {
98 use std::sync::Arc;
99
100 use arrow_array::{Array, Int32Array, RecordBatch, StringArray};
101 use arrow_schema::{DataType, Field, Schema};
102
103 #[test]
104 fn test_deep_copy_sliced_array_with_nulls() {
105 let array = Arc::new(Int32Array::from(vec![
106 Some(1),
107 None,
108 Some(3),
109 None,
110 Some(5),
111 ]));
112 let sliced_array = array.slice(1, 3);
113 let copied_array = super::deep_copy_array(&sliced_array);
114 assert_eq!(sliced_array.len(), copied_array.len());
115 assert_eq!(sliced_array.nulls(), copied_array.nulls());
116 }
117
118 #[test]
119 fn test_deep_copy_array_data_sliced() {
120 let array = Int32Array::from((0..1000).collect::<Vec<i32>>());
121 let sliced = array.slice(100, 10);
122
123 let sliced_data = sliced.to_data();
124 let copied_data = super::deep_copy_array_data_sliced(&sliced_data);
125
126 assert_eq!(copied_data.len(), 10);
127 assert_eq!(copied_data.offset(), 0);
128
129 let copied_array = Int32Array::from(copied_data);
131 for i in 0..10 {
132 assert_eq!(copied_array.value(i), 100 + i as i32);
133 }
134 }
135
136 #[test]
137 fn test_deep_copy_array_sliced() {
138 let array = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
139 let sliced = array.slice(1, 3);
140
141 let copied = super::deep_copy_array_sliced(&sliced);
142
143 assert_eq!(copied.len(), 3);
144 let copied_int = copied.as_any().downcast_ref::<Int32Array>().unwrap();
145 assert_eq!(copied_int.value(0), 2);
146 assert_eq!(copied_int.value(1), 3);
147 assert_eq!(copied_int.value(2), 4);
148 }
149
150 #[test]
151 fn test_deep_copy_batch_sliced() {
152 let schema = Arc::new(Schema::new(vec![
153 Field::new("id", DataType::Int32, false),
154 Field::new("name", DataType::Utf8, false),
155 ]));
156
157 let id_array = Arc::new(Int32Array::from((0..100).collect::<Vec<i32>>()));
158 let name_array = Arc::new(StringArray::from(
159 (0..100)
160 .map(|i| format!("name_{}", i))
161 .collect::<Vec<String>>(),
162 ));
163
164 let batch = RecordBatch::try_new(
165 schema,
166 vec![id_array as Arc<dyn Array>, name_array as Arc<dyn Array>],
167 )
168 .unwrap();
169
170 let sliced = batch.slice(10, 5);
171 let copied = super::deep_copy_batch_sliced(&sliced).unwrap();
172
173 assert_eq!(copied.num_rows(), 5);
174 assert_eq!(copied.num_columns(), 2);
175
176 let id_col = copied
178 .column(0)
179 .as_any()
180 .downcast_ref::<Int32Array>()
181 .unwrap();
182 let name_col = copied
183 .column(1)
184 .as_any()
185 .downcast_ref::<StringArray>()
186 .unwrap();
187
188 for i in 0..5 {
189 assert_eq!(id_col.value(i), 10 + i as i32);
190 assert_eq!(name_col.value(i), format!("name_{}", 10 + i));
191 }
192 }
193
194 #[test]
195 fn test_deep_copy_array_sliced_with_nulls() {
196 let array = Arc::new(Int32Array::from(vec![
197 Some(1),
198 None,
199 Some(3),
200 None,
201 Some(5),
202 ]));
203 let sliced = array.slice(1, 3); let copied = super::deep_copy_array_sliced(&sliced);
206
207 assert_eq!(copied.len(), 3);
208 assert_eq!(copied.null_count(), 2); let copied_int = copied.as_any().downcast_ref::<Int32Array>().unwrap();
211 assert!(!copied_int.is_valid(0)); assert!(copied_int.is_valid(1)); assert!(!copied_int.is_valid(2)); assert_eq!(copied_int.value(1), 3);
215 }
216}