apache_arrow_ffi/
apache_arrow_ffi.rs

1//! ---------------------------------------------------------
2//! Runs a roundtrip to and from Apache Arrow
3//!
4//! Run with:
5//!    cargo run --example apache_arrow_ffi --features cast_arrow
6//! ---------------------------------------------------------
7
8#[cfg(feature = "cast_arrow")]
9use crate::apache_arrow_test::run_example;
10
11// examples/ffi_roundtrip.rs
12#[cfg(feature = "cast_arrow")]
13mod apache_arrow_test {
14    use std::sync::Arc;
15
16    use arrow::array::ffi::{
17        FFI_ArrowArray, FFI_ArrowSchema, from_ffi as arrow_from_ffi, to_ffi as arrow_to_ffi,
18    };
19    use arrow::array::{ArrayRef, RecordBatch, make_array};
20    use minarrow::ffi::arrow_c_ffi::{export_to_c, import_from_c};
21    use minarrow::ffi::arrow_dtype::CategoricalIndexType;
22    use minarrow::ffi::schema::Schema;
23    use minarrow::{Array, ArrowType, Field, FieldArray, NumericArray, Table, TextArray};
24    #[cfg(feature = "datetime")]
25    use minarrow::{TemporalArray, TimeUnit};
26
27    pub(crate) fn run_example() {
28        // ---- 1. Build a Minarrow Table with all types ----
29
30        #[cfg(feature = "extended_numeric_types")]
31        let arr_int8 = Arc::new(minarrow::IntegerArray::<i8>::from_slice(&[1, 2, -1])) as Arc<_>;
32        #[cfg(feature = "extended_numeric_types")]
33        let arr_int16 =
34            Arc::new(minarrow::IntegerArray::<i16>::from_slice(&[10, 20, -10])) as Arc<_>;
35        let arr_int32 =
36            Arc::new(minarrow::IntegerArray::<i32>::from_slice(&[100, 200, -100])) as Arc<_>;
37        let arr_int64 = Arc::new(minarrow::IntegerArray::<i64>::from_slice(&[
38            1000, 2000, -1000,
39        ])) as Arc<_>;
40
41        #[cfg(feature = "extended_numeric_types")]
42        let arr_uint8 = Arc::new(minarrow::IntegerArray::<u8>::from_slice(&[1, 2, 255]))
43            as Arc<minarrow::IntegerArray<u8>>;
44        #[cfg(feature = "extended_numeric_types")]
45        let arr_uint16 = Arc::new(minarrow::IntegerArray::<u16>::from_slice(&[1, 2, 65535]))
46            as Arc<minarrow::IntegerArray<u16>>;
47        let arr_uint32 = Arc::new(minarrow::IntegerArray::<u32>::from_slice(&[
48            1, 2, 4294967295,
49        ])) as Arc<minarrow::IntegerArray<u32>>;
50        let arr_uint64 = Arc::new(minarrow::IntegerArray::<u64>::from_slice(&[
51            1,
52            2,
53            18446744073709551615,
54        ])) as Arc<minarrow::IntegerArray<u64>>;
55
56        let arr_float32 = Arc::new(minarrow::FloatArray::<f32>::from_slice(&[1.5, -0.5, 0.0]))
57            as Arc<minarrow::FloatArray<f32>>;
58        let arr_float64 = Arc::new(minarrow::FloatArray::<f64>::from_slice(&[1.0, -2.0, 0.0]))
59            as Arc<minarrow::FloatArray<f64>>;
60
61        let arr_bool = Arc::new(minarrow::BooleanArray::<()>::from_slice(&[
62            true, false, true,
63        ])) as Arc<minarrow::BooleanArray<()>>;
64
65        let arr_string32 = Arc::new(minarrow::StringArray::<u32>::from_slice(&[
66            "abc", "def", "",
67        ])) as Arc<minarrow::StringArray<u32>>;
68        let arr_categorical32 = Arc::new(minarrow::CategoricalArray::<u32>::from_slices(
69            &[0, 1, 2],
70            &["A".to_string(), "B".to_string(), "C".to_string()],
71        )) as Arc<minarrow::CategoricalArray<u32>>;
72
73        #[cfg(feature = "datetime")]
74        let arr_datetime32 = Arc::new(minarrow::DatetimeArray::<i32> {
75            data: minarrow::Buffer::<i32>::from_slice(&[
76                1_600_000_000 / 86_400,
77                1_600_000_001 / 86_400,
78                1_600_000_002 / 86_400,
79            ]),
80            null_mask: None,
81            time_unit: TimeUnit::Days,
82        });
83        #[cfg(feature = "datetime")]
84        let arr_datetime64 = Arc::new(minarrow::DatetimeArray::<i64> {
85            data: minarrow::Buffer::<i64>::from_slice(&[
86                1_600_000_000_000,
87                1_600_000_000_001,
88                1_600_000_000_002,
89            ]),
90            null_mask: None,
91            time_unit: TimeUnit::Milliseconds,
92        }) as Arc<_>;
93
94        // ---- 2. Wrap into Array enums ----
95        #[cfg(feature = "extended_numeric_types")]
96        let minarr_int8 = Array::NumericArray(NumericArray::Int8(arr_int8));
97        #[cfg(feature = "extended_numeric_types")]
98        let minarr_int16 = Array::NumericArray(NumericArray::Int16(arr_int16));
99        let minarr_int32 = Array::NumericArray(NumericArray::Int32(arr_int32));
100        let minarr_int64 = Array::NumericArray(NumericArray::Int64(arr_int64));
101        #[cfg(feature = "extended_numeric_types")]
102        let minarr_uint8 = Array::NumericArray(NumericArray::UInt8(arr_uint8));
103        #[cfg(feature = "extended_numeric_types")]
104        let minarr_uint16 = Array::NumericArray(NumericArray::UInt16(arr_uint16));
105        let minarr_uint32 = Array::NumericArray(NumericArray::UInt32(arr_uint32));
106        let minarr_uint64 = Array::NumericArray(NumericArray::UInt64(arr_uint64));
107        let minarr_float32 = Array::NumericArray(NumericArray::Float32(arr_float32));
108        let minarr_float64 = Array::NumericArray(NumericArray::Float64(arr_float64));
109        let minarr_bool = Array::BooleanArray(arr_bool);
110        let minarr_string32 = Array::TextArray(TextArray::String32(arr_string32));
111        let minarr_categorical32 = Array::TextArray(TextArray::Categorical32(arr_categorical32));
112        #[cfg(feature = "datetime")]
113        let minarr_datetime32 = Array::TemporalArray(TemporalArray::Datetime32(arr_datetime32));
114        #[cfg(feature = "datetime")]
115        let minarr_datetime64 = Array::TemporalArray(TemporalArray::Datetime64(arr_datetime64));
116
117        // ---- 3. Build Fields with correct logical types ----
118        #[cfg(feature = "extended_numeric_types")]
119        let field_int8 = Field::new("int8", ArrowType::Int8, false, None);
120        #[cfg(feature = "extended_numeric_types")]
121        let field_int16 = Field::new("int16", ArrowType::Int16, false, None);
122        let field_int32 = Field::new("int32", ArrowType::Int32, false, None);
123        let field_int64 = Field::new("int64", ArrowType::Int64, false, None);
124        #[cfg(feature = "extended_numeric_types")]
125        let field_uint8 = Field::new("uint8", ArrowType::UInt8, false, None);
126        #[cfg(feature = "extended_numeric_types")]
127        let field_uint16 = Field::new("uint16", ArrowType::UInt16, false, None);
128        let field_uint32 = Field::new("uint32", ArrowType::UInt32, false, None);
129        let field_uint64 = Field::new("uint64", ArrowType::UInt64, false, None);
130        let field_float32 = Field::new("float32", ArrowType::Float32, false, None);
131        let field_float64 = Field::new("float64", ArrowType::Float64, false, None);
132        let field_bool = Field::new("bool", ArrowType::Boolean, false, None);
133        let field_string32 = Field::new("string32", ArrowType::String, false, None);
134        let field_categorical32 = Field::new(
135            "categorical32",
136            ArrowType::Dictionary(CategoricalIndexType::UInt32),
137            false,
138            None,
139        );
140
141        #[cfg(feature = "datetime")]
142        let field_datetime32 = Field::new("dt32", ArrowType::Date32, false, None);
143        #[cfg(feature = "datetime")]
144        let field_datetime64 = Field::new("dt64", ArrowType::Date64, false, None);
145
146        // ---- 4. Build FieldArrays ----
147        #[cfg(feature = "extended_numeric_types")]
148        let fa_int8 = FieldArray::new(field_int8, minarr_int8);
149        #[cfg(feature = "extended_numeric_types")]
150        let fa_int16 = FieldArray::new(field_int16, minarr_int16);
151        let fa_int32 = FieldArray::new(field_int32, minarr_int32);
152        let fa_int64 = FieldArray::new(field_int64, minarr_int64);
153        #[cfg(feature = "extended_numeric_types")]
154        let fa_uint8 = FieldArray::new(field_uint8, minarr_uint8);
155        #[cfg(feature = "extended_numeric_types")]
156        let fa_uint16 = FieldArray::new(field_uint16, minarr_uint16);
157        let fa_uint32 = FieldArray::new(field_uint32, minarr_uint32);
158        let fa_uint64 = FieldArray::new(field_uint64, minarr_uint64);
159        let fa_float32 = FieldArray::new(field_float32, minarr_float32);
160        let fa_float64 = FieldArray::new(field_float64, minarr_float64);
161        let fa_bool = FieldArray::new(field_bool, minarr_bool);
162        let fa_string32 = FieldArray::new(field_string32, minarr_string32);
163        let fa_categorical32 = FieldArray::new(field_categorical32, minarr_categorical32);
164        #[cfg(feature = "datetime")]
165        let fa_datetime32 = FieldArray::new(field_datetime32, minarr_datetime32);
166        #[cfg(feature = "datetime")]
167        let fa_datetime64 = FieldArray::new(field_datetime64, minarr_datetime64);
168
169        // ---- 5. Build Table ----
170        let mut cols = Vec::new();
171        #[cfg(feature = "extended_numeric_types")]
172        {
173            cols.push(fa_int8);
174            cols.push(fa_int16);
175        }
176        cols.push(fa_int32);
177        cols.push(fa_int64);
178        #[cfg(feature = "extended_numeric_types")]
179        {
180            cols.push(fa_uint8);
181            cols.push(fa_uint16);
182        }
183        cols.push(fa_uint32);
184        cols.push(fa_uint64);
185        cols.push(fa_float32);
186        cols.push(fa_float64);
187        cols.push(fa_bool);
188        cols.push(fa_string32);
189        cols.push(fa_categorical32);
190        #[cfg(feature = "datetime")]
191        {
192            cols.push(fa_datetime32);
193            cols.push(fa_datetime64);
194        }
195        let minarrow_table = Table::new("ffi_test".to_string(), Some(cols));
196
197        // ---- 6. Export each column over FFI, import into Arrow-RS, and roundtrip back to Minarrow ----
198        for (_, col) in minarrow_table.cols.iter().enumerate() {
199            let array_arc = Arc::new(col.array.clone());
200            let schema = Schema::from(vec![(*col.field).clone()]);
201
202            // println!("Minarrow Pre-roundtrip for '{:?}':\n{:#?}", *col.field, array_arc);
203
204            let (c_arr, c_schema) = export_to_c(array_arc.clone(), schema);
205
206            // SAFETY: Arrow-RS expects raw pointers to FFI_ArrowArray/Schema
207            let arr_ptr = c_arr as *mut FFI_ArrowArray;
208            let schema_ptr = c_schema as *mut FFI_ArrowSchema;
209            let arrow_array = unsafe { arr_ptr.read() };
210            let arrow_schema = unsafe { schema_ptr.read() };
211            let array_data = unsafe { arrow_from_ffi(arrow_array, &arrow_schema) }
212                .expect("Arrow FFI import failed");
213            let field_name = &col.field.name;
214            println!(
215                "Imported field '{}' as Arrow type {:?}",
216                field_name,
217                array_data.data_type()
218            );
219            println!("Arrow-RS values for '{}':", field_name);
220            println!("  {:?}", array_data);
221
222            // Convert ArrayData to ArrayRef
223            let array_ref: ArrayRef = make_array(array_data.clone());
224
225            // Pretty print as a table
226            let arrow_schema = Arc::new(arrow::datatypes::Schema::new(vec![
227                arrow::datatypes::Field::new(field_name, array_ref.data_type().clone(), false),
228            ]));
229            let batch = RecordBatch::try_new(arrow_schema, vec![array_ref.clone()]).unwrap();
230            println!("Arrow-RS pretty-print for '{}':", field_name);
231            arrow::util::pretty::print_batches(&[batch]).unwrap();
232
233            // ---- 7. Export Arrow-RS back to Minarrow FFI, roundtrip ----
234            let (ffi_out_arr, ffi_out_schema) =
235                arrow_to_ffi(&array_data).expect("Arrow to FFI failed");
236
237            // Correctly allocate Arrow-RS FFI structs on the heap and cast as raw pointers to your C ABI structs
238            let ffi_out_arr_box = Box::new(ffi_out_arr);
239            let ffi_out_schema_box = Box::new(ffi_out_schema);
240
241            let arr_ptr =
242                Box::into_raw(ffi_out_arr_box) as *const minarrow::ffi::arrow_c_ffi::ArrowArray;
243            let schema_ptr =
244                Box::into_raw(ffi_out_schema_box) as *const minarrow::ffi::arrow_c_ffi::ArrowSchema;
245
246            // Now import back into minarrow using your real FFI import
247            let minarr_back_array: Arc<Array> = unsafe { import_from_c(arr_ptr, schema_ptr) };
248
249            println!(
250                "Minarrow array (roundtrip) for '{}':\n{:#?}",
251                field_name, minarr_back_array
252            );
253
254            // ---- 8. Validate roundtrip equality ----
255            assert_eq!(
256                &col.array,
257                minarr_back_array.as_ref(),
258                "Roundtrip array does not match for field {}",
259                field_name
260            );
261        }
262
263        println!("FFI roundtrip test completed for all supported types.");
264    }
265}
266
267fn main() {
268    if cfg!(feature = "cast_arrow") {
269        #[cfg(feature = "cast_arrow")]
270        run_example()
271    } else {
272        println!("The apache-FFI example requires enabling the `cast_arrow` feature.")
273    }
274}