apache_arrow_ffi/
apache_arrow_ffi.rs

1//! ---------------------------------------------------------
2//! Runs a roundtrip to and from Apache Arrow
3//!
4//! Run with:
5//!    cargo run --example apache_arrow_ffi --features cast_arrow
6//! ---------------------------------------------------------
7
8
9#[cfg(feature = "cast_arrow")]
10use crate::apache_arrow_test::run_example;
11
12// examples/ffi_roundtrip.rs
13#[cfg(feature = "cast_arrow")]
14mod apache_arrow_test {
15    use std::sync::Arc;
16
17    use arrow::array::ffi::{
18        FFI_ArrowArray, FFI_ArrowSchema, from_ffi as arrow_from_ffi, to_ffi as arrow_to_ffi
19    };
20    use arrow::array::{ArrayRef, RecordBatch, make_array};
21    use minarrow::ffi::arrow_c_ffi::{export_to_c, import_from_c};
22    use minarrow::ffi::arrow_dtype::CategoricalIndexType;
23    use minarrow::ffi::schema::Schema;
24    use minarrow::{Array, ArrowType, Field, FieldArray, NumericArray, Table, TextArray};
25    #[cfg(feature = "datetime")]
26    use minarrow::{TemporalArray, TimeUnit};
27
28    pub (crate) fn run_example() {
29        // ---- 1. Build a Minarrow Table with all types ----
30
31        #[cfg(feature = "extended_numeric_types")]
32        let arr_int8 = Arc::new(minarrow::IntegerArray::<i8>::from_slice(&[1, 2, -1])) as Arc<_>;
33        #[cfg(feature = "extended_numeric_types")]
34        let arr_int16 =
35            Arc::new(minarrow::IntegerArray::<i16>::from_slice(&[10, 20, -10])) as Arc<_>;
36        let arr_int32 =
37            Arc::new(minarrow::IntegerArray::<i32>::from_slice(&[100, 200, -100])) as Arc<_>;
38        let arr_int64 =
39            Arc::new(minarrow::IntegerArray::<i64>::from_slice(&[1000, 2000, -1000])) as Arc<_>;
40
41        #[cfg(feature = "extended_numeric_types")]
42        let arr_uint8 = Arc::new(minarrow::IntegerArray::<u8>::from_slice(&[1, 2, 255]))
43            as Arc<minarrow::IntegerArray<u8>>;
44        #[cfg(feature = "extended_numeric_types")]
45        let arr_uint16 = Arc::new(minarrow::IntegerArray::<u16>::from_slice(&[1, 2, 65535]))
46            as Arc<minarrow::IntegerArray<u16>>;
47        let arr_uint32 = Arc::new(minarrow::IntegerArray::<u32>::from_slice(&[1, 2, 4294967295]))
48            as Arc<minarrow::IntegerArray<u32>>;
49        let arr_uint64 =
50            Arc::new(minarrow::IntegerArray::<u64>::from_slice(&[1, 2, 18446744073709551615]))
51                as Arc<minarrow::IntegerArray<u64>>;
52
53        let arr_float32 = Arc::new(minarrow::FloatArray::<f32>::from_slice(&[1.5, -0.5, 0.0]))
54            as Arc<minarrow::FloatArray<f32>>;
55        let arr_float64 = Arc::new(minarrow::FloatArray::<f64>::from_slice(&[1.0, -2.0, 0.0]))
56            as Arc<minarrow::FloatArray<f64>>;
57
58        let arr_bool = Arc::new(minarrow::BooleanArray::<()>::from_slice(&[true, false, true]))
59            as Arc<minarrow::BooleanArray<()>>;
60
61        let arr_string32 = Arc::new(minarrow::StringArray::<u32>::from_slice(&["abc", "def", ""]))
62            as Arc<minarrow::StringArray<u32>>;
63        let arr_categorical32 = Arc::new(minarrow::CategoricalArray::<u32>::from_slices(
64            &[0, 1, 2],
65            &["A".to_string(), "B".to_string(), "C".to_string()]
66        )) as Arc<minarrow::CategoricalArray<u32>>;
67
68        #[cfg(feature = "datetime")]
69        let arr_datetime32 = Arc::new(minarrow::DatetimeArray::<i32> {
70            data: minarrow::Buffer::<i32>::from_slice(&[
71                1_600_000_000 / 86_400,
72                1_600_000_001 / 86_400,
73                1_600_000_002 / 86_400,
74            ]),
75            null_mask: None,
76            time_unit: TimeUnit::Days,
77        });
78        #[cfg(feature = "datetime")]
79        let arr_datetime64 = Arc::new(minarrow::DatetimeArray::<i64> {
80            data: minarrow::Buffer::<i64>::from_slice(&[
81                1_600_000_000_000,
82                1_600_000_000_001,
83                1_600_000_000_002
84            ]),
85            null_mask: None,
86            time_unit: TimeUnit::Milliseconds
87        }) as Arc<_>;
88
89        // ---- 2. Wrap into Array enums ----
90        #[cfg(feature = "extended_numeric_types")]
91        let minarr_int8 = Array::NumericArray(NumericArray::Int8(arr_int8));
92        #[cfg(feature = "extended_numeric_types")]
93        let minarr_int16 = Array::NumericArray(NumericArray::Int16(arr_int16));
94        let minarr_int32 = Array::NumericArray(NumericArray::Int32(arr_int32));
95        let minarr_int64 = Array::NumericArray(NumericArray::Int64(arr_int64));
96        #[cfg(feature = "extended_numeric_types")]
97        let minarr_uint8 = Array::NumericArray(NumericArray::UInt8(arr_uint8));
98        #[cfg(feature = "extended_numeric_types")]
99        let minarr_uint16 = Array::NumericArray(NumericArray::UInt16(arr_uint16));
100        let minarr_uint32 = Array::NumericArray(NumericArray::UInt32(arr_uint32));
101        let minarr_uint64 = Array::NumericArray(NumericArray::UInt64(arr_uint64));
102        let minarr_float32 = Array::NumericArray(NumericArray::Float32(arr_float32));
103        let minarr_float64 = Array::NumericArray(NumericArray::Float64(arr_float64));
104        let minarr_bool = Array::BooleanArray(arr_bool);
105        let minarr_string32 = Array::TextArray(TextArray::String32(arr_string32));
106        let minarr_categorical32 = Array::TextArray(TextArray::Categorical32(arr_categorical32));
107        #[cfg(feature = "datetime")]
108        let minarr_datetime32 = Array::TemporalArray(TemporalArray::Datetime32(arr_datetime32));
109        #[cfg(feature = "datetime")]
110        let minarr_datetime64 = Array::TemporalArray(TemporalArray::Datetime64(arr_datetime64));
111
112        // ---- 3. Build Fields with correct logical types ----
113        #[cfg(feature = "extended_numeric_types")]
114        let field_int8 = Field::new("int8", ArrowType::Int8, false, None);
115        #[cfg(feature = "extended_numeric_types")]
116        let field_int16 = Field::new("int16", ArrowType::Int16, false, None);
117        let field_int32 = Field::new("int32", ArrowType::Int32, false, None);
118        let field_int64 = Field::new("int64", ArrowType::Int64, false, None);
119        #[cfg(feature = "extended_numeric_types")]
120        let field_uint8 = Field::new("uint8", ArrowType::UInt8, false, None);
121        #[cfg(feature = "extended_numeric_types")]
122        let field_uint16 = Field::new("uint16", ArrowType::UInt16, false, None);
123        let field_uint32 = Field::new("uint32", ArrowType::UInt32, false, None);
124        let field_uint64 = Field::new("uint64", ArrowType::UInt64, false, None);
125        let field_float32 = Field::new("float32", ArrowType::Float32, false, None);
126        let field_float64 = Field::new("float64", ArrowType::Float64, false, None);
127        let field_bool = Field::new("bool", ArrowType::Boolean, false, None);
128        let field_string32 = Field::new("string32", ArrowType::String, false, None);
129        let field_categorical32 = Field::new(
130            "categorical32",
131            ArrowType::Dictionary(CategoricalIndexType::UInt32),
132            false,
133            None
134        );
135
136        #[cfg(feature = "datetime")]
137        let field_datetime32 = Field::new("dt32", ArrowType::Date32, false, None);
138        #[cfg(feature = "datetime")]
139        let field_datetime64 = Field::new("dt64", ArrowType::Date64, false, None);
140
141        // ---- 4. Build FieldArrays ----
142        #[cfg(feature = "extended_numeric_types")]
143        let fa_int8 = FieldArray::new(field_int8, minarr_int8);
144        #[cfg(feature = "extended_numeric_types")]
145        let fa_int16 = FieldArray::new(field_int16, minarr_int16);
146        let fa_int32 = FieldArray::new(field_int32, minarr_int32);
147        let fa_int64 = FieldArray::new(field_int64, minarr_int64);
148        #[cfg(feature = "extended_numeric_types")]
149        let fa_uint8 = FieldArray::new(field_uint8, minarr_uint8);
150        #[cfg(feature = "extended_numeric_types")]
151        let fa_uint16 = FieldArray::new(field_uint16, minarr_uint16);
152        let fa_uint32 = FieldArray::new(field_uint32, minarr_uint32);
153        let fa_uint64 = FieldArray::new(field_uint64, minarr_uint64);
154        let fa_float32 = FieldArray::new(field_float32, minarr_float32);
155        let fa_float64 = FieldArray::new(field_float64, minarr_float64);
156        let fa_bool = FieldArray::new(field_bool, minarr_bool);
157        let fa_string32 = FieldArray::new(field_string32, minarr_string32);
158        let fa_categorical32 = FieldArray::new(field_categorical32, minarr_categorical32);
159        #[cfg(feature = "datetime")]
160        let fa_datetime32 = FieldArray::new(field_datetime32, minarr_datetime32);
161        #[cfg(feature = "datetime")]
162        let fa_datetime64 = FieldArray::new(field_datetime64, minarr_datetime64);
163
164        // ---- 5. Build Table ----
165        let mut cols = Vec::new();
166        #[cfg(feature = "extended_numeric_types")]
167        {
168            cols.push(fa_int8);
169            cols.push(fa_int16);
170        }
171        cols.push(fa_int32);
172        cols.push(fa_int64);
173        #[cfg(feature = "extended_numeric_types")]
174        {
175            cols.push(fa_uint8);
176            cols.push(fa_uint16);
177        }
178        cols.push(fa_uint32);
179        cols.push(fa_uint64);
180        cols.push(fa_float32);
181        cols.push(fa_float64);
182        cols.push(fa_bool);
183        cols.push(fa_string32);
184        cols.push(fa_categorical32);
185        #[cfg(feature = "datetime")]
186        {
187            cols.push(fa_datetime32);
188            cols.push(fa_datetime64);
189        }
190        let minarrow_table = Table::new("ffi_test".to_string(), Some(cols));
191
192        // ---- 6. Export each column over FFI, import into Arrow-RS, and roundtrip back to Minarrow ----
193        for (_, col) in minarrow_table.cols.iter().enumerate() {
194            let array_arc = Arc::new(col.array.clone());
195            let schema = Schema::from(vec![(*col.field).clone()]);
196
197            // println!("Minarrow Pre-roundtrip for '{:?}':\n{:#?}", *col.field, array_arc);
198
199            let (c_arr, c_schema) = export_to_c(array_arc.clone(), schema);
200
201            // SAFETY: Arrow-RS expects raw pointers to FFI_ArrowArray/Schema
202            let arr_ptr = c_arr as *mut FFI_ArrowArray;
203            let schema_ptr = c_schema as *mut FFI_ArrowSchema;
204            let arrow_array = unsafe { arr_ptr.read() };
205            let arrow_schema = unsafe { schema_ptr.read() };
206            let array_data = unsafe { arrow_from_ffi(arrow_array, &arrow_schema) }
207                .expect("Arrow FFI import failed");
208            let field_name = &col.field.name;
209            println!("Imported field '{}' as Arrow type {:?}", field_name, array_data.data_type());
210            println!("Arrow-RS values for '{}':", field_name);
211            println!("  {:?}", array_data);
212
213            // Convert ArrayData to ArrayRef
214            let array_ref: ArrayRef = make_array(array_data.clone());
215
216            // Pretty print as a table
217            let arrow_schema =
218                Arc::new(arrow::datatypes::Schema::new(vec![arrow::datatypes::Field::new(
219                    field_name,
220                    array_ref.data_type().clone(),
221                    false
222                )]));
223            let batch = RecordBatch::try_new(arrow_schema, vec![array_ref.clone()]).unwrap();
224            println!("Arrow-RS pretty-print for '{}':", field_name);
225            arrow::util::pretty::print_batches(&[batch]).unwrap();
226
227            // ---- 7. Export Arrow-RS back to Minarrow FFI, roundtrip ----
228            let (ffi_out_arr, ffi_out_schema) =
229                arrow_to_ffi(&array_data).expect("Arrow to FFI failed");
230
231            // Correctly allocate Arrow-RS FFI structs on the heap and cast as raw pointers to your C ABI structs
232            let ffi_out_arr_box = Box::new(ffi_out_arr);
233            let ffi_out_schema_box = Box::new(ffi_out_schema);
234
235            let arr_ptr =
236                Box::into_raw(ffi_out_arr_box) as *const minarrow::ffi::arrow_c_ffi::ArrowArray;
237            let schema_ptr =
238                Box::into_raw(ffi_out_schema_box) as *const minarrow::ffi::arrow_c_ffi::ArrowSchema;
239
240            // Now import back into minarrow using your real FFI import
241            let minarr_back_array: Arc<Array> = unsafe { import_from_c(arr_ptr, schema_ptr) };
242
243            println!("Minarrow array (roundtrip) for '{}':\n{:#?}", field_name, minarr_back_array);
244
245            // ---- 8. Validate roundtrip equality ----
246            assert_eq!(
247                &col.array,
248                minarr_back_array.as_ref(),
249                "Roundtrip array does not match for field {}",
250                field_name
251            );
252        }
253
254        println!("FFI roundtrip test completed for all supported types.");
255    }
256}
257
258fn main() {
259    if cfg!(feature = "cast_arrow") {
260        #[cfg(feature = "cast_arrow")]
261        run_example()
262    } else {
263        println!("The apache-FFI example requires enabling the `cast_arrow` feature.")
264    }
265}