df_interchange/lib.rs
1#![allow(clippy::useless_transmute)]
2#![allow(dead_code)]
3#![allow(unused_macros)]
4
5//! # DataFrame Interchange
6//!
7//! This crate allows for seamless interoperability between any version of [Polars (>=0.40)](https://docs.rs/polars/latest/polars/) and any version of [Arrow (>=54)](https://docs.rs/arrow/latest/arrow/), including between versions of the same crate (e.g. `Polars 0.40` to `Polars 0.46`), using the [Arrow C Data Interchange](https://arrow.apache.org/docs/format/CDataInterface.html) format.
8//!
9//! Supported versions:
10//! * Arrow: "54", "55", "56", "57", "58"
11//! * Polars: "0.40", "0.41", "0.42", "0.43", "0.44", "0.45", "0.46", "0.47", "0.48", "0.49", "0.50", "0.51", "0.52", "0.53"
12//!
13//! ## Polars and Arrow Rust ecosystem
14//!
15//! Since both `Polars` (pre-1.0) and `Arrow` have SemVer breaking API updates in Rust every few months, the Rust ecosystem that depend on these crates update at a different rates and are consistently incompatible with each other (e.g. one crate outputs `Polars 0.45` and another crate takes `Polars 0.43` as input). For crates who take these as input or provide these as output, updating should be considered an API break, and require a major bump in version. This has a cascading effect over the whole ecosystem.
16//!
17//! For example, attempting to pass `Polars 0.45` to a crate that uses `Polars 0.43`, or vice versa, will give a [error\[E0308\]: mismatched types](https://doc.rust-lang.org/error_codes/E0308.html) error with the note "'DataFrame' and 'DataFrame' have similar names, but are actually distinct types".
18//!
19//! This crate is meant to solve the interoperability issue and prevent the need for the entirety of the ecosystem to update at the same speed.
20//!
21//! ## Usage
22//!
23//! Enable the correct feature (e.g. `Polars 0.43`, `Polars 0.46` and `Arrow 54`):
24//!
25//! ```toml
26//! [dependencies]
27//! polars = "0.43"
28//! arrow = "54"
29//! df-interchange = { version = "0.2", features = ["polars_0_43", "polars_0_46", "arrow_54"] }
30//! ```
31//! Then use the `from_polars_0_43` & `from_arrow_54` and `to_polars_0_46` implementation of `Interchange` to change types:
32//!
33//! ```no_run
34//! use std::sync::Arc;
35//! use arrow::{array::{ArrayRef, Int32Array, Int64Array}, record_batch::RecordBatch}; // Arrow 54
36//! use polars::prelude::*; // Polars 0.43
37//! use df_interchange::Interchange;
38//!
39//! // Create Polars 0.43 data
40//! let polars_0_43 = DataFrame::new(vec![
41//! Series::new("test_i32".into(), [-1i32, 0, 1]),
42//! Series::new("test_i64".into(), [-1i64, 0, 1]),
43//! ])
44//! .unwrap();
45//!
46//! // Create arrow 54 data
47//! let arrow_54: Vec<_> = vec![RecordBatch::try_from_iter(vec![
48//! ("test_i32", Arc::new(Int32Array::from(vec![-1i32, 0, 1])) as ArrayRef),
49//! ("test_i64", Arc::new(Int64Array::from(vec![-1i64, 0, 1])) as ArrayRef),
50//! ])
51//! .unwrap()];
52//!
53//! // Convert Polars 0.43 to Polars 0.46
54//! let df_polars = Interchange::from_polars_0_43(polars_0_43)?.to_polars_0_46()?;
55//!
56//! // Convert Arrow 54 to Polars 0.46
57//! let df_arrow = Interchange::from_arrow_54(arrow_54)?.to_polars_0_46()?;
58//!
59//! // Compare the two DataFrames (not possible prior to conversion to Polars 0.46)
60//! assert!(df_polars.equals_missing(&df_arrow));
61//!
62//! ```
63//! ## Technical info
64//!
65//! ### Features
66//!
67//! Since Rust features are [additive](https://doc.rust-lang.org/cargo/reference/features.html#feature-unification), you can enable features on Arrow or Polars crates by adding them to your own `Cargo.toml`.
68//!
69//! For example, you can enable the `lazy` feature on the Polars version you receive from `df-interchange`.
70//!
71//! ```toml
72//! [dependencies]
73//! polars = { version = "0.46", features = ["lazy"] }
74//! polars_old = { package = "polars", version = "0.45", features = ["lazy"] }
75//! df-interchange = { path = "/home/eric/Rust/df-interchange", version = "0.1.0", features = ["polars_0_45", "polars_0_46"] }
76//! ```
77//!
78//! To use this, since the `.lazy()` uses the `IntoLazy` trait for `DataFrame`, you have to [disambiguate the trait](https://doc.rust-lang.org/rust-by-example/trait/disambiguating.html) with `<polars_old::prelude::DataFrame as polars_old::prelude::IntoLazy>::lazy(df)`:
79//!
80//! ```no_run
81//! use df_interchange::Interchange;
82//! use polars::prelude::*;
83//!
84//! let df_0_46 = DataFrame::new(vec![
85//! Column::new("test_i32".into(), [1i32, 2, 3, 4]),
86//! Column::new("test_i64".into(), [1i64, 2, 3, 4]),
87//! ])
88//! .unwrap()
89//! .lazy();
90//!
91//! let df_0_45 = Interchange::from_polars_0_46(df_0_46.collect().unwrap())?.to_polars_0_45()?;
92//!
93//! let lf = <polars_old::prelude::DataFrame as polars_old::prelude::IntoLazy>::lazy(df_0_45);
94//! ```
95//!
96//! During conversion, you may encounter errors based on data type conversions enabled by features. For example, if you convert a column of `i8` from a `Polars 0.46` that enables the `dtype-i8` feature, to `Polars 0.43` that does not enable `dtype-i8`, you will get a `Error(ComputeError(ErrString("cannot create series from Int8")))`. You can enable this feature on both versions of the crate to solve the issue.
97//!
98//! ```toml
99//! [dependencies]
100//! polars = { version = "0.46", features = ["dtype-i8"] }
101//! polars_0_43 = { package = "polars", version = "0.43", features = ["dtype-i8"] }
102//! df-interchange = { version = "0.1.0", features = ["polars_0_43", "polars_0_46"] }
103//! ```
104//!
105//! ```no_run
106//! use polars::prelude::*; // Polars 0.46
107//! use df_interchange::Interchange;
108//!
109//! let df_0_46 = DataFrame::new(vec![
110//! Column::new("test_i8".into(), [1i8, 2, 3, 4]),
111//! Column::new("test_i64".into(), [1i64, 2, 3, 4]),
112//! ])
113//! .unwrap();
114//!
115//! let df_0_43 = Interchange::from_polars_0_46(df_0_46)?.to_polars_0_43()?;
116//! ```
117//!
118mod error;
119pub use error::InterchangeError;
120
121#[cfg(any(
122 feature = "arrow_54",
123 feature = "arrow_55",
124 feature = "arrow_56",
125 feature = "arrow_57",
126 feature = "arrow_58"
127))]
128mod from_arrow;
129
130#[cfg(any(
131 feature = "polars_0_40",
132 feature = "polars_0_41",
133 feature = "polars_0_42",
134 feature = "polars_0_43",
135 feature = "polars_0_44",
136 feature = "polars_0_45",
137 feature = "polars_0_46",
138 feature = "polars_0_47",
139 feature = "polars_0_48",
140 feature = "polars_0_49",
141 feature = "polars_0_50",
142 feature = "polars_0_51",
143 feature = "polars_0_52",
144 feature = "polars_0_53"
145))]
146mod from_polars;
147
148#[cfg(any(
149 feature = "arrow_54",
150 feature = "arrow_55",
151 feature = "arrow_56",
152 feature = "arrow_57",
153 feature = "arrow_58"
154))]
155mod to_arrow;
156
157#[cfg(any(
158 feature = "polars_0_40",
159 feature = "polars_0_41",
160 feature = "polars_0_42",
161 feature = "polars_0_43",
162 feature = "polars_0_44",
163 feature = "polars_0_45",
164 feature = "polars_0_46",
165 feature = "polars_0_47",
166 feature = "polars_0_48",
167 feature = "polars_0_49",
168 feature = "polars_0_50",
169 feature = "polars_0_51",
170 feature = "polars_0_52",
171 feature = "polars_0_53"
172))]
173mod to_polars;
174
175#[repr(C)]
176struct ArrowArray {
177 length: i64,
178 null_count: i64,
179 offset: i64,
180 n_buffers: i64,
181 n_children: i64,
182 buffers: *mut *const ::std::os::raw::c_void,
183 children: *mut *mut ArrowArray,
184 dictionary: *mut ArrowArray,
185 release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut ArrowArray)>,
186 private_data: *mut ::std::os::raw::c_void,
187}
188
189#[repr(C)]
190struct ArrowSchema {
191 format: *const ::std::os::raw::c_char,
192 name: *const ::std::os::raw::c_char,
193 metadata: *const ::std::os::raw::c_char,
194 flags: i64,
195 n_children: i64,
196 children: *mut *mut ArrowSchema,
197 dictionary: *mut ArrowSchema,
198 release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut ArrowSchema)>,
199 private_data: *mut ::std::os::raw::c_void,
200}
201
202pub struct Interchange {
203 chunks_aligned: bool,
204 ffi: Vec<(String, Vec<(ArrowArray, ArrowSchema)>)>,
205}