df_interchange/
lib.rs

1#![allow(clippy::useless_transmute)]
2#![allow(dead_code)]
3#![allow(unused_macros)]
4
5//! # DataFrame Interchange
6//!
7//! This crate allows for seamless interoperability between any version of [Polars (>=0.40)](https://docs.rs/polars/latest/polars/) and any version of [Arrow (>=54)](https://docs.rs/arrow/latest/arrow/), including between versions of the same crate (e.g. `Polars 0.40` to `Polars 0.46`), using the [Arrow C Data Interchange](https://arrow.apache.org/docs/format/CDataInterface.html) format.
8//!
9//! Supported versions:
10//! * Arrow: "54", "55"
11//! * Polars: "0.40", "0.41", "0.42", "0.43", "0.44", "0.45", "0.46", "0.47", "0.48", "0.49"
12//!
13//! ## Polars and Arrow Rust ecosystem
14//!
15//! Since both `Polars` (pre-1.0) and `Arrow` have SemVer breaking API updates in Rust every few months, the Rust ecosystem that depend on these crates update at a different rates and are consistently incompatible with each other (e.g. one crate outputs `Polars 0.45` and another crate takes `Polars 0.43` as input). For crates who take these as input or provide these as output, updating should be considered an API break, and require a major bump in version. This has a cascading effect over the whole ecosystem.
16//!
17//! For example, attempting to pass `Polars 0.45` to a crate that uses `Polars 0.43`, or vice versa, will give a [error\[E0308\]: mismatched types](https://doc.rust-lang.org/error_codes/E0308.html) error with the note  "'DataFrame' and 'DataFrame' have similar names, but are actually distinct types".
18//!
19//! This crate is meant to solve the interoperability issue and prevent the need for the entirety of the ecosystem to update at the same speed.
20//!
21//! ## Usage
22//!
23//! Enable the correct feature (e.g. `Polars 0.43`, `Polars 0.46` and `Arrow 54`):
24//!
25//! ```toml
26//! [dependencies]
27//! polars = "0.43"
28//! arrow = "54"
29//! df-interchange = { version = "0.2", features = ["polars_0_43", "polars_0_46", "arrow_54"] }
30//! ```
31//! Then use the `from_polars_0_43` & `from_arrow_54` and `to_polars_0_46` implementation of `Interchange` to change types:
32//!
33//! ```no_run
34//! use std::sync::Arc;
35//! use arrow::{array::{ArrayRef, Int32Array, Int64Array}, record_batch::RecordBatch}; // Arrow 54
36//! use polars::prelude::*; // Polars 0.43
37//! use df_interchange::Interchange;
38//!
39//! // Create Polars 0.43 data
40//! let polars_0_43 = DataFrame::new(vec![
41//!     Series::new("test_i32".into(), [-1i32, 0, 1]),
42//!     Series::new("test_i64".into(), [-1i64, 0, 1]),
43//! ])
44//! .unwrap();
45//!
46//! // Create arrow 54 data
47//! let arrow_54: Vec<_> = vec![RecordBatch::try_from_iter(vec![
48//!     ("test_i32", Arc::new(Int32Array::from(vec![-1i32, 0, 1])) as ArrayRef),
49//!     ("test_i64", Arc::new(Int64Array::from(vec![-1i64, 0, 1])) as ArrayRef),
50//! ])
51//! .unwrap()];
52//!
53//! // Convert Polars 0.43 to Polars 0.46
54//! let df_polars = Interchange::from_polars_0_43(polars_0_43)?.to_polars_0_46()?;
55//!
56//! // Convert Arrow 54 to Polars 0.46
57//! let df_arrow = Interchange::from_arrow_54(arrow_54)?.to_polars_0_46()?;
58//!
59//! // Compare the two DataFrames (not possible prior to conversion to Polars 0.46)
60//! assert!(df_polars.equals_missing(&df_arrow));
61//!
62//! ```
63//! ## Technical info
64//!
65//! ### Features
66//!
67//! Since Rust features are [additive](https://doc.rust-lang.org/cargo/reference/features.html#feature-unification), you can enable features on Arrow or Polars crates by adding them to your own `Cargo.toml`.
68//!
69//! For example, you can enable the `lazy` feature on the Polars version you receive from `df-interchange`.
70//!
71//! ```toml
72//! [dependencies]
73//! polars = { version = "0.46", features = ["lazy"] }
74//! polars_old = { package = "polars", version = "0.45", features = ["lazy"] }
75//! df-interchange = { path = "/home/eric/Rust/df-interchange", version = "0.1.0", features = ["polars_0_45", "polars_0_46"] }
76//! ```
77//!
78//! To use this, since the `.lazy()` uses the `IntoLazy` trait for `DataFrame`, you have to [disambiguate the trait](https://doc.rust-lang.org/rust-by-example/trait/disambiguating.html) with `<polars_old::prelude::DataFrame as polars_old::prelude::IntoLazy>::lazy(df)`:
79//!
80//! ```no_run
81//! use df_interchange::Interchange;
82//! use polars::prelude::*;
83//!
84//! let df_0_46 = DataFrame::new(vec![
85//!     Column::new("test_i32".into(), [1i32, 2, 3, 4]),
86//!     Column::new("test_i64".into(), [1i64, 2, 3, 4]),
87//! ])
88//! .unwrap()
89//! .lazy();
90//!
91//! let df_0_45 = Interchange::from_polars_0_46(df_0_46.collect().unwrap())?.to_polars_0_45()?;
92//!
93//! let lf = <polars_old::prelude::DataFrame as polars_old::prelude::IntoLazy>::lazy(df_0_45);
94//! ```
95//!
96//! During conversion, you may encounter errors based on data type conversions enabled by features. For example, if you convert a column of `i8` from a `Polars 0.46` that enables the `dtype-i8` feature, to `Polars 0.43` that does not enable `dtype-i8`, you will get a `Error(ComputeError(ErrString("cannot create series from Int8")))`. You can enable this feature on both versions of the crate to solve the issue.
97//!
98//! ```toml
99//! [dependencies]
100//! polars = { version = "0.46", features = ["dtype-i8"] }
101//! polars_0_43 = { package = "polars", version = "0.43", features = ["dtype-i8"] }
102//! df-interchange = { version = "0.1.0", features = ["polars_0_43", "polars_0_46"] }
103//! ```
104//!
105//! ```no_run
106//! use polars::prelude::*; // Polars 0.46
107//! use df_interchange::Interchange;
108//!
109//! let df_0_46 = DataFrame::new(vec![
110//!     Column::new("test_i8".into(), [1i8, 2, 3, 4]),
111//!     Column::new("test_i64".into(), [1i64, 2, 3, 4]),
112//! ])
113//! .unwrap();
114//!
115//! let df_0_43 = Interchange::from_polars_0_46(df_0_46)?.to_polars_0_43()?;
116//! ```
117//!
118mod error;
119pub use error::InterchangeError;
120
121#[cfg(any(feature = "arrow_54", feature = "arrow_55"))]
122mod from_arrow;
123
124#[cfg(any(
125    feature = "polars_0_40",
126    feature = "polars_0_41",
127    feature = "polars_0_42",
128    feature = "polars_0_43",
129    feature = "polars_0_44",
130    feature = "polars_0_45",
131    feature = "polars_0_46",
132    feature = "polars_0_47",
133    feature = "polars_0_48",
134    feature = "polars_0_49"
135))]
136mod from_polars;
137
138#[cfg(any(feature = "arrow_54", feature = "arrow_55"))]
139mod to_arrow;
140
141#[cfg(any(
142    feature = "polars_0_40",
143    feature = "polars_0_41",
144    feature = "polars_0_42",
145    feature = "polars_0_43",
146    feature = "polars_0_44",
147    feature = "polars_0_45",
148    feature = "polars_0_46",
149    feature = "polars_0_47",
150    feature = "polars_0_48",
151    feature = "polars_0_49"
152))]
153mod to_polars;
154
155#[repr(C)]
156struct ArrowArray {
157    length: i64,
158    null_count: i64,
159    offset: i64,
160    n_buffers: i64,
161    n_children: i64,
162    buffers: *mut *const ::std::os::raw::c_void,
163    children: *mut *mut ArrowArray,
164    dictionary: *mut ArrowArray,
165    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut ArrowArray)>,
166    private_data: *mut ::std::os::raw::c_void,
167}
168
169#[repr(C)]
170struct ArrowSchema {
171    format: *const ::std::os::raw::c_char,
172    name: *const ::std::os::raw::c_char,
173    metadata: *const ::std::os::raw::c_char,
174    flags: i64,
175    n_children: i64,
176    children: *mut *mut ArrowSchema,
177    dictionary: *mut ArrowSchema,
178    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut ArrowSchema)>,
179    private_data: *mut ::std::os::raw::c_void,
180}
181
182pub struct Interchange {
183    chunks_aligned: bool,
184    ffi: Vec<(String, Vec<(ArrowArray, ArrowSchema)>)>,
185}