shape_ext_python/arrow_bridge.rs
1//! DataTable <-> DataFrame Arrow IPC bridge.
2//!
3//! Shape's DataTable uses Arrow columnar format internally. Python's
4//! pandas/polars/pyarrow ecosystem also speaks Arrow IPC. This module
5//! provides zero-copy (or near-zero-copy) transfer between the two:
6//!
7//! 1. **Shape -> Python**: Serialize a DataTable as Arrow IPC bytes,
8//! pass through the ABI, reconstruct as `pyarrow.RecordBatch` on
9//! the Python side.
10//!
11//! 2. **Python -> Shape**: The Python function returns a RecordBatch
12//! serialized as Arrow IPC, which we deserialize back into a DataTable.
13//!
14//! This avoids the overhead of element-wise msgpack serialization for
15//! large tabular data.
16
17/// Convert Shape DataTable (Arrow IPC bytes) to a format suitable for
18/// Python consumption.
19///
20/// Stub -- the actual implementation will use pyo3 + pyarrow to create
21/// a `pyarrow.RecordBatch` from the IPC buffer.
22pub fn datatable_to_python_ipc(_ipc_bytes: &[u8]) -> Result<Vec<u8>, String> {
23 // In the real implementation, this is essentially a pass-through
24 // since both sides speak Arrow IPC. The bytes can be handed directly
25 // to `pyarrow.ipc.open_stream()`.
26 Err("arrow_bridge: DataTable -> Python not yet implemented".into())
27}
28
29/// Convert Python DataFrame (Arrow IPC bytes) back to Shape DataTable format.
30///
31/// Stub -- the actual implementation will serialize the pyarrow RecordBatch
32/// to IPC bytes which Shape can ingest directly.
33pub fn python_ipc_to_datatable(_ipc_bytes: &[u8]) -> Result<Vec<u8>, String> {
34 Err("arrow_bridge: Python -> DataTable not yet implemented".into())
35}