from __future__ import annotations
import pandas as pd
import pyarrow as pa
import pyarrow.ipc as ipc
def pandas_to_arrow_bytes(df: pd.DataFrame | pd.Series) -> bytes:
dt_cols = df.select_dtypes(include=["datetime64"]).columns
if len(dt_cols) > 0:
df = df.copy()
df[dt_cols] = df[dt_cols].astype("datetime64[ms]")
table = pa.Table.from_pandas(df)
sink = pa.BufferOutputStream()
with ipc.new_file(sink, table.schema) as writer:
writer.write_table(table)
return sink.getvalue().to_pybytes()
def polars_to_arrow_bytes(df) -> bytes:
import polars as pl
if isinstance(df, pl.LazyFrame):
df = df.collect()
table = df.to_arrow()
sink = pa.BufferOutputStream()
with ipc.new_file(sink, table.schema) as writer:
writer.write_table(table)
return sink.getvalue().to_pybytes()
def arrow_bytes_to_pd_df(arrow_bytes: bytes) -> pd.DataFrame:
buffer = pa.BufferReader(arrow_bytes)
with ipc.open_file(buffer) as reader:
table = reader.read_all()
return table.to_pandas()