Module parquet::arrow::async_writer

source ·
Expand description

Contains async writer which writes arrow data into parquet data.

Provides async API for writing RecordBatches as parquet files. The API is similar to the sync API, so please read the documentation there before using this API.

Here is an example for using AsyncArrowWriter:

use std::sync::Arc;
use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchReader};
use bytes::Bytes;
use parquet::arrow::{AsyncArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder};

let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef;
let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();

let mut buffer = Vec::new();
let mut writer =
    AsyncArrowWriter::try_new(&mut buffer, to_write.schema(), 0, None).unwrap();
writer.write(&to_write).await.unwrap();
writer.close().await.unwrap();

let buffer = Bytes::from(buffer);
let mut reader = ParquetRecordBatchReaderBuilder::try_new(buffer.clone())
    .unwrap()
    .build()
    .unwrap();
let read = reader.next().unwrap().unwrap();

assert_eq!(to_write, read);

Structs