use std::io::Write;
use parquet::{
arrow::arrow_writer::ArrowWriter,
basic::{Compression, ZstdLevel},
file::properties::WriterProperties,
};
use crate::domain::Datafile;
use crate::error::BiopacError;
use crate::export::arrow::build_record_batch;
#[derive(Debug, Clone, Copy)]
pub struct ParquetOptions {
zstd_level: i32,
}
impl Default for ParquetOptions {
fn default() -> Self {
Self { zstd_level: 3 }
}
}
impl ParquetOptions {
#[must_use]
pub const fn new() -> Self {
Self { zstd_level: 3 }
}
#[must_use]
pub const fn zstd_level(mut self, level: i32) -> Self {
self.zstd_level = level;
self
}
}
pub fn to_parquet<W: Write + Send>(
datafile: &Datafile,
writer: W,
options: &ParquetOptions,
) -> Result<(), BiopacError> {
let batch = build_record_batch(datafile)?;
let zstd = ZstdLevel::try_new(options.zstd_level)?;
let props = WriterProperties::builder()
.set_compression(Compression::ZSTD(zstd))
.build();
let mut arrow_writer = ArrowWriter::try_new(writer, batch.schema(), Some(props))?;
arrow_writer.write(&batch)?;
arrow_writer.close()?;
Ok(())
}