Skip to main content

rivet_cli/format/
mod.rs

1pub mod csv;
2pub mod parquet;
3
4use arrow::datatypes::SchemaRef;
5use arrow::record_batch::RecordBatch;
6
7use crate::config::{CompressionType, FormatType};
8use crate::error::Result;
9
10/// Streaming writer: receives one RecordBatch at a time.
11pub trait FormatWriter {
12    fn write_batch(&mut self, batch: &RecordBatch) -> Result<()>;
13    fn finish(self: Box<Self>) -> Result<()>;
14    /// Approximate bytes written so far (for file-size splitting).
15    fn bytes_written(&self) -> u64;
16}
17
18pub trait Format {
19    fn create_writer(
20        &self,
21        schema: &SchemaRef,
22        writer: Box<dyn std::io::Write + Send>,
23    ) -> Result<Box<dyn FormatWriter>>;
24
25    fn file_extension(&self) -> &str;
26}
27
28pub fn create_format(
29    format_type: FormatType,
30    compression: CompressionType,
31    compression_level: Option<u32>,
32) -> Box<dyn Format> {
33    match format_type {
34        FormatType::Csv => Box::new(csv::CsvFormat),
35        FormatType::Parquet => {
36            Box::new(parquet::ParquetFormat::new(compression, compression_level))
37        }
38    }
39}