Skip to main content

transferred_files/formats/
mod.rs

1//! File-format. A format converts between a file's byte stream and Arrow batches.
2//! `Files` owns opening the file and hands over the byte handle.
3//! One module per codec (`parquet`, `csv`, `avro`, etc.).
4
5use async_trait::async_trait;
6use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite};
7use transferred_core::{BatchStream, TransferredError};
8
9pub mod parquet;
10
11pub use parquet::Parquet;
12
13/// A readable file handle trait marker for random-access bytes.
14pub trait FileReader: AsyncRead + AsyncSeek + Send + Unpin {}
15impl<T: AsyncRead + AsyncSeek + Send + Unpin> FileReader for T {}
16
17/// A writable file handle trait.
18pub trait FileWriter: AsyncWrite + Send + Unpin {}
19impl<T: AsyncWrite + Send + Unpin> FileWriter for T {}
20
21/// Decodes a file's bytes into Arrow batches.
22#[async_trait]
23pub trait FormatRead: Send + Sync {
24    /// Read one open file handle into a stream of Arrow batches.
25    async fn read(&self, reader: Box<dyn FileReader>) -> Result<BatchStream, TransferredError>;
26}
27
28/// Encodes Arrow batches into a file's bytes.
29#[async_trait]
30pub trait FormatWrite: Send + Sync {
31    /// File extension for written parts, no dot (e.g. `"parquet"`).
32    fn file_extension(&self) -> &'static str;
33
34    /// Write all batches into one open sink. Returns the row count written.
35    async fn write(
36        &self,
37        writer: Box<dyn FileWriter>,
38        batches: BatchStream,
39    ) -> Result<u64, TransferredError>;
40}