typed_arrow_unified/
lib.rs

1#![deny(missing_docs)]
2//! A small facade that unifies compile-time typed schemas
3//! (from the `typed-arrow` crate) and runtime/dynamic schemas
4//! (from the `typed-arrow-dyn` crate) behind a single, lean API.
5//!
6//! The goal is zero-cost construction on the typed path (via
7//! generics and monomorphization) and a minimal-dispatch dynamic
8//! path for cases where the schema is only known at runtime.
9//!
10//! Most users will interact with:
11//! - `Typed<R>` when `R` is a struct deriving the `typed-arrow` traits.
12//! - `DynSchema` (or `Arc<Schema>`) for runtime-driven schemas.
13//! - `SchemaLike::build_batch` to assemble a `RecordBatch` from rows.
14
15use std::{marker::PhantomData, sync::Arc};
16
17use typed_arrow::{
18    arrow_array::RecordBatch,
19    arrow_schema::Schema,
20    schema::{BuildRows, IntoRecordBatch, RowBuilder, SchemaMeta},
21};
22use typed_arrow_dyn::{DynBuilders, DynError, DynRow, DynSchema};
23
24/// Marker type for a compile-time typed schema `R`.
25pub struct Typed<R> {
26    _phantom: PhantomData<R>,
27}
28
29impl<R> Default for Typed<R> {
30    fn default() -> Self {
31        Self {
32            _phantom: PhantomData,
33        }
34    }
35}
36
37/// Unified interface for building batches across typed and dynamic schemas.
38pub trait BuildersLike {
39    /// The row representation accepted by these builders.
40    type Row;
41
42    /// The error type returned by these builders.
43    type Error: std::error::Error;
44
45    /// Append a non-null row to all columns.
46    ///
47    /// # Errors
48    /// Returns an error if the dynamic path detects an append/type/builder issue.
49    fn append_row(&mut self, row: Self::Row) -> Result<(), Self::Error>;
50
51    /// Append an optional row; `None` appends a null to all columns.
52    ///
53    /// # Errors
54    /// Returns an error if the dynamic path detects an append/type/builder issue.
55    fn append_option_row(&mut self, row: Option<Self::Row>) -> Result<(), Self::Error>;
56
57    /// Finish building and convert accumulated arrays into a `RecordBatch`.
58    fn finish_into_batch(self) -> RecordBatch;
59
60    /// Try to finish building a `RecordBatch`, returning an error with
61    /// richer diagnostics when available (e.g., dynamic nullability).
62    ///
63    /// # Errors
64    /// Returns an error when batch assembly fails (e.g., dynamic nullability).
65    fn try_finish_into_batch(self) -> Result<RecordBatch, Self::Error>
66    where
67        Self: Sized,
68    {
69        Ok(self.finish_into_batch())
70    }
71}
72
73/// Unified schema abstraction: exposes Arrow schema and row/builder types.
74pub trait SchemaLike {
75    /// The row type produced/consumed for this schema.
76    type Row;
77
78    /// Concrete builders used to accumulate rows into columns.
79    type Builders: BuildersLike<Row = Self::Row>;
80
81    /// Return a shared reference to the underlying Arrow `Schema`.
82    fn schema_ref(&self) -> Arc<Schema>;
83
84    /// Create new column builders with a given capacity hint.
85    fn new_builders(&self, capacity: usize) -> Self::Builders;
86
87    /// Build a `RecordBatch` from an iterator of rows.
88    ///
89    /// Capacity is inferred from the iterator's size hint (upper bound if
90    /// present, otherwise the lower bound). For `ExactSizeIterator`s like
91    /// `Vec` and slices this yields exact preallocation.
92    /// # Errors
93    /// Returns an error if row appends or batch finishing fails on the dynamic path.
94    fn build_batch<I>(
95        &self,
96        rows: I,
97    ) -> Result<RecordBatch, <Self::Builders as BuildersLike>::Error>
98    where
99        I: IntoIterator<Item = Self::Row>,
100    {
101        let iter = rows.into_iter();
102        let (lb, ub) = iter.size_hint();
103        let capacity = ub.unwrap_or(lb);
104        let mut b = self.new_builders(capacity);
105        for r in iter {
106            b.append_row(r)?;
107        }
108        b.try_finish_into_batch()
109    }
110}
111
112/// Adapter over typed builders that implements `BuildersLike`.
113pub struct TypedBuilders<R: BuildRows> {
114    inner: R::Builders,
115}
116
117impl<R: BuildRows> TypedBuilders<R> {
118    fn new(inner: R::Builders) -> Self {
119        Self { inner }
120    }
121}
122
123#[derive(Debug)]
124/// Error type for `TypedBuilders`.
125pub struct NoError;
126
127impl std::fmt::Display for NoError {
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        write!(f, "NoError")
130    }
131}
132
133impl std::error::Error for NoError {}
134
135impl<R> BuildersLike for TypedBuilders<R>
136where
137    R: BuildRows,
138{
139    type Row = R;
140
141    type Error = NoError;
142
143    fn append_row(&mut self, row: Self::Row) -> Result<(), NoError> {
144        <R::Builders as RowBuilder<R>>::append_row(&mut self.inner, row);
145        Ok(())
146    }
147
148    fn append_option_row(&mut self, row: Option<Self::Row>) -> Result<(), NoError> {
149        <R::Builders as RowBuilder<R>>::append_option_row(&mut self.inner, row);
150        Ok(())
151    }
152
153    fn finish_into_batch(self) -> RecordBatch {
154        <R::Builders as RowBuilder<R>>::finish(self.inner).into_record_batch()
155    }
156}
157
158/// Typed schema: compile-time path.
159impl<R> SchemaLike for Typed<R>
160where
161    R: SchemaMeta + BuildRows,
162{
163    type Row = R;
164
165    type Builders = TypedBuilders<R>;
166
167    fn schema_ref(&self) -> Arc<Schema> {
168        R::schema()
169    }
170
171    fn new_builders(&self, capacity: usize) -> Self::Builders {
172        TypedBuilders::new(R::new_builders(capacity))
173    }
174}
175
176/// Dynamic schema: runtime path.
177impl SchemaLike for DynSchema {
178    type Row = DynRow;
179
180    type Builders = DynBuilders;
181
182    fn schema_ref(&self) -> Arc<Schema> {
183        self.schema.clone()
184    }
185
186    fn new_builders(&self, capacity: usize) -> Self::Builders {
187        DynBuilders::new(self.schema.clone(), capacity)
188    }
189}
190
191/// Convenience: treat an `Arc<Schema>` (aka `SchemaRef`) as a dynamic schema.
192impl SchemaLike for Arc<Schema> {
193    type Row = DynRow;
194
195    type Builders = DynBuilders;
196
197    fn schema_ref(&self) -> Arc<Schema> {
198        self.clone()
199    }
200
201    fn new_builders(&self, capacity: usize) -> Self::Builders {
202        DynBuilders::new(self.clone(), capacity)
203    }
204}
205
206/// Implement unified builders for dynamic builders.
207impl BuildersLike for DynBuilders {
208    type Row = DynRow;
209
210    type Error = DynError;
211
212    fn append_row(&mut self, row: Self::Row) -> Result<(), DynError> {
213        typed_arrow_dyn::DynBuilders::append_option_row(self, Some(row))
214    }
215
216    fn append_option_row(&mut self, row: Option<Self::Row>) -> Result<(), DynError> {
217        typed_arrow_dyn::DynBuilders::append_option_row(self, row)
218    }
219
220    fn finish_into_batch(self) -> RecordBatch {
221        typed_arrow_dyn::DynBuilders::finish_into_batch(self)
222    }
223
224    fn try_finish_into_batch(self) -> Result<RecordBatch, DynError> {
225        typed_arrow_dyn::DynBuilders::try_finish_into_batch(self)
226    }
227}