typed_arrow/schema.rs
1//! Core schema traits for compile-time Arrow typing.
2
3use std::{
4 collections::HashMap, iter::IntoIterator, marker::PhantomData, option::Option, sync::Arc,
5};
6
7use arrow_array::{
8 builder::{ArrayBuilder, StructBuilder},
9 Array, RecordBatch,
10};
11use arrow_schema::{DataType, Field, Schema};
12
13pub use crate::error::SchemaError;
14#[cfg(feature = "views")]
15pub use crate::error::ViewAccessError;
16
17#[cfg(feature = "views")]
18impl From<ViewAccessError> for SchemaError {
19 fn from(err: ViewAccessError) -> Self {
20 match err {
21 ViewAccessError::TypeMismatch {
22 expected, actual, ..
23 } => SchemaError::TypeMismatch { expected, actual },
24 _ => SchemaError::invalid(err.to_string()),
25 }
26 }
27}
28
29/// A record (row) with a fixed, compile-time number of columns.
30pub trait Record {
31 /// Number of columns in this record.
32 const LEN: usize;
33}
34
35/// Per-column metadata for a record at index `I`.
36pub trait ColAt<const I: usize>: Record {
37 /// The Native value type of this column (without nullability).
38 type Native;
39
40 /// The typed Arrow array for this column.
41 type ColumnArray: Array;
42
43 /// The typed Arrow builder for this column.
44 type ColumnBuilder: ArrayBuilder;
45
46 /// Whether this column is nullable.
47 const NULLABLE: bool;
48
49 /// Column name.
50 const NAME: &'static str;
51
52 /// Arrow-rs `DataType` for this column.
53 fn data_type() -> DataType;
54}
55
56/// Simple compile-time column metadata passed to visitors.
57pub struct FieldMeta<R> {
58 /// Column name.
59 pub name: &'static str,
60
61 /// Whether this column is nullable.
62 pub nullable: bool,
63
64 _phantom: PhantomData<R>,
65}
66
67impl<R> FieldMeta<R> {
68 /// Construct a new `FieldMeta`.
69 #[must_use]
70 pub const fn new(name: &'static str, nullable: bool) -> Self {
71 Self {
72 name,
73 nullable,
74 _phantom: PhantomData,
75 }
76 }
77}
78
79/// A visitor invoked at compile time for each column of a `Record`.
80pub trait ColumnVisitor {
81 /// Process a column at index `I` with Rust type `R`.
82 fn visit<const I: usize, R>(_m: FieldMeta<R>);
83}
84
85/// Trait emitted by derive/macro to enable `for_each_col` expansion.
86pub trait ForEachCol: Record {
87 /// Invoke `V::visit` for each column at compile time.
88 fn for_each_col<V: ColumnVisitor>();
89}
90
91// No Arrow markers: ColAt exposes DATA_TYPE/ColumnBuilder/ColumnArray
92
93/// Metadata and builder utilities for nested Struct fields.
94pub trait StructMeta: Record {
95 /// Child fields (names, data types, nullability) for this struct.
96 fn child_fields() -> Vec<Field>;
97
98 /// Construct a `StructBuilder` with appropriate child builders for this struct.
99 fn new_struct_builder(capacity: usize) -> StructBuilder;
100}
101
102/// Arrow runtime schema metadata for a top-level Record.
103pub trait SchemaMeta: Record {
104 /// Top-level fields: (name, `data_type`, nullable) represented as `Field`s.
105 fn fields() -> Vec<Field>;
106
107 /// Optional top-level schema key/value metadata.
108 #[must_use]
109 fn metadata() -> HashMap<String, String> {
110 HashMap::default()
111 }
112
113 /// Construct an `Arc<arrow_schema::Schema>` from `fields()`.
114 fn schema() -> Arc<Schema> {
115 let fields: Vec<Arc<Field>> = Self::fields().into_iter().map(Arc::new).collect();
116 Arc::new(Schema::new_with_metadata(fields, Self::metadata()))
117 }
118}
119
120/// Row-based building interface: construct typed column builders, append owned rows,
121/// and finish into typed arrays.
122pub trait BuildRows: Record + Sized {
123 /// Generated builders struct for this record.
124 type Builders: RowBuilder<Self>;
125
126 /// Generated arrays struct for this record.
127 type Arrays: IntoRecordBatch;
128
129 /// Create builders with a capacity hint.
130 fn new_builders(capacity: usize) -> Self::Builders;
131}
132
133/// Trait implemented by derive-generated builders to append rows of `Row`
134/// and finish into a typed arrays struct.
135pub trait RowBuilder<Row> {
136 /// The arrays struct produced by `finish`.
137 type Arrays: IntoRecordBatch;
138
139 /// Append a non-null row.
140 fn append_row(&mut self, row: Row);
141 /// Append a null row.
142 fn append_null_row(&mut self);
143 /// Append an optional row.
144 fn append_option_row(&mut self, row: Option<Row>);
145 /// Append an iterator of non-null rows.
146 fn append_rows<I: IntoIterator<Item = Row>>(&mut self, rows: I);
147 /// Append an iterator of optional rows.
148 fn append_option_rows<I: IntoIterator<Item = Option<Row>>>(&mut self, rows: I);
149 /// Finish and produce arrays.
150 fn finish(self) -> Self::Arrays;
151}
152
153/// Trait implemented by derive-generated arrays to assemble a `RecordBatch`.
154pub trait IntoRecordBatch {
155 /// Assemble and return an `arrow_array::RecordBatch`.
156 fn into_record_batch(self) -> RecordBatch;
157}
158
159// Identity conversion for dynamic path output (RecordBatch already assembled).
160impl IntoRecordBatch for RecordBatch {
161 fn into_record_batch(self) -> RecordBatch {
162 self
163 }
164}
165
166/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
167/// `StructBuilder`. Used by row-based APIs to handle nested struct fields.
168pub trait AppendStruct {
169 /// Append this struct's child values into the provided `StructBuilder`.
170 /// Caller is responsible for setting the parent validity via `append(true)`.
171 fn append_owned_into(self, b: &mut StructBuilder);
172
173 /// Append nulls for each child into the provided `StructBuilder` to align lengths.
174 /// Caller is responsible for `append(false)` for the parent validity.
175 fn append_null_into(b: &mut StructBuilder);
176}
177
178/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
179/// `StructBuilder` from a borrowed reference. This enables container builders (e.g.,
180/// lists of structs) to append child values without taking ownership of the struct.
181pub trait AppendStructRef {
182 /// Append this struct's child values into the provided `StructBuilder` using borrows.
183 /// Caller is responsible for setting the parent validity via `append(true)`.
184 fn append_borrowed_into(&self, b: &mut StructBuilder);
185}
186
187/// Trait for creating zero-copy views over a RecordBatch.
188///
189/// Implemented automatically by `#[derive(Record)]` to generate a view struct
190/// (`{Name}View<'a>`) and an iterator (`{Name}Views<'a>`) that provide borrowed
191/// access to RecordBatch rows without copying data.
192#[cfg(feature = "views")]
193pub trait FromRecordBatch: Record + Sized {
194 /// The view type representing a single row with borrowed references.
195 type View<'a>;
196
197 /// The iterator type yielding Result-wrapped views over all rows.
198 ///
199 /// Each item is a `Result<View, ViewAccessError>` to handle potential errors
200 /// during view access (e.g., type mismatches, unexpected nulls, out of bounds).
201 type Views<'a>: Iterator<Item = Result<Self::View<'a>, ViewAccessError>>;
202
203 /// Create an iterator of views over the RecordBatch rows.
204 ///
205 /// # Errors
206 /// Returns `SchemaError` if the RecordBatch schema doesn't match this Record's schema.
207 /// This includes mismatched column names, types, or field counts.
208 fn from_record_batch(batch: &RecordBatch) -> Result<Self::Views<'_>, SchemaError>;
209}
210
211/// Extension trait providing convenience methods for iterators over `Result<T, ViewAccessError>`.
212///
213/// This trait is automatically implemented for any iterator yielding `Result<T, ViewAccessError>`,
214/// such as the iterators returned by [`FromRecordBatch::from_record_batch`].
215#[cfg(feature = "views")]
216pub trait ViewResultIteratorExt: Iterator + Sized {
217 /// The success type of the Result items.
218 type Item;
219
220 /// Flatten the Result iterator, returning all views or the first error.
221 ///
222 /// This consumes the iterator and returns a `Result` containing either:
223 /// - `Ok(Vec<T>)` with all successfully accessed views
224 /// - `Err(ViewAccessError)` with the first error encountered
225 ///
226 /// # Errors
227 /// Returns the first `ViewAccessError` encountered while iterating.
228 ///
229 /// # Example
230 /// ```
231 /// use typed_arrow::prelude::*;
232 ///
233 /// #[derive(typed_arrow::Record)]
234 /// struct Row {
235 /// id: i32,
236 /// name: String,
237 /// }
238 ///
239 /// # let rows = vec![Row { id: 1, name: "Alice".into() }];
240 /// # let mut b = <Row as BuildRows>::new_builders(1);
241 /// # b.append_rows(rows);
242 /// # let batch = b.finish().into_record_batch();
243 /// // Returns all views or first error
244 /// let views = batch.iter_views::<Row>()?.try_flatten()?;
245 /// for row in views {
246 /// println!("{}: {}", row.id, row.name);
247 /// }
248 /// # Ok::<_, typed_arrow::schema::SchemaError>(())
249 /// ```
250 fn try_flatten(self) -> Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>
251 where
252 Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>:
253 std::iter::FromIterator<<Self as Iterator>::Item>,
254 {
255 self.collect()
256 }
257}
258
259#[cfg(feature = "views")]
260impl<I, T> ViewResultIteratorExt for I
261where
262 I: Iterator<Item = Result<T, ViewAccessError>>,
263{
264 type Item = T;
265}
266
267/// Trait for creating a view from a StructArray at a specific index.
268///
269/// This is automatically implemented by `#[derive(Record)]` and used internally
270/// to support nested struct views.
271#[cfg(feature = "views")]
272pub trait StructView: Record + Sized {
273 /// The view type for this struct with borrowed references.
274 type View<'a>;
275
276 /// Extract a view at the given index from a StructArray.
277 ///
278 /// # Errors
279 /// Returns `ViewAccessError` if the index is out of bounds, the value is null when expected to
280 /// be non-null, or if there's a type mismatch during field extraction.
281 fn view_at(
282 array: &arrow_array::StructArray,
283 index: usize,
284 ) -> Result<Self::View<'_>, ViewAccessError>;
285
286 /// Check if the struct value at the given index is null.
287 fn is_null_at(array: &arrow_array::StructArray, index: usize) -> bool;
288}