typed_arrow/
schema.rs

1//! Core schema traits for compile-time Arrow typing.
2
3use std::{
4    collections::HashMap, iter::IntoIterator, marker::PhantomData, option::Option, sync::Arc,
5};
6
7use arrow_array::{
8    Array, RecordBatch,
9    builder::{ArrayBuilder, StructBuilder},
10};
11use arrow_schema::{DataType, Field, Schema};
12
13pub use crate::error::SchemaError;
14#[cfg(feature = "views")]
15pub use crate::error::ViewAccessError;
16
17#[cfg(feature = "views")]
18impl From<ViewAccessError> for SchemaError {
19    fn from(err: ViewAccessError) -> Self {
20        match err {
21            ViewAccessError::TypeMismatch {
22                expected, actual, ..
23            } => SchemaError::TypeMismatch { expected, actual },
24            _ => SchemaError::invalid(err.to_string()),
25        }
26    }
27}
28
29/// A record (row) with a fixed, compile-time number of columns.
30pub trait Record {
31    /// Number of columns in this record.
32    const LEN: usize;
33}
34
35/// Per-column metadata for a record at index `I`.
36pub trait ColAt<const I: usize>: Record {
37    /// The Native value type of this column (without nullability).
38    type Native;
39
40    /// The typed Arrow array for this column.
41    type ColumnArray: Array;
42
43    /// The typed Arrow builder for this column.
44    type ColumnBuilder: ArrayBuilder;
45
46    /// Whether this column is nullable.
47    const NULLABLE: bool;
48
49    /// Column name.
50    const NAME: &'static str;
51
52    /// Arrow-rs `DataType` for this column.
53    fn data_type() -> DataType;
54}
55
56/// Simple compile-time column metadata passed to visitors.
57pub struct FieldMeta<R> {
58    /// Column name.
59    pub name: &'static str,
60
61    /// Whether this column is nullable.
62    pub nullable: bool,
63
64    _phantom: PhantomData<R>,
65}
66
67impl<R> FieldMeta<R> {
68    /// Construct a new `FieldMeta`.
69    #[must_use]
70    pub const fn new(name: &'static str, nullable: bool) -> Self {
71        Self {
72            name,
73            nullable,
74            _phantom: PhantomData,
75        }
76    }
77}
78
79/// A visitor invoked at compile time for each column of a `Record`.
80pub trait ColumnVisitor {
81    /// Process a column at index `I` with Rust type `R`.
82    fn visit<const I: usize, R>(_m: FieldMeta<R>);
83}
84
85/// Trait emitted by derive/macro to enable `for_each_col` expansion.
86pub trait ForEachCol: Record {
87    /// Invoke `V::visit` for each column at compile time.
88    fn for_each_col<V: ColumnVisitor>();
89}
90
91// No Arrow markers: ColAt exposes DATA_TYPE/ColumnBuilder/ColumnArray
92
93/// Metadata and builder utilities for nested Struct fields.
94pub trait StructMeta: Record {
95    /// Child fields (names, data types, nullability) for this struct.
96    fn child_fields() -> Vec<Field>;
97
98    /// Construct a `StructBuilder` with appropriate child builders for this struct.
99    fn new_struct_builder(capacity: usize) -> StructBuilder;
100}
101
102/// Arrow runtime schema metadata for a top-level Record.
103pub trait SchemaMeta: Record {
104    /// Top-level fields: (name, `data_type`, nullable) represented as `Field`s.
105    fn fields() -> Vec<Field>;
106
107    /// Optional top-level schema key/value metadata.
108    #[must_use]
109    fn metadata() -> HashMap<String, String> {
110        HashMap::default()
111    }
112
113    /// Construct an `Arc<arrow_schema::Schema>` from `fields()`.
114    fn schema() -> Arc<Schema> {
115        let fields: Vec<Arc<Field>> = Self::fields().into_iter().map(Arc::new).collect();
116        Arc::new(Schema::new_with_metadata(fields, Self::metadata()))
117    }
118}
119
120/// Row-based building interface: construct typed column builders, append owned rows,
121/// and finish into typed arrays.
122pub trait BuildRows: Record + Sized {
123    /// Generated builders struct for this record.
124    type Builders: RowBuilder<Self>;
125
126    /// Generated arrays struct for this record.
127    type Arrays: IntoRecordBatch;
128
129    /// Create builders with a capacity hint.
130    fn new_builders(capacity: usize) -> Self::Builders;
131}
132
133/// Trait implemented by derive-generated builders to append rows of `Row`
134/// and finish into a typed arrays struct.
135pub trait RowBuilder<Row> {
136    /// The arrays struct produced by `finish`.
137    type Arrays: IntoRecordBatch;
138
139    /// Append a non-null row.
140    fn append_row(&mut self, row: Row);
141    /// Append a null row.
142    fn append_null_row(&mut self);
143    /// Append an optional row.
144    fn append_option_row(&mut self, row: Option<Row>);
145    /// Append an iterator of non-null rows.
146    fn append_rows<I: IntoIterator<Item = Row>>(&mut self, rows: I);
147    /// Append an iterator of optional rows.
148    fn append_option_rows<I: IntoIterator<Item = Option<Row>>>(&mut self, rows: I);
149    /// Finish and produce arrays.
150    fn finish(self) -> Self::Arrays;
151}
152
153/// Trait implemented by derive-generated arrays to assemble a `RecordBatch`.
154pub trait IntoRecordBatch {
155    /// Assemble and return an `arrow_array::RecordBatch`.
156    fn into_record_batch(self) -> RecordBatch;
157}
158
159// Identity conversion for dynamic path output (RecordBatch already assembled).
160impl IntoRecordBatch for RecordBatch {
161    fn into_record_batch(self) -> RecordBatch {
162        self
163    }
164}
165
166/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
167/// `StructBuilder`. Used by row-based APIs to handle nested struct fields.
168pub trait AppendStruct {
169    /// Append this struct's child values into the provided `StructBuilder`.
170    /// Caller is responsible for setting the parent validity via `append(true)`.
171    fn append_owned_into(self, b: &mut StructBuilder);
172
173    /// Append nulls for each child into the provided `StructBuilder` to align lengths.
174    /// Caller is responsible for `append(false)` for the parent validity.
175    fn append_null_into(b: &mut StructBuilder);
176}
177
178/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
179/// `StructBuilder` from a borrowed reference. This enables container builders (e.g.,
180/// lists of structs) to append child values without taking ownership of the struct.
181pub trait AppendStructRef {
182    /// Append this struct's child values into the provided `StructBuilder` using borrows.
183    /// Caller is responsible for setting the parent validity via `append(true)`.
184    fn append_borrowed_into(&self, b: &mut StructBuilder);
185}
186
187/// Trait for creating zero-copy views over a RecordBatch.
188///
189/// Implemented automatically by `#[derive(Record)]` to generate a view struct
190/// (`{Name}View<'a>`) and an iterator (`{Name}Views<'a>`) that provide borrowed
191/// access to RecordBatch rows without copying data.
192#[cfg(feature = "views")]
193pub trait FromRecordBatch: Record + Sized {
194    /// The view type representing a single row with borrowed references.
195    type View<'a>;
196
197    /// The iterator type yielding Result-wrapped views over all rows.
198    ///
199    /// Each item is a `Result<View, ViewAccessError>` to handle potential errors
200    /// during view access (e.g., type mismatches, unexpected nulls, out of bounds).
201    type Views<'a>: Iterator<Item = Result<Self::View<'a>, ViewAccessError>>;
202
203    /// Create an iterator of views over the RecordBatch rows.
204    ///
205    /// # Errors
206    /// Returns `SchemaError` if the RecordBatch schema doesn't match this Record's schema.
207    /// This includes mismatched column names, types, or field counts.
208    fn from_record_batch(batch: &RecordBatch) -> Result<Self::Views<'_>, SchemaError>;
209}
210
211/// Extension trait providing convenience methods for iterators over `Result<T, ViewAccessError>`.
212///
213/// This trait is automatically implemented for any iterator yielding `Result<T, ViewAccessError>`,
214/// such as the iterators returned by [`FromRecordBatch::from_record_batch`].
215#[cfg(feature = "views")]
216pub trait ViewResultIteratorExt: Iterator + Sized {
217    /// The success type of the Result items.
218    type Item;
219
220    /// Flatten the Result iterator, returning all views or the first error.
221    ///
222    /// This consumes the iterator and returns a `Result` containing either:
223    /// - `Ok(Vec<T>)` with all successfully accessed views
224    /// - `Err(ViewAccessError)` with the first error encountered
225    ///
226    /// # Errors
227    /// Returns the first `ViewAccessError` encountered while iterating.
228    ///
229    /// # Example
230    /// ```
231    /// use typed_arrow::prelude::*;
232    ///
233    /// #[derive(typed_arrow::Record)]
234    /// struct Row {
235    ///     id: i32,
236    ///     name: String,
237    /// }
238    ///
239    /// # let rows = vec![Row { id: 1, name: "Alice".into() }];
240    /// # let mut b = <Row as BuildRows>::new_builders(1);
241    /// # b.append_rows(rows);
242    /// # let batch = b.finish().into_record_batch();
243    /// // Returns all views or first error
244    /// let views = batch.iter_views::<Row>()?.try_flatten()?;
245    /// for row in views {
246    ///     println!("{}: {}", row.id, row.name);
247    /// }
248    /// # Ok::<_, typed_arrow::schema::SchemaError>(())
249    /// ```
250    fn try_flatten(self) -> Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>
251    where
252        Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>:
253            std::iter::FromIterator<<Self as Iterator>::Item>,
254    {
255        self.collect()
256    }
257}
258
259#[cfg(feature = "views")]
260impl<I, T> ViewResultIteratorExt for I
261where
262    I: Iterator<Item = Result<T, ViewAccessError>>,
263{
264    type Item = T;
265}
266
267/// Trait for creating a view from a StructArray at a specific index.
268///
269/// This is automatically implemented by `#[derive(Record)]` and used internally
270/// to support nested struct views.
271#[cfg(feature = "views")]
272pub trait StructView: Record + Sized {
273    /// The view type for this struct with borrowed references.
274    type View<'a>;
275
276    /// Extract a view at the given index from a StructArray.
277    ///
278    /// # Errors
279    /// Returns `ViewAccessError` if the index is out of bounds, the value is null when expected to
280    /// be non-null, or if there's a type mismatch during field extraction.
281    fn view_at(
282        array: &arrow_array::StructArray,
283        index: usize,
284    ) -> Result<Self::View<'_>, ViewAccessError>;
285
286    /// Check if the struct value at the given index is null.
287    fn is_null_at(array: &arrow_array::StructArray, index: usize) -> bool;
288}