typed_arrow/
schema.rs

1//! Core schema traits for compile-time Arrow typing.
2//!
3//! This module provides the foundational traits generated by `#[derive(Record)]`:
4//!
5//! # Schema Definition
6//!
7//! | Trait | Description |
8//! |-------|-------------|
9//! | [`Record`] | Marker trait with `const LEN: usize` for column count |
10//! | [`ColAt<I>`] | Per-column metadata: type, name, nullability, builder, array |
11//! | [`ForEachCol`] | Compile-time iteration via [`ColumnVisitor`] |
12//! | [`SchemaMeta`] | Runtime schema: `fields()`, `schema()`, `metadata()` |
13//! | [`StructMeta`] | Nested struct support: `child_fields()`, `new_struct_builder()` |
14//!
15//! # Row Building
16//!
17//! | Trait | Description |
18//! |-------|-------------|
19//! | [`BuildRows`] | Entry point: `new_builders(capacity)` |
20//! | [`RowBuilder`] | Append methods: `append_row()`, `append_rows()`, `finish()` |
21//! | [`IntoRecordBatch`] | Convert arrays to [`RecordBatch`] |
22//! | [`AppendStruct`] | Append struct fields into a `StructBuilder` |
23//!
24//! # Zero-Copy Views (requires `views` feature)
25//!
26//! Read [`RecordBatch`] data without allocation using generated view types.
27//!
28//! ## Generated Types
29//!
30//! For each `#[derive(Record)]` struct `Foo`, the macro generates:
31//!
32//! - **`FooView<'a>`** — Borrowed row view with fields as references
33//! - **`FooViews<'a>`** — Iterator yielding `Result<FooView<'a>, ViewAccessError>`
34//! - **`impl TryFrom<FooView<'_>> for Foo`** — Convert view to owned record
35//!
36//! ## Reading Views
37//!
38//! ```
39//! use typed_arrow::prelude::*;
40//!
41//! #[derive(Record)]
42//! struct Row {
43//!     id: i32,
44//!     name: String,
45//! }
46//!
47//! # fn example(batch: arrow_array::RecordBatch) -> Result<(), SchemaError> {
48//! // Get iterator of views
49//! for view in batch.iter_views::<Row>()?.try_flatten()? {
50//!     // view.id is i32 (copied), view.name is &str (zero-copy)
51//!     println!("{}: {}", view.id, view.name);
52//! }
53//! # Ok(())
54//! # }
55//! ```
56//!
57//! ## Converting Views to Owned
58//!
59//! Use [`.try_into()`](TryInto::try_into) when data must outlive the batch:
60//!
61//! ```
62//! use typed_arrow::prelude::*;
63//!
64//! #[derive(Record)]
65//! struct Row {
66//!     id: i32,
67//!     name: String,
68//! }
69//!
70//! # fn example(batch: arrow_array::RecordBatch) -> Result<(), SchemaError> {
71//! let mut owned_rows = Vec::new();
72//! for view in batch.iter_views::<Row>()?.try_flatten()? {
73//!     let owned: Row = view.try_into()?; // Clone strings, copy primitives
74//!     owned_rows.push(owned);
75//! }
76//! // owned_rows can now outlive the batch
77//! # Ok(())
78//! # }
79//! ```
80//!
81//! The conversion uses [`TryFrom`] with [`ViewAccessError`] to handle nested
82//! structures that may fail during conversion.
83//!
84//! ## Key Traits
85//!
86//! | Trait | Description |
87//! |-------|-------------|
88//! | [`FromRecordBatch`] | Create views from a batch via `from_record_batch()` |
89//! | [`ViewResultIteratorExt`] | Helper `.try_flatten()` for view iterators |
90//! | [`StructView`] | Internal: extract views from nested `StructArray` |
91
92use std::{
93    collections::HashMap, iter::IntoIterator, marker::PhantomData, option::Option, sync::Arc,
94};
95
96use arrow_array::{
97    Array, RecordBatch,
98    builder::{ArrayBuilder, StructBuilder},
99};
100use arrow_schema::{DataType, Field, Schema};
101
102pub use crate::error::SchemaError;
103#[cfg(feature = "views")]
104pub use crate::error::ViewAccessError;
105
106#[cfg(feature = "views")]
107impl From<ViewAccessError> for SchemaError {
108    fn from(err: ViewAccessError) -> Self {
109        match err {
110            ViewAccessError::TypeMismatch {
111                expected, actual, ..
112            } => SchemaError::TypeMismatch { expected, actual },
113            _ => SchemaError::invalid(err.to_string()),
114        }
115    }
116}
117
118/// A record (row) with a fixed, compile-time number of columns.
119pub trait Record {
120    /// Number of columns in this record.
121    const LEN: usize;
122}
123
124/// Per-column metadata for a record at index `I`.
125pub trait ColAt<const I: usize>: Record {
126    /// The Native value type of this column (without nullability).
127    type Native;
128
129    /// The typed Arrow array for this column.
130    type ColumnArray: Array;
131
132    /// The typed Arrow builder for this column.
133    type ColumnBuilder: ArrayBuilder;
134
135    /// Whether this column is nullable.
136    const NULLABLE: bool;
137
138    /// Column name.
139    const NAME: &'static str;
140
141    /// Arrow-rs `DataType` for this column.
142    fn data_type() -> DataType;
143}
144
145/// Simple compile-time column metadata passed to visitors.
146pub struct FieldMeta<R> {
147    /// Column name.
148    pub name: &'static str,
149
150    /// Whether this column is nullable.
151    pub nullable: bool,
152
153    _phantom: PhantomData<R>,
154}
155
156impl<R> FieldMeta<R> {
157    /// Construct a new `FieldMeta`.
158    #[must_use]
159    pub const fn new(name: &'static str, nullable: bool) -> Self {
160        Self {
161            name,
162            nullable,
163            _phantom: PhantomData,
164        }
165    }
166}
167
168/// A visitor invoked at compile time for each column of a `Record`.
169pub trait ColumnVisitor {
170    /// Process a column at index `I` with Rust type `R`.
171    fn visit<const I: usize, R>(_m: FieldMeta<R>);
172}
173
174/// Trait emitted by derive/macro to enable `for_each_col` expansion.
175pub trait ForEachCol: Record {
176    /// Invoke `V::visit` for each column at compile time.
177    fn for_each_col<V: ColumnVisitor>();
178}
179
180// No Arrow markers: ColAt exposes DATA_TYPE/ColumnBuilder/ColumnArray
181
182/// Metadata and builder utilities for nested Struct fields.
183pub trait StructMeta: Record {
184    /// Child fields (names, data types, nullability) for this struct.
185    fn child_fields() -> Vec<Field>;
186
187    /// Construct a `StructBuilder` with appropriate child builders for this struct.
188    fn new_struct_builder(capacity: usize) -> StructBuilder;
189}
190
191/// Arrow runtime schema metadata for a top-level Record.
192pub trait SchemaMeta: Record {
193    /// Top-level fields: (name, `data_type`, nullable) represented as `Field`s.
194    fn fields() -> Vec<Field>;
195
196    /// Optional top-level schema key/value metadata.
197    #[must_use]
198    fn metadata() -> HashMap<String, String> {
199        HashMap::default()
200    }
201
202    /// Construct an `Arc<arrow_schema::Schema>` from `fields()`.
203    fn schema() -> Arc<Schema> {
204        let fields: Vec<Arc<Field>> = Self::fields().into_iter().map(Arc::new).collect();
205        Arc::new(Schema::new_with_metadata(fields, Self::metadata()))
206    }
207}
208
209/// Row-based building interface: construct typed column builders, append owned rows,
210/// and finish into typed arrays.
211pub trait BuildRows: Record + Sized {
212    /// Generated builders struct for this record.
213    type Builders: RowBuilder<Self>;
214
215    /// Generated arrays struct for this record.
216    type Arrays: IntoRecordBatch;
217
218    /// Create builders with a capacity hint.
219    fn new_builders(capacity: usize) -> Self::Builders;
220}
221
222/// Trait implemented by derive-generated builders to append rows of `Row`
223/// and finish into a typed arrays struct.
224pub trait RowBuilder<Row> {
225    /// The arrays struct produced by `finish`.
226    type Arrays: IntoRecordBatch;
227
228    /// Append a non-null row.
229    fn append_row(&mut self, row: Row);
230    /// Append a null row.
231    fn append_null_row(&mut self);
232    /// Append an optional row.
233    fn append_option_row(&mut self, row: Option<Row>);
234    /// Append an iterator of non-null rows.
235    fn append_rows<I: IntoIterator<Item = Row>>(&mut self, rows: I);
236    /// Append an iterator of optional rows.
237    fn append_option_rows<I: IntoIterator<Item = Option<Row>>>(&mut self, rows: I);
238    /// Finish and produce arrays.
239    fn finish(self) -> Self::Arrays;
240}
241
242/// Trait implemented by derive-generated arrays to assemble a `RecordBatch`.
243pub trait IntoRecordBatch {
244    /// Assemble and return an `arrow_array::RecordBatch`.
245    fn into_record_batch(self) -> RecordBatch;
246}
247
248// Identity conversion for dynamic path output (RecordBatch already assembled).
249impl IntoRecordBatch for RecordBatch {
250    fn into_record_batch(self) -> RecordBatch {
251        self
252    }
253}
254
255/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
256/// `StructBuilder`. Used by row-based APIs to handle nested struct fields.
257pub trait AppendStruct {
258    /// Append this struct's child values into the provided `StructBuilder`.
259    /// Caller is responsible for setting the parent validity via `append(true)`.
260    fn append_owned_into(self, b: &mut StructBuilder);
261
262    /// Append nulls for each child into the provided `StructBuilder` to align lengths.
263    /// Caller is responsible for `append(false)` for the parent validity.
264    fn append_null_into(b: &mut StructBuilder);
265}
266
267/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
268/// `StructBuilder` from a borrowed reference. This enables container builders (e.g.,
269/// lists of structs) to append child values without taking ownership of the struct.
270pub trait AppendStructRef {
271    /// Append this struct's child values into the provided `StructBuilder` using borrows.
272    /// Caller is responsible for setting the parent validity via `append(true)`.
273    fn append_borrowed_into(&self, b: &mut StructBuilder);
274}
275
276/// Trait for creating zero-copy views over a RecordBatch.
277///
278/// Implemented automatically by `#[derive(Record)]` to generate a view struct
279/// (`{Name}View<'a>`) and an iterator (`{Name}Views<'a>`) that provide borrowed
280/// access to RecordBatch rows without copying data.
281#[cfg(feature = "views")]
282pub trait FromRecordBatch: Record + Sized {
283    /// The view type representing a single row with borrowed references.
284    type View<'a>;
285
286    /// The iterator type yielding Result-wrapped views over all rows.
287    ///
288    /// Each item is a `Result<View, ViewAccessError>` to handle potential errors
289    /// during view access (e.g., type mismatches, unexpected nulls, out of bounds).
290    type Views<'a>: Iterator<Item = Result<Self::View<'a>, ViewAccessError>>;
291
292    /// Create an iterator of views over the RecordBatch rows.
293    ///
294    /// # Errors
295    /// Returns `SchemaError` if the RecordBatch schema doesn't match this Record's schema.
296    /// This includes mismatched column names, types, or field counts.
297    fn from_record_batch(batch: &RecordBatch) -> Result<Self::Views<'_>, SchemaError>;
298}
299
300/// Extension trait providing convenience methods for iterators over `Result<T, ViewAccessError>`.
301///
302/// This trait is automatically implemented for any iterator yielding `Result<T, ViewAccessError>`,
303/// such as the iterators returned by [`FromRecordBatch::from_record_batch`].
304#[cfg(feature = "views")]
305pub trait ViewResultIteratorExt: Iterator + Sized {
306    /// The success type of the Result items.
307    type Item;
308
309    /// Flatten the Result iterator, returning all views or the first error.
310    ///
311    /// This consumes the iterator and returns a `Result` containing either:
312    /// - `Ok(Vec<T>)` with all successfully accessed views
313    /// - `Err(ViewAccessError)` with the first error encountered
314    ///
315    /// # Errors
316    /// Returns the first `ViewAccessError` encountered while iterating.
317    ///
318    /// # Example
319    /// ```
320    /// use typed_arrow::prelude::*;
321    ///
322    /// #[derive(Record)]
323    /// struct Row {
324    ///     id: i32,
325    ///     name: String,
326    /// }
327    ///
328    /// # let rows = vec![Row { id: 1, name: "Alice".into() }];
329    /// # let mut b = <Row as BuildRows>::new_builders(1);
330    /// # b.append_rows(rows);
331    /// # let batch = b.finish().into_record_batch();
332    /// // Returns all views or first error
333    /// let views = batch.iter_views::<Row>()?.try_flatten()?;
334    /// for row in views {
335    ///     println!("{}: {}", row.id, row.name);
336    /// }
337    /// # Ok::<_, typed_arrow::schema::SchemaError>(())
338    /// ```
339    fn try_flatten(self) -> Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>
340    where
341        Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>:
342            std::iter::FromIterator<<Self as Iterator>::Item>,
343    {
344        self.collect()
345    }
346}
347
348#[cfg(feature = "views")]
349impl<I, T> ViewResultIteratorExt for I
350where
351    I: Iterator<Item = Result<T, ViewAccessError>>,
352{
353    type Item = T;
354}
355
356/// Trait for creating a view from a StructArray at a specific index.
357///
358/// This is automatically implemented by `#[derive(Record)]` and used internally
359/// to support nested struct views.
360#[cfg(feature = "views")]
361pub trait StructView: Record + Sized {
362    /// The view type for this struct with borrowed references.
363    type View<'a>;
364
365    /// Extract a view at the given index from a StructArray.
366    ///
367    /// # Errors
368    /// Returns `ViewAccessError` if the index is out of bounds, the value is null when expected to
369    /// be non-null, or if there's a type mismatch during field extraction.
370    fn view_at(
371        array: &arrow_array::StructArray,
372        index: usize,
373    ) -> Result<Self::View<'_>, ViewAccessError>;
374
375    /// Check if the struct value at the given index is null.
376    fn is_null_at(array: &arrow_array::StructArray, index: usize) -> bool;
377}