typed_arrow/schema.rs
1//! Core schema traits for compile-time Arrow typing.
2//!
3//! This module provides the foundational traits generated by `#[derive(Record)]`:
4//!
5//! # Schema Definition
6//!
7//! | Trait | Description |
8//! |-------|-------------|
9//! | [`Record`] | Marker trait with `const LEN: usize` for column count |
10//! | [`ColAt<I>`] | Per-column metadata: type, name, nullability, builder, array |
11//! | [`ForEachCol`] | Compile-time iteration via [`ColumnVisitor`] |
12//! | [`SchemaMeta`] | Runtime schema: `fields()`, `schema()`, `metadata()` |
13//! | [`StructMeta`] | Nested struct support: `child_fields()`, `new_struct_builder()` |
14//!
15//! # Row Building
16//!
17//! | Trait | Description |
18//! |-------|-------------|
19//! | [`BuildRows`] | Entry point: `new_builders(capacity)` |
20//! | [`RowBuilder`] | Append methods: `append_row()`, `append_rows()`, `finish()` |
21//! | [`IntoRecordBatch`] | Convert arrays to [`RecordBatch`] |
22//! | [`AppendStruct`] | Append struct fields into a `StructBuilder` |
23//!
24//! # Zero-Copy Views (requires `views` feature)
25//!
26//! Read [`RecordBatch`] data without allocation using generated view types.
27//!
28//! ## Generated Types
29//!
30//! For each `#[derive(Record)]` struct `Foo`, the macro generates:
31//!
32//! - **`FooView<'a>`** — Borrowed row view with fields as references
33//! - **`FooViews<'a>`** — Iterator yielding `Result<FooView<'a>, ViewAccessError>`
34//! - **`impl TryFrom<FooView<'_>> for Foo`** — Convert view to owned record
35//!
36//! ## Reading Views
37//!
38//! ```
39//! use typed_arrow::prelude::*;
40//!
41//! #[derive(Record)]
42//! struct Row {
43//! id: i32,
44//! name: String,
45//! }
46//!
47//! # fn example(batch: arrow_array::RecordBatch) -> Result<(), SchemaError> {
48//! // Get iterator of views
49//! for view in batch.iter_views::<Row>()?.try_flatten()? {
50//! // view.id is i32 (copied), view.name is &str (zero-copy)
51//! println!("{}: {}", view.id, view.name);
52//! }
53//! # Ok(())
54//! # }
55//! ```
56//!
57//! ## Converting Views to Owned
58//!
59//! Use [`.try_into()`](TryInto::try_into) when data must outlive the batch:
60//!
61//! ```
62//! use typed_arrow::prelude::*;
63//!
64//! #[derive(Record)]
65//! struct Row {
66//! id: i32,
67//! name: String,
68//! }
69//!
70//! # fn example(batch: arrow_array::RecordBatch) -> Result<(), SchemaError> {
71//! let mut owned_rows = Vec::new();
72//! for view in batch.iter_views::<Row>()?.try_flatten()? {
73//! let owned: Row = view.try_into()?; // Clone strings, copy primitives
74//! owned_rows.push(owned);
75//! }
76//! // owned_rows can now outlive the batch
77//! # Ok(())
78//! # }
79//! ```
80//!
81//! The conversion uses [`TryFrom`] with [`ViewAccessError`] to handle nested
82//! structures that may fail during conversion.
83//!
84//! ## Key Traits
85//!
86//! | Trait | Description |
87//! |-------|-------------|
88//! | [`FromRecordBatch`] | Create views from a batch via `from_record_batch()` |
89//! | [`ViewResultIteratorExt`] | Helper `.try_flatten()` for view iterators |
90//! | [`StructView`] | Internal: extract views from nested `StructArray` |
91
92use std::{
93 collections::HashMap, iter::IntoIterator, marker::PhantomData, option::Option, sync::Arc,
94};
95
96use arrow_array::{
97 Array, RecordBatch,
98 builder::{ArrayBuilder, StructBuilder},
99};
100use arrow_schema::{DataType, Field, Schema};
101
102pub use crate::error::SchemaError;
103#[cfg(feature = "views")]
104pub use crate::error::ViewAccessError;
105
106#[cfg(feature = "views")]
107impl From<ViewAccessError> for SchemaError {
108 fn from(err: ViewAccessError) -> Self {
109 match err {
110 ViewAccessError::TypeMismatch {
111 expected, actual, ..
112 } => SchemaError::TypeMismatch { expected, actual },
113 _ => SchemaError::invalid(err.to_string()),
114 }
115 }
116}
117
118/// A record (row) with a fixed, compile-time number of columns.
119pub trait Record {
120 /// Number of columns in this record.
121 const LEN: usize;
122}
123
124/// Per-column metadata for a record at index `I`.
125pub trait ColAt<const I: usize>: Record {
126 /// The Native value type of this column (without nullability).
127 type Native;
128
129 /// The typed Arrow array for this column.
130 type ColumnArray: Array;
131
132 /// The typed Arrow builder for this column.
133 type ColumnBuilder: ArrayBuilder;
134
135 /// Whether this column is nullable.
136 const NULLABLE: bool;
137
138 /// Column name.
139 const NAME: &'static str;
140
141 /// Arrow-rs `DataType` for this column.
142 fn data_type() -> DataType;
143}
144
145/// Simple compile-time column metadata passed to visitors.
146pub struct FieldMeta<R> {
147 /// Column name.
148 pub name: &'static str,
149
150 /// Whether this column is nullable.
151 pub nullable: bool,
152
153 _phantom: PhantomData<R>,
154}
155
156impl<R> FieldMeta<R> {
157 /// Construct a new `FieldMeta`.
158 #[must_use]
159 pub const fn new(name: &'static str, nullable: bool) -> Self {
160 Self {
161 name,
162 nullable,
163 _phantom: PhantomData,
164 }
165 }
166}
167
168/// A visitor invoked at compile time for each column of a `Record`.
169pub trait ColumnVisitor {
170 /// Process a column at index `I` with Rust type `R`.
171 fn visit<const I: usize, R>(_m: FieldMeta<R>);
172}
173
174/// Trait emitted by derive/macro to enable `for_each_col` expansion.
175pub trait ForEachCol: Record {
176 /// Invoke `V::visit` for each column at compile time.
177 fn for_each_col<V: ColumnVisitor>();
178}
179
180// No Arrow markers: ColAt exposes DATA_TYPE/ColumnBuilder/ColumnArray
181
182/// Metadata and builder utilities for nested Struct fields.
183pub trait StructMeta: Record {
184 /// Child fields (names, data types, nullability) for this struct.
185 fn child_fields() -> Vec<Field>;
186
187 /// Construct a `StructBuilder` with appropriate child builders for this struct.
188 fn new_struct_builder(capacity: usize) -> StructBuilder;
189}
190
191/// Arrow runtime schema metadata for a top-level Record.
192pub trait SchemaMeta: Record {
193 /// Top-level fields: (name, `data_type`, nullable) represented as `Field`s.
194 fn fields() -> Vec<Field>;
195
196 /// Optional top-level schema key/value metadata.
197 #[must_use]
198 fn metadata() -> HashMap<String, String> {
199 HashMap::default()
200 }
201
202 /// Construct an `Arc<arrow_schema::Schema>` from `fields()`.
203 fn schema() -> Arc<Schema> {
204 let fields: Vec<Arc<Field>> = Self::fields().into_iter().map(Arc::new).collect();
205 Arc::new(Schema::new_with_metadata(fields, Self::metadata()))
206 }
207}
208
209/// Row-based building interface: construct typed column builders, append owned rows,
210/// and finish into typed arrays.
211pub trait BuildRows: Record + Sized {
212 /// Generated builders struct for this record.
213 type Builders: RowBuilder<Self>;
214
215 /// Generated arrays struct for this record.
216 type Arrays: IntoRecordBatch;
217
218 /// Create builders with a capacity hint.
219 fn new_builders(capacity: usize) -> Self::Builders;
220}
221
222/// Trait implemented by derive-generated builders to append rows of `Row`
223/// and finish into a typed arrays struct.
224pub trait RowBuilder<Row> {
225 /// The arrays struct produced by `finish`.
226 type Arrays: IntoRecordBatch;
227
228 /// Append a non-null row.
229 fn append_row(&mut self, row: Row);
230 /// Append a null row.
231 fn append_null_row(&mut self);
232 /// Append an optional row.
233 fn append_option_row(&mut self, row: Option<Row>);
234 /// Append an iterator of non-null rows.
235 fn append_rows<I: IntoIterator<Item = Row>>(&mut self, rows: I);
236 /// Append an iterator of optional rows.
237 fn append_option_rows<I: IntoIterator<Item = Option<Row>>>(&mut self, rows: I);
238 /// Finish and produce arrays.
239 fn finish(self) -> Self::Arrays;
240}
241
242/// Trait implemented by derive-generated arrays to assemble a `RecordBatch`.
243pub trait IntoRecordBatch {
244 /// Assemble and return an `arrow_array::RecordBatch`.
245 fn into_record_batch(self) -> RecordBatch;
246}
247
248// Identity conversion for dynamic path output (RecordBatch already assembled).
249impl IntoRecordBatch for RecordBatch {
250 fn into_record_batch(self) -> RecordBatch {
251 self
252 }
253}
254
255/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
256/// `StructBuilder`. Used by row-based APIs to handle nested struct fields.
257pub trait AppendStruct {
258 /// Append this struct's child values into the provided `StructBuilder`.
259 /// Caller is responsible for setting the parent validity via `append(true)`.
260 fn append_owned_into(self, b: &mut StructBuilder);
261
262 /// Append nulls for each child into the provided `StructBuilder` to align lengths.
263 /// Caller is responsible for `append(false)` for the parent validity.
264 fn append_null_into(b: &mut StructBuilder);
265}
266
267/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
268/// `StructBuilder` from a borrowed reference. This enables container builders (e.g.,
269/// lists of structs) to append child values without taking ownership of the struct.
270pub trait AppendStructRef {
271 /// Append this struct's child values into the provided `StructBuilder` using borrows.
272 /// Caller is responsible for setting the parent validity via `append(true)`.
273 fn append_borrowed_into(&self, b: &mut StructBuilder);
274}
275
276/// Trait for creating zero-copy views over a RecordBatch.
277///
278/// Implemented automatically by `#[derive(Record)]` to generate a view struct
279/// (`{Name}View<'a>`) and an iterator (`{Name}Views<'a>`) that provide borrowed
280/// access to RecordBatch rows without copying data.
281#[cfg(feature = "views")]
282pub trait FromRecordBatch: Record + Sized {
283 /// The view type representing a single row with borrowed references.
284 type View<'a>;
285
286 /// The iterator type yielding Result-wrapped views over all rows.
287 ///
288 /// Each item is a `Result<View, ViewAccessError>` to handle potential errors
289 /// during view access (e.g., type mismatches, unexpected nulls, out of bounds).
290 type Views<'a>: Iterator<Item = Result<Self::View<'a>, ViewAccessError>>;
291
292 /// Create an iterator of views over the RecordBatch rows.
293 ///
294 /// # Errors
295 /// Returns `SchemaError` if the RecordBatch schema doesn't match this Record's schema.
296 /// This includes mismatched column names, types, or field counts.
297 fn from_record_batch(batch: &RecordBatch) -> Result<Self::Views<'_>, SchemaError>;
298}
299
300/// Extension trait providing convenience methods for iterators over `Result<T, ViewAccessError>`.
301///
302/// This trait is automatically implemented for any iterator yielding `Result<T, ViewAccessError>`,
303/// such as the iterators returned by [`FromRecordBatch::from_record_batch`].
304#[cfg(feature = "views")]
305pub trait ViewResultIteratorExt: Iterator + Sized {
306 /// The success type of the Result items.
307 type Item;
308
309 /// Flatten the Result iterator, returning all views or the first error.
310 ///
311 /// This consumes the iterator and returns a `Result` containing either:
312 /// - `Ok(Vec<T>)` with all successfully accessed views
313 /// - `Err(ViewAccessError)` with the first error encountered
314 ///
315 /// # Errors
316 /// Returns the first `ViewAccessError` encountered while iterating.
317 ///
318 /// # Example
319 /// ```
320 /// use typed_arrow::prelude::*;
321 ///
322 /// #[derive(Record)]
323 /// struct Row {
324 /// id: i32,
325 /// name: String,
326 /// }
327 ///
328 /// # let rows = vec![Row { id: 1, name: "Alice".into() }];
329 /// # let mut b = <Row as BuildRows>::new_builders(1);
330 /// # b.append_rows(rows);
331 /// # let batch = b.finish().into_record_batch();
332 /// // Returns all views or first error
333 /// let views = batch.iter_views::<Row>()?.try_flatten()?;
334 /// for row in views {
335 /// println!("{}: {}", row.id, row.name);
336 /// }
337 /// # Ok::<_, typed_arrow::schema::SchemaError>(())
338 /// ```
339 fn try_flatten(self) -> Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>
340 where
341 Result<Vec<<Self as ViewResultIteratorExt>::Item>, ViewAccessError>:
342 std::iter::FromIterator<<Self as Iterator>::Item>,
343 {
344 self.collect()
345 }
346}
347
348#[cfg(feature = "views")]
349impl<I, T> ViewResultIteratorExt for I
350where
351 I: Iterator<Item = Result<T, ViewAccessError>>,
352{
353 type Item = T;
354}
355
356/// Trait for creating a view from a StructArray at a specific index.
357///
358/// This is automatically implemented by `#[derive(Record)]` and used internally
359/// to support nested struct views.
360#[cfg(feature = "views")]
361pub trait StructView: Record + Sized {
362 /// The view type for this struct with borrowed references.
363 type View<'a>;
364
365 /// Extract a view at the given index from a StructArray.
366 ///
367 /// # Errors
368 /// Returns `ViewAccessError` if the index is out of bounds, the value is null when expected to
369 /// be non-null, or if there's a type mismatch during field extraction.
370 fn view_at(
371 array: &arrow_array::StructArray,
372 index: usize,
373 ) -> Result<Self::View<'_>, ViewAccessError>;
374
375 /// Check if the struct value at the given index is null.
376 fn is_null_at(array: &arrow_array::StructArray, index: usize) -> bool;
377}