typed_arrow/lib.rs
1#![deny(missing_docs)]
2//! Compile-time Arrow schema definition using Rust types.
3//!
4//! `typed-arrow` maps Rust structs directly to Arrow schemas, builders, and arrays
5//! without runtime `DataType` switching. This enables zero-cost, monomorphized
6//! column construction with compile-time type safety.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use typed_arrow::prelude::*;
12//!
13//! #[derive(Record)]
14//! struct Person {
15//! id: i64,
16//! name: String,
17//! score: Option<f64>,
18//! }
19//!
20//! // Build arrays from rows
21//! let rows = vec![
22//! Person {
23//! id: 1,
24//! name: "Alice".into(),
25//! score: Some(95.5),
26//! },
27//! Person {
28//! id: 2,
29//! name: "Bob".into(),
30//! score: None,
31//! },
32//! ];
33//!
34//! let mut builders = <Person as BuildRows>::new_builders(rows.len());
35//! builders.append_rows(rows);
36//! let batch = builders.finish().into_record_batch();
37//!
38//! assert_eq!(batch.num_rows(), 2);
39//! assert_eq!(batch.num_columns(), 3);
40//! ```
41//!
42//! # Cargo Features
43//!
44//! | Feature | Default | Description |
45//! |---------|---------|-------------|
46//! | `derive` | ✓ | Enables [`#[derive(Record)]`](Record) and [`#[derive(Union)]`](Union) macros |
47//! | `views` | ✓ | Zero-copy views for reading [`RecordBatch`](arrow_array::RecordBatch) data |
48//! | `ext-hooks` | | Extensibility hooks for custom derive behavior |
49//!
50//! # Derive Macros
51//!
52//! ## `#[derive(Record)]`
53//!
54//! Generates Arrow schema traits for structs. See [`schema::Record`] for the marker trait.
55//!
56//! ```
57//! use typed_arrow::prelude::*;
58//!
59//! #[derive(Record)]
60//! struct Event {
61//! id: i64, // Non-null Int64
62//! name: Option<String>, // Nullable Utf8
63//! #[record(name = "eventType")] // Override Arrow field name
64//! event_type: String,
65//! }
66//! ```
67//!
68//! **Field attributes:**
69//! - `#[record(name = "...")]` — Override the Arrow field name
70//! - `#[arrow(nullable)]` — Force nullability even without `Option<T>`
71//! - `#[metadata(k = "key", v = "value")]` — Add field-level metadata
72//! - `#[schema_metadata(k = "key", v = "value")]` — Add schema-level metadata (on struct)
73//!
74//! ## `#[derive(Union)]`
75//!
76//! Generates Arrow Union type bindings for enums. Implements
77//! [`ArrowBinding`](bridge::ArrowBinding).
78//!
79//! ```
80//! use typed_arrow::prelude::*;
81//!
82//! #[derive(Union)]
83//! #[union(mode = "dense")] // or "sparse"
84//! enum Value {
85//! #[union(tag = 0)]
86//! Int(i32),
87//! #[union(tag = 1, field = "text")]
88//! Str(String),
89//! }
90//! ```
91//!
92//! **Container attributes:**
93//! - `#[union(mode = "dense"|"sparse")]` — Union mode (default: dense)
94//! - `#[union(null_variant = "None")]` — Designate a null-carrier variant
95//! - `#[union(tags(A = 0, B = 1))]` — Set all variant tags at once
96//!
97//! **Variant attributes:**
98//! - `#[union(tag = N)]` — Set type ID for this variant
99//! - `#[union(field = "name")]` — Override Arrow field name
100//! - `#[union(null)]` — Mark as the null-carrier variant
101//!
102//! # Core Traits
103//!
104//! ## Schema Traits (in [`schema`] module)
105//!
106//! | Trait | Description |
107//! |-------|-------------|
108//! | [`Record`](schema::Record) | Marker for structs with `const LEN: usize` columns |
109//! | [`ColAt<I>`](schema::ColAt) | Per-column metadata: `Native`, `ColumnArray`, `ColumnBuilder`, `NULLABLE`, `NAME`, `data_type()` |
110//! | [`ForEachCol`](schema::ForEachCol) | Compile-time column iteration via [`ColumnVisitor`](schema::ColumnVisitor) |
111//! | [`SchemaMeta`](schema::SchemaMeta) | Runtime schema access: `fields()`, `schema()`, `metadata()` |
112//! | [`StructMeta`](schema::StructMeta) | Nested struct support: `child_fields()`, `new_struct_builder()` |
113//!
114//! ## Row Building Traits (in [`schema`] module)
115//!
116//! | Trait | Description |
117//! |-------|-------------|
118//! | [`BuildRows`](schema::BuildRows) | Entry point: `new_builders(capacity)` → `Builders` |
119//! | [`RowBuilder<T>`](schema::RowBuilder) | `append_row()`, `append_rows()`, `append_option_row()`, `finish()` |
120//! | [`IntoRecordBatch`](schema::IntoRecordBatch) | Convert finished arrays to [`RecordBatch`](arrow_array::RecordBatch) |
121//! | [`AppendStruct`](schema::AppendStruct) | Append struct fields into a `StructBuilder` |
122//!
123//! ## Type Binding Trait (in [`bridge`] module)
124//!
125//! | Trait | Description |
126//! |-------|-------------|
127//! | [`ArrowBinding`](bridge::ArrowBinding) | Maps Rust types to Arrow: `Builder`, `Array`, `data_type()`, `append_value()`, `finish()` |
128//!
129//! # Supported Types
130//!
131//! ## Primitives
132//!
133//! | Rust Type | Arrow Type |
134//! |-----------|------------|
135//! | `i8`, `i16`, `i32`, `i64` | `Int8`, `Int16`, `Int32`, `Int64` |
136//! | `u8`, `u16`, `u32`, `u64` | `UInt8`, `UInt16`, `UInt32`, `UInt64` |
137//! | `f32`, `f64` | `Float32`, `Float64` |
138//! | [`half::f16`] | `Float16` |
139//! | `bool` | `Boolean` |
140//!
141//! ## Strings & Binary
142//!
143//! | Rust Type | Arrow Type |
144//! |-----------|------------|
145//! | `String` | `Utf8` |
146//! | [`LargeUtf8`] | `LargeUtf8` (64-bit offsets) |
147//! | `Vec<u8>` | `Binary` |
148//! | [`LargeBinary`] | `LargeBinary` (64-bit offsets) |
149//! | `[u8; N]` | `FixedSizeBinary(N)` |
150//!
151//! ## Nullability
152//!
153//! | Rust Type | Arrow Nullability |
154//! |-----------|-------------------|
155//! | `T` | Non-nullable column |
156//! | `Option<T>` | Nullable column |
157//! | [`Null`] | `Null` type (always null) |
158//!
159//! ## Temporal Types
160//!
161//! | Rust Type | Arrow Type |
162//! |-----------|------------|
163//! | [`Date32`] | `Date32` (days since epoch) |
164//! | [`Date64`] | `Date64` (milliseconds since epoch) |
165//! | [`Time32<U>`](Time32) | `Time32` with unit `U` ([`Second`], [`Millisecond`]) |
166//! | [`Time64<U>`](Time64) | `Time64` with unit `U` ([`Microsecond`], [`Nanosecond`]) |
167//! | [`Timestamp<U>`] | `Timestamp` without timezone |
168//! | [`TimestampTz<U, Z>`] | `Timestamp` with timezone `Z` (e.g., [`Utc`]) |
169//! | [`Duration<U>`](Duration) | `Duration` with unit `U` |
170//!
171//! ## Intervals
172//!
173//! | Rust Type | Arrow Type |
174//! |-----------|------------|
175//! | [`IntervalYearMonth`] | `Interval(YearMonth)` |
176//! | [`IntervalDayTime`] | `Interval(DayTime)` |
177//! | [`IntervalMonthDayNano`] | `Interval(MonthDayNano)` |
178//!
179//! ## Decimal
180//!
181//! | Rust Type | Arrow Type |
182//! |-----------|------------|
183//! | [`Decimal128<P, S>`](Decimal128) | `Decimal128(P, S)` |
184//! | [`Decimal256<P, S>`](Decimal256) | `Decimal256(P, S)` |
185//!
186//! ## Nested Types
187//!
188//! | Rust Type | Arrow Type |
189//! |-----------|------------|
190//! | `#[derive(Record)]` struct | `Struct` |
191//! | [`List<T>`] | `List` (non-null items) |
192//! | [`List<Option<T>>`](List) | `List` (nullable items) |
193//! | [`LargeList<T>`](LargeList) | `LargeList` (64-bit offsets) |
194//! | [`FixedSizeList<T, N>`](FixedSizeList) | `FixedSizeList(N)` (non-null items) |
195//! | [`FixedSizeListNullable<T, N>`](FixedSizeListNullable) | `FixedSizeList(N)` (nullable items) |
196//! | [`Map<K, V>`] | `Map` (non-null values) |
197//! | [`Map<K, Option<V>>`](Map) | `Map` (nullable values) |
198//! | [`OrderedMap<K, V>`] | `Map` with `keys_sorted = true` |
199//! | [`Dictionary<K, V>`] | `Dictionary` (K: integral, V: string/binary/primitive) |
200//! | `#[derive(Union)]` enum | `Union` (Dense or Sparse) |
201//!
202//! # Zero-Copy Views (requires `views` feature)
203//!
204//! Read [`RecordBatch`](arrow_array::RecordBatch) data without allocation.
205//! Use [`AsViewsIterator::iter_views`] to iterate over borrowed row views,
206//! and [`.try_into()`](TryInto::try_into) to convert views to owned records.
207//!
208//! See the [`schema`] module for detailed documentation and examples.
209//!
210//! # Extensibility (requires `ext-hooks` feature)
211//!
212//! Customize derive behavior with hooks:
213//!
214//! ```ignore
215//! #[derive(Record)]
216//! #[record(visit(MyVisitor))] // Inject compile-time visitor
217//! #[record(field_macro = my_ext::per_field)] // Call macro per field
218//! #[record(record_macro = my_ext::per_record)] // Call macro per record
219//! struct MyRecord {
220//! #[record(ext(custom_tag))] // Tag fields with markers
221//! field: i32,
222//! }
223//! ```
224//!
225//! See `examples/12_ext_hooks.rs` for usage.
226
227pub mod bridge;
228pub mod error;
229pub mod schema;
230
231/// Prelude exporting the most common traits and markers.
232pub mod prelude {
233 // Re-export derive macros when enabled
234 #[cfg(feature = "derive")]
235 pub use typed_arrow_derive::{Record, Union};
236
237 #[cfg(feature = "views")]
238 pub use crate::AsViewsIterator;
239 #[cfg(feature = "views")]
240 pub use crate::error::ViewAccessError;
241 #[cfg(feature = "views")]
242 pub use crate::schema::{FromRecordBatch, ViewResultIteratorExt};
243 pub use crate::{
244 error::SchemaError,
245 schema::{BuildRows, ColAt, ColumnVisitor, FieldMeta, ForEachCol, Record},
246 };
247}
248
249// Re-export the derive macro when enabled
250// Re-export Arrow crates so derives can reference a stable path
251// and downstream users don't need to depend on Arrow directly.
252pub use arrow_array;
253pub use arrow_buffer;
254pub use arrow_schema;
255#[cfg(feature = "derive")]
256pub use typed_arrow_derive::{Record, Union};
257
258// Public re-exports for convenience
259pub use crate::bridge::{
260 Date32, Date64, Decimal128, Decimal256, Dictionary, Duration, FixedSizeList,
261 FixedSizeListNullable, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, LargeBinary,
262 LargeList, LargeUtf8, List, Map, Microsecond, Millisecond, Nanosecond, Null, OrderedMap,
263 Second, Time32, Time64, TimeZoneSpec, Timestamp, TimestampTz, Utc,
264};
265
266/// Extension trait for creating typed view iterators from `RecordBatch`.
267#[cfg(feature = "views")]
268pub trait AsViewsIterator {
269 /// Iterate over typed views of rows in this RecordBatch.
270 ///
271 /// This provides zero-copy access to the data as borrowed references.
272 ///
273 /// # Errors
274 /// Returns `SchemaError` if the RecordBatch schema doesn't match the expected Record type.
275 ///
276 /// # Example
277 /// ```
278 /// use typed_arrow::prelude::*;
279 ///
280 /// #[derive(Record)]
281 /// struct Row {
282 /// id: i32,
283 /// name: String,
284 /// }
285 ///
286 /// // Build a RecordBatch
287 /// let rows = vec![
288 /// Row {
289 /// id: 1,
290 /// name: "Alice".to_string(),
291 /// },
292 /// Row {
293 /// id: 2,
294 /// name: "Bob".to_string(),
295 /// },
296 /// ];
297 /// let mut b = <Row as BuildRows>::new_builders(rows.len());
298 /// b.append_rows(rows);
299 /// let arrays = b.finish();
300 /// let batch = arrays.into_record_batch();
301 ///
302 /// // Iterate with zero-copy views (using convenience method to handle errors)
303 /// let views = batch.iter_views::<Row>()?.try_flatten()?;
304 /// for row in views {
305 /// println!("{}: {}", row.id, row.name);
306 /// }
307 /// # Ok::<_, typed_arrow::error::SchemaError>(())
308 /// ```
309 fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError>;
310}
311
312#[cfg(feature = "views")]
313impl AsViewsIterator for arrow_array::RecordBatch {
314 fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError> {
315 T::from_record_batch(self)
316 }
317}