typed_arrow/
lib.rs

1#![deny(missing_docs)]
2//! Compile-time Arrow schema definition using Rust types.
3//!
4//! `typed-arrow` maps Rust structs directly to Arrow schemas, builders, and arrays
5//! without runtime `DataType` switching. This enables zero-cost, monomorphized
6//! column construction with compile-time type safety.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use typed_arrow::prelude::*;
12//!
13//! #[derive(Record)]
14//! struct Person {
15//!     id: i64,
16//!     name: String,
17//!     score: Option<f64>,
18//! }
19//!
20//! // Build arrays from rows
21//! let rows = vec![
22//!     Person {
23//!         id: 1,
24//!         name: "Alice".into(),
25//!         score: Some(95.5),
26//!     },
27//!     Person {
28//!         id: 2,
29//!         name: "Bob".into(),
30//!         score: None,
31//!     },
32//! ];
33//!
34//! let mut builders = <Person as BuildRows>::new_builders(rows.len());
35//! builders.append_rows(rows);
36//! let batch = builders.finish().into_record_batch();
37//!
38//! assert_eq!(batch.num_rows(), 2);
39//! assert_eq!(batch.num_columns(), 3);
40//! ```
41//!
42//! # Cargo Features
43//!
44//! | Feature | Default | Description |
45//! |---------|---------|-------------|
46//! | `derive` | ✓ | Enables [`#[derive(Record)]`](Record) and [`#[derive(Union)]`](Union) macros |
47//! | `views` | ✓ | Zero-copy views for reading [`RecordBatch`](arrow_array::RecordBatch) data |
48//! | `ext-hooks` | | Extensibility hooks for custom derive behavior |
49//! | `arrow-55` | | Use Arrow 55.x crates |
50//! | `arrow-56` | | Use Arrow 56.x crates |
51//! | `arrow-57` | ✓ | Use Arrow 57.x crates |
52//!
53//! Exactly one Arrow feature must be enabled.
54//!
55//! # Derive Macros
56//!
57//! ## `#[derive(Record)]`
58//!
59//! Generates Arrow schema traits for structs. See [`schema::Record`] for the marker trait.
60//!
61//! ```
62//! use typed_arrow::prelude::*;
63//!
64//! #[derive(Record)]
65//! struct Event {
66//!     id: i64,              // Non-null Int64
67//!     name: Option<String>, // Nullable Utf8
68//!     #[record(name = "eventType")] // Override Arrow field name
69//!     event_type: String,
70//! }
71//! ```
72//!
73//! **Field attributes:**
74//! - `#[record(name = "...")]` — Override the Arrow field name
75//! - `#[arrow(nullable)]` — Force nullability even without `Option<T>`
76//! - `#[metadata(k = "key", v = "value")]` — Add field-level metadata
77//! - `#[schema_metadata(k = "key", v = "value")]` — Add schema-level metadata (on struct)
78//!
79//! ## `#[derive(Union)]`
80//!
81//! Generates Arrow Union type bindings for enums. Implements
82//! [`ArrowBinding`](bridge::ArrowBinding).
83//!
84//! ```
85//! use typed_arrow::prelude::*;
86//!
87//! #[derive(Union)]
88//! #[union(mode = "dense")] // or "sparse"
89//! enum Value {
90//!     #[union(tag = 0)]
91//!     Int(i32),
92//!     #[union(tag = 1, field = "text")]
93//!     Str(String),
94//! }
95//! ```
96//!
97//! **Container attributes:**
98//! - `#[union(mode = "dense"|"sparse")]` — Union mode (default: dense)
99//! - `#[union(null_variant = "None")]` — Designate a null-carrier variant
100//! - `#[union(tags(A = 0, B = 1))]` — Set all variant tags at once
101//!
102//! **Variant attributes:**
103//! - `#[union(tag = N)]` — Set type ID for this variant
104//! - `#[union(field = "name")]` — Override Arrow field name
105//! - `#[union(null)]` — Mark as the null-carrier variant
106//!
107//! # Core Traits
108//!
109//! ## Schema Traits (in [`schema`] module)
110//!
111//! | Trait | Description |
112//! |-------|-------------|
113//! | [`Record`](schema::Record) | Marker for structs with `const LEN: usize` columns |
114//! | [`ColAt<I>`](schema::ColAt) | Per-column metadata: `Native`, `ColumnArray`, `ColumnBuilder`, `NULLABLE`, `NAME`, `data_type()` |
115//! | [`ForEachCol`](schema::ForEachCol) | Compile-time column iteration via [`ColumnVisitor`](schema::ColumnVisitor) |
116//! | [`SchemaMeta`](schema::SchemaMeta) | Runtime schema access: `fields()`, `schema()`, `metadata()` |
117//! | [`StructMeta`](schema::StructMeta) | Nested struct support: `child_fields()`, `new_struct_builder()` |
118//!
119//! ## Row Building Traits (in [`schema`] module)
120//!
121//! | Trait | Description |
122//! |-------|-------------|
123//! | [`BuildRows`](schema::BuildRows) | Entry point: `new_builders(capacity)` → `Builders` |
124//! | [`RowBuilder<T>`](schema::RowBuilder) | `append_row()`, `append_rows()`, `append_option_row()`, `finish()` |
125//! | [`IntoRecordBatch`](schema::IntoRecordBatch) | Convert finished arrays to [`RecordBatch`](arrow_array::RecordBatch) |
126//! | [`AppendStruct`](schema::AppendStruct) | Append struct fields into a `StructBuilder` |
127//!
128//! ## Type Binding Trait (in [`bridge`] module)
129//!
130//! | Trait | Description |
131//! |-------|-------------|
132//! | [`ArrowBinding`](bridge::ArrowBinding) | Maps Rust types to Arrow: `Builder`, `Array`, `data_type()`, `append_value()`, `finish()` |
133//!
134//! # Supported Types
135//!
136//! ## Primitives
137//!
138//! | Rust Type | Arrow Type |
139//! |-----------|------------|
140//! | `i8`, `i16`, `i32`, `i64` | `Int8`, `Int16`, `Int32`, `Int64` |
141//! | `u8`, `u16`, `u32`, `u64` | `UInt8`, `UInt16`, `UInt32`, `UInt64` |
142//! | `f32`, `f64` | `Float32`, `Float64` |
143//! | [`half::f16`] | `Float16` |
144//! | `bool` | `Boolean` |
145//!
146//! ## Strings & Binary
147//!
148//! | Rust Type | Arrow Type |
149//! |-----------|------------|
150//! | `String` | `Utf8` |
151//! | [`LargeUtf8`] | `LargeUtf8` (64-bit offsets) |
152//! | `Vec<u8>` | `Binary` |
153//! | [`LargeBinary`] | `LargeBinary` (64-bit offsets) |
154//! | `[u8; N]` | `FixedSizeBinary(N)` |
155//!
156//! ## Nullability
157//!
158//! | Rust Type | Arrow Nullability |
159//! |-----------|-------------------|
160//! | `T` | Non-nullable column |
161//! | `Option<T>` | Nullable column |
162//! | [`Null`] | `Null` type (always null) |
163//!
164//! ## Temporal Types
165//!
166//! | Rust Type | Arrow Type |
167//! |-----------|------------|
168//! | [`Date32`] | `Date32` (days since epoch) |
169//! | [`Date64`] | `Date64` (milliseconds since epoch) |
170//! | [`Time32<U>`](Time32) | `Time32` with unit `U` ([`Second`], [`Millisecond`]) |
171//! | [`Time64<U>`](Time64) | `Time64` with unit `U` ([`Microsecond`], [`Nanosecond`]) |
172//! | [`Timestamp<U>`] | `Timestamp` without timezone |
173//! | [`TimestampTz<U, Z>`] | `Timestamp` with timezone `Z` (e.g., [`Utc`]) |
174//! | [`Duration<U>`](Duration) | `Duration` with unit `U` |
175//!
176//! ## Intervals
177//!
178//! | Rust Type | Arrow Type |
179//! |-----------|------------|
180//! | [`IntervalYearMonth`] | `Interval(YearMonth)` |
181//! | [`IntervalDayTime`] | `Interval(DayTime)` |
182//! | [`IntervalMonthDayNano`] | `Interval(MonthDayNano)` |
183//!
184//! ## Decimal
185//!
186//! | Rust Type | Arrow Type |
187//! |-----------|------------|
188//! | [`Decimal128<P, S>`](Decimal128) | `Decimal128(P, S)` |
189//! | [`Decimal256<P, S>`](Decimal256) | `Decimal256(P, S)` |
190//!
191//! ## Nested Types
192//!
193//! | Rust Type | Arrow Type |
194//! |-----------|------------|
195//! | `#[derive(Record)]` struct | `Struct` |
196//! | [`List<T>`] | `List` (non-null items) |
197//! | [`List<Option<T>>`](List) | `List` (nullable items) |
198//! | [`LargeList<T>`](LargeList) | `LargeList` (64-bit offsets) |
199//! | [`FixedSizeList<T, N>`](FixedSizeList) | `FixedSizeList(N)` (non-null items) |
200//! | [`FixedSizeListNullable<T, N>`](FixedSizeListNullable) | `FixedSizeList(N)` (nullable items) |
201//! | [`Map<K, V>`] | `Map` (non-null values) |
202//! | [`Map<K, Option<V>>`](Map) | `Map` (nullable values) |
203//! | [`OrderedMap<K, V>`] | `Map` with `keys_sorted = true` |
204//! | [`Dictionary<K, V>`] | `Dictionary` (K: integral, V: string/binary/primitive) |
205//! | `#[derive(Union)]` enum | `Union` (Dense or Sparse) |
206//!
207//! # Zero-Copy Views (requires `views` feature)
208//!
209//! Read [`RecordBatch`](arrow_array::RecordBatch) data without allocation.
210//! Use [`AsViewsIterator::iter_views`] to iterate over borrowed row views,
211//! and [`.try_into()`](TryInto::try_into) to convert views to owned records.
212//!
213//! See the [`schema`] module for detailed documentation and examples.
214//!
215//! # Extensibility (requires `ext-hooks` feature)
216//!
217//! Customize derive behavior with hooks:
218//!
219//! ```ignore
220//! #[derive(Record)]
221//! #[record(visit(MyVisitor))]                    // Inject compile-time visitor
222//! #[record(field_macro = my_ext::per_field)]     // Call macro per field
223//! #[record(record_macro = my_ext::per_record)]   // Call macro per record
224//! struct MyRecord {
225//!     #[record(ext(custom_tag))]                 // Tag fields with markers
226//!     field: i32,
227//! }
228//! ```
229//!
230//! See `examples/12_ext_hooks.rs` for usage.
231
232#[cfg(all(feature = "arrow-55", any(feature = "arrow-56", feature = "arrow-57")))]
233compile_error!("Select exactly one Arrow feature: arrow-55, arrow-56, or arrow-57.");
234#[cfg(all(feature = "arrow-56", feature = "arrow-57"))]
235compile_error!("Select exactly one Arrow feature: arrow-55, arrow-56, or arrow-57.");
236#[cfg(not(any(feature = "arrow-55", feature = "arrow-56", feature = "arrow-57")))]
237compile_error!("Enable one Arrow feature: arrow-55, arrow-56, or arrow-57.");
238
239#[cfg(feature = "arrow-55")]
240pub extern crate arrow_array_55 as arrow_array;
241#[cfg(feature = "arrow-56")]
242pub extern crate arrow_array_56 as arrow_array;
243#[cfg(feature = "arrow-57")]
244pub extern crate arrow_array_57 as arrow_array;
245
246#[cfg(feature = "arrow-55")]
247pub extern crate arrow_buffer_55 as arrow_buffer;
248#[cfg(feature = "arrow-56")]
249pub extern crate arrow_buffer_56 as arrow_buffer;
250#[cfg(feature = "arrow-57")]
251pub extern crate arrow_buffer_57 as arrow_buffer;
252
253#[cfg(feature = "arrow-55")]
254pub extern crate arrow_data_55 as arrow_data;
255#[cfg(feature = "arrow-56")]
256pub extern crate arrow_data_56 as arrow_data;
257#[cfg(feature = "arrow-57")]
258pub extern crate arrow_data_57 as arrow_data;
259
260#[cfg(feature = "arrow-55")]
261pub extern crate arrow_schema_55 as arrow_schema;
262#[cfg(feature = "arrow-56")]
263pub extern crate arrow_schema_56 as arrow_schema;
264#[cfg(feature = "arrow-57")]
265pub extern crate arrow_schema_57 as arrow_schema;
266
267pub mod bridge;
268pub mod error;
269pub mod schema;
270
271/// Prelude exporting the most common traits and markers.
272pub mod prelude {
273    // Re-export derive macros when enabled
274    #[cfg(feature = "derive")]
275    pub use typed_arrow_derive::{Record, Union};
276
277    #[cfg(feature = "views")]
278    pub use crate::AsViewsIterator;
279    #[cfg(feature = "views")]
280    pub use crate::error::ViewAccessError;
281    #[cfg(feature = "views")]
282    pub use crate::schema::{FromRecordBatch, ViewResultIteratorExt};
283    #[cfg(any(feature = "arrow-55", feature = "arrow-56", feature = "arrow-57"))]
284    pub use crate::{arrow_array, arrow_buffer, arrow_data, arrow_schema};
285    pub use crate::{
286        error::SchemaError,
287        schema::{BuildRows, ColAt, ColumnVisitor, FieldMeta, ForEachCol, Record},
288    };
289}
290
291// Re-export the derive macro when enabled
292// Re-export Arrow crates so derives can reference a stable path
293// and downstream users don't need to depend on Arrow directly.
294#[cfg(feature = "derive")]
295pub use typed_arrow_derive::{Record, Union};
296
297// Public re-exports for convenience
298pub use crate::bridge::{
299    Date32, Date64, Decimal128, Decimal256, Dictionary, Duration, FixedSizeList,
300    FixedSizeListNullable, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, LargeBinary,
301    LargeList, LargeUtf8, List, Map, Microsecond, Millisecond, Nanosecond, Null, OrderedMap,
302    Second, Time32, Time64, TimeZoneSpec, Timestamp, TimestampTz, Utc,
303};
304
305/// Extension trait for creating typed view iterators from `RecordBatch`.
306#[cfg(feature = "views")]
307pub trait AsViewsIterator {
308    /// Iterate over typed views of rows in this RecordBatch.
309    ///
310    /// This provides zero-copy access to the data as borrowed references.
311    ///
312    /// # Errors
313    /// Returns `SchemaError` if the RecordBatch schema doesn't match the expected Record type.
314    ///
315    /// # Example
316    /// ```
317    /// use typed_arrow::prelude::*;
318    ///
319    /// #[derive(Record)]
320    /// struct Row {
321    ///     id: i32,
322    ///     name: String,
323    /// }
324    ///
325    /// // Build a RecordBatch
326    /// let rows = vec![
327    ///     Row {
328    ///         id: 1,
329    ///         name: "Alice".to_string(),
330    ///     },
331    ///     Row {
332    ///         id: 2,
333    ///         name: "Bob".to_string(),
334    ///     },
335    /// ];
336    /// let mut b = <Row as BuildRows>::new_builders(rows.len());
337    /// b.append_rows(rows);
338    /// let arrays = b.finish();
339    /// let batch = arrays.into_record_batch();
340    ///
341    /// // Iterate with zero-copy views (using convenience method to handle errors)
342    /// let views = batch.iter_views::<Row>()?.try_flatten()?;
343    /// for row in views {
344    ///     println!("{}: {}", row.id, row.name);
345    /// }
346    /// # Ok::<_, typed_arrow::error::SchemaError>(())
347    /// ```
348    fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError>;
349}
350
351#[cfg(feature = "views")]
352impl AsViewsIterator for arrow_array::RecordBatch {
353    fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError> {
354        T::from_record_batch(self)
355    }
356}