Skip to main content

typed_arrow/
lib.rs

1#![deny(missing_docs)]
2//! Compile-time Arrow schema definition using Rust types.
3//!
4//! `typed-arrow` maps Rust structs directly to Arrow schemas, builders, and arrays
5//! without runtime `DataType` switching. This enables zero-cost, monomorphized
6//! column construction with compile-time type safety.
7//!
8//! # Quick Start
9//!
10//! ```
11//! use typed_arrow::prelude::*;
12//!
13//! #[derive(Record)]
14//! struct Person {
15//!     id: i64,
16//!     name: String,
17//!     score: Option<f64>,
18//! }
19//!
20//! // Build arrays from rows
21//! let rows = vec![
22//!     Person {
23//!         id: 1,
24//!         name: "Alice".into(),
25//!         score: Some(95.5),
26//!     },
27//!     Person {
28//!         id: 2,
29//!         name: "Bob".into(),
30//!         score: None,
31//!     },
32//! ];
33//!
34//! let mut builders = <Person as BuildRows>::new_builders(rows.len());
35//! builders.append_rows(rows);
36//! let batch = builders.finish().into_record_batch();
37//!
38//! assert_eq!(batch.num_rows(), 2);
39//! assert_eq!(batch.num_columns(), 3);
40//! ```
41//!
42//! # Cargo Features
43//!
44//! | Feature | Default | Description |
45//! |---------|---------|-------------|
46//! | `derive` | ✓ | Enables [`#[derive(Record)]`](Record) and [`#[derive(Union)]`](Union) macros |
47//! | `views` | ✓ | Zero-copy views for reading [`RecordBatch`](arrow_array::RecordBatch) data |
48//! | `ext-hooks` | | Extensibility hooks for custom derive behavior |
49//! | `arrow-55` | | Use Arrow 55.x crates |
50//! | `arrow-56` | | Use Arrow 56.x crates |
51//! | `arrow-57` | | Use Arrow 57.x crates |
52//! | `arrow-58` | ✓ | Use Arrow 58.x crates |
53//!
54//! Exactly one Arrow feature must be enabled.
55//!
56//! # Derive Macros
57//!
58//! ## `#[derive(Record)]`
59//!
60//! Generates Arrow schema traits for structs. See [`schema::Record`] for the marker trait.
61//!
62//! ```
63//! use typed_arrow::prelude::*;
64//!
65//! #[derive(Record)]
66//! struct Event {
67//!     id: i64,              // Non-null Int64
68//!     name: Option<String>, // Nullable Utf8
69//!     #[record(name = "eventType")] // Override Arrow field name
70//!     event_type: String,
71//! }
72//! ```
73//!
74//! **Field attributes:**
75//! - `#[record(name = "...")]` — Override the Arrow field name
76//! - `#[arrow(nullable)]` — Force nullability even without `Option<T>`
77//! - `#[metadata(k = "key", v = "value")]` — Add field-level metadata
78//! - `#[schema_metadata(k = "key", v = "value")]` — Add schema-level metadata (on struct)
79//!
80//! ## `#[derive(Union)]`
81//!
82//! Generates Arrow Union type bindings for enums. Implements
83//! [`ArrowBinding`](bridge::ArrowBinding).
84//!
85//! ```
86//! use typed_arrow::prelude::*;
87//!
88//! #[derive(Union)]
89//! #[union(mode = "dense")] // or "sparse"
90//! enum Value {
91//!     #[union(tag = 0)]
92//!     Int(i32),
93//!     #[union(tag = 1, field = "text")]
94//!     Str(String),
95//! }
96//! ```
97//!
98//! **Container attributes:**
99//! - `#[union(mode = "dense"|"sparse")]` — Union mode (default: dense)
100//! - `#[union(null_variant = "None")]` — Designate a null-carrier variant
101//! - `#[union(tags(A = 0, B = 1))]` — Set all variant tags at once
102//!
103//! **Variant attributes:**
104//! - `#[union(tag = N)]` — Set type ID for this variant
105//! - `#[union(field = "name")]` — Override Arrow field name
106//! - `#[union(null)]` — Mark as the null-carrier variant
107//!
108//! # Core Traits
109//!
110//! ## Schema Traits (in [`schema`] module)
111//!
112//! | Trait | Description |
113//! |-------|-------------|
114//! | [`Record`](schema::Record) | Marker for structs with `const LEN: usize` columns |
115//! | [`ColAt<I>`](schema::ColAt) | Per-column metadata: `Native`, `ColumnArray`, `ColumnBuilder`, `NULLABLE`, `NAME`, `data_type()` |
116//! | [`ForEachCol`](schema::ForEachCol) | Compile-time column iteration via [`ColumnVisitor`](schema::ColumnVisitor) |
117//! | [`SchemaMeta`](schema::SchemaMeta) | Runtime schema access: `fields()`, `schema()`, `metadata()` |
118//! | [`StructMeta`](schema::StructMeta) | Nested struct support: `child_fields()`, `new_struct_builder()` |
119//!
120//! ## Row Building Traits (in [`schema`] module)
121//!
122//! | Trait | Description |
123//! |-------|-------------|
124//! | [`BuildRows`](schema::BuildRows) | Entry point: `new_builders(capacity)` → `Builders` |
125//! | [`RowBuilder<T>`](schema::RowBuilder) | `append_row()`, `append_rows()`, `append_option_row()`, `finish()` |
126//! | [`IntoRecordBatch`](schema::IntoRecordBatch) | Convert finished arrays to [`RecordBatch`](arrow_array::RecordBatch) |
127//! | [`AppendStruct`](schema::AppendStruct) | Append struct fields into a `StructBuilder` |
128//!
129//! ## Type Binding Trait (in [`bridge`] module)
130//!
131//! | Trait | Description |
132//! |-------|-------------|
133//! | [`ArrowBinding`](bridge::ArrowBinding) | Maps Rust types to Arrow: `Builder`, `Array`, `data_type()`, `append_value()`, `finish()` |
134//!
135//! # Supported Types
136//!
137//! ## Primitives
138//!
139//! | Rust Type | Arrow Type |
140//! |-----------|------------|
141//! | `i8`, `i16`, `i32`, `i64` | `Int8`, `Int16`, `Int32`, `Int64` |
142//! | `u8`, `u16`, `u32`, `u64` | `UInt8`, `UInt16`, `UInt32`, `UInt64` |
143//! | `f32`, `f64` | `Float32`, `Float64` |
144//! | [`half::f16`] | `Float16` |
145//! | `bool` | `Boolean` |
146//!
147//! ## Strings & Binary
148//!
149//! | Rust Type | Arrow Type |
150//! |-----------|------------|
151//! | `String` | `Utf8` |
152//! | [`LargeUtf8`] | `LargeUtf8` (64-bit offsets) |
153//! | `Vec<u8>` | `Binary` |
154//! | [`LargeBinary`] | `LargeBinary` (64-bit offsets) |
155//! | `[u8; N]` | `FixedSizeBinary(N)` |
156//!
157//! ## Nullability
158//!
159//! | Rust Type | Arrow Nullability |
160//! |-----------|-------------------|
161//! | `T` | Non-nullable column |
162//! | `Option<T>` | Nullable column |
163//! | [`Null`] | `Null` type (always null) |
164//!
165//! ## Temporal Types
166//!
167//! | Rust Type | Arrow Type |
168//! |-----------|------------|
169//! | [`Date32`] | `Date32` (days since epoch) |
170//! | [`Date64`] | `Date64` (milliseconds since epoch) |
171//! | [`Time32<U>`](Time32) | `Time32` with unit `U` ([`Second`], [`Millisecond`]) |
172//! | [`Time64<U>`](Time64) | `Time64` with unit `U` ([`Microsecond`], [`Nanosecond`]) |
173//! | [`Timestamp<U>`] | `Timestamp` without timezone |
174//! | [`TimestampTz<U, Z>`] | `Timestamp` with timezone `Z` (e.g., [`Utc`]) |
175//! | [`Duration<U>`](Duration) | `Duration` with unit `U` |
176//!
177//! ## Intervals
178//!
179//! | Rust Type | Arrow Type |
180//! |-----------|------------|
181//! | [`IntervalYearMonth`] | `Interval(YearMonth)` |
182//! | [`IntervalDayTime`] | `Interval(DayTime)` |
183//! | [`IntervalMonthDayNano`] | `Interval(MonthDayNano)` |
184//!
185//! ## Decimal
186//!
187//! | Rust Type | Arrow Type |
188//! |-----------|------------|
189//! | [`Decimal128<P, S>`](Decimal128) | `Decimal128(P, S)` |
190//! | [`Decimal256<P, S>`](Decimal256) | `Decimal256(P, S)` |
191//!
192//! ## Nested Types
193//!
194//! | Rust Type | Arrow Type |
195//! |-----------|------------|
196//! | `#[derive(Record)]` struct | `Struct` |
197//! | [`List<T>`] | `List` (non-null items) |
198//! | [`List<Option<T>>`](List) | `List` (nullable items) |
199//! | [`LargeList<T>`](LargeList) | `LargeList` (64-bit offsets) |
200//! | [`FixedSizeList<T, N>`](FixedSizeList) | `FixedSizeList(N)` (non-null items) |
201//! | [`FixedSizeListNullable<T, N>`](FixedSizeListNullable) | `FixedSizeList(N)` (nullable items) |
202//! | [`Map<K, V>`] | `Map` (non-null values) |
203//! | [`Map<K, Option<V>>`](Map) | `Map` (nullable values) |
204//! | [`OrderedMap<K, V>`] | `Map` with `keys_sorted = true` |
205//! | [`Dictionary<K, V>`] | `Dictionary` (K: integral, V: string/binary/primitive) |
206//! | `#[derive(Union)]` enum | `Union` (Dense or Sparse) |
207//!
208//! # Zero-Copy Views (requires `views` feature)
209//!
210//! Read [`RecordBatch`](arrow_array::RecordBatch) data without allocation.
211//! Use [`AsViewsIterator::iter_views`] to iterate over borrowed row views,
212//! and [`.try_into()`](TryInto::try_into) to convert views to owned records.
213//!
214//! See the [`schema`] module for detailed documentation and examples.
215//!
216//! # Extensibility (requires `ext-hooks` feature)
217//!
218//! Customize derive behavior with hooks:
219//!
220//! ```ignore
221//! #[derive(Record)]
222//! #[record(visit(MyVisitor))]                    // Inject compile-time visitor
223//! #[record(field_macro = my_ext::per_field)]     // Call macro per field
224//! #[record(record_macro = my_ext::per_record)]   // Call macro per record
225//! struct MyRecord {
226//!     #[record(ext(custom_tag))]                 // Tag fields with markers
227//!     field: i32,
228//! }
229//! ```
230//!
231//! See `examples/12_ext_hooks.rs` for usage.
232
233#[cfg(all(
234    feature = "arrow-55",
235    any(feature = "arrow-56", feature = "arrow-57", feature = "arrow-58")
236))]
237compile_error!("Select exactly one Arrow feature: arrow-55, arrow-56, arrow-57, or arrow-58.");
238#[cfg(all(feature = "arrow-56", any(feature = "arrow-57", feature = "arrow-58")))]
239compile_error!("Select exactly one Arrow feature: arrow-55, arrow-56, arrow-57, or arrow-58.");
240#[cfg(all(feature = "arrow-57", feature = "arrow-58"))]
241compile_error!("Select exactly one Arrow feature: arrow-55, arrow-56, arrow-57, or arrow-58.");
242#[cfg(not(any(
243    feature = "arrow-55",
244    feature = "arrow-56",
245    feature = "arrow-57",
246    feature = "arrow-58"
247)))]
248compile_error!("Enable one Arrow feature: arrow-55, arrow-56, arrow-57, or arrow-58.");
249
250#[cfg(feature = "arrow-55")]
251pub extern crate arrow_array_55 as arrow_array;
252#[cfg(feature = "arrow-56")]
253pub extern crate arrow_array_56 as arrow_array;
254#[cfg(feature = "arrow-57")]
255pub extern crate arrow_array_57 as arrow_array;
256#[cfg(feature = "arrow-58")]
257pub extern crate arrow_array_58 as arrow_array;
258
259#[cfg(feature = "arrow-55")]
260pub extern crate arrow_buffer_55 as arrow_buffer;
261#[cfg(feature = "arrow-56")]
262pub extern crate arrow_buffer_56 as arrow_buffer;
263#[cfg(feature = "arrow-57")]
264pub extern crate arrow_buffer_57 as arrow_buffer;
265#[cfg(feature = "arrow-58")]
266pub extern crate arrow_buffer_58 as arrow_buffer;
267
268#[cfg(feature = "arrow-55")]
269pub extern crate arrow_data_55 as arrow_data;
270#[cfg(feature = "arrow-56")]
271pub extern crate arrow_data_56 as arrow_data;
272#[cfg(feature = "arrow-57")]
273pub extern crate arrow_data_57 as arrow_data;
274#[cfg(feature = "arrow-58")]
275pub extern crate arrow_data_58 as arrow_data;
276
277#[cfg(feature = "arrow-55")]
278pub extern crate arrow_schema_55 as arrow_schema;
279#[cfg(feature = "arrow-56")]
280pub extern crate arrow_schema_56 as arrow_schema;
281#[cfg(feature = "arrow-57")]
282pub extern crate arrow_schema_57 as arrow_schema;
283#[cfg(feature = "arrow-58")]
284pub extern crate arrow_schema_58 as arrow_schema;
285
286pub mod bridge;
287pub mod error;
288pub mod schema;
289
290/// Prelude exporting the most common traits and markers.
291pub mod prelude {
292    // Re-export derive macros when enabled
293    #[cfg(feature = "derive")]
294    pub use typed_arrow_derive::{Record, Union};
295
296    #[cfg(feature = "views")]
297    pub use crate::AsViewsIterator;
298    #[cfg(feature = "views")]
299    pub use crate::error::ViewAccessError;
300    #[cfg(feature = "views")]
301    pub use crate::schema::{FromRecordBatch, ViewResultIteratorExt};
302    #[cfg(any(
303        feature = "arrow-55",
304        feature = "arrow-56",
305        feature = "arrow-57",
306        feature = "arrow-58"
307    ))]
308    pub use crate::{arrow_array, arrow_buffer, arrow_data, arrow_schema};
309    pub use crate::{
310        error::SchemaError,
311        schema::{BuildRows, ColAt, ColumnVisitor, FieldMeta, ForEachCol, Record},
312    };
313}
314
315// Re-export the derive macro when enabled
316// Re-export Arrow crates so derives can reference a stable path
317// and downstream users don't need to depend on Arrow directly.
318#[cfg(feature = "derive")]
319pub use typed_arrow_derive::{Record, Union};
320
321// Public re-exports for convenience
322pub use crate::bridge::{
323    Date32, Date64, Decimal128, Decimal256, Dictionary, Duration, FixedSizeList,
324    FixedSizeListNullable, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, LargeBinary,
325    LargeList, LargeUtf8, List, Map, Microsecond, Millisecond, Nanosecond, Null, OrderedMap,
326    Second, Time32, Time64, TimeZoneSpec, Timestamp, TimestampTz, Utc,
327};
328
329/// Extension trait for creating typed view iterators from `RecordBatch`.
330#[cfg(feature = "views")]
331pub trait AsViewsIterator {
332    /// Iterate over typed views of rows in this RecordBatch.
333    ///
334    /// This provides zero-copy access to the data as borrowed references.
335    ///
336    /// # Errors
337    /// Returns `SchemaError` if the RecordBatch schema doesn't match the expected Record type.
338    ///
339    /// # Example
340    /// ```
341    /// use typed_arrow::prelude::*;
342    ///
343    /// #[derive(Record)]
344    /// struct Row {
345    ///     id: i32,
346    ///     name: String,
347    /// }
348    ///
349    /// // Build a RecordBatch
350    /// let rows = vec![
351    ///     Row {
352    ///         id: 1,
353    ///         name: "Alice".to_string(),
354    ///     },
355    ///     Row {
356    ///         id: 2,
357    ///         name: "Bob".to_string(),
358    ///     },
359    /// ];
360    /// let mut b = <Row as BuildRows>::new_builders(rows.len());
361    /// b.append_rows(rows);
362    /// let arrays = b.finish();
363    /// let batch = arrays.into_record_batch();
364    ///
365    /// // Iterate with zero-copy views (using convenience method to handle errors)
366    /// let views = batch.iter_views::<Row>()?.try_flatten()?;
367    /// for row in views {
368    ///     println!("{}: {}", row.id, row.name);
369    /// }
370    /// # Ok::<_, typed_arrow::error::SchemaError>(())
371    /// ```
372    fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError>;
373}
374
375#[cfg(feature = "views")]
376impl AsViewsIterator for arrow_array::RecordBatch {
377    fn iter_views<T: schema::FromRecordBatch>(&self) -> Result<T::Views<'_>, error::SchemaError> {
378        T::from_record_batch(self)
379    }
380}