typed_arrow/bridge/mod.rs
1//! Bridge from Rust types to Arrow typed arrays and `DataType`.
2//!
3//! This module provides a compile-time mapping from Rust value types to
4//! arrow-rs typed builders/arrays and their corresponding `arrow_schema::DataType`,
5//! avoiding any runtime `DataType` matching.
6//!
7//! - Core trait: [`ArrowBinding`] (Rust type → typed builder/array + `DataType`).
8//! - Primitives: `i{8,16,32,64}`, `u{8,16,32,64}`, `f{32,64}`, `bool`.
9//! - Utf8/Binary: `String` → `Utf8`, `Vec<u8>` → `Binary`.
10//! - Nested containers:
11//! - [`List<T>`] with non-null items, and [`List<Option<T>>`] for nullable items.
12//! - [`Dictionary<K, String>`] → dictionary-encoded Utf8 values.
13//! - [`Timestamp<U>`] with unit markers ([`Second`], [`Millisecond`], [`Microsecond`],
14//! [`Nanosecond`]) and [`TimestampTz<U, Z>`] for timezone-aware timestamps.
15//! - Any `T: Record + StructMeta` binds to an Arrow `StructArray`.
16//!
17//! See tests for end-to-end examples and usage patterns.
18
19use arrow_array::Array;
20use arrow_schema::DataType;
21
22/// Binding from a Rust type to Arrow typed builders/arrays and `DataType`.
23///
24/// Implementations of this trait provide a zero-cost, monomorphized mapping
25/// between a Rust value type and its Arrow representation.
26///
27/// All methods are marked `#[inline]` to enable cross-crate inlining for
28/// optimal performance in hot loops.
29pub trait ArrowBinding {
30 /// Concrete Arrow builder type used for this Rust type.
31 type Builder;
32
33 /// Concrete Arrow array type produced by `finish`.
34 type Array: Array;
35
36 /// Whether this type is inherently nullable (e.g. `Null`).
37 const NULLABLE: bool = false;
38
39 /// The Arrow `DataType` corresponding to this Rust type.
40 fn data_type() -> DataType;
41
42 /// Create a new builder with an optional capacity hint.
43 fn new_builder(capacity: usize) -> Self::Builder;
44
45 /// Estimated bytes per value for variable-length types (String, Binary, etc.).
46 /// Returns 0 for fixed-size types. Used to pre-allocate buffer space.
47 #[inline]
48 fn estimated_bytes_per_value() -> usize {
49 0
50 }
51
52 /// Append a non-null value to the builder.
53 fn append_value(b: &mut Self::Builder, v: &Self);
54
55 /// Append a null to the builder.
56 fn append_null(b: &mut Self::Builder);
57
58 /// Finish the builder and produce a typed Arrow array.
59 fn finish(b: Self::Builder) -> Self::Array;
60}
61
62/// View binding from an Arrow array to borrowed Rust reference types.
63///
64/// Implementations of this trait provide zero-copy access to Arrow array values
65/// by returning borrowed references with a lifetime tied to the array.
66///
67/// Note: This trait is separate from `ArrowBinding` to allow types to opt-in
68/// to view support. Complex types like `List`, `Map`, etc. may not implement this.
69#[cfg(feature = "views")]
70pub trait ArrowBindingView {
71 /// The Arrow array type this view reads from.
72 type Array: Array;
73
74 /// The borrowed view type returned when accessing array elements.
75 /// For example: `&'a str` for `StringArray`, `i64` for `Int64Array`.
76 type View<'a>
77 where
78 Self: 'a;
79
80 /// Extract a view at the given index from the array.
81 ///
82 /// # Errors
83 /// - Returns `ViewAccessError::OutOfBounds` if index >= array.len()
84 /// - Returns `ViewAccessError::UnexpectedNull` if the value is null
85 /// - Returns `ViewAccessError::TypeMismatch` if array downcast fails
86 fn get_view(
87 array: &Self::Array,
88 index: usize,
89 ) -> Result<Self::View<'_>, crate::schema::ViewAccessError>;
90}
91
92mod binary;
93mod column;
94mod decimals;
95mod dictionary;
96mod intervals;
97mod lists;
98mod map;
99mod null_type;
100mod option;
101mod primitives;
102mod record_struct;
103mod strings;
104mod temporal;
105
106// Public re-exports for convenience
107pub use binary::LargeBinary;
108pub use column::{ColumnBuilder, data_type_of};
109pub use decimals::{Decimal128, Decimal256};
110pub use dictionary::{DictKey, Dictionary};
111pub use intervals::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
112pub use lists::{FixedSizeList, FixedSizeListNullable, LargeList, List};
113pub use map::{Map, OrderedMap};
114pub use null_type::Null;
115pub use strings::LargeUtf8;
116pub use temporal::{
117 Date32, Date64, Duration, Microsecond, Millisecond, Nanosecond, Second, Time32, Time64,
118 TimeZoneSpec, Timestamp, TimestampTz, Utc,
119};