typed_arrow/bridge/
mod.rs

1//! Bridge from Rust types to Arrow typed arrays and `DataType`.
2//!
3//! This module provides a compile-time mapping from Rust value types to
4//! arrow-rs typed builders/arrays and their corresponding `arrow_schema::DataType`,
5//! avoiding any runtime `DataType` matching.
6//!
7//! - Core trait: [`ArrowBinding`] (Rust type → typed builder/array + `DataType`).
8//! - Primitives: `i{8,16,32,64}`, `u{8,16,32,64}`, `f{32,64}`, `bool`.
9//! - Utf8/Binary: `String` → `Utf8`, `Vec<u8>` → `Binary`.
10//! - Nested containers:
11//!   - [`List<T>`] with non-null items, and [`List<Option<T>>`] for nullable items.
12//!   - [`Dictionary<K, String>`] → dictionary-encoded Utf8 values.
13//!   - [`Timestamp<U>`] with unit markers ([`Second`], [`Millisecond`], [`Microsecond`],
14//!     [`Nanosecond`]) and [`TimestampTz<U, Z>`] for timezone-aware timestamps.
15//!   - Any `T: Record + StructMeta` binds to an Arrow `StructArray`.
16//!
17//! See tests for end-to-end examples and usage patterns.
18
19use arrow_array::Array;
20use arrow_schema::DataType;
21
22/// Binding from a Rust type to Arrow typed builders/arrays and `DataType`.
23///
24/// Implementations of this trait provide a zero-cost, monomorphized mapping
25/// between a Rust value type and its Arrow representation.
26///
27/// All methods are marked `#[inline]` to enable cross-crate inlining for
28/// optimal performance in hot loops.
29pub trait ArrowBinding {
30    /// Concrete Arrow builder type used for this Rust type.
31    type Builder;
32
33    /// Concrete Arrow array type produced by `finish`.
34    type Array: Array;
35
36    /// The Arrow `DataType` corresponding to this Rust type.
37    fn data_type() -> DataType;
38
39    /// Create a new builder with an optional capacity hint.
40    fn new_builder(capacity: usize) -> Self::Builder;
41
42    /// Estimated bytes per value for variable-length types (String, Binary, etc.).
43    /// Returns 0 for fixed-size types. Used to pre-allocate buffer space.
44    #[inline]
45    fn estimated_bytes_per_value() -> usize {
46        0
47    }
48
49    /// Append a non-null value to the builder.
50    fn append_value(b: &mut Self::Builder, v: &Self);
51
52    /// Append a null to the builder.
53    fn append_null(b: &mut Self::Builder);
54
55    /// Finish the builder and produce a typed Arrow array.
56    fn finish(b: Self::Builder) -> Self::Array;
57}
58
59/// View binding from an Arrow array to borrowed Rust reference types.
60///
61/// Implementations of this trait provide zero-copy access to Arrow array values
62/// by returning borrowed references with a lifetime tied to the array.
63///
64/// Note: This trait is separate from `ArrowBinding` to allow types to opt-in
65/// to view support. Complex types like `List`, `Map`, etc. may not implement this.
66#[cfg(feature = "views")]
67pub trait ArrowBindingView {
68    /// The Arrow array type this view reads from.
69    type Array: Array;
70
71    /// The borrowed view type returned when accessing array elements.
72    /// For example: `&'a str` for `StringArray`, `i64` for `Int64Array`.
73    type View<'a>
74    where
75        Self: 'a;
76
77    /// Extract a view at the given index from the array.
78    ///
79    /// # Errors
80    /// - Returns `ViewAccessError::OutOfBounds` if index >= array.len()
81    /// - Returns `ViewAccessError::UnexpectedNull` if the value is null
82    /// - Returns `ViewAccessError::TypeMismatch` if array downcast fails
83    fn get_view(
84        array: &Self::Array,
85        index: usize,
86    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError>;
87}
88
89mod binary;
90mod column;
91mod decimals;
92mod dictionary;
93mod intervals;
94mod lists;
95mod map;
96mod null_type;
97mod option;
98mod primitives;
99mod record_struct;
100mod strings;
101mod temporal;
102
103// Public re-exports for convenience
104pub use binary::LargeBinary;
105pub use column::{ColumnBuilder, data_type_of};
106pub use decimals::{Decimal128, Decimal256};
107pub use dictionary::{DictKey, Dictionary};
108pub use intervals::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
109pub use lists::{FixedSizeList, FixedSizeListNullable, LargeList, List};
110pub use map::{Map, OrderedMap};
111pub use null_type::Null;
112pub use strings::LargeUtf8;
113pub use temporal::{
114    Date32, Date64, Duration, Microsecond, Millisecond, Nanosecond, Second, Time32, Time64,
115    TimeZoneSpec, Timestamp, TimestampTz, Utc,
116};