Skip to main content

typed_arrow/bridge/
mod.rs

1//! Bridge from Rust types to Arrow typed arrays and `DataType`.
2//!
3//! This module provides a compile-time mapping from Rust value types to
4//! arrow-rs typed builders/arrays and their corresponding `arrow_schema::DataType`,
5//! avoiding any runtime `DataType` matching.
6//!
7//! - Core trait: [`ArrowBinding`] (Rust type → typed builder/array + `DataType`).
8//! - Primitives: `i{8,16,32,64}`, `u{8,16,32,64}`, `f{32,64}`, `bool`.
9//! - Utf8/Binary: `String` → `Utf8`, `Vec<u8>` → `Binary`.
10//! - Nested containers:
11//!   - [`List<T>`] with non-null items, and [`List<Option<T>>`] for nullable items.
12//!   - [`Dictionary<K, String>`] → dictionary-encoded Utf8 values.
13//!   - [`Timestamp<U>`] with unit markers ([`Second`], [`Millisecond`], [`Microsecond`],
14//!     [`Nanosecond`]) and [`TimestampTz<U, Z>`] for timezone-aware timestamps.
15//!   - Any `T: Record + StructMeta` binds to an Arrow `StructArray`.
16//!
17//! See tests for end-to-end examples and usage patterns.
18
19use arrow_array::Array;
20use arrow_schema::DataType;
21
22/// Binding from a Rust type to Arrow typed builders/arrays and `DataType`.
23///
24/// Implementations of this trait provide a zero-cost, monomorphized mapping
25/// between a Rust value type and its Arrow representation.
26///
27/// All methods are marked `#[inline]` to enable cross-crate inlining for
28/// optimal performance in hot loops.
29pub trait ArrowBinding {
30    /// Concrete Arrow builder type used for this Rust type.
31    type Builder;
32
33    /// Concrete Arrow array type produced by `finish`.
34    type Array: Array;
35
36    /// Whether this type is inherently nullable (e.g. `Null`).
37    const NULLABLE: bool = false;
38
39    /// The Arrow `DataType` corresponding to this Rust type.
40    fn data_type() -> DataType;
41
42    /// Create a new builder with an optional capacity hint.
43    fn new_builder(capacity: usize) -> Self::Builder;
44
45    /// Estimated bytes per value for variable-length types (String, Binary, etc.).
46    /// Returns 0 for fixed-size types. Used to pre-allocate buffer space.
47    #[inline]
48    fn estimated_bytes_per_value() -> usize {
49        0
50    }
51
52    /// Append a non-null value to the builder.
53    fn append_value(b: &mut Self::Builder, v: &Self);
54
55    /// Append a null to the builder.
56    fn append_null(b: &mut Self::Builder);
57
58    /// Finish the builder and produce a typed Arrow array.
59    fn finish(b: Self::Builder) -> Self::Array;
60}
61
62/// View binding from an Arrow array to borrowed Rust reference types.
63///
64/// Implementations of this trait provide zero-copy access to Arrow array values
65/// by returning borrowed references with a lifetime tied to the array.
66///
67/// Note: This trait is separate from `ArrowBinding` to allow types to opt-in
68/// to view support. Complex types like `List`, `Map`, etc. may not implement this.
69#[cfg(feature = "views")]
70pub trait ArrowBindingView {
71    /// The Arrow array type this view reads from.
72    type Array: Array;
73
74    /// The borrowed view type returned when accessing array elements.
75    /// For example: `&'a str` for `StringArray`, `i64` for `Int64Array`.
76    type View<'a>
77    where
78        Self: 'a;
79
80    /// Extract a view at the given index from the array.
81    ///
82    /// # Errors
83    /// - Returns `ViewAccessError::OutOfBounds` if index >= array.len()
84    /// - Returns `ViewAccessError::UnexpectedNull` if the value is null
85    /// - Returns `ViewAccessError::TypeMismatch` if array downcast fails
86    fn get_view(
87        array: &Self::Array,
88        index: usize,
89    ) -> Result<Self::View<'_>, crate::schema::ViewAccessError>;
90}
91
92mod binary;
93mod column;
94mod decimals;
95mod dictionary;
96mod intervals;
97mod lists;
98mod map;
99mod null_type;
100mod option;
101mod primitives;
102mod record_struct;
103mod strings;
104mod temporal;
105
106// Public re-exports for convenience
107pub use binary::LargeBinary;
108pub use column::{ColumnBuilder, data_type_of};
109pub use decimals::{Decimal128, Decimal256};
110pub use dictionary::{DictKey, Dictionary};
111pub use intervals::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
112pub use lists::{FixedSizeList, FixedSizeListNullable, LargeList, List};
113pub use map::{Map, OrderedMap};
114pub use null_type::Null;
115pub use strings::LargeUtf8;
116pub use temporal::{
117    Date32, Date64, Duration, Microsecond, Millisecond, Nanosecond, Second, Time32, Time64,
118    TimeZoneSpec, Timestamp, TimestampTz, Utc,
119};