typed_arrow/schema.rs
1//! Core schema traits for compile-time Arrow typing.
2
3use std::{
4 collections::HashMap, iter::IntoIterator, marker::PhantomData, option::Option, sync::Arc,
5};
6
7use arrow_array::{
8 builder::{ArrayBuilder, StructBuilder},
9 Array, RecordBatch,
10};
11use arrow_schema::{DataType, Field, Schema};
12
13/// A record (row) with a fixed, compile-time number of columns.
14pub trait Record {
15 /// Number of columns in this record.
16 const LEN: usize;
17}
18
19/// Per-column metadata for a record at index `I`.
20pub trait ColAt<const I: usize>: Record {
21 /// The Native value type of this column (without nullability).
22 type Native;
23
24 /// The typed Arrow array for this column.
25 type ColumnArray: Array;
26
27 /// The typed Arrow builder for this column.
28 type ColumnBuilder: ArrayBuilder;
29
30 /// Whether this column is nullable.
31 const NULLABLE: bool;
32
33 /// Column name.
34 const NAME: &'static str;
35
36 /// Arrow-rs `DataType` for this column.
37 fn data_type() -> DataType;
38}
39
40/// Simple compile-time column metadata passed to visitors.
41pub struct FieldMeta<R> {
42 /// Column name.
43 pub name: &'static str,
44
45 /// Whether this column is nullable.
46 pub nullable: bool,
47
48 _phantom: PhantomData<R>,
49}
50
51impl<R> FieldMeta<R> {
52 /// Construct a new `FieldMeta`.
53 #[must_use]
54 pub const fn new(name: &'static str, nullable: bool) -> Self {
55 Self {
56 name,
57 nullable,
58 _phantom: PhantomData,
59 }
60 }
61}
62
63/// A visitor invoked at compile time for each column of a `Record`.
64pub trait ColumnVisitor {
65 /// Process a column at index `I` with Rust type `R`.
66 fn visit<const I: usize, R>(_m: FieldMeta<R>);
67}
68
69/// Trait emitted by derive/macro to enable `for_each_col` expansion.
70pub trait ForEachCol: Record {
71 /// Invoke `V::visit` for each column at compile time.
72 fn for_each_col<V: ColumnVisitor>();
73}
74
75// No Arrow markers: ColAt exposes DATA_TYPE/ColumnBuilder/ColumnArray
76
77/// Metadata and builder utilities for nested Struct fields.
78pub trait StructMeta: Record {
79 /// Child fields (names, data types, nullability) for this struct.
80 fn child_fields() -> Vec<Field>;
81
82 /// Construct a `StructBuilder` with appropriate child builders for this struct.
83 fn new_struct_builder(capacity: usize) -> StructBuilder;
84}
85
86/// Arrow runtime schema metadata for a top-level Record.
87pub trait SchemaMeta: Record {
88 /// Top-level fields: (name, `data_type`, nullable) represented as `Field`s.
89 fn fields() -> Vec<Field>;
90
91 /// Optional top-level schema key/value metadata.
92 #[must_use]
93 fn metadata() -> HashMap<String, String> {
94 HashMap::default()
95 }
96
97 /// Construct an `Arc<arrow_schema::Schema>` from `fields()`.
98 fn schema() -> Arc<Schema> {
99 let fields: Vec<Arc<Field>> = Self::fields().into_iter().map(Arc::new).collect();
100 Arc::new(Schema::new_with_metadata(fields, Self::metadata()))
101 }
102}
103
104/// Row-based building interface: construct typed column builders, append owned rows,
105/// and finish into typed arrays.
106pub trait BuildRows: Record + Sized {
107 /// Generated builders struct for this record.
108 type Builders: RowBuilder<Self>;
109
110 /// Generated arrays struct for this record.
111 type Arrays: IntoRecordBatch;
112
113 /// Create builders with a capacity hint.
114 fn new_builders(capacity: usize) -> Self::Builders;
115}
116
117/// Trait implemented by derive-generated builders to append rows of `Row`
118/// and finish into a typed arrays struct.
119pub trait RowBuilder<Row> {
120 /// The arrays struct produced by `finish`.
121 type Arrays: IntoRecordBatch;
122
123 /// Append a non-null row.
124 fn append_row(&mut self, row: Row);
125 /// Append a null row.
126 fn append_null_row(&mut self);
127 /// Append an optional row.
128 fn append_option_row(&mut self, row: Option<Row>);
129 /// Append an iterator of non-null rows.
130 fn append_rows<I: IntoIterator<Item = Row>>(&mut self, rows: I);
131 /// Append an iterator of optional rows.
132 fn append_option_rows<I: IntoIterator<Item = Option<Row>>>(&mut self, rows: I);
133 /// Finish and produce arrays.
134 fn finish(self) -> Self::Arrays;
135}
136
137/// Trait implemented by derive-generated arrays to assemble a `RecordBatch`.
138pub trait IntoRecordBatch {
139 /// Assemble and return an `arrow_array::RecordBatch`.
140 fn into_record_batch(self) -> RecordBatch;
141}
142
143// Identity conversion for dynamic path output (RecordBatch already assembled).
144impl IntoRecordBatch for RecordBatch {
145 fn into_record_batch(self) -> RecordBatch {
146 self
147 }
148}
149
150/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
151/// `StructBuilder`. Used by row-based APIs to handle nested struct fields.
152pub trait AppendStruct {
153 /// Append this struct's child values into the provided `StructBuilder`.
154 /// Caller is responsible for setting the parent validity via `append(true)`.
155 fn append_owned_into(self, b: &mut StructBuilder);
156
157 /// Append nulls for each child into the provided `StructBuilder` to align lengths.
158 /// Caller is responsible for `append(false)` for the parent validity.
159 fn append_null_into(b: &mut StructBuilder);
160}
161
162/// Trait implemented by `#[derive(Record)]` structs to append their fields into a
163/// `StructBuilder` from a borrowed reference. This enables container builders (e.g.,
164/// lists of structs) to append child values without taking ownership of the struct.
165pub trait AppendStructRef {
166 /// Append this struct's child values into the provided `StructBuilder` using borrows.
167 /// Caller is responsible for setting the parent validity via `append(true)`.
168 fn append_borrowed_into(&self, b: &mut StructBuilder);
169}