bodkin/lib.rs
1//! Bodkin is a library that provides a derive macro to generate Arrow integration code for Rust structs.
2//!
3//! For example, given the following struct:
4//!
5//! ```
6//! pub struct Example {
7//! pub id: u32,
8//! }
9//! ```
10//!
11//! The derive macro will generate code similar to the following:
12//!
13//! ```rust
14//!
15//! // The user writes this code.
16//! pub struct Example {
17//! pub id: u32,
18//! }
19//!
20//! // The derive macro generates the `ExampleArrow` struct and associated methods.
21//!
22//! // Arrow works with arrays, so this generated code associates an array with each field of the original struct.
23//! pub struct ExampleArrow {
24//! pub ids: arrow_array::UInt32Array,
25//! }
26//!
27//! impl ExampleArrow {
28//! /// Convert an arrow `RecordBatch` to an ExampleArrow struct, in a fallible way.
29//! pub fn try_from_record_batch(
30//! batch: &arrow_array::RecordBatch,
31//! ) -> bodkin::Result<Self> {
32//! let ids = batch
33//! .column_by_name("id")
34//! .ok_or_else(|| bodkin::BodkinError::new("missing column 'id'".into()))?
35//! .as_any()
36//! .downcast_ref::<arrow_array::UInt32Array>()
37//! .ok_or_else(|| bodkin::BodkinError::new("invalid column 'id'".into()))?;
38//! bodkin::Result::Ok(ExampleArrow { ids: ids.clone() })
39//! }
40//! }
41//! impl ExampleArrow {
42//! /// Generate an Arrow schema, this is useful for generating parquet or lancedb tables.
43//! pub fn arrow_schema() -> arrow::datatypes::Schema {
44//! let fields = vec![
45//! arrow::datatypes::Field::new(
46//! "id",
47//! arrow::datatypes::DataType::UInt32,
48//! false,
49//! ),
50//! ];
51//! arrow::datatypes::Schema::new(fields)
52//! }
53//! }
54//!
55//! impl ExampleArrow {
56//! /// Convert a slice of Example-s to an arrow `RecordBatch`, in a fallible way.
57//! pub fn to_record_batch(
58//! items: &[Example],
59//! ) -> bodkin::Result<arrow_array::RecordBatch> {
60//! use arrow_array::Array;
61//! let schema = Self::arrow_schema();
62//! let ids = arrow_array::UInt32Array::from(
63//! items.iter().map(|item| item.id.clone()).collect::<Vec<_>>(),
64//! );
65//! let out = arrow_array::RecordBatch::try_new(
66//! std::sync::Arc::new(schema),
67//! vec![std::sync::Arc::new(ids)],
68//! )?;
69//! bodkin::Result::Ok(out)
70//! }
71//! }
72//! ```
73//!
74//! The following user code uses the generated code:
75//!
76//! ```
77//! use bodkin::ArrowIntegration;
78//! use std::slice;
79//!
80//! #[derive(ArrowIntegration)]
81//! pub struct Example {
82//! pub id: u32,
83//! }
84//!
85//! fn main() {
86//! println!("Generated schema: {:#?}", ExampleArrow::arrow_schema());
87//! let data = Example { id: 1 };
88//! let record_batch = ExampleArrow::to_record_batch(slice::from_ref(&data))
89//! .expect("Failed to convert to record batch");
90//! println!("Generated record batch: {:#?}", record_batch);
91//! let round_trip_data =
92//! ExampleArrow::try_from_record_batch(&record_batch).expect("Failed to read from record batch");
93//! assert_eq!(data.id, round_trip_data.ids.value(0));
94//! }
95//! ```
96
97
98use std::{error::Error, fmt};
99
100#[allow(unused_imports)]
101#[macro_use]
102extern crate bodkin_derive;
103
104#[doc(hidden)]
105pub use bodkin_derive::*;
106
107
108
109/// Error used internally by the Bodkin library.
110#[derive(Debug)]
111pub enum BodkinError {
112 ArrowError(arrow::error::ArrowError),
113 MacroError(String),
114}
115
116impl BodkinError {
117 pub fn new(message: String) -> Self {
118 BodkinError::MacroError(message)
119 }
120}
121
122impl Error for BodkinError {}
123
124impl fmt::Display for BodkinError {
125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126 match self {
127 BodkinError::ArrowError(err) => write!(f, "ArrowError: {}", err),
128 BodkinError::MacroError(message) => write!(f, "MacroError: {}", message),
129 }
130 }
131}
132
133impl From<arrow::error::ArrowError> for BodkinError {
134 fn from(err: arrow::error::ArrowError) -> Self {
135 BodkinError::ArrowError(err)
136 }
137}
138
139/// A specialized `Result` type to be used by the code generated by the `ArrowIntegration` derive macro.
140pub type Result<T, E = BodkinError> = core::result::Result<T, E>;