bodkin/
lib.rs

1//! Bodkin is a library that provides a derive macro to generate Arrow integration code for Rust structs.
2//! 
3//! For example, given the following struct:
4//! 
5//!  ```
6//! pub struct Example {
7//!   pub id: u32,
8//! }
9//! ```
10//! 
11//! The derive macro will generate code similar to the following:
12//! 
13//! ```rust
14//! 
15//! // The user writes this code.
16//! pub struct Example {
17//!   pub id: u32,
18//! }
19//! 
20//! // The derive macro generates the `ExampleArrow` struct and associated methods.
21//! 
22//! // Arrow works with arrays, so this generated code associates an array with each field of the original struct.
23//! pub struct ExampleArrow {
24//!     pub ids: arrow_array::UInt32Array,
25//! }
26//! 
27//! impl ExampleArrow {
28//!     /// Convert an arrow `RecordBatch` to an ExampleArrow struct, in a fallible way.
29//!     pub fn try_from_record_batch(
30//!         batch: &arrow_array::RecordBatch,
31//!     ) -> bodkin::Result<Self> {
32//!         let ids = batch
33//!             .column_by_name("id")
34//!             .ok_or_else(|| bodkin::BodkinError::new("missing column 'id'".into()))?
35//!             .as_any()
36//!             .downcast_ref::<arrow_array::UInt32Array>()
37//!             .ok_or_else(|| bodkin::BodkinError::new("invalid column 'id'".into()))?;
38//!         bodkin::Result::Ok(ExampleArrow { ids: ids.clone() })
39//!     }
40//! }
41//! impl ExampleArrow {
42//!     /// Generate an Arrow schema, this is useful for generating parquet or lancedb tables.
43//!     pub fn arrow_schema() -> arrow::datatypes::Schema {
44//!         let fields = vec![
45//!                 arrow::datatypes::Field::new(
46//!                     "id",
47//!                     arrow::datatypes::DataType::UInt32,
48//!                     false,
49//!                 ),
50//!             ];
51//!         arrow::datatypes::Schema::new(fields)
52//!     }
53//! }
54//! 
55//! impl ExampleArrow {
56//!     /// Convert a slice of Example-s to an arrow `RecordBatch`, in a fallible way.
57//!     pub fn to_record_batch(
58//!         items: &[Example],
59//!     ) -> bodkin::Result<arrow_array::RecordBatch> {
60//!         use arrow_array::Array;
61//!         let schema = Self::arrow_schema();
62//!         let ids = arrow_array::UInt32Array::from(
63//!             items.iter().map(|item| item.id.clone()).collect::<Vec<_>>(),
64//!         );
65//!         let out = arrow_array::RecordBatch::try_new(
66//!             std::sync::Arc::new(schema),
67//!             vec![std::sync::Arc::new(ids)],
68//!         )?;
69//!         bodkin::Result::Ok(out)
70//!     }
71//! }
72//! ```
73//! 
74//! The following user code uses the generated code:
75//! 
76//! ```
77//! use bodkin::ArrowIntegration;
78//! use std::slice;
79//! 
80//! #[derive(ArrowIntegration)]
81//! pub struct Example {
82//!   pub id: u32,
83//! }
84//! 
85//! fn main() {
86//!     println!("Generated schema: {:#?}", ExampleArrow::arrow_schema());
87//!     let data = Example { id: 1 };
88//!     let record_batch = ExampleArrow::to_record_batch(slice::from_ref(&data))
89//!         .expect("Failed to convert to record batch");
90//!     println!("Generated record batch: {:#?}", record_batch);
91//!     let round_trip_data =
92//!         ExampleArrow::try_from_record_batch(&record_batch).expect("Failed to read from record batch");
93//!     assert_eq!(data.id, round_trip_data.ids.value(0));
94//! }
95//! ```
96
97
98use std::{error::Error, fmt};
99
100#[allow(unused_imports)]
101#[macro_use]
102extern crate bodkin_derive;
103
104#[doc(hidden)]
105pub use bodkin_derive::*;
106
107
108
109/// Error used internally by the Bodkin library.
110#[derive(Debug)]
111pub enum BodkinError {
112    ArrowError(arrow::error::ArrowError),
113    MacroError(String),
114}
115
116impl BodkinError {
117    pub fn new(message: String) -> Self {
118        BodkinError::MacroError(message)
119    }
120}
121
122impl Error for BodkinError {}
123
124impl fmt::Display for BodkinError {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        match self {
127            BodkinError::ArrowError(err) => write!(f, "ArrowError: {}", err),
128            BodkinError::MacroError(message) => write!(f, "MacroError: {}", message),
129        }
130    }
131}
132
133impl From<arrow::error::ArrowError> for BodkinError {
134    fn from(err: arrow::error::ArrowError) -> Self {
135        BodkinError::ArrowError(err)
136    }
137}
138
139/// A specialized `Result` type to be used by the code generated by the `ArrowIntegration` derive macro.
140pub type Result<T, E = BodkinError> = core::result::Result<T, E>;