bodkin/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//! Bodkin is a library that provides a derive macro to generate Arrow integration code for Rust structs.
//! 
//! For example, given the following struct:
//! 
//!  ```
//! pub struct Example {
//!   pub id: u32,
//! }
//! ```
//! 
//! The derive macro will generate code similar to the following:
//! 
//! ```rust
//! 
//! // The user writes this code.
//! pub struct Example {
//!   pub id: u32,
//! }
//! 
//! // The derive macro generates the `ExampleArrow` struct and associated methods.
//! 
//! // Arrow works with arrays, so this generated code associates an array with each field of the original struct.
//! pub struct ExampleArrow {
//!     pub ids: arrow_array::UInt32Array,
//! }
//! 
//! impl ExampleArrow {
//!     /// Convert an arrow `RecordBatch` to an ExampleArrow struct, in a fallible way.
//!     pub fn try_from_record_batch(
//!         batch: &arrow_array::RecordBatch,
//!     ) -> bodkin::Result<Self> {
//!         let ids = batch
//!             .column_by_name("id")
//!             .ok_or_else(|| bodkin::BodkinError::new("missing column 'id'".into()))?
//!             .as_any()
//!             .downcast_ref::<arrow_array::UInt32Array>()
//!             .ok_or_else(|| bodkin::BodkinError::new("invalid column 'id'".into()))?;
//!         bodkin::Result::Ok(ExampleArrow { ids: ids.clone() })
//!     }
//! }
//! impl ExampleArrow {
//!     /// Generate an Arrow schema, this is useful for generating parquet or lancedb tables.
//!     pub fn arrow_schema() -> arrow::datatypes::Schema {
//!         let fields = vec![
//!                 arrow::datatypes::Field::new(
//!                     "id",
//!                     arrow::datatypes::DataType::UInt32,
//!                     false,
//!                 ),
//!             ];
//!         arrow::datatypes::Schema::new(fields)
//!     }
//! }
//! 
//! impl ExampleArrow {
//!     /// Convert a slice of Example-s to an arrow `RecordBatch`, in a fallible way.
//!     pub fn to_record_batch(
//!         items: &[Example],
//!     ) -> bodkin::Result<arrow_array::RecordBatch> {
//!         use arrow_array::Array;
//!         let schema = Self::arrow_schema();
//!         let ids = arrow_array::UInt32Array::from(
//!             items.iter().map(|item| item.id.clone()).collect::<Vec<_>>(),
//!         );
//!         let out = arrow_array::RecordBatch::try_new(
//!             std::sync::Arc::new(schema),
//!             vec![std::sync::Arc::new(ids)],
//!         )?;
//!         bodkin::Result::Ok(out)
//!     }
//! }
//! ```
//! 
//! The following user code uses the generated code:
//! 
//! ```
//! use bodkin::ArrowIntegration;
//! use std::slice;
//! 
//! #[derive(ArrowIntegration)]
//! pub struct Example {
//!   pub id: u32,
//! }
//! 
//! fn main() {
//!     println!("Generated schema: {:#?}", ExampleArrow::arrow_schema());
//!     let data = Example { id: 1 };
//!     let record_batch = ExampleArrow::to_record_batch(slice::from_ref(&data))
//!         .expect("Failed to convert to record batch");
//!     println!("Generated record batch: {:#?}", record_batch);
//!     let round_trip_data =
//!         ExampleArrow::try_from_record_batch(&record_batch).expect("Failed to read from record batch");
//!     assert_eq!(data.id, round_trip_data.ids.value(0));
//! }
//! ```


use std::{error::Error, fmt};

#[allow(unused_imports)]
#[macro_use]
extern crate bodkin_derive;

#[doc(hidden)]
pub use bodkin_derive::*;



/// Error used internally by the Bodkin library.
#[derive(Debug)]
pub enum BodkinError {
    ArrowError(arrow::error::ArrowError),
    MacroError(String),
}

impl BodkinError {
    pub fn new(message: String) -> Self {
        BodkinError::MacroError(message)
    }
}

impl Error for BodkinError {}

impl fmt::Display for BodkinError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            BodkinError::ArrowError(err) => write!(f, "ArrowError: {}", err),
            BodkinError::MacroError(message) => write!(f, "MacroError: {}", message),
        }
    }
}

impl From<arrow::error::ArrowError> for BodkinError {
    fn from(err: arrow::error::ArrowError) -> Self {
        BodkinError::ArrowError(err)
    }
}

/// A specialized `Result` type to be used by the code generated by the `ArrowIntegration` derive macro.
pub type Result<T, E = BodkinError> = core::result::Result<T, E>;