bodkin/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
//! Bodkin is a library that provides a derive macro to generate Arrow integration code for Rust structs.
//!
//! For example, given the following struct:
//!
//! ```
//! pub struct Example {
//! pub id: u32,
//! }
//! ```
//!
//! The derive macro will generate code similar to the following:
//!
//! ```rust
//!
//! // The user writes this code.
//! pub struct Example {
//! pub id: u32,
//! }
//!
//! // The derive macro generates the `ExampleArrow` struct and associated methods.
//!
//! // Arrow works with arrays, so this generated code associates an array with each field of the original struct.
//! pub struct ExampleArrow {
//! pub ids: arrow_array::UInt32Array,
//! }
//!
//! impl ExampleArrow {
//! /// Convert an arrow `RecordBatch` to an ExampleArrow struct, in a fallible way.
//! pub fn try_from_record_batch(
//! batch: &arrow_array::RecordBatch,
//! ) -> bodkin::Result<Self> {
//! let ids = batch
//! .column_by_name("id")
//! .ok_or_else(|| bodkin::BodkinError::new("missing column 'id'".into()))?
//! .as_any()
//! .downcast_ref::<arrow_array::UInt32Array>()
//! .ok_or_else(|| bodkin::BodkinError::new("invalid column 'id'".into()))?;
//! bodkin::Result::Ok(ExampleArrow { ids: ids.clone() })
//! }
//! }
//! impl ExampleArrow {
//! /// Generate an Arrow schema, this is useful for generating parquet or lancedb tables.
//! pub fn arrow_schema() -> arrow::datatypes::Schema {
//! let fields = vec![
//! arrow::datatypes::Field::new(
//! "id",
//! arrow::datatypes::DataType::UInt32,
//! false,
//! ),
//! ];
//! arrow::datatypes::Schema::new(fields)
//! }
//! }
//!
//! impl ExampleArrow {
//! /// Convert a slice of Example-s to an arrow `RecordBatch`, in a fallible way.
//! pub fn to_record_batch(
//! items: &[Example],
//! ) -> bodkin::Result<arrow_array::RecordBatch> {
//! use arrow_array::Array;
//! let schema = Self::arrow_schema();
//! let ids = arrow_array::UInt32Array::from(
//! items.iter().map(|item| item.id.clone()).collect::<Vec<_>>(),
//! );
//! let out = arrow_array::RecordBatch::try_new(
//! std::sync::Arc::new(schema),
//! vec![std::sync::Arc::new(ids)],
//! )?;
//! bodkin::Result::Ok(out)
//! }
//! }
//! ```
//!
//! The following user code uses the generated code:
//!
//! ```
//! use bodkin::ArrowIntegration;
//! use std::slice;
//!
//! #[derive(ArrowIntegration)]
//! pub struct Example {
//! pub id: u32,
//! }
//!
//! fn main() {
//! println!("Generated schema: {:#?}", ExampleArrow::arrow_schema());
//! let data = Example { id: 1 };
//! let record_batch = ExampleArrow::to_record_batch(slice::from_ref(&data))
//! .expect("Failed to convert to record batch");
//! println!("Generated record batch: {:#?}", record_batch);
//! let round_trip_data =
//! ExampleArrow::try_from_record_batch(&record_batch).expect("Failed to read from record batch");
//! assert_eq!(data.id, round_trip_data.ids.value(0));
//! }
//! ```
use std::{error::Error, fmt};
#[allow(unused_imports)]
#[macro_use]
extern crate bodkin_derive;
#[doc(hidden)]
pub use bodkin_derive::*;
/// Error used internally by the Bodkin library.
#[derive(Debug)]
pub enum BodkinError {
ArrowError(arrow::error::ArrowError),
MacroError(String),
}
impl BodkinError {
pub fn new(message: String) -> Self {
BodkinError::MacroError(message)
}
}
impl Error for BodkinError {}
impl fmt::Display for BodkinError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BodkinError::ArrowError(err) => write!(f, "ArrowError: {}", err),
BodkinError::MacroError(message) => write!(f, "MacroError: {}", message),
}
}
}
impl From<arrow::error::ArrowError> for BodkinError {
fn from(err: arrow::error::ArrowError) -> Self {
BodkinError::ArrowError(err)
}
}
/// A specialized `Result` type to be used by the code generated by the `ArrowIntegration` derive macro.
pub type Result<T, E = BodkinError> = core::result::Result<T, E>;