Expand description
Bodkin is a library that provides a derive macro to generate Arrow integration code for Rust structs.
For example, given the following struct:
pub struct Example {
pub id: u32,
}The derive macro will generate code similar to the following:
// The user writes this code.
pub struct Example {
pub id: u32,
}
// The derive macro generates the `ExampleArrow` struct and associated methods.
// Arrow works with arrays, so this generated code associates an array with each field of the original struct.
pub struct ExampleArrow {
pub ids: arrow_array::UInt32Array,
}
impl ExampleArrow {
/// Convert an arrow `RecordBatch` to an ExampleArrow struct, in a fallible way.
pub fn try_from_record_batch(
batch: &arrow_array::RecordBatch,
) -> bodkin::Result<Self> {
let ids = batch
.column_by_name("id")
.ok_or_else(|| bodkin::BodkinError::new("missing column 'id'".into()))?
.as_any()
.downcast_ref::<arrow_array::UInt32Array>()
.ok_or_else(|| bodkin::BodkinError::new("invalid column 'id'".into()))?;
bodkin::Result::Ok(ExampleArrow { ids: ids.clone() })
}
}
impl ExampleArrow {
/// Generate an Arrow schema, this is useful for generating parquet or lancedb tables.
pub fn arrow_schema() -> arrow::datatypes::Schema {
let fields = vec![
arrow::datatypes::Field::new(
"id",
arrow::datatypes::DataType::UInt32,
false,
),
];
arrow::datatypes::Schema::new(fields)
}
}
impl ExampleArrow {
/// Convert a slice of Example-s to an arrow `RecordBatch`, in a fallible way.
pub fn to_record_batch(
items: &[Example],
) -> bodkin::Result<arrow_array::RecordBatch> {
use arrow_array::Array;
let schema = Self::arrow_schema();
let ids = arrow_array::UInt32Array::from(
items.iter().map(|item| item.id.clone()).collect::<Vec<_>>(),
);
let out = arrow_array::RecordBatch::try_new(
std::sync::Arc::new(schema),
vec![std::sync::Arc::new(ids)],
)?;
bodkin::Result::Ok(out)
}
}The following user code uses the generated code:
use bodkin::ArrowIntegration;
use std::slice;
#[derive(ArrowIntegration)]
pub struct Example {
pub id: u32,
}
fn main() {
println!("Generated schema: {:#?}", ExampleArrow::arrow_schema());
let data = Example { id: 1 };
let record_batch = ExampleArrow::to_record_batch(slice::from_ref(&data))
.expect("Failed to convert to record batch");
println!("Generated record batch: {:#?}", record_batch);
let round_trip_data =
ExampleArrow::try_from_record_batch(&record_batch).expect("Failed to read from record batch");
assert_eq!(data.id, round_trip_data.ids.value(0));
}Enums§
- Bodkin
Error - Error used internally by the Bodkin library.
Type Aliases§
- Result
- A specialized
Resulttype to be used by the code generated by theArrowIntegrationderive macro.