use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
use arrow::array::{Int64Array, RecordBatch, StringViewArray};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use std::sync::{Arc, LazyLock};
use tpchgen::generators::{RegionGenerator, RegionGeneratorIterator};
pub struct RegionArrow {
inner: RegionGeneratorIterator<'static>,
batch_size: usize,
}
impl RegionArrow {
pub fn new(generator: RegionGenerator<'static>) -> Self {
Self {
inner: generator.iter(),
batch_size: DEFAULT_BATCH_SIZE,
}
}
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
self.batch_size = batch_size;
self
}
}
impl RecordBatchIterator for RegionArrow {
fn schema(&self) -> &SchemaRef {
®ION_SCHEMA
}
}
impl Iterator for RegionArrow {
type Item = RecordBatch;
fn next(&mut self) -> Option<Self::Item> {
let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
if rows.is_empty() {
return None;
}
let r_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.r_regionkey));
let r_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_name));
let r_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_comment));
let batch = RecordBatch::try_new(
Arc::clone(self.schema()),
vec![Arc::new(r_regionkey), Arc::new(r_name), Arc::new(r_comment)],
)
.unwrap();
Some(batch)
}
}
static REGION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_region_schema);
fn make_region_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Field::new("r_regionkey", DataType::Int64, false),
Field::new("r_name", DataType::Utf8View, false),
Field::new("r_comment", DataType::Utf8View, false),
]))
}