tpchgen_arrow/region.rs
1use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
2use arrow::array::{Int64Array, RecordBatch, StringViewArray};
3use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
4use std::sync::{Arc, LazyLock};
5use tpchgen::generators::{RegionGenerator, RegionGeneratorIterator};
6
7/// Generate [`Region`]s in [`RecordBatch`] format
8///
9/// [`Region`]: tpchgen::generators::Region
10///
11/// # Example
12/// ```
13/// # use tpchgen::generators::{RegionGenerator};
14/// # use tpchgen_arrow::RegionArrow;
15///
16/// // Create a SF=1.0 generator and wrap it in an Arrow generator
17/// let generator = RegionGenerator::new(1.0, 1, 1);
18/// let mut arrow_generator = RegionArrow::new(generator)
19/// .with_batch_size(10);
20/// // Read the first 10 batches
21/// let batch = arrow_generator.next().unwrap();
22/// // compare the output by pretty printing it
23/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
24/// .unwrap()
25/// .to_string();
26/// let lines = formatted_batches.lines().collect::<Vec<_>>();
27/// assert_eq!(lines, vec![
28/// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+",
29/// "| r_regionkey | r_name | r_comment |",
30/// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+",
31/// "| 0 | AFRICA | lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to |",
32/// "| 1 | AMERICA | hs use ironic, even requests. s |",
33/// "| 2 | ASIA | ges. thinly even pinto beans ca |",
34/// "| 3 | EUROPE | ly final courts cajole furiously final excuse |",
35/// "| 4 | MIDDLE EAST | uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl |",
36/// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+"
37/// ]);
38/// ```
39pub struct RegionArrow {
40 inner: RegionGeneratorIterator<'static>,
41 batch_size: usize,
42}
43
44impl RegionArrow {
45 pub fn new(generator: RegionGenerator<'static>) -> Self {
46 Self {
47 inner: generator.iter(),
48 batch_size: DEFAULT_BATCH_SIZE,
49 }
50 }
51
52 /// Set the batch size
53 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
54 self.batch_size = batch_size;
55 self
56 }
57}
58
59impl RecordBatchIterator for RegionArrow {
60 fn schema(&self) -> &SchemaRef {
61 ®ION_SCHEMA
62 }
63}
64
65impl Iterator for RegionArrow {
66 type Item = RecordBatch;
67
68 fn next(&mut self) -> Option<Self::Item> {
69 // Get next rows to convert
70 let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
71 if rows.is_empty() {
72 return None;
73 }
74
75 let r_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.r_regionkey));
76 let r_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_name));
77 let r_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_comment));
78
79 let batch = RecordBatch::try_new(
80 Arc::clone(self.schema()),
81 vec![Arc::new(r_regionkey), Arc::new(r_name), Arc::new(r_comment)],
82 )
83 .unwrap();
84 Some(batch)
85 }
86}
87
88/// Schema for the Region
89static REGION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_region_schema);
90fn make_region_schema() -> SchemaRef {
91 Arc::new(Schema::new(vec![
92 Field::new("r_regionkey", DataType::Int64, false),
93 Field::new("r_name", DataType::Utf8View, false),
94 Field::new("r_comment", DataType::Utf8View, false),
95 ]))
96}