tpchgen_arrow/
region.rs

1use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
2use arrow::array::{Int64Array, RecordBatch, StringViewArray};
3use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
4use std::sync::{Arc, LazyLock};
5use tpchgen::generators::{RegionGenerator, RegionGeneratorIterator};
6
7/// Generate  [`Region`]s in [`RecordBatch`] format
8///
9/// [`Region`]: tpchgen::generators::Region
10///
11/// # Example
12/// ```
13/// # use tpchgen::generators::{RegionGenerator};
14/// # use tpchgen_arrow::RegionArrow;
15///
16/// // Create a SF=1.0 generator and wrap it in an Arrow generator
17/// let generator = RegionGenerator::new(1.0, 1, 1);
18/// let mut arrow_generator = RegionArrow::new(generator)
19///   .with_batch_size(10);
20/// // Read the first 10 batches
21/// let batch = arrow_generator.next().unwrap();
22/// // compare the output by pretty printing it
23/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
24///   .unwrap()
25///   .to_string();
26/// let lines = formatted_batches.lines().collect::<Vec<_>>();
27/// assert_eq!(lines, vec![
28///   "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+",
29///   "| r_regionkey | r_name      | r_comment                                                                                                           |",
30///   "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+",
31///   "| 0           | AFRICA      | lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to  |",
32///   "| 1           | AMERICA     | hs use ironic, even requests. s                                                                                     |",
33///   "| 2           | ASIA        | ges. thinly even pinto beans ca                                                                                     |",
34///   "| 3           | EUROPE      | ly final courts cajole furiously final excuse                                                                       |",
35///   "| 4           | MIDDLE EAST | uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl        |",
36///   "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+"
37/// ]);
38/// ```
39pub struct RegionArrow {
40    inner: RegionGeneratorIterator<'static>,
41    batch_size: usize,
42}
43
44impl RegionArrow {
45    pub fn new(generator: RegionGenerator<'static>) -> Self {
46        Self {
47            inner: generator.iter(),
48            batch_size: DEFAULT_BATCH_SIZE,
49        }
50    }
51
52    /// Set the batch size
53    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
54        self.batch_size = batch_size;
55        self
56    }
57}
58
59impl RecordBatchIterator for RegionArrow {
60    fn schema(&self) -> &SchemaRef {
61        &REGION_SCHEMA
62    }
63}
64
65impl Iterator for RegionArrow {
66    type Item = RecordBatch;
67
68    fn next(&mut self) -> Option<Self::Item> {
69        // Get next rows to convert
70        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
71        if rows.is_empty() {
72            return None;
73        }
74
75        let r_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.r_regionkey));
76        let r_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_name));
77        let r_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_comment));
78
79        let batch = RecordBatch::try_new(
80            Arc::clone(self.schema()),
81            vec![Arc::new(r_regionkey), Arc::new(r_name), Arc::new(r_comment)],
82        )
83        .unwrap();
84        Some(batch)
85    }
86}
87
88/// Schema for the Region
89static REGION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_region_schema);
90fn make_region_schema() -> SchemaRef {
91    Arc::new(Schema::new(vec![
92        Field::new("r_regionkey", DataType::Int64, false),
93        Field::new("r_name", DataType::Utf8View, false),
94        Field::new("r_comment", DataType::Utf8View, false),
95    ]))
96}