tpchgen_arrow/
nation.rs

1use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
2use arrow::array::{Int64Array, RecordBatch, StringViewArray};
3use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
4use std::sync::{Arc, LazyLock};
5use tpchgen::generators::{NationGenerator, NationGeneratorIterator};
6
7/// Generate  [`Nation`]s in [`RecordBatch`] format
8///
9/// [`Nation`]: tpchgen::generators::Nation
10///
11/// # Example
12/// ```
13/// # use tpchgen::generators::{NationGenerator};
14/// # use tpchgen_arrow::NationArrow;
15///
16/// // Create a SF=1.0 generator and wrap it in an Arrow generator
17/// let generator = NationGenerator::new(1.0, 1, 1);
18/// let mut arrow_generator = NationArrow::new(generator)
19///   .with_batch_size(10);
20/// // Read the first 10 batches
21/// let batch = arrow_generator.next().unwrap();
22/// // compare the output by pretty printing it
23/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
24///   .unwrap()
25///   .to_string();
26/// let lines = formatted_batches.lines().collect::<Vec<_>>();
27/// assert_eq!(lines, vec![
28///   "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+",
29///   "| n_nationkey | n_name    | n_regionkey | n_comment                                                                                                          |",
30///   "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+",
31///   "| 0           | ALGERIA   | 0           |  haggle. carefully final deposits detect slyly agai                                                                |",
32///   "| 1           | ARGENTINA | 1           | al foxes promise slyly according to the regular accounts. bold requests alon                                       |",
33///   "| 2           | BRAZIL    | 1           | y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special         |",
34///   "| 3           | CANADA    | 1           | eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold              |",
35///   "| 4           | EGYPT     | 4           | y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d                |",
36///   "| 5           | ETHIOPIA  | 0           | ven packages wake quickly. regu                                                                                    |",
37///   "| 6           | FRANCE    | 3           | refully final requests. regular, ironi                                                                             |",
38///   "| 7           | GERMANY   | 3           | l platelets. regular accounts x-ray: unusual, regular acco                                                         |",
39///   "| 8           | INDIA     | 2           | ss excuses cajole slyly across the packages. deposits print aroun                                                  |",
40///   "| 9           | INDONESIA | 2           |  slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull |",
41///   "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+"
42/// ]);
43/// ```
44pub struct NationArrow {
45    inner: NationGeneratorIterator<'static>,
46    batch_size: usize,
47}
48
49impl NationArrow {
50    pub fn new(generator: NationGenerator<'static>) -> Self {
51        Self {
52            inner: generator.iter(),
53            batch_size: DEFAULT_BATCH_SIZE,
54        }
55    }
56
57    /// Set the batch size
58    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
59        self.batch_size = batch_size;
60        self
61    }
62}
63
64impl RecordBatchIterator for NationArrow {
65    fn schema(&self) -> &SchemaRef {
66        &NATION_SCHEMA
67    }
68}
69
70impl Iterator for NationArrow {
71    type Item = RecordBatch;
72
73    fn next(&mut self) -> Option<Self::Item> {
74        // Get next rows to convert
75        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
76        if rows.is_empty() {
77            return None;
78        }
79
80        let n_nationkey = Int64Array::from_iter_values(rows.iter().map(|r| r.n_nationkey));
81        let n_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.n_name));
82        let n_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.n_regionkey));
83        let n_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.n_comment));
84
85        let batch = RecordBatch::try_new(
86            Arc::clone(self.schema()),
87            vec![
88                Arc::new(n_nationkey),
89                Arc::new(n_name),
90                Arc::new(n_regionkey),
91                Arc::new(n_comment),
92            ],
93        )
94        .unwrap();
95        Some(batch)
96    }
97}
98
99/// Schema for the Nation
100static NATION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_nation_schema);
101fn make_nation_schema() -> SchemaRef {
102    Arc::new(Schema::new(vec![
103        Field::new("n_nationkey", DataType::Int64, false),
104        Field::new("n_name", DataType::Utf8View, false),
105        Field::new("n_regionkey", DataType::Int64, false),
106        Field::new("n_comment", DataType::Utf8View, false),
107    ]))
108}