tpchgen_arrow/
supplier.rs

1use crate::conversions::{decimal128_array_from_iter, string_view_array_from_display_iter};
2use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
3use arrow::array::{Int64Array, RecordBatch};
4use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
5use std::sync::{Arc, LazyLock};
6use tpchgen::generators::{SupplierGenerator, SupplierGeneratorIterator};
7
8/// Generate [`Supplier`]s in [`RecordBatch`] format
9///
10/// [`Supplier`]: tpchgen::generators::Supplier
11///
12/// # Example:
13/// ```
14/// # use tpchgen::generators::{SupplierGenerator};
15/// # use tpchgen_arrow::SupplierArrow;
16///
17/// // Create a SF=1.0 generator and wrap it in an Arrow generator
18/// let generator = SupplierGenerator::new(1.0, 1, 1);
19/// let mut arrow_generator = SupplierArrow::new(generator)
20///   .with_batch_size(10);
21/// // Read the first 10 batches
22/// let batch = arrow_generator.next().unwrap();
23/// // compare the output by pretty printing it
24/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
25///   .unwrap()
26///   .to_string();
27/// let lines = formatted_batches.lines().collect::<Vec<_>>();
28/// assert_eq!(lines, vec![
29///   "+-----------+--------------------+-------------------------------------+-------------+-----------------+-----------+-----------------------------------------------------------------------------------------------------+", "| s_suppkey | s_name             | s_address                           | s_nationkey | s_phone         | s_acctbal | s_comment                                                                                           |", "+-----------+--------------------+-------------------------------------+-------------+-----------------+-----------+-----------------------------------------------------------------------------------------------------+", "| 1         | Supplier#000000001 |  N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ | 17          | 27-918-335-1736 | 5755.94   | each slyly above the careful                                                                        |", "| 2         | Supplier#000000002 | 89eJ5ksX3ImxJQBvxObC,               | 5           | 15-679-861-2259 | 4032.68   |  slyly bold instructions. idle dependen                                                             |", "| 3         | Supplier#000000003 | q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3   | 1           | 11-383-516-1199 | 4192.40   | blithely silent requests after the express dependencies are sl                                      |", "| 4         | Supplier#000000004 | Bk7ah4CK8SYQTepEmvMkkgMwg           | 15          | 25-843-787-7479 | 4641.08   | riously even requests above the exp                                                                 |", "| 5         | Supplier#000000005 | Gcdm2rJRzl5qlTVzc                   | 11          | 21-151-690-3663 | -283.84   | . slyly regular pinto bea                                                                           |", "| 6         | Supplier#000000006 | tQxuVm7s7CnK                        | 14          | 24-696-997-4969 | 1365.79   | final accounts. regular dolphins use against the furiously ironic decoys.                           |", "| 7         | Supplier#000000007 | s,4TicNGB4uO6PaSqNBUq               | 23          | 33-990-965-2201 | 6820.35   | s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit |", "| 8         | Supplier#000000008 | 9Sq4bBH2FQEmaFOocY45sRTxo6yuoG      | 17          | 27-498-742-3860 | 7627.85   | al pinto beans. asymptotes haggl                                                                    |", "| 9         | Supplier#000000009 | 1KhUgZegwM3ua7dsYmekYBsK            | 10          | 20-403-398-8662 | 5302.37   | s. unusual, even requests along the furiously regular pac                                           |", "| 10        | Supplier#000000010 | Saygah3gYWMp72i PY                  | 24          | 34-852-489-8585 | 3891.91   | ing waters. regular requests ar                                                                     |", "+-----------+--------------------+-------------------------------------+-------------+-----------------+-----------+-----------------------------------------------------------------------------------------------------+"
30/// ]);
31/// ```
32pub struct SupplierArrow {
33    inner: SupplierGeneratorIterator<'static>,
34    batch_size: usize,
35}
36
37impl SupplierArrow {
38    pub fn new(generator: SupplierGenerator<'static>) -> Self {
39        Self {
40            inner: generator.iter(),
41            batch_size: DEFAULT_BATCH_SIZE,
42        }
43    }
44
45    /// Set the batch size
46    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
47        self.batch_size = batch_size;
48        self
49    }
50}
51
52impl RecordBatchIterator for SupplierArrow {
53    fn schema(&self) -> &SchemaRef {
54        &SUPPLIER_SCHEMA
55    }
56}
57
58impl Iterator for SupplierArrow {
59    type Item = RecordBatch;
60
61    fn next(&mut self) -> Option<Self::Item> {
62        // Get next rows to convert
63        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
64        if rows.is_empty() {
65            return None;
66        }
67
68        let s_suppkey = Int64Array::from_iter_values(rows.iter().map(|r| r.s_suppkey));
69        let s_name = string_view_array_from_display_iter(rows.iter().map(|r| r.s_name));
70        let s_address = string_view_array_from_display_iter(rows.iter().map(|r| &r.s_address));
71        let s_nationkey = Int64Array::from_iter_values(rows.iter().map(|r| r.s_nationkey));
72        let s_phone = string_view_array_from_display_iter(rows.iter().map(|r| &r.s_phone));
73        let s_acctbal = decimal128_array_from_iter(rows.iter().map(|r| r.s_acctbal));
74        let s_comment = string_view_array_from_display_iter(rows.iter().map(|r| &r.s_comment));
75
76        let batch = RecordBatch::try_new(
77            Arc::clone(self.schema()),
78            vec![
79                Arc::new(s_suppkey),
80                Arc::new(s_name),
81                Arc::new(s_address),
82                Arc::new(s_nationkey),
83                Arc::new(s_phone),
84                Arc::new(s_acctbal),
85                Arc::new(s_comment),
86            ],
87        )
88        .unwrap();
89        Some(batch)
90    }
91}
92
93/// Schema for the PartSupp
94static SUPPLIER_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_supplier_schema);
95fn make_supplier_schema() -> SchemaRef {
96    Arc::new(Schema::new(vec![
97        Field::new("s_suppkey", DataType::Int64, false),
98        Field::new("s_name", DataType::Utf8View, false),
99        Field::new("s_address", DataType::Utf8View, false),
100        Field::new("s_nationkey", DataType::Int64, false),
101        Field::new("s_phone", DataType::Utf8View, false),
102        Field::new("s_acctbal", DataType::Decimal128(15, 2), false),
103        Field::new("s_comment", DataType::Utf8View, false),
104    ]))
105}