tpchgen_arrow/
customer.rs

1use crate::conversions::{decimal128_array_from_iter, string_view_array_from_display_iter};
2use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
3use arrow::array::{Int64Array, RecordBatch};
4use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
5use std::sync::{Arc, LazyLock};
6use tpchgen::generators::{CustomerGenerator, CustomerGeneratorIterator};
7
8/// Generate [`Customer`]s in [`RecordBatch`] format
9///
10/// [`Customer`]: tpchgen::generators::Customer
11///
12/// # Example
13/// ```
14/// # use tpchgen::generators::{CustomerGenerator};
15/// # use tpchgen_arrow::CustomerArrow;
16///
17/// // Create a SF=1.0 generator and wrap it in an Arrow generator
18/// let generator = CustomerGenerator::new(1.0, 1, 1);
19/// let mut arrow_generator = CustomerArrow::new(generator)
20///   .with_batch_size(10);
21/// // Read the first 10 batches
22/// let batch = arrow_generator.next().unwrap();
23/// // compare the output by pretty printing it
24/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
25///   .unwrap()
26///   .to_string();
27/// let lines = formatted_batches.lines().collect::<Vec<_>>();
28/// assert_eq!(lines, vec![
29///   "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
30///   "| c_custkey | c_name             | c_address                             | c_nationkey | c_phone         | c_acctbal | c_mktsegment | c_comment                                                                                                         |",
31///   "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
32///   "| 1         | Customer#000000001 | IVhzIApeRb ot,c,E                     | 15          | 25-989-741-2988 | 711.56    | BUILDING     | to the even, regular platelets. regular, ironic epitaphs nag e                                                    |",
33///   "| 2         | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak        | 13          | 23-768-687-3665 | 121.65    | AUTOMOBILE   | l accounts. blithely ironic theodolites integrate boldly: caref                                                   |",
34///   "| 3         | Customer#000000003 | MG9kdTD2WBHm                          | 1           | 11-719-748-3364 | 7498.12   | AUTOMOBILE   |  deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov            |",
35///   "| 4         | Customer#000000004 | XxVSJsLAGtn                           | 4           | 14-128-190-5944 | 2866.83   | MACHINERY    |  requests. final, regular ideas sleep final accou                                                                 |",
36///   "| 5         | Customer#000000005 | KvpyuHCplrB84WgAiGV6sYpZq7Tj          | 3           | 13-750-942-6364 | 794.47    | HOUSEHOLD    | n accounts will have to unwind. foxes cajole accor                                                                |",
37///   "| 6         | Customer#000000006 | sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn  | 20          | 30-114-968-4951 | 7638.57   | AUTOMOBILE   | tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious          |",
38///   "| 7         | Customer#000000007 | TcGe5gaZNgVePxU5kRrvXBfkasDTea        | 18          | 28-190-982-9759 | 9561.95   | AUTOMOBILE   | ainst the ironic, express theodolites. express, even pinto beans among the exp                                    |",
39///   "| 8         | Customer#000000008 | I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5 | 17          | 27-147-574-9335 | 6819.74   | BUILDING     | among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide |",
40///   "| 9         | Customer#000000009 | xKiAFTjUsCuxfeleNqefumTrjS            | 8           | 18-338-906-3675 | 8324.07   | FURNITURE    | r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl                     |",
41///   "| 10        | Customer#000000010 | 6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2    | 5           | 15-741-346-9870 | 2753.54   | HOUSEHOLD    | es regular deposits haggle. fur                                                                                   |",
42///   "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
43///   ]);
44/// ```
45pub struct CustomerArrow {
46    inner: CustomerGeneratorIterator<'static>,
47    batch_size: usize,
48}
49
50impl CustomerArrow {
51    pub fn new(generator: CustomerGenerator<'static>) -> Self {
52        Self {
53            inner: generator.iter(),
54            batch_size: DEFAULT_BATCH_SIZE,
55        }
56    }
57
58    /// Set the batch size
59    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
60        self.batch_size = batch_size;
61        self
62    }
63}
64
65impl RecordBatchIterator for CustomerArrow {
66    fn schema(&self) -> &SchemaRef {
67        &CUSTOMER_SCHEMA
68    }
69}
70
71impl Iterator for CustomerArrow {
72    type Item = RecordBatch;
73
74    fn next(&mut self) -> Option<Self::Item> {
75        // Get next rows to convert
76        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
77        if rows.is_empty() {
78            return None;
79        }
80
81        let c_custkey = Int64Array::from_iter_values(rows.iter().map(|r| r.c_custkey));
82        let c_name = string_view_array_from_display_iter(rows.iter().map(|r| r.c_name));
83        let c_address = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_address));
84        let c_nationkey = Int64Array::from_iter_values(rows.iter().map(|r| r.c_nationkey));
85        let c_phone = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_phone));
86        let c_acctbal = decimal128_array_from_iter(rows.iter().map(|r| r.c_acctbal));
87        let c_mktsegment = string_view_array_from_display_iter(rows.iter().map(|r| r.c_mktsegment));
88        let c_comment = string_view_array_from_display_iter(rows.iter().map(|r| r.c_comment));
89
90        let batch = RecordBatch::try_new(
91            Arc::clone(self.schema()),
92            vec![
93                Arc::new(c_custkey),
94                Arc::new(c_name),
95                Arc::new(c_address),
96                Arc::new(c_nationkey),
97                Arc::new(c_phone),
98                Arc::new(c_acctbal),
99                Arc::new(c_mktsegment),
100                Arc::new(c_comment),
101            ],
102        )
103        .unwrap();
104        Some(batch)
105    }
106}
107
108/// Schema for the Customer
109static CUSTOMER_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_customer_schema);
110fn make_customer_schema() -> SchemaRef {
111    Arc::new(Schema::new(vec![
112        Field::new("c_custkey", DataType::Int64, false),
113        Field::new("c_name", DataType::Utf8View, false),
114        Field::new("c_address", DataType::Utf8View, false),
115        Field::new("c_nationkey", DataType::Int64, false),
116        Field::new("c_phone", DataType::Utf8View, false),
117        Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
118        Field::new("c_mktsegment", DataType::Utf8View, false),
119        Field::new("c_comment", DataType::Utf8View, false),
120    ]))
121}