tpchgen_arrow/customer.rs
1use crate::conversions::{decimal128_array_from_iter, string_view_array_from_display_iter};
2use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
3use arrow::array::{Int64Array, RecordBatch};
4use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
5use std::sync::{Arc, LazyLock};
6use tpchgen::generators::{CustomerGenerator, CustomerGeneratorIterator};
7
8/// Generate [`Customer`]s in [`RecordBatch`] format
9///
10/// [`Customer`]: tpchgen::generators::Customer
11///
12/// # Example
13/// ```
14/// # use tpchgen::generators::{CustomerGenerator};
15/// # use tpchgen_arrow::CustomerArrow;
16///
17/// // Create a SF=1.0 generator and wrap it in an Arrow generator
18/// let generator = CustomerGenerator::new(1.0, 1, 1);
19/// let mut arrow_generator = CustomerArrow::new(generator)
20/// .with_batch_size(10);
21/// // Read the first 10 batches
22/// let batch = arrow_generator.next().unwrap();
23/// // compare the output by pretty printing it
24/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
25/// .unwrap()
26/// .to_string();
27/// let lines = formatted_batches.lines().collect::<Vec<_>>();
28/// assert_eq!(lines, vec![
29/// "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
30/// "| c_custkey | c_name | c_address | c_nationkey | c_phone | c_acctbal | c_mktsegment | c_comment |",
31/// "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
32/// "| 1 | Customer#000000001 | IVhzIApeRb ot,c,E | 15 | 25-989-741-2988 | 711.56 | BUILDING | to the even, regular platelets. regular, ironic epitaphs nag e |",
33/// "| 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | 13 | 23-768-687-3665 | 121.65 | AUTOMOBILE | l accounts. blithely ironic theodolites integrate boldly: caref |",
34/// "| 3 | Customer#000000003 | MG9kdTD2WBHm | 1 | 11-719-748-3364 | 7498.12 | AUTOMOBILE | deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov |",
35/// "| 4 | Customer#000000004 | XxVSJsLAGtn | 4 | 14-128-190-5944 | 2866.83 | MACHINERY | requests. final, regular ideas sleep final accou |",
36/// "| 5 | Customer#000000005 | KvpyuHCplrB84WgAiGV6sYpZq7Tj | 3 | 13-750-942-6364 | 794.47 | HOUSEHOLD | n accounts will have to unwind. foxes cajole accor |",
37/// "| 6 | Customer#000000006 | sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn | 20 | 30-114-968-4951 | 7638.57 | AUTOMOBILE | tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious |",
38/// "| 7 | Customer#000000007 | TcGe5gaZNgVePxU5kRrvXBfkasDTea | 18 | 28-190-982-9759 | 9561.95 | AUTOMOBILE | ainst the ironic, express theodolites. express, even pinto beans among the exp |",
39/// "| 8 | Customer#000000008 | I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5 | 17 | 27-147-574-9335 | 6819.74 | BUILDING | among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide |",
40/// "| 9 | Customer#000000009 | xKiAFTjUsCuxfeleNqefumTrjS | 8 | 18-338-906-3675 | 8324.07 | FURNITURE | r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl |",
41/// "| 10 | Customer#000000010 | 6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2 | 5 | 15-741-346-9870 | 2753.54 | HOUSEHOLD | es regular deposits haggle. fur |",
42/// "+-----------+--------------------+---------------------------------------+-------------+-----------------+-----------+--------------+-------------------------------------------------------------------------------------------------------------------+",
43/// ]);
44/// ```
45pub struct CustomerArrow {
46 inner: CustomerGeneratorIterator<'static>,
47 batch_size: usize,
48}
49
50impl CustomerArrow {
51 pub fn new(generator: CustomerGenerator<'static>) -> Self {
52 Self {
53 inner: generator.iter(),
54 batch_size: DEFAULT_BATCH_SIZE,
55 }
56 }
57
58 /// Set the batch size
59 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
60 self.batch_size = batch_size;
61 self
62 }
63}
64
65impl RecordBatchIterator for CustomerArrow {
66 fn schema(&self) -> &SchemaRef {
67 &CUSTOMER_SCHEMA
68 }
69}
70
71impl Iterator for CustomerArrow {
72 type Item = RecordBatch;
73
74 fn next(&mut self) -> Option<Self::Item> {
75 // Get next rows to convert
76 let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
77 if rows.is_empty() {
78 return None;
79 }
80
81 let c_custkey = Int64Array::from_iter_values(rows.iter().map(|r| r.c_custkey));
82 let c_name = string_view_array_from_display_iter(rows.iter().map(|r| r.c_name));
83 let c_address = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_address));
84 let c_nationkey = Int64Array::from_iter_values(rows.iter().map(|r| r.c_nationkey));
85 let c_phone = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_phone));
86 let c_acctbal = decimal128_array_from_iter(rows.iter().map(|r| r.c_acctbal));
87 let c_mktsegment = string_view_array_from_display_iter(rows.iter().map(|r| r.c_mktsegment));
88 let c_comment = string_view_array_from_display_iter(rows.iter().map(|r| r.c_comment));
89
90 let batch = RecordBatch::try_new(
91 Arc::clone(self.schema()),
92 vec![
93 Arc::new(c_custkey),
94 Arc::new(c_name),
95 Arc::new(c_address),
96 Arc::new(c_nationkey),
97 Arc::new(c_phone),
98 Arc::new(c_acctbal),
99 Arc::new(c_mktsegment),
100 Arc::new(c_comment),
101 ],
102 )
103 .unwrap();
104 Some(batch)
105 }
106}
107
108/// Schema for the Customer
109static CUSTOMER_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_customer_schema);
110fn make_customer_schema() -> SchemaRef {
111 Arc::new(Schema::new(vec![
112 Field::new("c_custkey", DataType::Int64, false),
113 Field::new("c_name", DataType::Utf8View, false),
114 Field::new("c_address", DataType::Utf8View, false),
115 Field::new("c_nationkey", DataType::Int64, false),
116 Field::new("c_phone", DataType::Utf8View, false),
117 Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
118 Field::new("c_mktsegment", DataType::Utf8View, false),
119 Field::new("c_comment", DataType::Utf8View, false),
120 ]))
121}