Skip to main content

spatialbench_arrow/
customer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::conversions::string_view_array_from_display_iter;
19use crate::{RecordBatchIterator, DEFAULT_BATCH_SIZE};
20use arrow::array::{Int64Array, RecordBatch, StringViewArray};
21use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
22use spatialbench::generators::{CustomerGenerator, CustomerGeneratorIterator};
23use std::sync::{Arc, LazyLock};
24
25/// Generate [`Customer`]s in [`RecordBatch`] format
26///
27/// [`Customer`]: spatialbench::generators::Customer
28///
29/// # Example
30/// ```
31/// # use spatialbench::generators::{CustomerGenerator};
32/// # use spatialbench_arrow::CustomerArrow;
33///
34/// // Create a SF=1.0 generator and wrap it in an Arrow generator
35/// let generator = CustomerGenerator::new(1.0, 1, 1);
36/// let mut arrow_generator = CustomerArrow::new(generator)
37///   .with_batch_size(10);
38/// // Read the first 10 batches
39/// let batch = arrow_generator.next().unwrap();
40/// // compare the output by pretty printing it
41/// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch])
42///   .unwrap()
43///   .to_string();
44/// let lines = formatted_batches.lines().collect::<Vec<_>>();
45/// assert_eq!(lines, vec![
46///   "+-----------+--------------------+---------------------------------------+-------------+--------------+-----------------+",
47///   "| c_custkey | c_name             | c_address                             | c_region    | c_nation     | c_phone         |",
48///   "+-----------+--------------------+---------------------------------------+-------------+--------------+-----------------+",
49///   "| 1         | Customer#000000001 | IVhzIApeRb ot,c,E                     | AFRICA      | MOROCCO      | 25-989-741-2988 |",
50///   "| 2         | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak        | MIDDLE EAST | JORDAN       | 23-768-687-3665 |",
51///   "| 3         | Customer#000000003 | MG9kdTD2WBHm                          | AMERICA     | ARGENTINA    | 11-719-748-3364 |",
52///   "| 4         | Customer#000000004 | XxVSJsLAGtn                           | MIDDLE EAST | EGYPT        | 14-128-190-5944 |",
53///   "| 5         | Customer#000000005 | KvpyuHCplrB84WgAiGV6sYpZq7Tj          | AMERICA     | CANADA       | 13-750-942-6364 |",
54///   "| 6         | Customer#000000006 | sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn  | MIDDLE EAST | SAUDI ARABIA | 30-114-968-4951 |",
55///   "| 7         | Customer#000000007 | TcGe5gaZNgVePxU5kRrvXBfkasDTea        | ASIA        | CHINA        | 28-190-982-9759 |",
56///   "| 8         | Customer#000000008 | I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5 | AMERICA     | PERU         | 27-147-574-9335 |",
57///   "| 9         | Customer#000000009 | xKiAFTjUsCuxfeleNqefumTrjS            | ASIA        | INDIA        | 18-338-906-3675 |",
58///   "| 10        | Customer#000000010 | 6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2    | AFRICA      | ETHIOPIA     | 15-741-346-9870 |",
59///   "+-----------+--------------------+---------------------------------------+-------------+--------------+-----------------+"
60///   ]);
61/// ```
62pub struct CustomerArrow {
63    inner: CustomerGeneratorIterator<'static>,
64    batch_size: usize,
65}
66
67impl CustomerArrow {
68    pub fn new(generator: CustomerGenerator<'static>) -> Self {
69        Self {
70            inner: generator.iter(),
71            batch_size: DEFAULT_BATCH_SIZE,
72        }
73    }
74
75    /// Set the batch size
76    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
77        self.batch_size = batch_size;
78        self
79    }
80}
81
82impl RecordBatchIterator for CustomerArrow {
83    fn schema(&self) -> &SchemaRef {
84        &CUSTOMER_SCHEMA
85    }
86}
87
88impl Iterator for CustomerArrow {
89    type Item = RecordBatch;
90
91    fn next(&mut self) -> Option<Self::Item> {
92        // Get next rows to convert
93        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
94        if rows.is_empty() {
95            return None;
96        }
97
98        let c_custkey = Int64Array::from_iter_values(rows.iter().map(|r| r.c_custkey));
99        let c_name = string_view_array_from_display_iter(rows.iter().map(|r| r.c_name));
100        let c_address = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_address));
101        let c_region = StringViewArray::from_iter_values(rows.iter().map(|r| r.c_region));
102        let c_nation = StringViewArray::from_iter_values(rows.iter().map(|r| r.c_nation));
103        let c_phone = string_view_array_from_display_iter(rows.iter().map(|r| &r.c_phone));
104
105        let batch = RecordBatch::try_new(
106            Arc::clone(self.schema()),
107            vec![
108                Arc::new(c_custkey),
109                Arc::new(c_name),
110                Arc::new(c_address),
111                Arc::new(c_region),
112                Arc::new(c_nation),
113                Arc::new(c_phone),
114            ],
115        )
116        .unwrap();
117        Some(batch)
118    }
119}
120
121/// Schema for the Customer
122static CUSTOMER_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_customer_schema);
123fn make_customer_schema() -> SchemaRef {
124    Arc::new(Schema::new(vec![
125        Field::new("c_custkey", DataType::Int64, false),
126        Field::new("c_name", DataType::Utf8View, false),
127        Field::new("c_address", DataType::Utf8View, false),
128        Field::new("c_region", DataType::Utf8View, false),
129        Field::new("c_nation", DataType::Utf8View, false),
130        Field::new("c_phone", DataType::Utf8View, false),
131    ]))
132}