spatialbench_arrow/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Generate Spatial Bench data as Arrow RecordBatches
19//!
20//! This crate provides generators for Spatial Bench tables that directly produces
21//! Arrow [`RecordBatch`]es. This is significantly faster than generating TBL or CSV
22//! files and then parsing them into Arrow.
23//!
24//! # Example
25//! ```
26//! # use spatialbench::generators::TripGenerator;
27//! # use spatialbench_arrow::TripArrow;
28//! # use arrow::util::pretty::pretty_format_batches;
29//! // Create a SF=0.01 generator for the LineItem table
30//! let generator = TripGenerator::new(0.01, 1, 1);
31//! let mut arrow_generator = TripArrow::new(generator)
32//!   .with_batch_size(10);
33//! // The generator is a Rust iterator, producing RecordBatch
34//! let batch = arrow_generator.next().unwrap();
35//! // compare the output by pretty printing it
36//! let formatted_batches = pretty_format_batches(&[batch]).unwrap().to_string();
37//! assert_eq!(formatted_batches.lines().collect::<Vec<_>>(), vec![
38//!   "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
39//!   "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime        | t_dropofftime       | t_fare  | t_tip   | t_totalamount | t_distance | t_pickuploc                                | t_dropoffloc                               |",
40//!   "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
41//!   "| 1         | 215       | 1           | 1            | 1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037       | 0.00014    | 01010000009f3c318dd43735405930592bc6062040 | 0101000000d408a2934a34354083fa96395d7e1f40 |",
42//!   "| 2         | 172       | 1           | 1            | 1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004       | 0.00001    | 010100000066ea0ba7209b5740dc070cd122e33d40 | 01010000007f720caf019c57407cf24d26b0e33d40 |",
43//!   "| 3         | 46        | 1           | 1            | 1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000       | 0.00000    | 010100000003cc2607066e5b40f26f32d2d4d0ff3f | 01010000009be61da7e86d5b407ac002b940c9ff3f |",
44//!   "| 4         | 40        | 1           | 1            | 1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005       | 0.00002    | 0101000000897921e03a0e4cc08816c7ebfbc745c0 | 0101000000c4f5ffdc5d0f4cc0eb6b23bffaca45c0 |",
45//!   "| 5         | 232       | 1           | 1            | 1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003       | 0.00001    | 0101000000ff3ea1a62fcb4dc014fc64fc630b2ac0 | 0101000000f3e32f2deacc4dc026e9714a06072ac0 |",
46//!   "| 6         | 46        | 1           | 1            | 1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003       | 0.00001    | 0101000000855c114bb6562440b2810ccef493d83f | 0101000000ac47af40d3522440915fa2eec173d93f |",
47//!   "| 7         | 284       | 1           | 1            | 1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000       | 0.00000    | 010100000088904b0024855a40ea87d1a6fc8b4340 | 01010000000bdc4f0024855a40f01edaa6fc8b4340 |",
48//!   "| 8         | 233       | 1           | 1            | 1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003       | 0.00001    | 010100000002f5d829e5845640f2770bfe6053f8bf | 01010000003b74b489d78356402a9b07ea7359f8bf |",
49//!   "| 9         | 178       | 1           | 1            | 1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007       | 0.00003    | 0101000000dd8b0712968e49c061d63d122c131640 | 0101000000ec67d222328e49c0d7fd9dccc3f21540 |",
50//!   "| 10        | 118       | 1           | 1            | 1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001       | 0.00000    | 0101000000df66d30c07e75940ff4f81705eca3c40 | 01010000003469ae2e42e75940c49e2c6b51cb3c40 |",
51//!   "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+"
52//! ]);
53//! ```
54
55mod building;
56pub mod conversions;
57mod customer;
58mod driver;
59mod trip;
60mod vehicle;
61
62use arrow::array::RecordBatch;
63use arrow::datatypes::SchemaRef;
64pub use building::BuildingArrow;
65pub use customer::CustomerArrow;
66pub use driver::DriverArrow;
67pub use trip::TripArrow;
68pub use vehicle::VehicleArrow;
69
70/// Iterator of Arrow [`RecordBatch`] that also knows its schema
71pub trait RecordBatchIterator: Iterator<Item = RecordBatch> + Send {
72    fn schema(&self) -> &SchemaRef;
73}
74
75/// The default number of rows in each Batch
76pub const DEFAULT_BATCH_SIZE: usize = 8 * 1000;