tpchgen_arrow/lib.rs
1//! Generate TPCH data as Arrow RecordBatches
2//!
3//! This crate provides generators for TPCH tables that directly produces
4//! Arrow [`RecordBatch`]es. This is significantly faster than generating TBL or CSV
5//! files and then parsing them into Arrow.
6//!
7//! # Example
8//! ```
9//! # use tpchgen::generators::LineItemGenerator;
10//! # use tpchgen_arrow::LineItemArrow;
11//! # use arrow::util::pretty::pretty_format_batches;
12//! // Create a SF=1 generator for the LineItem table
13//! let generator = LineItemGenerator::new(1.0, 1, 1);
14//! let mut arrow_generator = LineItemArrow::new(generator)
15//! .with_batch_size(10);
16//! // The generator is a Rust iterator, producing RecordBatch
17//! let batch = arrow_generator.next().unwrap();
18//! // compare the output by pretty printing it
19//! let formatted_batches = pretty_format_batches(&[batch]).unwrap().to_string();
20//! assert_eq!(formatted_batches.lines().collect::<Vec<_>>(), vec![
21//! "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+",
22//! "| l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment |",
23//! "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+",
24//! "| 1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 1996-03-13 | 1996-02-12 | 1996-03-22 | DELIVER IN PERSON | TRUCK | egular courts above the |",
25//! "| 1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 1996-04-12 | 1996-02-28 | 1996-04-20 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold |",
26//! "| 1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 1996-01-29 | 1996-03-05 | 1996-01-31 | TAKE BACK RETURN | REG AIR | riously. regular, express dep |",
27//! "| 1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 1996-04-21 | 1996-03-30 | 1996-05-16 | NONE | AIR | lites. fluffily even de |",
28//! "| 1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 1996-03-30 | 1996-03-14 | 1996-04-01 | NONE | FOB | pending foxes. slyly re |",
29//! "| 1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 1996-01-30 | 1996-02-07 | 1996-02-03 | DELIVER IN PERSON | MAIL | arefully slyly ex |",
30//! "| 2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 1997-01-28 | 1997-01-14 | 1997-02-02 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a |",
31//! "| 3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 1994-02-02 | 1994-01-04 | 1994-02-23 | NONE | AIR | ongside of the furiously brave acco |",
32//! "| 3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 1993-11-09 | 1993-12-20 | 1993-11-24 | TAKE BACK RETURN | RAIL | unusual accounts. eve |",
33//! "| 3 | 128449 | 3474 | 3 | 27.00 | 39890.88 | 0.06 | 0.07 | A | F | 1994-01-16 | 1993-11-22 | 1994-01-23 | DELIVER IN PERSON | SHIP | nal foxes wake. |",
34//! "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+"
35//! ]);
36//! ```
37pub mod conversions;
38mod customer;
39mod lineitem;
40mod nation;
41mod order;
42mod part;
43mod partsupp;
44mod region;
45mod supplier;
46
47use arrow::array::RecordBatch;
48use arrow::datatypes::SchemaRef;
49pub use customer::CustomerArrow;
50pub use lineitem::LineItemArrow;
51pub use nation::NationArrow;
52pub use order::OrderArrow;
53pub use part::PartArrow;
54pub use partsupp::PartSuppArrow;
55pub use region::RegionArrow;
56pub use supplier::SupplierArrow;
57
58/// Iterator of Arrow [`RecordBatch`] that also knows its schema
59pub trait RecordBatchIterator: Iterator<Item = RecordBatch> + Send {
60 fn schema(&self) -> &SchemaRef;
61}
62
63/// The default number of rows in each Batch
64pub const DEFAULT_BATCH_SIZE: usize = 8 * 1000;