tpchgen_arrow/
lib.rs

1//! Generate TPCH data as Arrow RecordBatches
2//!
3//! This crate provides generators for TPCH tables that directly produces
4//! Arrow [`RecordBatch`]es. This is significantly faster than generating TBL or CSV
5//! files and then parsing them into Arrow.
6//!
7//! # Example
8//! ```
9//! # use tpchgen::generators::LineItemGenerator;
10//! # use tpchgen_arrow::LineItemArrow;
11//! # use arrow::util::pretty::pretty_format_batches;
12//! // Create a SF=1 generator for the LineItem table
13//! let generator = LineItemGenerator::new(1.0, 1, 1);
14//! let mut arrow_generator = LineItemArrow::new(generator)
15//!   .with_batch_size(10);
16//! // The generator is a Rust iterator, producing RecordBatch
17//! let batch = arrow_generator.next().unwrap();
18//! // compare the output by pretty printing it
19//! let formatted_batches = pretty_format_batches(&[batch]).unwrap().to_string();
20//! assert_eq!(formatted_batches.lines().collect::<Vec<_>>(), vec![
21//!   "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+",
22//!   "| l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct    | l_shipmode | l_comment                           |",
23//!   "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+",
24//!   "| 1          | 155190    | 7706      | 1            | 17.00      | 21168.23        | 0.04       | 0.02  | N            | O            | 1996-03-13 | 1996-02-12   | 1996-03-22    | DELIVER IN PERSON | TRUCK      | egular courts above the             |",
25//!   "| 1          | 67310     | 7311      | 2            | 36.00      | 45983.16        | 0.09       | 0.06  | N            | O            | 1996-04-12 | 1996-02-28   | 1996-04-20    | TAKE BACK RETURN  | MAIL       | ly final dependencies: slyly bold   |",
26//!   "| 1          | 63700     | 3701      | 3            | 8.00       | 13309.60        | 0.10       | 0.02  | N            | O            | 1996-01-29 | 1996-03-05   | 1996-01-31    | TAKE BACK RETURN  | REG AIR    | riously. regular, express dep       |",
27//!   "| 1          | 2132      | 4633      | 4            | 28.00      | 28955.64        | 0.09       | 0.06  | N            | O            | 1996-04-21 | 1996-03-30   | 1996-05-16    | NONE              | AIR        | lites. fluffily even de             |",
28//!   "| 1          | 24027     | 1534      | 5            | 24.00      | 22824.48        | 0.10       | 0.04  | N            | O            | 1996-03-30 | 1996-03-14   | 1996-04-01    | NONE              | FOB        |  pending foxes. slyly re            |",
29//!   "| 1          | 15635     | 638       | 6            | 32.00      | 49620.16        | 0.07       | 0.02  | N            | O            | 1996-01-30 | 1996-02-07   | 1996-02-03    | DELIVER IN PERSON | MAIL       | arefully slyly ex                   |",
30//!   "| 2          | 106170    | 1191      | 1            | 38.00      | 44694.46        | 0.00       | 0.05  | N            | O            | 1997-01-28 | 1997-01-14   | 1997-02-02    | TAKE BACK RETURN  | RAIL       | ven requests. deposits breach a     |",
31//!   "| 3          | 4297      | 1798      | 1            | 45.00      | 54058.05        | 0.06       | 0.00  | R            | F            | 1994-02-02 | 1994-01-04   | 1994-02-23    | NONE              | AIR        | ongside of the furiously brave acco |",
32//!   "| 3          | 19036     | 6540      | 2            | 49.00      | 46796.47        | 0.10       | 0.00  | R            | F            | 1993-11-09 | 1993-12-20   | 1993-11-24    | TAKE BACK RETURN  | RAIL       |  unusual accounts. eve              |",
33//!   "| 3          | 128449    | 3474      | 3            | 27.00      | 39890.88        | 0.06       | 0.07  | A            | F            | 1994-01-16 | 1993-11-22   | 1994-01-23    | DELIVER IN PERSON | SHIP       | nal foxes wake.                     |",
34//!   "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+"
35//! ]);
36//! ```
37pub mod conversions;
38mod customer;
39mod lineitem;
40mod nation;
41mod order;
42mod part;
43mod partsupp;
44mod region;
45mod supplier;
46
47use arrow::array::RecordBatch;
48use arrow::datatypes::SchemaRef;
49pub use customer::CustomerArrow;
50pub use lineitem::LineItemArrow;
51pub use nation::NationArrow;
52pub use order::OrderArrow;
53pub use part::PartArrow;
54pub use partsupp::PartSuppArrow;
55pub use region::RegionArrow;
56pub use supplier::SupplierArrow;
57
58/// Iterator of Arrow [`RecordBatch`] that also knows its schema
59pub trait RecordBatchIterator: Iterator<Item = RecordBatch> + Send {
60    fn schema(&self) -> &SchemaRef;
61}
62
63/// The default number of rows in each Batch
64pub const DEFAULT_BATCH_SIZE: usize = 8 * 1000;