tpchgen_arrow/
conversions.rs

1//! Routines to convert TPCH types to Arrow types
2
3use arrow::array::{StringViewArray, StringViewBuilder};
4use std::fmt::Write;
5use tpchgen::dates::TPCHDate;
6use tpchgen::decimal::TPCHDecimal;
7
8/// Convert a TPCHDecimal to an Arrow Decimal(15,2)
9#[inline(always)]
10pub fn to_arrow_decimal(value: TPCHDecimal) -> i128 {
11    // TPCH decimals are stored as i64 with 2 decimal places, so
12    // we can simply convert to i128 directly
13    value.into_inner() as i128
14}
15
16/// Convert a TPCH date to an Arrow Date32
17#[inline(always)]
18pub fn to_arrow_date32(value: TPCHDate) -> i32 {
19    value.into_inner() + TPCHDATE_TO_DATE32_OFFSET
20}
21
22/// Converts an iterator of TPCH decimals to an Arrow Decimal128Array
23pub fn decimal128_array_from_iter<I>(values: I) -> arrow::array::Decimal128Array
24where
25    I: Iterator<Item = TPCHDecimal>,
26{
27    let values = values.map(to_arrow_decimal);
28    arrow::array::Decimal128Array::from_iter_values(values)
29        .with_precision_and_scale(15, 2)
30        // safe to unwrap because 15,2 is within the valid range for Decimal128 (38)
31        .unwrap()
32}
33
34/// Coverts an iterator of displayable values to an Arrow StringViewArray
35///
36/// This results in an extra copy of the data, which could be avoided for some types
37pub fn string_view_array_from_display_iter<I>(values: I) -> StringViewArray
38where
39    I: Iterator<Item: std::fmt::Display>,
40{
41    let mut buffer = String::new();
42    let values = values.into_iter();
43    let size_hint = values.size_hint().0;
44    let mut builder = StringViewBuilder::with_capacity(size_hint);
45    for v in values {
46        buffer.clear();
47        write!(&mut buffer, "{v}").unwrap();
48        builder.append_value(&buffer);
49    }
50    builder.finish()
51}
52
53/// Number of days that must be added to a TPCH date to get an Arrow `Date32` value.
54///
55/// * Arrow `Date32` are days since the epoch (1970-01-01)
56/// * [`TPCHDate`]s are days since MIN_GENERATE_DATE (1992-01-01)
57///
58/// This value is `8035` because `1992-01-01` is `8035` days after `1970-01-01`
59/// ```
60/// use chrono::NaiveDate;
61/// use tpchgen_arrow::conversions::TPCHDATE_TO_DATE32_OFFSET;
62/// let arrow_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
63///  let tpch_epoch = NaiveDate::from_ymd_opt(1992, 1, 1).unwrap();
64/// // the difference between the two epochs is 8035 days
65/// let day_offset = (tpch_epoch - arrow_epoch).num_days();
66/// let day_offset: i32 = day_offset.try_into().unwrap();
67///  assert_eq!(day_offset, TPCHDATE_TO_DATE32_OFFSET);
68/// ```
69pub const TPCHDATE_TO_DATE32_OFFSET: i32 = 8035;
70
71// test to ensure that the conversion functions are correct
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use tpchgen::dates::MIN_GENERATE_DATE;
76
77    #[test]
78    fn test_to_arrow_decimal() {
79        let value = TPCHDecimal::new(123456789);
80        assert_eq!(to_arrow_decimal(value), 123456789);
81    }
82
83    #[test]
84    fn test_to_arrow_date32() {
85        let value = TPCHDate::new(MIN_GENERATE_DATE);
86        assert_eq!(to_arrow_date32(value), 8035);
87
88        let value = TPCHDate::new(MIN_GENERATE_DATE + 100);
89        assert_eq!(to_arrow_date32(value), 8135);
90
91        let value = TPCHDate::new(MIN_GENERATE_DATE + 1234);
92        assert_eq!(to_arrow_date32(value), 9269);
93    }
94}