Skip to main content

spatialbench_arrow/
conversions.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Routines to convert TPCH types to Arrow types
19
20use arrow::array::{StringViewArray, StringViewBuilder};
21use spatialbench::dates::TPCHDate;
22use spatialbench::decimal::TPCHDecimal;
23use std::fmt::Write;
24
25/// Convert a TPCHDecimal to an Arrow Decimal(15,2)
26#[inline(always)]
27pub fn to_arrow_decimal(value: TPCHDecimal) -> i128 {
28    // TPCH decimals are stored as i64 with 2 decimal places, so
29    // we can simply convert to i128 directly
30    value.into_inner() as i128
31}
32
33/// Convert a TPCH date to an Arrow Date32.
34///
35/// * Arrow `Date32` are days since the epoch (1970-01-01)
36/// * [`TPCHDate`]s are days since MIN_GENERATE_DATE (1992-01-01)
37///
38/// ```
39/// use chrono::NaiveDate;
40/// use spatialbench::dates::TPCHDate;
41/// let arrow_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
42///  let tpch_epoch = NaiveDate::from_ymd_opt(1992, 1, 1).unwrap();
43/// // the difference between the two epochs is 8035 days
44/// let day_offset = (tpch_epoch - arrow_epoch).num_days();
45/// let day_offset: i32 = day_offset.try_into().unwrap();
46///  assert_eq!(day_offset, TPCHDate::UNIX_EPOCH_OFFSET);
47/// ```
48#[inline(always)]
49pub fn to_arrow_date32(value: TPCHDate) -> i32 {
50    value.to_unix_epoch()
51}
52
53/// Convert a TPCH date to an Arrow Timestamp (milliseconds since Unix epoch)
54#[inline(always)]
55pub fn to_arrow_timestamp_millis(value: TPCHDate) -> i64 {
56    value.to_unix_epoch_seconds() * 1000
57}
58
59/// Converts an iterator of TPCH decimals to an Arrow Decimal128Array
60pub fn decimal128_array_from_iter<I>(values: I) -> arrow::array::Decimal128Array
61where
62    I: Iterator<Item = TPCHDecimal>,
63{
64    let values = values.map(to_arrow_decimal);
65    arrow::array::Decimal128Array::from_iter_values(values)
66        .with_precision_and_scale(15, 5)
67        // safe to unwrap because 15,2 is within the valid range for Decimal128 (38)
68        .unwrap()
69}
70
71/// Converts an iterator of displayable values to an Arrow StringViewArray
72///
73/// This results in an extra copy of the data, which could be avoided for some types
74pub fn string_view_array_from_display_iter<I>(values: I) -> StringViewArray
75where
76    I: Iterator<Item: std::fmt::Display>,
77{
78    let mut buffer = String::new();
79    let values = values.into_iter();
80    let size_hint = values.size_hint().0;
81    let mut builder = StringViewBuilder::with_capacity(size_hint);
82    for v in values {
83        buffer.clear();
84        write!(&mut buffer, "{v}").unwrap();
85        builder.append_value(&buffer);
86    }
87    builder.finish()
88}
89
90// test to ensure that the conversion functions are correct
91#[cfg(test)]
92mod tests {
93    use super::*;
94    use spatialbench::dates::MIN_GENERATE_DATE;
95
96    #[test]
97    fn test_to_arrow_decimal() {
98        let value = TPCHDecimal::new(123456789);
99        assert_eq!(to_arrow_decimal(value), 123456789);
100    }
101
102    #[test]
103    fn test_to_arrow_timestamp_millis() {
104        let value = TPCHDate::new(MIN_GENERATE_DATE, 0, 0, 0);
105        assert_eq!(to_arrow_timestamp_millis(value), 694224000000);
106
107        let value = TPCHDate::new(MIN_GENERATE_DATE + 100, 0, 0, 0);
108        assert_eq!(to_arrow_timestamp_millis(value), 702864000000);
109
110        let value = TPCHDate::new(MIN_GENERATE_DATE + 1234, 0, 0, 0);
111        assert_eq!(to_arrow_timestamp_millis(value), 800841600000);
112    }
113}