spatialbench_arrow/conversions.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Routines to convert TPCH types to Arrow types
19
20use arrow::array::{StringViewArray, StringViewBuilder};
21use spatialbench::dates::TPCHDate;
22use spatialbench::decimal::TPCHDecimal;
23use std::fmt::Write;
24
25/// Convert a TPCHDecimal to an Arrow Decimal(15,2)
26#[inline(always)]
27pub fn to_arrow_decimal(value: TPCHDecimal) -> i128 {
28 // TPCH decimals are stored as i64 with 2 decimal places, so
29 // we can simply convert to i128 directly
30 value.into_inner() as i128
31}
32
33/// Convert a TPCH date to an Arrow Date32.
34///
35/// * Arrow `Date32` are days since the epoch (1970-01-01)
36/// * [`TPCHDate`]s are days since MIN_GENERATE_DATE (1992-01-01)
37///
38/// ```
39/// use chrono::NaiveDate;
40/// use spatialbench::dates::TPCHDate;
41/// let arrow_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
42/// let tpch_epoch = NaiveDate::from_ymd_opt(1992, 1, 1).unwrap();
43/// // the difference between the two epochs is 8035 days
44/// let day_offset = (tpch_epoch - arrow_epoch).num_days();
45/// let day_offset: i32 = day_offset.try_into().unwrap();
46/// assert_eq!(day_offset, TPCHDate::UNIX_EPOCH_OFFSET);
47/// ```
48#[inline(always)]
49pub fn to_arrow_date32(value: TPCHDate) -> i32 {
50 value.to_unix_epoch()
51}
52
53/// Convert a TPCH date to an Arrow Timestamp (milliseconds since Unix epoch)
54#[inline(always)]
55pub fn to_arrow_timestamp_millis(value: TPCHDate) -> i64 {
56 value.to_unix_epoch_seconds() * 1000
57}
58
59/// Converts an iterator of TPCH decimals to an Arrow Decimal128Array
60pub fn decimal128_array_from_iter<I>(values: I) -> arrow::array::Decimal128Array
61where
62 I: Iterator<Item = TPCHDecimal>,
63{
64 let values = values.map(to_arrow_decimal);
65 arrow::array::Decimal128Array::from_iter_values(values)
66 .with_precision_and_scale(15, 5)
67 // safe to unwrap because 15,2 is within the valid range for Decimal128 (38)
68 .unwrap()
69}
70
71/// Coverts an iterator of displayable values to an Arrow StringViewArray
72///
73/// This results in an extra copy of the data, which could be avoided for some types
74pub fn string_view_array_from_display_iter<I>(values: I) -> StringViewArray
75where
76 I: Iterator<Item: std::fmt::Display>,
77{
78 let mut buffer = String::new();
79 let values = values.into_iter();
80 let size_hint = values.size_hint().0;
81 let mut builder = StringViewBuilder::with_capacity(size_hint);
82 for v in values {
83 buffer.clear();
84 write!(&mut buffer, "{v}").unwrap();
85 builder.append_value(&buffer);
86 }
87 builder.finish()
88}
89
90// test to ensure that the conversion functions are correct
91#[cfg(test)]
92mod tests {
93 use super::*;
94 use spatialbench::dates::MIN_GENERATE_DATE;
95
96 #[test]
97 fn test_to_arrow_decimal() {
98 let value = TPCHDecimal::new(123456789);
99 assert_eq!(to_arrow_decimal(value), 123456789);
100 }
101
102 #[test]
103 fn test_to_arrow_timestamp_millis() {
104 let value = TPCHDate::new(MIN_GENERATE_DATE, 0, 0, 0);
105 assert_eq!(to_arrow_timestamp_millis(value), 694224000000);
106
107 let value = TPCHDate::new(MIN_GENERATE_DATE + 100, 0, 0, 0);
108 assert_eq!(to_arrow_timestamp_millis(value), 702864000000);
109
110 let value = TPCHDate::new(MIN_GENERATE_DATE + 1234, 0, 0, 0);
111 assert_eq!(to_arrow_timestamp_millis(value), 800841600000);
112 }
113}