parquet2/
types.rs

1use std::convert::TryFrom;
2
3use crate::schema::types::PhysicalType;
4
5/// A physical native representation of a Parquet fixed-sized type.
6pub trait NativeType: std::fmt::Debug + Send + Sync + 'static + Copy + Clone {
7    type Bytes: AsRef<[u8]> + for<'a> TryFrom<&'a [u8], Error = std::array::TryFromSliceError>;
8
9    fn to_le_bytes(&self) -> Self::Bytes;
10
11    fn from_le_bytes(bytes: Self::Bytes) -> Self;
12
13    fn ord(&self, other: &Self) -> std::cmp::Ordering;
14
15    const TYPE: PhysicalType;
16}
17
18macro_rules! native {
19    ($type:ty, $physical_type:expr) => {
20        impl NativeType for $type {
21            type Bytes = [u8; std::mem::size_of::<Self>()];
22            #[inline]
23            fn to_le_bytes(&self) -> Self::Bytes {
24                Self::to_le_bytes(*self)
25            }
26
27            #[inline]
28            fn from_le_bytes(bytes: Self::Bytes) -> Self {
29                Self::from_le_bytes(bytes)
30            }
31
32            #[inline]
33            fn ord(&self, other: &Self) -> std::cmp::Ordering {
34                self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
35            }
36
37            const TYPE: PhysicalType = $physical_type;
38        }
39    };
40}
41
42native!(i32, PhysicalType::Int32);
43native!(i64, PhysicalType::Int64);
44native!(f32, PhysicalType::Float);
45native!(f64, PhysicalType::Double);
46
47impl NativeType for [u32; 3] {
48    const TYPE: PhysicalType = PhysicalType::Int96;
49
50    type Bytes = [u8; std::mem::size_of::<Self>()];
51    #[inline]
52    fn to_le_bytes(&self) -> Self::Bytes {
53        let mut bytes = [0; 12];
54        let first = self[0].to_le_bytes();
55        bytes[0] = first[0];
56        bytes[1] = first[1];
57        bytes[2] = first[2];
58        bytes[3] = first[3];
59        let second = self[1].to_le_bytes();
60        bytes[4] = second[0];
61        bytes[5] = second[1];
62        bytes[6] = second[2];
63        bytes[7] = second[3];
64        let third = self[2].to_le_bytes();
65        bytes[8] = third[0];
66        bytes[9] = third[1];
67        bytes[10] = third[2];
68        bytes[11] = third[3];
69        bytes
70    }
71
72    #[inline]
73    fn from_le_bytes(bytes: Self::Bytes) -> Self {
74        let mut first = [0; 4];
75        first[0] = bytes[0];
76        first[1] = bytes[1];
77        first[2] = bytes[2];
78        first[3] = bytes[3];
79        let mut second = [0; 4];
80        second[0] = bytes[4];
81        second[1] = bytes[5];
82        second[2] = bytes[6];
83        second[3] = bytes[7];
84        let mut third = [0; 4];
85        third[0] = bytes[8];
86        third[1] = bytes[9];
87        third[2] = bytes[10];
88        third[3] = bytes[11];
89        [
90            u32::from_le_bytes(first),
91            u32::from_le_bytes(second),
92            u32::from_le_bytes(third),
93        ]
94    }
95
96    #[inline]
97    fn ord(&self, other: &Self) -> std::cmp::Ordering {
98        int96_to_i64_ns(*self).ord(&int96_to_i64_ns(*other))
99    }
100}
101
102#[inline]
103pub fn int96_to_i64_ns(value: [u32; 3]) -> i64 {
104    const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
105    const SECONDS_PER_DAY: i64 = 86_400;
106    const NANOS_PER_SECOND: i64 = 1_000_000_000;
107
108    let day = value[2] as i64;
109    let nanoseconds = ((value[1] as i64) << 32) + value[0] as i64;
110    let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
111
112    seconds * NANOS_PER_SECOND + nanoseconds
113}
114
115/// Returns the ordering of two binary values.
116pub fn ord_binary<'a>(a: &'a [u8], b: &'a [u8]) -> std::cmp::Ordering {
117    use std::cmp::Ordering::*;
118    match (a.is_empty(), b.is_empty()) {
119        (true, true) => return Equal,
120        (true, false) => return Less,
121        (false, true) => return Greater,
122        (false, false) => {}
123    }
124
125    for (v1, v2) in a.iter().zip(b.iter()) {
126        match v1.cmp(v2) {
127            Equal => continue,
128            other => return other,
129        }
130    }
131    Equal
132}
133
134#[inline]
135pub fn decode<T: NativeType>(chunk: &[u8]) -> T {
136    let chunk: <T as NativeType>::Bytes = match chunk.try_into() {
137        Ok(v) => v,
138        Err(_) => panic!(),
139    };
140    T::from_le_bytes(chunk)
141}