polars_parquet/parquet/
types.rs1use arrow::types::{
2 AlignedBytes, AlignedBytesCast, Bytes12Alignment4, Bytes4Alignment4, Bytes8Alignment8,
3};
4
5use crate::parquet::schema::types::PhysicalType;
6
7pub trait NativeType:
9 std::fmt::Debug + Send + Sync + 'static + Copy + Clone + AlignedBytesCast<Self::AlignedBytes>
10{
11 type Bytes: AsRef<[u8]>
12 + bytemuck::Pod
13 + IntoIterator<Item = u8>
14 + for<'a> TryFrom<&'a [u8], Error = std::array::TryFromSliceError>
15 + std::fmt::Debug
16 + Clone
17 + Copy;
18 type AlignedBytes: AlignedBytes<Unaligned = Self::Bytes> + From<Self> + Into<Self>;
19
20 fn to_le_bytes(&self) -> Self::Bytes;
21
22 fn from_le_bytes(bytes: Self::Bytes) -> Self;
23
24 fn ord(&self, other: &Self) -> std::cmp::Ordering;
25
26 const TYPE: PhysicalType;
27}
28
29macro_rules! native {
30 ($type:ty, $unaligned:ty, $physical_type:expr) => {
31 impl NativeType for $type {
32 type Bytes = [u8; size_of::<Self>()];
33 type AlignedBytes = $unaligned;
34
35 #[inline]
36 fn to_le_bytes(&self) -> Self::Bytes {
37 Self::to_le_bytes(*self)
38 }
39
40 #[inline]
41 fn from_le_bytes(bytes: Self::Bytes) -> Self {
42 Self::from_le_bytes(bytes)
43 }
44
45 #[inline]
46 fn ord(&self, other: &Self) -> std::cmp::Ordering {
47 self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
48 }
49
50 const TYPE: PhysicalType = $physical_type;
51 }
52 };
53}
54
55native!(i32, Bytes4Alignment4, PhysicalType::Int32);
56native!(i64, Bytes8Alignment8, PhysicalType::Int64);
57native!(f32, Bytes4Alignment4, PhysicalType::Float);
58native!(f64, Bytes8Alignment8, PhysicalType::Double);
59
60impl NativeType for [u32; 3] {
61 const TYPE: PhysicalType = PhysicalType::Int96;
62
63 type Bytes = [u8; size_of::<Self>()];
64 type AlignedBytes = Bytes12Alignment4;
65
66 #[inline]
67 fn to_le_bytes(&self) -> Self::Bytes {
68 let mut bytes = [0; 12];
69 let first = self[0].to_le_bytes();
70 bytes[0] = first[0];
71 bytes[1] = first[1];
72 bytes[2] = first[2];
73 bytes[3] = first[3];
74 let second = self[1].to_le_bytes();
75 bytes[4] = second[0];
76 bytes[5] = second[1];
77 bytes[6] = second[2];
78 bytes[7] = second[3];
79 let third = self[2].to_le_bytes();
80 bytes[8] = third[0];
81 bytes[9] = third[1];
82 bytes[10] = third[2];
83 bytes[11] = third[3];
84 bytes
85 }
86
87 #[inline]
88 fn from_le_bytes(bytes: Self::Bytes) -> Self {
89 let mut first = [0; 4];
90 first[0] = bytes[0];
91 first[1] = bytes[1];
92 first[2] = bytes[2];
93 first[3] = bytes[3];
94 let mut second = [0; 4];
95 second[0] = bytes[4];
96 second[1] = bytes[5];
97 second[2] = bytes[6];
98 second[3] = bytes[7];
99 let mut third = [0; 4];
100 third[0] = bytes[8];
101 third[1] = bytes[9];
102 third[2] = bytes[10];
103 third[3] = bytes[11];
104 [
105 u32::from_le_bytes(first),
106 u32::from_le_bytes(second),
107 u32::from_le_bytes(third),
108 ]
109 }
110
111 #[inline]
112 fn ord(&self, other: &Self) -> std::cmp::Ordering {
113 int96_to_i64_ns(*self).ord(&int96_to_i64_ns(*other))
114 }
115}
116
117#[inline]
118pub fn int96_to_i64_ns(value: [u32; 3]) -> i64 {
119 const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
120 const SECONDS_PER_DAY: i64 = 86_400;
121 const NANOS_PER_SECOND: i64 = 1_000_000_000;
122
123 let day = value[2] as i64;
124 let nanoseconds = ((value[1] as i64) << 32) + value[0] as i64;
125 let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
126
127 seconds * NANOS_PER_SECOND + nanoseconds
128}
129
130pub fn ord_binary<'a>(a: &'a [u8], b: &'a [u8]) -> std::cmp::Ordering {
132 use std::cmp::Ordering::*;
133 match (a.is_empty(), b.is_empty()) {
134 (true, true) => return Equal,
135 (true, false) => return Less,
136 (false, true) => return Greater,
137 (false, false) => {},
138 }
139
140 for (v1, v2) in a.iter().zip(b.iter()) {
141 match v1.cmp(v2) {
142 Equal => continue,
143 other => return other,
144 }
145 }
146 Equal
147}
148
149#[inline]
150pub fn decode<T: NativeType>(chunk: &[u8]) -> T {
151 assert!(chunk.len() >= size_of::<<T as NativeType>::Bytes>());
152 unsafe { decode_unchecked(chunk) }
153}
154
155#[inline]
161pub unsafe fn decode_unchecked<T: NativeType>(chunk: &[u8]) -> T {
162 let chunk: <T as NativeType>::Bytes = unsafe { chunk.try_into().unwrap_unchecked() };
163 T::from_le_bytes(chunk)
164}