arrow2/io/parquet/write/
fixed_len_bytes.rs1use parquet2::{
2 encoding::Encoding,
3 page::DataPage,
4 schema::types::PrimitiveType,
5 statistics::{serialize_statistics, FixedLenStatistics},
6};
7
8use super::{binary::ord_binary, utils, WriteOptions};
9use crate::types::i256;
10use crate::{
11 array::{Array, FixedSizeBinaryArray, PrimitiveArray},
12 error::Result,
13 io::parquet::read::schema::is_nullable,
14};
15
16pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buffer: &mut Vec<u8>) {
17 if is_optional {
19 array.iter().for_each(|x| {
20 if let Some(x) = x {
21 buffer.extend_from_slice(x);
22 }
23 })
24 } else {
25 buffer.extend_from_slice(array.values());
26 }
27}
28
29pub fn array_to_page(
30 array: &FixedSizeBinaryArray,
31 options: WriteOptions,
32 type_: PrimitiveType,
33 statistics: Option<FixedLenStatistics>,
34) -> Result<DataPage> {
35 let is_optional = is_nullable(&type_.field_info);
36 let validity = array.validity();
37
38 let mut buffer = vec![];
39 utils::write_def_levels(
40 &mut buffer,
41 is_optional,
42 validity,
43 array.len(),
44 options.version,
45 )?;
46
47 let definition_levels_byte_length = buffer.len();
48
49 encode_plain(array, is_optional, &mut buffer);
50
51 utils::build_plain_page(
52 buffer,
53 array.len(),
54 array.len(),
55 array.null_count(),
56 0,
57 definition_levels_byte_length,
58 statistics.map(|x| serialize_statistics(&x)),
59 type_,
60 options,
61 Encoding::Plain,
62 )
63}
64
65pub(super) fn build_statistics(
66 array: &FixedSizeBinaryArray,
67 primitive_type: PrimitiveType,
68) -> FixedLenStatistics {
69 FixedLenStatistics {
70 primitive_type,
71 null_count: Some(array.null_count() as i64),
72 distinct_count: None,
73 max_value: array
74 .iter()
75 .flatten()
76 .max_by(|x, y| ord_binary(x, y))
77 .map(|x| x.to_vec()),
78 min_value: array
79 .iter()
80 .flatten()
81 .min_by(|x, y| ord_binary(x, y))
82 .map(|x| x.to_vec()),
83 }
84}
85
86pub(super) fn build_statistics_decimal(
87 array: &PrimitiveArray<i128>,
88 primitive_type: PrimitiveType,
89 size: usize,
90) -> FixedLenStatistics {
91 FixedLenStatistics {
92 primitive_type,
93 null_count: Some(array.null_count() as i64),
94 distinct_count: None,
95 max_value: array
96 .iter()
97 .flatten()
98 .max()
99 .map(|x| x.to_be_bytes()[16 - size..].to_vec()),
100 min_value: array
101 .iter()
102 .flatten()
103 .min()
104 .map(|x| x.to_be_bytes()[16 - size..].to_vec()),
105 }
106}
107
108pub(super) fn build_statistics_decimal256_with_i128(
109 array: &PrimitiveArray<i256>,
110 primitive_type: PrimitiveType,
111 size: usize,
112) -> FixedLenStatistics {
113 FixedLenStatistics {
114 primitive_type,
115 null_count: Some(array.null_count() as i64),
116 distinct_count: None,
117 max_value: array
118 .iter()
119 .flatten()
120 .max()
121 .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()),
122 min_value: array
123 .iter()
124 .flatten()
125 .min()
126 .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()),
127 }
128}
129
130pub(super) fn build_statistics_decimal256(
131 array: &PrimitiveArray<i256>,
132 primitive_type: PrimitiveType,
133 size: usize,
134) -> FixedLenStatistics {
135 FixedLenStatistics {
136 primitive_type,
137 null_count: Some(array.null_count() as i64),
138 distinct_count: None,
139 max_value: array
140 .iter()
141 .flatten()
142 .max()
143 .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()),
144 min_value: array
145 .iter()
146 .flatten()
147 .min()
148 .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()),
149 }
150}