arrow2/io/parquet/write/
fixed_len_bytes.rs

1use parquet2::{
2    encoding::Encoding,
3    page::DataPage,
4    schema::types::PrimitiveType,
5    statistics::{serialize_statistics, FixedLenStatistics},
6};
7
8use super::{binary::ord_binary, utils, WriteOptions};
9use crate::types::i256;
10use crate::{
11    array::{Array, FixedSizeBinaryArray, PrimitiveArray},
12    error::Result,
13    io::parquet::read::schema::is_nullable,
14};
15
16pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buffer: &mut Vec<u8>) {
17    // append the non-null values
18    if is_optional {
19        array.iter().for_each(|x| {
20            if let Some(x) = x {
21                buffer.extend_from_slice(x);
22            }
23        })
24    } else {
25        buffer.extend_from_slice(array.values());
26    }
27}
28
29pub fn array_to_page(
30    array: &FixedSizeBinaryArray,
31    options: WriteOptions,
32    type_: PrimitiveType,
33    statistics: Option<FixedLenStatistics>,
34) -> Result<DataPage> {
35    let is_optional = is_nullable(&type_.field_info);
36    let validity = array.validity();
37
38    let mut buffer = vec![];
39    utils::write_def_levels(
40        &mut buffer,
41        is_optional,
42        validity,
43        array.len(),
44        options.version,
45    )?;
46
47    let definition_levels_byte_length = buffer.len();
48
49    encode_plain(array, is_optional, &mut buffer);
50
51    utils::build_plain_page(
52        buffer,
53        array.len(),
54        array.len(),
55        array.null_count(),
56        0,
57        definition_levels_byte_length,
58        statistics.map(|x| serialize_statistics(&x)),
59        type_,
60        options,
61        Encoding::Plain,
62    )
63}
64
65pub(super) fn build_statistics(
66    array: &FixedSizeBinaryArray,
67    primitive_type: PrimitiveType,
68) -> FixedLenStatistics {
69    FixedLenStatistics {
70        primitive_type,
71        null_count: Some(array.null_count() as i64),
72        distinct_count: None,
73        max_value: array
74            .iter()
75            .flatten()
76            .max_by(|x, y| ord_binary(x, y))
77            .map(|x| x.to_vec()),
78        min_value: array
79            .iter()
80            .flatten()
81            .min_by(|x, y| ord_binary(x, y))
82            .map(|x| x.to_vec()),
83    }
84}
85
86pub(super) fn build_statistics_decimal(
87    array: &PrimitiveArray<i128>,
88    primitive_type: PrimitiveType,
89    size: usize,
90) -> FixedLenStatistics {
91    FixedLenStatistics {
92        primitive_type,
93        null_count: Some(array.null_count() as i64),
94        distinct_count: None,
95        max_value: array
96            .iter()
97            .flatten()
98            .max()
99            .map(|x| x.to_be_bytes()[16 - size..].to_vec()),
100        min_value: array
101            .iter()
102            .flatten()
103            .min()
104            .map(|x| x.to_be_bytes()[16 - size..].to_vec()),
105    }
106}
107
108pub(super) fn build_statistics_decimal256_with_i128(
109    array: &PrimitiveArray<i256>,
110    primitive_type: PrimitiveType,
111    size: usize,
112) -> FixedLenStatistics {
113    FixedLenStatistics {
114        primitive_type,
115        null_count: Some(array.null_count() as i64),
116        distinct_count: None,
117        max_value: array
118            .iter()
119            .flatten()
120            .max()
121            .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()),
122        min_value: array
123            .iter()
124            .flatten()
125            .min()
126            .map(|x| x.0.low().to_be_bytes()[16 - size..].to_vec()),
127    }
128}
129
130pub(super) fn build_statistics_decimal256(
131    array: &PrimitiveArray<i256>,
132    primitive_type: PrimitiveType,
133    size: usize,
134) -> FixedLenStatistics {
135    FixedLenStatistics {
136        primitive_type,
137        null_count: Some(array.null_count() as i64),
138        distinct_count: None,
139        max_value: array
140            .iter()
141            .flatten()
142            .max()
143            .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()),
144        min_value: array
145            .iter()
146            .flatten()
147            .min()
148            .map(|x| x.0.to_be_bytes()[32 - size..].to_vec()),
149    }
150}