polars_parquet_format/
lib.rs

1#![allow(non_camel_case_types)]
2#![forbid(unsafe_code)]
3#![cfg_attr(docsrs, feature(doc_cfg))]
4
5#[allow(clippy::all)]
6mod parquet_format;
7pub use crate::parquet_format::*;
8
9pub mod thrift;
10
11#[cfg(test)]
12mod tests {
13    use super::*;
14
15    fn meta() -> FileMetaData {
16        FileMetaData {
17            version: 0,
18            schema: vec![SchemaElement {
19                type_: Some(Type::INT32),
20                type_length: None,
21                repetition_type: Some(FieldRepetitionType::REQUIRED),
22                name: "aaa".to_string(),
23                num_children: None,
24                converted_type: Some(ConvertedType::DATE),
25                scale: None,
26                precision: None,
27                field_id: None,
28                logical_type: Some(LogicalType::DATE(Default::default())),
29            }],
30            num_rows: 0,
31            row_groups: vec![RowGroup {
32                columns: vec![ColumnChunk {
33                    file_path: None,
34                    file_offset: 10,
35                    meta_data: Some(ColumnMetaData {
36                        type_: Type::INT32,
37                        encodings: vec![Encoding::PLAIN],
38                        path_in_schema: vec![],
39                        codec: CompressionCodec::UNCOMPRESSED,
40                        num_values: 0,
41                        total_uncompressed_size: 0,
42                        total_compressed_size: 0,
43                        key_value_metadata: Some(vec![KeyValue {
44                            key: "".to_string(),
45                            value: Some("".to_string()),
46                        }]),
47                        data_page_offset: 0,
48                        index_page_offset: None,
49                        dictionary_page_offset: None,
50                        statistics: Some(Statistics {
51                            max: None,
52                            min: None,
53                            null_count: Some(0),
54                            distinct_count: Some(0),
55                            max_value: Some(vec![]),
56                            min_value: Some(vec![]),
57                        }),
58                        encoding_stats: Some(vec![PageEncodingStats {
59                            page_type: PageType::DATA_PAGE,
60                            encoding: Encoding::PLAIN,
61                            count: 1,
62                        }]),
63                        bloom_filter_offset: None,
64                    }),
65                    offset_index_offset: None,
66                    offset_index_length: None,
67                    column_index_offset: None,
68                    column_index_length: None,
69                    crypto_metadata: None,
70                    encrypted_column_metadata: None,
71                }],
72                total_byte_size: 10,
73                num_rows: 10,
74                sorting_columns: Some(vec![SortingColumn {
75                    column_idx: 1,
76                    descending: true,
77                    nulls_first: true,
78                }]),
79                file_offset: Some(10),
80                total_compressed_size: Some(10),
81                ordinal: None,
82            }],
83            key_value_metadata: None,
84            created_by: None,
85            column_orders: Some(vec![ColumnOrder::TYPEORDER(Default::default())]),
86            encryption_algorithm: None,
87            footer_signing_key_metadata: None,
88        }
89    }
90
91    #[test]
92    fn basic() {
93        let mut writer = vec![];
94        let mut protocol = thrift::protocol::TCompactOutputProtocol::new(&mut writer);
95        let metadata = meta();
96        metadata.write_to_out_protocol(&mut protocol).unwrap();
97
98        let mut prot = thrift::protocol::TCompactInputProtocol::new(writer.as_slice(), usize::MAX);
99        let result = FileMetaData::read_from_in_protocol(&mut prot).unwrap();
100        assert_eq!(result, metadata)
101    }
102
103    #[cfg(feature = "async")]
104    #[tokio::test]
105    #[allow(clippy::needless_return)]
106    async fn async_() {
107        let mut writer = vec![];
108        let mut protocol = thrift::protocol::TCompactOutputStreamProtocol::new(&mut writer);
109        let metadata = meta();
110        metadata
111            .write_to_out_stream_protocol(&mut protocol)
112            .await
113            .unwrap();
114
115        let mut prot =
116            thrift::protocol::TCompactInputStreamProtocol::new(writer.as_slice(), usize::MAX);
117        let result = FileMetaData::stream_from_in_protocol(&mut prot)
118            .await
119            .unwrap();
120        assert_eq!(result, metadata);
121    }
122
123    fn page() -> PageHeader {
124        PageHeader {
125            type_: PageType::DATA_PAGE,
126            uncompressed_page_size: 0,
127            compressed_page_size: 0,
128            crc: None,
129            data_page_header: Some(DataPageHeader {
130                num_values: 0,
131                encoding: Encoding::PLAIN,
132                definition_level_encoding: Encoding::RLE,
133                repetition_level_encoding: Encoding::RLE,
134                statistics: Some(Statistics {
135                    max: None,
136                    min: None,
137                    null_count: Some(0),
138                    distinct_count: Some(0),
139                    max_value: Some(vec![]),
140                    min_value: Some(vec![]),
141                }),
142            }),
143            index_page_header: Some(IndexPageHeader {}),
144            dictionary_page_header: Some(DictionaryPageHeader {
145                num_values: 0,
146                encoding: Encoding::PLAIN,
147                is_sorted: Some(false),
148            }),
149            data_page_header_v2: Some(DataPageHeaderV2 {
150                num_values: 0,
151                num_nulls: 0,
152                num_rows: 0,
153                encoding: Encoding::PLAIN,
154                definition_levels_byte_length: 0,
155                repetition_levels_byte_length: 0,
156                is_compressed: Some(true),
157                statistics: Some(Statistics {
158                    max: None,
159                    min: None,
160                    null_count: Some(0),
161                    distinct_count: Some(0),
162                    max_value: Some(vec![]),
163                    min_value: Some(vec![]),
164                }),
165            }),
166        }
167    }
168
169    #[test]
170    fn basic_page() {
171        let mut writer = vec![];
172        let mut protocol = thrift::protocol::TCompactOutputProtocol::new(&mut writer);
173        let metadata = page();
174        metadata.write_to_out_protocol(&mut protocol).unwrap();
175
176        let mut prot = thrift::protocol::TCompactInputProtocol::new(writer.as_slice(), usize::MAX);
177        let result = PageHeader::read_from_in_protocol(&mut prot).unwrap();
178        assert_eq!(result, metadata);
179    }
180
181    #[cfg(feature = "async")]
182    #[tokio::test]
183    #[allow(clippy::needless_return)]
184    async fn async_page() {
185        let mut writer = vec![];
186        let mut protocol = thrift::protocol::TCompactOutputStreamProtocol::new(&mut writer);
187        let metadata = page();
188        metadata
189            .write_to_out_stream_protocol(&mut protocol)
190            .await
191            .unwrap();
192
193        let mut prot =
194            thrift::protocol::TCompactInputStreamProtocol::new(writer.as_slice(), usize::MAX);
195        let result = PageHeader::stream_from_in_protocol(&mut prot)
196            .await
197            .unwrap();
198        assert_eq!(result, metadata);
199    }
200}