1use thiserror::Error;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum ParquetType {
6 Boolean,
7 Int32,
8 Int64,
9 Int96,
10 Float,
11 Double,
12 ByteArray,
13 FixedLenByteArray(i32),
14}
15
16impl ParquetType {
17 pub fn from_thrift(code: i32) -> Result<Self> {
19 match code {
20 0 => Ok(ParquetType::Boolean),
21 1 => Ok(ParquetType::Int32),
22 2 => Ok(ParquetType::Int64),
23 3 => Ok(ParquetType::Int96),
24 4 => Ok(ParquetType::Float),
25 5 => Ok(ParquetType::Double),
26 6 => Ok(ParquetType::ByteArray),
27 7 => Ok(ParquetType::FixedLenByteArray(0)), _ => Err(ParquetError::UnsupportedType(format!(
29 "Unknown physical type code: {code}"
30 ))),
31 }
32 }
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum Encoding {
38 Plain,
39 RleBitPacked,
40 DeltaBinaryPacked,
41 DeltaLengthByteArray,
42 DeltaByteArray,
43}
44
45impl Encoding {
46 pub fn from_thrift(code: i32) -> Result<Self> {
48 match code {
49 0 => Ok(Encoding::Plain),
50 4 => Ok(Encoding::RleBitPacked),
51 5 => Ok(Encoding::DeltaBinaryPacked),
52 6 => Ok(Encoding::DeltaLengthByteArray),
53 7 => Ok(Encoding::DeltaByteArray),
54 _ => Ok(Encoding::Plain),
56 }
57 }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum Compression {
63 Uncompressed,
64 Snappy,
65 Gzip,
66 Lzo,
67 Brotli,
68 Lz4,
69 Zstd,
70}
71
72impl Compression {
73 pub fn from_thrift(code: i32) -> Result<Self> {
75 match code {
76 0 => Ok(Compression::Uncompressed),
77 1 => Ok(Compression::Snappy),
78 2 => Ok(Compression::Gzip),
79 3 => Ok(Compression::Lzo),
80 4 => Ok(Compression::Brotli),
81 5 => Ok(Compression::Lz4),
82 6 => Ok(Compression::Zstd),
83 _ => Err(ParquetError::UnsupportedCompression(format!(
84 "Unknown compression code: {code}"
85 ))),
86 }
87 }
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum PageType {
93 DataPage,
94 IndexPage,
95 DictionaryPage,
96 DataPageV2,
97}
98
99impl PageType {
100 pub fn from_thrift(code: i32) -> Result<Self> {
101 match code {
102 0 => Ok(PageType::DataPage),
103 1 => Ok(PageType::IndexPage),
104 2 => Ok(PageType::DictionaryPage),
105 3 => Ok(PageType::DataPageV2),
106 _ => Err(ParquetError::DataError(format!(
107 "Unknown page type: {code}"
108 ))),
109 }
110 }
111}
112
113#[derive(Debug, Error)]
115pub enum ParquetError {
116 #[error("IO error: {0}")]
117 IoError(#[from] std::io::Error),
118
119 #[error("Invalid Parquet file: {0}")]
120 InvalidFile(String),
121
122 #[error("Unsupported physical type: {0}")]
123 UnsupportedType(String),
124
125 #[error("Unsupported compression: {0}")]
126 UnsupportedCompression(String),
127
128 #[error("Unsupported encoding: {0}")]
129 UnsupportedEncoding(String),
130
131 #[error("Data error: {0}")]
132 DataError(String),
133
134 #[error("Arrow conversion error: {0}")]
135 ArrowError(String),
136
137 #[error("Column index {0} out of range")]
138 ColumnOutOfRange(usize),
139}
140
141pub type Result<T> = std::result::Result<T, ParquetError>;
143
144#[derive(Debug, Clone)]
146pub struct ColumnMetadata {
147 pub name: String,
149 pub physical_type: ParquetType,
151 pub encoding: Encoding,
153 pub compression: Compression,
155 pub num_values: i64,
157 pub data_offset: i64,
159 pub total_compressed_size: i64,
161 pub total_uncompressed_size: i64,
163}
164
165#[derive(Debug, Clone)]
167pub struct RowGroupMetadata {
168 pub columns: Vec<ColumnMetadata>,
170 pub num_rows: i64,
172 pub total_byte_size: i64,
174}
175
176#[derive(Debug, Clone)]
178pub struct ParquetMetadata {
179 pub version: i32,
181 pub num_rows: i64,
183 pub num_columns: usize,
185 pub schema_names: Vec<String>,
187 pub row_groups: Vec<RowGroupMetadata>,
189 pub columns: Vec<ColumnMetadata>,
191 pub created_by: Option<String>,
193}