1use crate::types::*;
2
3#[derive(Debug, Clone)]
5pub struct ColumnSchema {
6 pub name: String,
7 pub physical_type: ParquetType,
8 pub logical_type: LogicalType,
9 pub encoding: Encoding,
10 pub compression: Compression,
11 pub required: bool,
12}
13
14#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum LogicalType {
17 String,
18 Integer,
19 Float,
20 Boolean,
21 Timestamp(TimestampUnit),
22 Date,
23 Decimal { precision: u8, scale: u8 },
24}
25
26#[derive(Debug, Clone, Copy, PartialEq)]
28pub enum TimestampUnit {
29 Millis,
30 Micros,
31 Nanos,
32}
33
34pub struct SchemaBuilder {
36 columns: Vec<ColumnSchema>,
37}
38
39impl SchemaBuilder {
40 pub fn new() -> Self {
41 SchemaBuilder {
42 columns: Vec::new(),
43 }
44 }
45
46 pub fn add_column(
48 mut self,
49 name: impl Into<String>,
50 physical_type: ParquetType,
51 logical_type: LogicalType,
52 ) -> Self {
53 self.columns.push(ColumnSchema {
54 name: name.into(),
55 physical_type,
56 logical_type,
57 encoding: Encoding::Plain,
58 compression: Compression::Uncompressed,
59 required: true,
60 });
61 self
62 }
63
64 pub fn add_optional_column(
66 mut self,
67 name: impl Into<String>,
68 physical_type: ParquetType,
69 logical_type: LogicalType,
70 ) -> Self {
71 self.columns.push(ColumnSchema {
72 name: name.into(),
73 physical_type,
74 logical_type,
75 encoding: Encoding::Plain,
76 compression: Compression::Uncompressed,
77 required: false,
78 });
79 self
80 }
81
82 pub fn with_compression(mut self, compression: Compression) -> Self {
84 if let Some(col) = self.columns.last_mut() {
85 col.compression = compression;
86 }
87 self
88 }
89
90 pub fn with_encoding(mut self, encoding: Encoding) -> Self {
92 if let Some(col) = self.columns.last_mut() {
93 col.encoding = encoding;
94 }
95 self
96 }
97
98 pub fn build(self) -> Vec<ColumnSchema> {
100 self.columns
101 }
102}
103
104impl Default for SchemaBuilder {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl ColumnSchema {
111 pub fn default_for_type(name: impl Into<String>, parquet_type: ParquetType) -> Self {
113 let logical_type = match parquet_type {
114 ParquetType::Boolean => LogicalType::Boolean,
115 ParquetType::Int32 => LogicalType::Integer,
116 ParquetType::Int64 => LogicalType::Integer,
117 ParquetType::Float => LogicalType::Float,
118 ParquetType::Double => LogicalType::Float,
119 ParquetType::ByteArray => LogicalType::String,
120 ParquetType::FixedLenByteArray(_) => LogicalType::String,
121 ParquetType::Int96 => LogicalType::Timestamp(TimestampUnit::Nanos),
122 };
123
124 ColumnSchema {
125 name: name.into(),
126 physical_type: parquet_type,
127 logical_type,
128 encoding: Encoding::Plain,
129 compression: Compression::Uncompressed,
130 required: true,
131 }
132 }
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 #[test]
140 fn test_schema_builder() {
141 let schema = SchemaBuilder::new()
142 .add_column("id", ParquetType::Int64, LogicalType::Integer)
143 .add_column("name", ParquetType::ByteArray, LogicalType::String)
144 .add_optional_column("score", ParquetType::Double, LogicalType::Float)
145 .with_compression(Compression::Snappy)
146 .build();
147
148 assert_eq!(schema.len(), 3);
149 assert_eq!(schema[0].name, "id");
150 assert!(schema[0].required);
151 assert_eq!(schema[2].name, "score");
152 assert!(!schema[2].required);
153 assert_eq!(schema[2].compression, Compression::Snappy);
154 }
155
156 #[test]
157 fn test_default_for_type() {
158 let col = ColumnSchema::default_for_type("age", ParquetType::Int32);
159 assert_eq!(col.logical_type, LogicalType::Integer);
160 assert!(col.required);
161
162 let col = ColumnSchema::default_for_type("name", ParquetType::ByteArray);
163 assert_eq!(col.logical_type, LogicalType::String);
164 }
165}