1use crate::database::datatypes::DataType;
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{Read, Write};
6
7const COLUMNAR_HEADER: [u8; 29] = *b"TABLE COLUMNAR FORMAT HEADER\n";
9
10pub enum FileFormat {
13 SimpleColumnar,
33}
34
35pub trait Writer {
38 fn write(
40 &self,
41 fields: &HashMap<String, DataType>,
42 columns: &HashMap<String, Vec<DataType>>,
43 file_: File,
44 ) -> Result<usize, std::io::Error>;
45 fn append(
47 &self,
48 fields: &HashMap<String, DataType>,
49 columns: &HashMap<String, Vec<DataType>>,
50 file_: File,
51 ) -> Result<usize, std::io::Error>;
52}
53
54#[derive(Debug)]
57pub enum ReadError {
58 InvalidFileSize,
61 InvalidFieldMeta(String),
63 FieldParseError(String),
65 StdIoError(std::io::Error),
67}
68pub trait Reader {
70 fn read(
71 &self,
72 file_: File,
73 select_columns: Vec<String>,
74 ) -> Result<(HashMap<String, DataType>, HashMap<String, Vec<DataType>>), ReadError>;
75}
76
77pub struct ColumnarWriter {}
80
81impl ColumnarWriter {
82 pub fn new() -> Box<ColumnarWriter> {
84 return Box::new(ColumnarWriter {});
85 }
86}
87
88impl Writer for ColumnarWriter {
89 fn write(
91 &self,
92 fields: &HashMap<String, DataType>,
93 columns: &HashMap<String, Vec<DataType>>,
94 mut file_: File,
95 ) -> Result<usize, std::io::Error> {
96 if fields.len() == 0 {
97 panic!("Cannot write empty table without schema - TODO: Handle this case, it should propagate an error and not panic");
98 }
99
100 let mut written_bytes: usize = 0;
101
102 written_bytes += file_.write(&COLUMNAR_HEADER)?;
103
104 for (key, value) in fields.iter() {
105 let column = columns.get(key).unwrap();
106 let s = format!(
107 "Field name: {}; Type: {}; Number of elements: {}\n",
108 key,
109 value.name(),
110 column.len()
111 );
112 let b = s.as_bytes();
113 written_bytes += file_.write(b)?;
114
115 for value in column.iter() {
116 match value {
117 DataType::String(str) => {
118 let s = format!("{}\n", str);
119 written_bytes += file_.write(s.as_bytes())?;
120 }
121 DataType::Integer32(str) => {
122 let s = format!("{}\n", str);
123 written_bytes += file_.write(s.as_bytes())?;
124 }
125 DataType::Float32(str) => {
126 let s = format!("{}\n", str);
127 written_bytes += file_.write(s.as_bytes())?;
128 }
129 }
130 }
131 }
132
133 return Ok(written_bytes);
134 }
135 fn append(
137 &self,
138 fields: &HashMap<String, DataType>,
139 columns: &HashMap<String, Vec<DataType>>,
140 file_: File,
141 ) -> Result<usize, std::io::Error> {
142 return Ok(0 as usize);
143 }
144}
145
146pub struct ColumnarReader {}
149impl ColumnarReader {
150 pub fn new() -> Box<ColumnarReader> {
152 return Box::new(ColumnarReader {});
153 }
154 fn read_metadata(line: &str, line_number: i32) -> Result<(String, String, i32), ReadError> {
156 let field_meta: Vec<&str> = line.split(";").collect();
158 if field_meta.len() != 3 {
160 let s = format!(
161 "Error at line: {}. Expected 3 meta fields, found {} instead",
162 line_number,
163 field_meta.len()
164 );
165 return Err(ReadError::InvalidFieldMeta(s));
166 }
167
168 let number_split: Vec<&str> = field_meta.get(2).unwrap().split(":").collect();
170
171 if number_split.len() != 2 {
172 return Err(ReadError::InvalidFieldMeta(format!(
173 "Error at line: {}. Could not split meta 'number of elements'",
174 line_number,
175 )));
176 }
177 let maybe_number = number_split.get(1).unwrap().replace(" ", "").parse::<i32>();
178
179 if maybe_number.is_err() {
180 return Err(ReadError::FieldParseError(format!(
181 "Error at line: {}. Could not read meta 'number of elements'. Error: {}",
182 line_number,
183 maybe_number.unwrap_err()
184 )));
185 }
186
187 let field_number_of_elements = maybe_number.unwrap();
188
189 let type_split: Vec<&str> = field_meta.get(1).unwrap().split(":").collect();
191
192 if type_split.len() != 2 {
193 return Err(ReadError::InvalidFieldMeta(format!(
194 "Error at line: {}. Could not split meta 'type'",
195 line_number,
196 )));
197 }
198
199 let field_type = type_split.get(1).unwrap().replace(" ", "");
200
201 let name_split: Vec<&str> = field_meta.get(0).unwrap().split(":").collect();
203 if name_split.len() != 2 {
204 return Err(ReadError::InvalidFieldMeta(
205 "Could not split meta 'name'".to_string(),
206 ));
207 }
208 let field_name = name_split.get(1).unwrap().replace(" ", "");
209
210 return Ok((field_name, field_type, field_number_of_elements));
211 }
212}
213
214impl Reader for ColumnarReader {
215 fn read(
217 &self,
218 mut file_: File,
219 select_columns: Vec<String>,
220 ) -> Result<(HashMap<String, DataType>, HashMap<String, Vec<DataType>>), ReadError> {
221 let mut fields = HashMap::<String, DataType>::new();
223 let mut columns = HashMap::<String, Vec<DataType>>::new();
224
225 let mut buffer = String::new();
227 let result = file_.read_to_string(&mut buffer);
228 if result.is_err() {
229 return Err(ReadError::StdIoError(result.unwrap_err()));
230 }
231 let lines: Vec<&str> = buffer.split("\n").collect();
233 if lines.len() < 2 {
234 return Err(ReadError::InvalidFileSize);
235 }
236
237 let field_header_line = lines.get(1).unwrap();
238
239 let result = ColumnarReader::read_metadata(field_header_line, 1);
240 if result.is_err() {
241 return Err(result.unwrap_err());
242 }
243 let (mut field_name, mut field_type, mut field_number_of_elements) = result.unwrap();
244
245 let mut line = 2;
247
248 while line < lines.len() as i32 {
250 let block_end = field_number_of_elements + line;
251
252 if (lines.len() as i32) < block_end {
253 return Err(ReadError::InvalidFileSize);
254 }
255
256 if select_columns.contains(&field_name) {
258 let dtype: DataType;
259 if field_type == "i32" {
260 dtype = DataType::Integer32(0);
261 } else if field_type == "f32" {
262 dtype = DataType::Float32(0.0);
263 } else {
264 dtype = DataType::String(field_name.to_string());
265 }
266
267 fields.insert(field_name.to_string(), dtype);
268 columns.insert(field_name.to_string(), vec![]);
269 let column = columns.get_mut(&field_name).unwrap();
270 for i in line..block_end {
271 let line = lines.get(i as usize).unwrap();
272 let val: DataType;
273 if field_type == "i32" {
274 let result = line.parse::<i32>();
275 if result.is_err() {
276 return Err(ReadError::FieldParseError(format!(
277 "Failed to read integer at line {}",
278 i
279 )));
280 }
281 val = DataType::Integer32(result.unwrap());
282 } else if field_type == "f32" {
283 let result = line.parse::<f32>();
284 if result.is_err() {
285 return Err(ReadError::FieldParseError(format!(
286 "Failed to read integer at line {}",
287 i
288 )));
289 }
290 val = DataType::Float32(result.unwrap());
291 } else {
292 val = DataType::String(line.to_string());
293 }
294 column.push(val);
295 }
296 }
297
298 line = block_end;
299 if line >= lines.len() as i32 {
300 break;
302 }
303 let unwrapped_line = lines.get(line as usize).unwrap();
304 if unwrapped_line.len() == 0 {
305 break;
306 }
307
308 let result = ColumnarReader::read_metadata(&unwrapped_line, line);
310 if result.is_err() {
311 return Err(result.unwrap_err());
312 }
313 (field_name, field_type, field_number_of_elements) = result.unwrap();
314 line += 1;
316 }
317
318 return Ok((fields, columns));
319 }
320}