steeldb/database/
file_io.rs

1//! This module defines structs / methods to save/read data to/from disk.
2use crate::database::datatypes::DataType;
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{Read, Write};
6
7/// Defines the string 'TABLE COLUMNAR FORMAT HEADER\n' that goes to the top of the columnar file.
8const COLUMNAR_HEADER: [u8; 29] = *b"TABLE COLUMNAR FORMAT HEADER\n";
9
10// Enums
11/// Defines the supported file formats by the Database
12pub enum FileFormat {
13    /// The only supported file for now is the SimpleColumnar, which is a naive ASCII format.
14    /// Here is an example of this format:
15    /// ```txt
16    /// TABLE COLUMNAR FORMAT HEADER
17    /// Field name: final_grade; Type: f32; Number of elements: 3
18    /// 4.0
19    /// 3.2
20    /// 5
21    /// Field name: name; Type: String; Number of elements: 3
22    /// John Man
23    /// Lenon
24    /// Mary
25    /// Field name: annual_salary; Type: i32; Number of elements: 3
26    /// 60000
27    /// 200000
28    /// 3012000
29    ///
30    /// ```
31    /// Notice that the newline at the end is not optional.
32    SimpleColumnar,
33}
34
35// Traits
36/// The public interface of a table Writer. Used for dynamic dispatching in runtime.
37pub trait Writer {
38    /// Write table to disk.
39    fn write(
40        &self,
41        fields: &HashMap<String, DataType>,
42        columns: &HashMap<String, Vec<DataType>>,
43        file_: File,
44    ) -> Result<usize, std::io::Error>;
45    /// Append data to disk.
46    fn append(
47        &self,
48        fields: &HashMap<String, DataType>,
49        columns: &HashMap<String, Vec<DataType>>,
50        file_: File,
51    ) -> Result<usize, std::io::Error>;
52}
53
54/// The errors that might happen when reading a table from disk.
55/// This implicitly defines errors for ColumnarFormat only.
56#[derive(Debug)]
57pub enum ReadError {
58    /// The table has an invalid size.
59    /// This can happen if columns have different sizes or if the file was incorrectly saved / corrupted / modified.
60    InvalidFileSize,
61    /// Found a line that should contain field meta data, but does not comply to the expected format.
62    InvalidFieldMeta(String),
63    /// Could not parse a data type.
64    FieldParseError(String),
65    /// Generic fallback standard I/O error.
66    StdIoError(std::io::Error),
67}
68/// The public interface of a table Reader. Used for dynamic dispatching in runtime.
69pub trait Reader {
70    fn read(
71        &self,
72        file_: File,
73        select_columns: Vec<String>,
74    ) -> Result<(HashMap<String, DataType>, HashMap<String, Vec<DataType>>), ReadError>;
75}
76
77// Writer Implementations
78/// The writer for the SimpleColumnar format.
79pub struct ColumnarWriter {}
80
81impl ColumnarWriter {
82    /// Constructor wraps into a Box to allow dynamic dispatching in runtime.
83    pub fn new() -> Box<ColumnarWriter> {
84        return Box::new(ColumnarWriter {});
85    }
86}
87
88impl Writer for ColumnarWriter {
89    /// Write table to disk in columnar format.
90    fn write(
91        &self,
92        fields: &HashMap<String, DataType>,
93        columns: &HashMap<String, Vec<DataType>>,
94        mut file_: File,
95    ) -> Result<usize, std::io::Error> {
96        if fields.len() == 0 {
97            panic!("Cannot write empty table without schema - TODO: Handle this case, it should propagate an error and not panic");
98        }
99
100        let mut written_bytes: usize = 0;
101
102        written_bytes += file_.write(&COLUMNAR_HEADER)?;
103
104        for (key, value) in fields.iter() {
105            let column = columns.get(key).unwrap();
106            let s = format!(
107                "Field name: {}; Type: {}; Number of elements: {}\n",
108                key,
109                value.name(),
110                column.len()
111            );
112            let b = s.as_bytes();
113            written_bytes += file_.write(b)?;
114
115            for value in column.iter() {
116                match value {
117                    DataType::String(str) => {
118                        let s = format!("{}\n", str);
119                        written_bytes += file_.write(s.as_bytes())?;
120                    }
121                    DataType::Integer32(str) => {
122                        let s = format!("{}\n", str);
123                        written_bytes += file_.write(s.as_bytes())?;
124                    }
125                    DataType::Float32(str) => {
126                        let s = format!("{}\n", str);
127                        written_bytes += file_.write(s.as_bytes())?;
128                    }
129                }
130            }
131        }
132
133        return Ok(written_bytes);
134    }
135    /// Not implemented. Should append data to the columnar format.
136    fn append(
137        &self,
138        fields: &HashMap<String, DataType>,
139        columns: &HashMap<String, Vec<DataType>>,
140        file_: File,
141    ) -> Result<usize, std::io::Error> {
142        return Ok(0 as usize);
143    }
144}
145
146// Reader Implementations
147/// The reader for the SimpleColumnar format.
148pub struct ColumnarReader {}
149impl ColumnarReader {
150    /// Constructor wraps into a Box to allow dynamic dispatching in runtime.
151    pub fn new() -> Box<ColumnarReader> {
152        return Box::new(ColumnarReader {});
153    }
154    /// Read a line from disk that should contain field metadata.
155    fn read_metadata(line: &str, line_number: i32) -> Result<(String, String, i32), ReadError> {
156        // "Field name: {:?}; Type: {:?}; Number of elements: {:?}\n",
157        let field_meta: Vec<&str> = line.split(";").collect();
158        // Basic check
159        if field_meta.len() != 3 {
160            let s = format!(
161                "Error at line: {}. Expected 3 meta fields, found {} instead",
162                line_number,
163                field_meta.len()
164            );
165            return Err(ReadError::InvalidFieldMeta(s));
166        }
167
168        // collect number of elements;
169        let number_split: Vec<&str> = field_meta.get(2).unwrap().split(":").collect();
170
171        if number_split.len() != 2 {
172            return Err(ReadError::InvalidFieldMeta(format!(
173                "Error at line: {}. Could not split meta 'number of elements'",
174                line_number,
175            )));
176        }
177        let maybe_number = number_split.get(1).unwrap().replace(" ", "").parse::<i32>();
178
179        if maybe_number.is_err() {
180            return Err(ReadError::FieldParseError(format!(
181                "Error at line: {}. Could not read meta 'number of elements'. Error: {}",
182                line_number,
183                maybe_number.unwrap_err()
184            )));
185        }
186
187        let field_number_of_elements = maybe_number.unwrap();
188
189        // collect field type
190        let type_split: Vec<&str> = field_meta.get(1).unwrap().split(":").collect();
191
192        if type_split.len() != 2 {
193            return Err(ReadError::InvalidFieldMeta(format!(
194                "Error at line: {}. Could not split meta 'type'",
195                line_number,
196            )));
197        }
198
199        let field_type = type_split.get(1).unwrap().replace(" ", "");
200
201        // collect field name
202        let name_split: Vec<&str> = field_meta.get(0).unwrap().split(":").collect();
203        if name_split.len() != 2 {
204            return Err(ReadError::InvalidFieldMeta(
205                "Could not split meta 'name'".to_string(),
206            ));
207        }
208        let field_name = name_split.get(1).unwrap().replace(" ", "");
209
210        return Ok((field_name, field_type, field_number_of_elements));
211    }
212}
213
214impl Reader for ColumnarReader {
215    /// The SimpleColumnar reader method.
216    fn read(
217        &self,
218        mut file_: File,
219        select_columns: Vec<String>,
220    ) -> Result<(HashMap<String, DataType>, HashMap<String, Vec<DataType>>), ReadError> {
221        // Prepare return output
222        let mut fields = HashMap::<String, DataType>::new();
223        let mut columns = HashMap::<String, Vec<DataType>>::new();
224
225        // Read file
226        let mut buffer = String::new();
227        let result = file_.read_to_string(&mut buffer);
228        if result.is_err() {
229            return Err(ReadError::StdIoError(result.unwrap_err()));
230        }
231        // if result
232        let lines: Vec<&str> = buffer.split("\n").collect();
233        if lines.len() < 2 {
234            return Err(ReadError::InvalidFileSize);
235        }
236
237        let field_header_line = lines.get(1).unwrap();
238
239        let result = ColumnarReader::read_metadata(field_header_line, 1);
240        if result.is_err() {
241            return Err(result.unwrap_err());
242        }
243        let (mut field_name, mut field_type, mut field_number_of_elements) = result.unwrap();
244
245        // Start collecting at third line (zero-indexed)
246        let mut line = 2;
247
248        // read loop here
249        while line < lines.len() as i32 {
250            let block_end = field_number_of_elements + line;
251
252            if (lines.len() as i32) < block_end {
253                return Err(ReadError::InvalidFileSize);
254            }
255
256            // collect data only if requested
257            if select_columns.contains(&field_name) {
258                let dtype: DataType;
259                if field_type == "i32" {
260                    dtype = DataType::Integer32(0);
261                } else if field_type == "f32" {
262                    dtype = DataType::Float32(0.0);
263                } else {
264                    dtype = DataType::String(field_name.to_string());
265                }
266
267                fields.insert(field_name.to_string(), dtype);
268                columns.insert(field_name.to_string(), vec![]);
269                let column = columns.get_mut(&field_name).unwrap();
270                for i in line..block_end {
271                    let line = lines.get(i as usize).unwrap();
272                    let val: DataType;
273                    if field_type == "i32" {
274                        let result = line.parse::<i32>();
275                        if result.is_err() {
276                            return Err(ReadError::FieldParseError(format!(
277                                "Failed to read integer at line {}",
278                                i
279                            )));
280                        }
281                        val = DataType::Integer32(result.unwrap());
282                    } else if field_type == "f32" {
283                        let result = line.parse::<f32>();
284                        if result.is_err() {
285                            return Err(ReadError::FieldParseError(format!(
286                                "Failed to read integer at line {}",
287                                i
288                            )));
289                        }
290                        val = DataType::Float32(result.unwrap());
291                    } else {
292                        val = DataType::String(line.to_string());
293                    }
294                    column.push(val);
295                }
296            }
297
298            line = block_end;
299            if line >= lines.len() as i32 {
300                // reached EOF
301                break;
302            }
303            let unwrapped_line = lines.get(line as usize).unwrap();
304            if unwrapped_line.len() == 0 {
305                break;
306            }
307
308            // Read next field metadata
309            let result = ColumnarReader::read_metadata(&unwrapped_line, line);
310            if result.is_err() {
311                return Err(result.unwrap_err());
312            }
313            (field_name, field_type, field_number_of_elements) = result.unwrap();
314            // Prepare to read data
315            line += 1;
316        }
317
318        return Ok((fields, columns));
319    }
320}