rusty_data/
loader.rs

1//! The loader module
2//!
3//! Provides the Loader struct which is used to read data into
4//! DataTables.
5
6use std::io;
7use std::io::prelude::*;
8use std::io::{BufReader, Error, ErrorKind};
9use std::fs::File;
10
11use datatable::*;
12
13/// Options used to fine tune the file loading
14pub struct LoaderOptions {
15    /// True if there are headers present in the file
16    pub has_header: bool,
17    /// The delimiter character
18    pub delimiter: char,
19    /// The quote character
20    pub quote_marker: Option<char>,
21}
22
23impl Default for LoaderOptions {
24    fn default() -> LoaderOptions {
25        LoaderOptions {
26            has_header: false,
27            delimiter: ',',
28            quote_marker: None,
29        }
30    }
31}
32/// Loader struct
33///
34/// Used to load and process data files into tables.
35pub struct Loader<'a> {
36    file: &'a str,
37    options: LoaderOptions,
38}
39
40impl<'a> Loader<'a> {
41    /// Constructs a new Loader.
42    pub fn new(has_header: bool, file: &str, delimiter: char) -> Loader {
43        let options = LoaderOptions {
44            has_header: has_header,
45            delimiter: delimiter,
46            quote_marker: None,
47        };
48
49        Loader {
50            file: file,
51            options: options,
52        }
53    }
54
55    /// Creates a loader with default settings from a file string.
56    ///
57    /// The default settings are as follows:
58    ///
59    /// - has_header : false
60    /// - delimiter : ','
61    pub fn from_file_string(file_string: &str) -> Loader {
62        Loader {
63            file: file_string,
64            options: LoaderOptions::default(),
65        }
66    }
67
68    /// Load the file from the loader with given delimiter.
69    ///
70    /// Pretty rudimentary with poor error handling.
71    ///
72    /// # Panics
73    ///
74    /// - The input data is not a float.
75    ///
76    /// # Failures
77    ///
78    /// - The input data is malformed (missing data, non-uniform rows etc.)
79    pub fn load_file(self) -> Result<DataTable, io::Error> {
80        let f = try!(File::open(self.file));
81        let reader = BufReader::new(f);
82
83        let mut table = DataTable::empty();
84
85        let mut lines = reader.lines();
86
87        if self.options.has_header {
88            if let Some(line) = lines.next() {
89                let line = try!(line);
90                let values = LineSplitIter::new(line.to_string(),
91                                                self.options.quote_marker,
92                                                self.options.delimiter);
93
94                for val in values {
95                    let mut column = DataColumn::empty();
96                    column.name = Some(val);
97                    table.data_cols.push(column);
98                }
99            }
100        } else {
101            if let Some(line) = lines.next() {
102                let line = try!(line);
103                let values = LineSplitIter::new(line.to_string(),
104                                                self.options.quote_marker,
105                                                self.options.delimiter);
106
107                for val in values {
108                    let mut column = DataColumn::empty();
109                    column.push(val);
110
111                    table.data_cols.push(column);
112                }
113            }
114        }
115
116        for line in lines {
117            let line = try!(line);
118            let values = LineSplitIter::new(line.to_string(),
119                                                self.options.quote_marker,
120                                                self.options.delimiter);
121
122
123            let mut idx = 0usize;
124
125            for (i, val) in values.enumerate() {
126                idx = i;
127                if idx > table.cols() {
128                    return Err(Error::new(ErrorKind::InvalidInput, "Malformed data format."));
129                }
130
131                table.data_cols[idx].push(val);
132            }
133
134            if idx != table.cols() - 1 {
135                return Err(Error::new(ErrorKind::InvalidInput, "Malformed data format."));
136            }
137        }
138
139        table.shrink_to_fit();
140        Ok(table)
141    }
142}
143
144/// Iterator to parse a line in a data file.
145pub struct LineSplitIter {
146    line: String,
147    quote_char: Option<char>,
148    delimiter: char,
149}
150
151impl LineSplitIter {
152    /// Construct a new LineSplitIter over the specified line using
153    /// the given quote character and delimiter.
154    pub fn new(line: String, quote_char: Option<char>, delimiter: char) -> LineSplitIter {
155        LineSplitIter {
156            line: line,
157            quote_char: quote_char,
158            delimiter: delimiter,
159        }
160    }
161}
162
163impl Iterator for LineSplitIter {
164    type Item = String;
165
166    fn next(&mut self) -> Option<Self::Item> {
167        if self.line.len() == 0 {
168            return None;
169        }
170
171        let drain_offset: Option<usize>;
172        if let Some(quote_char) = self.quote_char {
173            let mut in_quotes = false;
174
175            drain_offset = self.line
176                               .find(|c| {
177                                   if c == quote_char {
178                                       in_quotes = !in_quotes;
179                                       false
180                                   } else if c == self.delimiter && !in_quotes {
181                                       true
182                                   } else {
183                                       false
184                                   }
185                               });
186
187        } else {
188            drain_offset = self.line.find(self.delimiter);
189        }
190
191        if let Some(offset) = drain_offset {
192            let t: String = self.line.drain(..offset).collect();
193            self.line = self.line[1..].to_string();
194
195            match self.quote_char {
196                None => Some(t),
197                Some(quote_char) => Some(t.trim_matches(quote_char).to_string()),
198            }
199        } else {
200            Some(self.line.drain(..).collect())
201        }
202    }
203}
204
205/// Load the specified file to a DataTable.
206///
207/// # Examples
208///
209/// ```no_run
210/// use rusty_data::loader::load_file;
211///
212/// let table = load_file("path/to/file.data");
213/// ```
214pub fn load_file(file: &str) -> DataTable {
215    let loader = Loader::from_file_string(file);
216
217    loader.load_file().unwrap()
218
219}