1extern crate arrow;
22
23mod datatables;
24
25use arrow::datatypes::SchemaRef;
26use arrow::record_batch::RecordBatch;
27
28#[derive(Clone, Debug)]
31pub struct DataSet {
32 tables: Vec<DataTable>,
33 doc: Option<String>,
34 name: String,
35}
36
37impl DataSet {
38 pub fn new(name: String, tables: Vec<DataTable>, doc: Option<String>) -> Self {
39 Self { tables, name, doc }
40 }
41
42 pub fn name(&self) -> &str {
43 &self.name
44 }
45
46 pub fn doc(&self) -> Option<&str> {
47 self.doc.as_deref()
48 }
49
50 pub fn tables(&self) -> &[DataTable] {
51 &self.tables
52 }
53}
54
55#[derive(Clone, Debug)]
59pub struct DataTable {
60 batches: Vec<RecordBatch>,
61 doc: Option<String>,
62 name: String,
63}
64
65impl DataTable {
66 pub fn data(&self) -> Vec<RecordBatch> {
71 self.batches.iter().map(|x| x.clone()).collect()
77 }
78
79 pub fn data_ref(&self) -> &[RecordBatch] {
84 &self.batches
85 }
86
87 pub fn name(&self) -> &str {
89 &self.name
90 }
91
92 pub fn num_rows(&self) -> usize {
94 self.batches.iter().map(|x| x.num_rows()).sum()
95 }
96
97 pub fn num_columns(&self) -> usize {
99 self.batches[0].num_columns()
100 }
101
102 pub fn schema(&self) -> SchemaRef {
104 self.batches[0].schema()
105 }
106
107 pub fn doc(&self) -> Option<&str> {
109 self.doc.as_deref()
110 }
111}
112
113struct DataTableBuilder {
114 batches: Option<Vec<RecordBatch>>,
115 doc: Option<String>,
116 name: Option<String>,
117}
118
119impl DataTableBuilder {
120 fn new() -> Self {
121 DataTableBuilder {
122 batches: None,
123 doc: None,
124 name: None,
125 }
126 }
127
128 fn with_name(mut self, name: String) -> Self {
129 self.name = Some(name);
130 self
131 }
132
133 fn with_doc(mut self, doc: String) -> Self {
134 self.doc = Some(doc);
135 self
136 }
137
138 fn with_batches(mut self, batches: Vec<RecordBatch>) -> Self {
139 self.batches = Some(batches);
140 self
141 }
142
143 fn build(self) -> Result<DataTable, String> {
144 let batches = self
145 .batches
146 .ok_or_else(|| String::from("Cannot create DataTable without data/batches"))?;
147 let name = self
148 .name
149 .ok_or_else(|| String::from("Cannot create DataTable without a name."))?;
150
151 let table = DataTable {
152 name,
153 batches,
154 doc: self.doc,
155 };
156
157 Ok(table)
158 }
159}
160
161pub trait Repo {
163 fn load_table(&self, name: &str) -> Result<DataTable, String>;
165
166 fn load_data_set(&self, name: &str) -> Result<DataSet, String>;
168}
169
170impl dyn Repo {
171 pub fn default() -> impl Repo {
179 DefaultRepo {}
180 }
181}
182
183struct DefaultRepo {}
184
185impl Repo for DefaultRepo {
186 fn load_table(&self, name: &str) -> Result<DataTable, String> {
188 match name {
189 "iris" => crate::datatables::iris::load_table(),
190 "boston" => crate::datatables::boston::load_table(),
191 _ => Err(format!("{} could not be found in default-repository", name)),
192 }
193 }
194
195 fn load_data_set(&self, name: &str) -> Result<DataSet, String> {
197 Err(format!("Failed to find dataset {:}", name))
198 }
199}
200
201#[cfg(test)]
202mod tests {
203
204 use super::*;
205
206 use arrow::datatypes::DataType;
207
208 #[test]
209 fn test_can_load_iris() {
210 let repo = Repo::default();
211 let table: DataTable = repo.load_table("iris").unwrap();
212
213 assert_eq!(
214 table.num_rows(),
215 150,
216 "Iris is supposed to have 64 observations"
217 );
218 assert_eq!(
219 table.num_columns(),
220 5,
221 "Iris is supposed to have 5 features"
222 );
223
224 assert_eq!(table.schema().field(0).name(), "sepal_length");
226 assert_eq!(table.schema().field(1).name(), "sepal_width");
227 assert_eq!(table.schema().field(2).name(), "petal_length");
228 assert_eq!(table.schema().field(3).name(), "petal_width");
229 assert_eq!(table.schema().field(4).name(), "variety");
230
231 assert_eq!(table.schema().field(0).data_type(), &DataType::Float64);
233 assert_eq!(table.schema().field(1).data_type(), &DataType::Float64);
234 assert_eq!(table.schema().field(2).data_type(), &DataType::Float64);
235 assert_eq!(table.schema().field(3).data_type(), &DataType::Float64);
236 assert_eq!(table.schema().field(4).data_type(), &DataType::Utf8);
237 }
238
239 #[test]
240 fn test_can_load_boston_housing() {
241 let repo = Repo::default();
242 let table: DataTable = repo.load_table("boston").unwrap();
243
244 assert_eq!(table.num_rows(), 506);
245 assert_eq!(table.num_columns(), 14);
246 }
247}