1use crate::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use anyhow::{Context, Result};
3use csv::ReaderBuilder;
4use serde_json::Value as JsonValue;
5use std::collections::HashSet;
6use std::fs::File;
7use std::io::Read;
8use std::path::Path;
9
10pub fn load_csv_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
12 let file = File::open(&path)
13 .with_context(|| format!("Failed to open CSV file: {:?}", path.as_ref()))?;
14
15 let mut reader = ReaderBuilder::new().has_headers(true).from_reader(file);
16
17 let headers = reader.headers()?.clone();
19 let mut table = DataTable::new(table_name);
20
21 table
23 .metadata
24 .insert("source_type".to_string(), "csv".to_string());
25 table.metadata.insert(
26 "source_path".to_string(),
27 path.as_ref().display().to_string(),
28 );
29
30 for header in &headers {
32 table.add_column(DataColumn::new(header));
33 }
34
35 let mut string_rows = Vec::new();
37 for result in reader.records() {
38 let record = result?;
39 let row: Vec<String> = record
40 .iter()
41 .map(std::string::ToString::to_string)
42 .collect();
43 string_rows.push(row);
44 }
45
46 let mut column_types = vec![DataType::Null; headers.len()];
48 let sample_size = string_rows.len().min(100); for row in string_rows.iter().take(sample_size) {
51 for (col_idx, value) in row.iter().enumerate() {
52 if !value.is_empty() {
53 let inferred = DataType::infer_from_string(value);
54 column_types[col_idx] = column_types[col_idx].merge(&inferred);
55 }
56 }
57 }
58
59 for (col_idx, column) in table.columns.iter_mut().enumerate() {
61 column.data_type = column_types[col_idx].clone();
62 }
63
64 for string_row in string_rows {
66 let mut values = Vec::new();
67 for (col_idx, value) in string_row.iter().enumerate() {
68 let data_value = DataValue::from_string(value, &column_types[col_idx]);
69 values.push(data_value);
70 }
71 table
72 .add_row(DataRow::new(values))
73 .map_err(|e| anyhow::anyhow!(e))?;
74 }
75
76 table.infer_column_types();
78
79 Ok(table)
80}
81
82pub fn load_json_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
84 let mut file = File::open(&path)
86 .with_context(|| format!("Failed to open JSON file: {:?}", path.as_ref()))?;
87 let mut json_str = String::new();
88 file.read_to_string(&mut json_str)?;
89
90 let json_data: Vec<JsonValue> =
93 serde_json::from_str(&json_str).with_context(|| "Failed to parse JSON file")?;
94
95 if json_data.is_empty() {
96 return Ok(DataTable::new(table_name));
97 }
98
99 let first_obj = json_data[0]
102 .as_object()
103 .context("JSON data must be an array of objects")?;
104
105 let mut table = DataTable::new(table_name);
106
107 table
109 .metadata
110 .insert("source_type".to_string(), "json".to_string());
111 table.metadata.insert(
112 "source_path".to_string(),
113 path.as_ref().display().to_string(),
114 );
115
116 let column_names: Vec<String> = first_obj.keys().cloned().collect();
119 for name in &column_names {
120 table.add_column(DataColumn::new(name));
121 }
122
123 let mut string_rows = Vec::new();
125 for json_obj in &json_data {
126 if let Some(obj) = json_obj.as_object() {
127 let mut row = Vec::new();
128 for name in &column_names {
129 let value_str = match obj.get(name) {
130 Some(JsonValue::Null) | None => String::new(),
131 Some(JsonValue::Bool(b)) => b.to_string(),
132 Some(JsonValue::Number(n)) => n.to_string(),
133 Some(JsonValue::String(s)) => s.clone(),
134 Some(JsonValue::Array(arr)) => format!("{arr:?}"), Some(JsonValue::Object(obj)) => format!("{obj:?}"), };
137 row.push(value_str);
138 }
139 string_rows.push(row);
140 }
141 }
142
143 let mut column_types = vec![DataType::Null; column_names.len()];
145 let sample_size = string_rows.len().min(100);
146
147 for row in string_rows.iter().take(sample_size) {
148 for (col_idx, value) in row.iter().enumerate() {
149 if !value.is_empty() {
150 let inferred = DataType::infer_from_string(value);
151 column_types[col_idx] = column_types[col_idx].merge(&inferred);
152 }
153 }
154 }
155
156 for (col_idx, column) in table.columns.iter_mut().enumerate() {
158 column.data_type = column_types[col_idx].clone();
159 }
160
161 for string_row in string_rows {
163 let mut values = Vec::new();
164 for (col_idx, value) in string_row.iter().enumerate() {
165 let data_value = DataValue::from_string(value, &column_types[col_idx]);
166 values.push(data_value);
167 }
168 table
169 .add_row(DataRow::new(values))
170 .map_err(|e| anyhow::anyhow!(e))?;
171 }
172
173 table.infer_column_types();
175
176 Ok(table)
177}
178
179pub fn load_json_data_to_datatable(data: Vec<JsonValue>, table_name: &str) -> Result<DataTable> {
181 if data.is_empty() {
182 return Ok(DataTable::new(table_name));
183 }
184
185 let mut all_columns = HashSet::new();
187 for item in &data {
188 if let Some(obj) = item.as_object() {
189 for key in obj.keys() {
190 all_columns.insert(key.clone());
191 }
192 }
193 }
194
195 let column_names: Vec<String> = all_columns.into_iter().collect();
196 let mut table = DataTable::new(table_name);
197
198 table
200 .metadata
201 .insert("source_type".to_string(), "json_data".to_string());
202
203 for name in &column_names {
205 table.add_column(DataColumn::new(name));
206 }
207
208 let mut string_rows = Vec::new();
210 for json_obj in &data {
211 if let Some(obj) = json_obj.as_object() {
212 let mut row = Vec::new();
213 for name in &column_names {
214 let value_str = match obj.get(name) {
215 Some(JsonValue::Null) | None => String::new(),
216 Some(JsonValue::Bool(b)) => b.to_string(),
217 Some(JsonValue::Number(n)) => n.to_string(),
218 Some(JsonValue::String(s)) => s.clone(),
219 Some(JsonValue::Array(arr)) => format!("{arr:?}"),
220 Some(JsonValue::Object(obj)) => format!("{obj:?}"),
221 };
222 row.push(value_str);
223 }
224 string_rows.push(row);
225 }
226 }
227
228 let mut column_types = vec![DataType::Null; column_names.len()];
230 let sample_size = string_rows.len().min(100);
231
232 for row in string_rows.iter().take(sample_size) {
233 for (col_idx, value) in row.iter().enumerate() {
234 if !value.is_empty() {
235 let inferred = DataType::infer_from_string(value);
236 column_types[col_idx] = column_types[col_idx].merge(&inferred);
237 }
238 }
239 }
240
241 for (col_idx, column) in table.columns.iter_mut().enumerate() {
242 column.data_type = column_types[col_idx].clone();
243 }
244
245 for string_row in string_rows {
246 let mut values = Vec::new();
247 for (col_idx, value) in string_row.iter().enumerate() {
248 let data_value = DataValue::from_string(value, &column_types[col_idx]);
249 values.push(data_value);
250 }
251 table
252 .add_row(DataRow::new(values))
253 .map_err(|e| anyhow::anyhow!(e))?;
254 }
255
256 table.infer_column_types();
257
258 Ok(table)
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264 use std::io::Write;
265 use tempfile::NamedTempFile;
266
267 #[test]
268 fn test_load_csv() -> Result<()> {
269 let mut temp_file = NamedTempFile::new()?;
271 writeln!(temp_file, "id,name,price,quantity")?;
272 writeln!(temp_file, "1,Widget,9.99,100")?;
273 writeln!(temp_file, "2,Gadget,19.99,50")?;
274 writeln!(temp_file, "3,Doohickey,5.00,200")?;
275 temp_file.flush()?;
276
277 let table = load_csv_to_datatable(temp_file.path(), "products")?;
278
279 assert_eq!(table.name, "products");
280 assert_eq!(table.column_count(), 4);
281 assert_eq!(table.row_count(), 3);
282
283 assert_eq!(table.columns[0].name, "id");
285 assert_eq!(table.columns[0].data_type, DataType::Integer);
286
287 assert_eq!(table.columns[1].name, "name");
288 assert_eq!(table.columns[1].data_type, DataType::String);
289
290 assert_eq!(table.columns[2].name, "price");
291 assert_eq!(table.columns[2].data_type, DataType::Float);
292
293 assert_eq!(table.columns[3].name, "quantity");
294 assert_eq!(table.columns[3].data_type, DataType::Integer);
295
296 let value = table.get_value_by_name(0, "name").unwrap();
298 assert_eq!(value.to_string(), "Widget");
299
300 Ok(())
301 }
302
303 #[test]
304 fn test_load_json() -> Result<()> {
305 let mut temp_file = NamedTempFile::new()?;
307 writeln!(
308 temp_file,
309 r#"[
310 {{"id": 1, "name": "Alice", "active": true, "score": 95.5}},
311 {{"id": 2, "name": "Bob", "active": false, "score": 87.3}},
312 {{"id": 3, "name": "Charlie", "active": true, "score": null}}
313 ]"#
314 )?;
315 temp_file.flush()?;
316
317 let table = load_json_to_datatable(temp_file.path(), "users")?;
318
319 assert_eq!(table.name, "users");
320 assert_eq!(table.column_count(), 4);
321 assert_eq!(table.row_count(), 3);
322
323 let score = table.get_value_by_name(2, "score").unwrap();
325 assert!(score.is_null());
326
327 Ok(())
328 }
329}