1use crate::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use anyhow::{Context, Result};
3use csv::ReaderBuilder;
4use serde_json::Value as JsonValue;
5use std::collections::HashSet;
6use std::fs::File;
7use std::io::Read;
8use std::path::Path;
9
10pub fn load_csv_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
12 let file = File::open(&path)
13 .with_context(|| format!("Failed to open CSV file: {:?}", path.as_ref()))?;
14
15 let mut reader = ReaderBuilder::new().has_headers(true).from_reader(file);
16
17 let headers = reader.headers()?.clone();
19 let mut table = DataTable::new(table_name);
20
21 table
23 .metadata
24 .insert("source_type".to_string(), "csv".to_string());
25 table.metadata.insert(
26 "source_path".to_string(),
27 path.as_ref().display().to_string(),
28 );
29
30 for header in headers.iter() {
32 table.add_column(DataColumn::new(header));
33 }
34
35 let mut string_rows = Vec::new();
37 for result in reader.records() {
38 let record = result?;
39 let row: Vec<String> = record.iter().map(|s| s.to_string()).collect();
40 string_rows.push(row);
41 }
42
43 let mut column_types = vec![DataType::Null; headers.len()];
45 let sample_size = string_rows.len().min(100); for row in string_rows.iter().take(sample_size) {
48 for (col_idx, value) in row.iter().enumerate() {
49 if !value.is_empty() {
50 let inferred = DataType::infer_from_string(value);
51 column_types[col_idx] = column_types[col_idx].merge(&inferred);
52 }
53 }
54 }
55
56 for (col_idx, column) in table.columns.iter_mut().enumerate() {
58 column.data_type = column_types[col_idx].clone();
59 }
60
61 for string_row in string_rows {
63 let mut values = Vec::new();
64 for (col_idx, value) in string_row.iter().enumerate() {
65 let data_value = DataValue::from_string(value, &column_types[col_idx]);
66 values.push(data_value);
67 }
68 table
69 .add_row(DataRow::new(values))
70 .map_err(|e| anyhow::anyhow!(e))?;
71 }
72
73 table.infer_column_types();
75
76 Ok(table)
77}
78
79pub fn load_json_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
81 let mut file = File::open(&path)
83 .with_context(|| format!("Failed to open JSON file: {:?}", path.as_ref()))?;
84 let mut json_str = String::new();
85 file.read_to_string(&mut json_str)?;
86
87 let json_data: Vec<JsonValue> =
90 serde_json::from_str(&json_str).with_context(|| "Failed to parse JSON file")?;
91
92 if json_data.is_empty() {
93 return Ok(DataTable::new(table_name));
94 }
95
96 let first_obj = json_data[0]
99 .as_object()
100 .context("JSON data must be an array of objects")?;
101
102 let mut table = DataTable::new(table_name);
103
104 table
106 .metadata
107 .insert("source_type".to_string(), "json".to_string());
108 table.metadata.insert(
109 "source_path".to_string(),
110 path.as_ref().display().to_string(),
111 );
112
113 let column_names: Vec<String> = first_obj.keys().cloned().collect();
116 for name in &column_names {
117 table.add_column(DataColumn::new(name));
118 }
119
120 let mut string_rows = Vec::new();
122 for json_obj in &json_data {
123 if let Some(obj) = json_obj.as_object() {
124 let mut row = Vec::new();
125 for name in &column_names {
126 let value_str = match obj.get(name) {
127 Some(JsonValue::Null) | None => String::new(),
128 Some(JsonValue::Bool(b)) => b.to_string(),
129 Some(JsonValue::Number(n)) => n.to_string(),
130 Some(JsonValue::String(s)) => s.clone(),
131 Some(JsonValue::Array(arr)) => format!("{:?}", arr), Some(JsonValue::Object(obj)) => format!("{:?}", obj), };
134 row.push(value_str);
135 }
136 string_rows.push(row);
137 }
138 }
139
140 let mut column_types = vec![DataType::Null; column_names.len()];
142 let sample_size = string_rows.len().min(100);
143
144 for row in string_rows.iter().take(sample_size) {
145 for (col_idx, value) in row.iter().enumerate() {
146 if !value.is_empty() {
147 let inferred = DataType::infer_from_string(value);
148 column_types[col_idx] = column_types[col_idx].merge(&inferred);
149 }
150 }
151 }
152
153 for (col_idx, column) in table.columns.iter_mut().enumerate() {
155 column.data_type = column_types[col_idx].clone();
156 }
157
158 for string_row in string_rows {
160 let mut values = Vec::new();
161 for (col_idx, value) in string_row.iter().enumerate() {
162 let data_value = DataValue::from_string(value, &column_types[col_idx]);
163 values.push(data_value);
164 }
165 table
166 .add_row(DataRow::new(values))
167 .map_err(|e| anyhow::anyhow!(e))?;
168 }
169
170 table.infer_column_types();
172
173 Ok(table)
174}
175
176pub fn load_json_data_to_datatable(data: Vec<JsonValue>, table_name: &str) -> Result<DataTable> {
178 if data.is_empty() {
179 return Ok(DataTable::new(table_name));
180 }
181
182 let mut all_columns = HashSet::new();
184 for item in &data {
185 if let Some(obj) = item.as_object() {
186 for key in obj.keys() {
187 all_columns.insert(key.clone());
188 }
189 }
190 }
191
192 let column_names: Vec<String> = all_columns.into_iter().collect();
193 let mut table = DataTable::new(table_name);
194
195 table
197 .metadata
198 .insert("source_type".to_string(), "json_data".to_string());
199
200 for name in &column_names {
202 table.add_column(DataColumn::new(name));
203 }
204
205 let mut string_rows = Vec::new();
207 for json_obj in &data {
208 if let Some(obj) = json_obj.as_object() {
209 let mut row = Vec::new();
210 for name in &column_names {
211 let value_str = match obj.get(name) {
212 Some(JsonValue::Null) | None => String::new(),
213 Some(JsonValue::Bool(b)) => b.to_string(),
214 Some(JsonValue::Number(n)) => n.to_string(),
215 Some(JsonValue::String(s)) => s.clone(),
216 Some(JsonValue::Array(arr)) => format!("{:?}", arr),
217 Some(JsonValue::Object(obj)) => format!("{:?}", obj),
218 };
219 row.push(value_str);
220 }
221 string_rows.push(row);
222 }
223 }
224
225 let mut column_types = vec![DataType::Null; column_names.len()];
227 let sample_size = string_rows.len().min(100);
228
229 for row in string_rows.iter().take(sample_size) {
230 for (col_idx, value) in row.iter().enumerate() {
231 if !value.is_empty() {
232 let inferred = DataType::infer_from_string(value);
233 column_types[col_idx] = column_types[col_idx].merge(&inferred);
234 }
235 }
236 }
237
238 for (col_idx, column) in table.columns.iter_mut().enumerate() {
239 column.data_type = column_types[col_idx].clone();
240 }
241
242 for string_row in string_rows {
243 let mut values = Vec::new();
244 for (col_idx, value) in string_row.iter().enumerate() {
245 let data_value = DataValue::from_string(value, &column_types[col_idx]);
246 values.push(data_value);
247 }
248 table
249 .add_row(DataRow::new(values))
250 .map_err(|e| anyhow::anyhow!(e))?;
251 }
252
253 table.infer_column_types();
254
255 Ok(table)
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261 use std::io::Write;
262 use tempfile::NamedTempFile;
263
264 #[test]
265 fn test_load_csv() -> Result<()> {
266 let mut temp_file = NamedTempFile::new()?;
268 writeln!(temp_file, "id,name,price,quantity")?;
269 writeln!(temp_file, "1,Widget,9.99,100")?;
270 writeln!(temp_file, "2,Gadget,19.99,50")?;
271 writeln!(temp_file, "3,Doohickey,5.00,200")?;
272 temp_file.flush()?;
273
274 let table = load_csv_to_datatable(temp_file.path(), "products")?;
275
276 assert_eq!(table.name, "products");
277 assert_eq!(table.column_count(), 4);
278 assert_eq!(table.row_count(), 3);
279
280 assert_eq!(table.columns[0].name, "id");
282 assert_eq!(table.columns[0].data_type, DataType::Integer);
283
284 assert_eq!(table.columns[1].name, "name");
285 assert_eq!(table.columns[1].data_type, DataType::String);
286
287 assert_eq!(table.columns[2].name, "price");
288 assert_eq!(table.columns[2].data_type, DataType::Float);
289
290 assert_eq!(table.columns[3].name, "quantity");
291 assert_eq!(table.columns[3].data_type, DataType::Integer);
292
293 let value = table.get_value_by_name(0, "name").unwrap();
295 assert_eq!(value.to_string(), "Widget");
296
297 Ok(())
298 }
299
300 #[test]
301 fn test_load_json() -> Result<()> {
302 let mut temp_file = NamedTempFile::new()?;
304 writeln!(
305 temp_file,
306 r#"[
307 {{"id": 1, "name": "Alice", "active": true, "score": 95.5}},
308 {{"id": 2, "name": "Bob", "active": false, "score": 87.3}},
309 {{"id": 3, "name": "Charlie", "active": true, "score": null}}
310 ]"#
311 )?;
312 temp_file.flush()?;
313
314 let table = load_json_to_datatable(temp_file.path(), "users")?;
315
316 assert_eq!(table.name, "users");
317 assert_eq!(table.column_count(), 4);
318 assert_eq!(table.row_count(), 3);
319
320 let score = table.get_value_by_name(2, "score").unwrap();
322 assert!(score.is_null());
323
324 Ok(())
325 }
326}