1use crate::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use anyhow::{Context, Result};
3use csv::ReaderBuilder;
4use serde_json::Value as JsonValue;
5use std::collections::HashSet;
6use std::fs::File;
7use std::io::{BufRead, BufReader, Read};
8use std::path::Path;
9
10fn is_null_field(raw_line: &str, field_index: usize) -> bool {
12 let mut comma_count = 0;
13 let mut in_quotes = false;
14 let mut field_start = 0;
15 let mut prev_char = ' ';
16
17 for (i, ch) in raw_line.chars().enumerate() {
18 if ch == '"' && prev_char != '\\' {
19 in_quotes = !in_quotes;
20 }
21
22 if ch == ',' && !in_quotes {
23 if comma_count == field_index {
24 let field_end = i;
25 let field_content = &raw_line[field_start..field_end].trim();
26 if field_content.is_empty() {
28 return true; }
30 if field_content.starts_with('"')
32 && field_content.ends_with('"')
33 && field_content.len() == 2
34 {
35 return false; }
37 return false; }
39 comma_count += 1;
40 field_start = i + 1;
41 }
42 prev_char = ch;
43 }
44
45 if comma_count == field_index {
47 let field_content = raw_line[field_start..]
48 .trim()
49 .trim_end_matches('\n')
50 .trim_end_matches('\r');
51 if field_content.is_empty() {
53 return true; }
55 if field_content.starts_with('"')
57 && field_content.ends_with('"')
58 && field_content.len() == 2
59 {
60 return false; }
62 return false; }
64
65 false }
67
68pub fn load_csv_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
70 let file = File::open(&path)
71 .with_context(|| format!("Failed to open CSV file: {:?}", path.as_ref()))?;
72
73 let mut reader = ReaderBuilder::new().has_headers(true).from_reader(file);
74
75 let headers = reader.headers()?.clone();
77 let mut table = DataTable::new(table_name);
78
79 table
81 .metadata
82 .insert("source_type".to_string(), "csv".to_string());
83 table.metadata.insert(
84 "source_path".to_string(),
85 path.as_ref().display().to_string(),
86 );
87
88 for header in &headers {
90 table.add_column(DataColumn::new(header));
91 }
92
93 let file2 = File::open(&path).with_context(|| {
95 format!(
96 "Failed to open CSV file for raw reading: {:?}",
97 path.as_ref()
98 )
99 })?;
100 let mut line_reader = BufReader::new(file2);
101 let mut raw_line = String::new();
102 line_reader.read_line(&mut raw_line)?;
104
105 let mut string_rows = Vec::new();
107 let mut raw_lines = Vec::new();
108
109 for result in reader.records() {
110 let record = result?;
111 let row: Vec<String> = record
112 .iter()
113 .map(std::string::ToString::to_string)
114 .collect();
115
116 raw_line.clear();
118 line_reader.read_line(&mut raw_line)?;
119 raw_lines.push(raw_line.clone());
120
121 string_rows.push(row);
122 }
123
124 let mut column_types = vec![DataType::Null; headers.len()];
126 let sample_size = string_rows.len().min(100); for row in string_rows.iter().take(sample_size) {
129 for (col_idx, value) in row.iter().enumerate() {
130 if !value.is_empty() {
131 let inferred = DataType::infer_from_string(value);
132 column_types[col_idx] = column_types[col_idx].merge(&inferred);
133 }
134 }
135 }
136
137 for (col_idx, column) in table.columns.iter_mut().enumerate() {
139 column.data_type = column_types[col_idx].clone();
140 }
141
142 for (row_idx, string_row) in string_rows.iter().enumerate() {
144 let mut values = Vec::new();
145 let raw_line = &raw_lines[row_idx];
146
147 for (col_idx, value) in string_row.iter().enumerate() {
148 let data_value = if value.is_empty() {
149 if is_null_field(raw_line, col_idx) {
151 DataValue::Null
152 } else {
153 DataValue::String(String::new())
154 }
155 } else {
156 DataValue::from_string(value, &column_types[col_idx])
157 };
158 values.push(data_value);
159 }
160 table
161 .add_row(DataRow::new(values))
162 .map_err(|e| anyhow::anyhow!(e))?;
163 }
164
165 table.infer_column_types();
167
168 Ok(table)
169}
170
171pub fn load_json_to_datatable<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
173 let mut file = File::open(&path)
175 .with_context(|| format!("Failed to open JSON file: {:?}", path.as_ref()))?;
176 let mut json_str = String::new();
177 file.read_to_string(&mut json_str)?;
178
179 let json_data: Vec<JsonValue> =
182 serde_json::from_str(&json_str).with_context(|| "Failed to parse JSON file")?;
183
184 if json_data.is_empty() {
185 return Ok(DataTable::new(table_name));
186 }
187
188 let first_obj = json_data[0]
191 .as_object()
192 .context("JSON data must be an array of objects")?;
193
194 let mut table = DataTable::new(table_name);
195
196 table
198 .metadata
199 .insert("source_type".to_string(), "json".to_string());
200 table.metadata.insert(
201 "source_path".to_string(),
202 path.as_ref().display().to_string(),
203 );
204
205 let column_names: Vec<String> = first_obj.keys().cloned().collect();
208 for name in &column_names {
209 table.add_column(DataColumn::new(name));
210 }
211
212 let mut string_rows = Vec::new();
214 for json_obj in &json_data {
215 if let Some(obj) = json_obj.as_object() {
216 let mut row = Vec::new();
217 for name in &column_names {
218 let value_str = match obj.get(name) {
219 Some(JsonValue::Null) | None => String::new(),
220 Some(JsonValue::Bool(b)) => b.to_string(),
221 Some(JsonValue::Number(n)) => n.to_string(),
222 Some(JsonValue::String(s)) => s.clone(),
223 Some(JsonValue::Array(arr)) => format!("{arr:?}"), Some(JsonValue::Object(obj)) => format!("{obj:?}"), };
226 row.push(value_str);
227 }
228 string_rows.push(row);
229 }
230 }
231
232 let mut column_types = vec![DataType::Null; column_names.len()];
234 let sample_size = string_rows.len().min(100);
235
236 for row in string_rows.iter().take(sample_size) {
237 for (col_idx, value) in row.iter().enumerate() {
238 if !value.is_empty() {
239 let inferred = DataType::infer_from_string(value);
240 column_types[col_idx] = column_types[col_idx].merge(&inferred);
241 }
242 }
243 }
244
245 for (col_idx, column) in table.columns.iter_mut().enumerate() {
247 column.data_type = column_types[col_idx].clone();
248 }
249
250 for string_row in string_rows {
252 let mut values = Vec::new();
253 for (col_idx, value) in string_row.iter().enumerate() {
254 let data_value = DataValue::from_string(value, &column_types[col_idx]);
255 values.push(data_value);
256 }
257 table
258 .add_row(DataRow::new(values))
259 .map_err(|e| anyhow::anyhow!(e))?;
260 }
261
262 table.infer_column_types();
264
265 Ok(table)
266}
267
268pub fn load_json_data_to_datatable(data: Vec<JsonValue>, table_name: &str) -> Result<DataTable> {
270 if data.is_empty() {
271 return Ok(DataTable::new(table_name));
272 }
273
274 let mut all_columns = HashSet::new();
276 for item in &data {
277 if let Some(obj) = item.as_object() {
278 for key in obj.keys() {
279 all_columns.insert(key.clone());
280 }
281 }
282 }
283
284 let column_names: Vec<String> = all_columns.into_iter().collect();
285 let mut table = DataTable::new(table_name);
286
287 table
289 .metadata
290 .insert("source_type".to_string(), "json_data".to_string());
291
292 for name in &column_names {
294 table.add_column(DataColumn::new(name));
295 }
296
297 let mut string_rows = Vec::new();
299 for json_obj in &data {
300 if let Some(obj) = json_obj.as_object() {
301 let mut row = Vec::new();
302 for name in &column_names {
303 let value_str = match obj.get(name) {
304 Some(JsonValue::Null) | None => String::new(),
305 Some(JsonValue::Bool(b)) => b.to_string(),
306 Some(JsonValue::Number(n)) => n.to_string(),
307 Some(JsonValue::String(s)) => s.clone(),
308 Some(JsonValue::Array(arr)) => format!("{arr:?}"),
309 Some(JsonValue::Object(obj)) => format!("{obj:?}"),
310 };
311 row.push(value_str);
312 }
313 string_rows.push(row);
314 }
315 }
316
317 let mut column_types = vec![DataType::Null; column_names.len()];
319 let sample_size = string_rows.len().min(100);
320
321 for row in string_rows.iter().take(sample_size) {
322 for (col_idx, value) in row.iter().enumerate() {
323 if !value.is_empty() {
324 let inferred = DataType::infer_from_string(value);
325 column_types[col_idx] = column_types[col_idx].merge(&inferred);
326 }
327 }
328 }
329
330 for (col_idx, column) in table.columns.iter_mut().enumerate() {
331 column.data_type = column_types[col_idx].clone();
332 }
333
334 for string_row in string_rows {
335 let mut values = Vec::new();
336 for (col_idx, value) in string_row.iter().enumerate() {
337 let data_value = DataValue::from_string(value, &column_types[col_idx]);
338 values.push(data_value);
339 }
340 table
341 .add_row(DataRow::new(values))
342 .map_err(|e| anyhow::anyhow!(e))?;
343 }
344
345 table.infer_column_types();
346
347 Ok(table)
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353 use std::io::Write;
354 use tempfile::NamedTempFile;
355
356 #[test]
357 fn test_load_csv() -> Result<()> {
358 let mut temp_file = NamedTempFile::new()?;
360 writeln!(temp_file, "id,name,price,quantity")?;
361 writeln!(temp_file, "1,Widget,9.99,100")?;
362 writeln!(temp_file, "2,Gadget,19.99,50")?;
363 writeln!(temp_file, "3,Doohickey,5.00,200")?;
364 temp_file.flush()?;
365
366 let table = load_csv_to_datatable(temp_file.path(), "products")?;
367
368 assert_eq!(table.name, "products");
369 assert_eq!(table.column_count(), 4);
370 assert_eq!(table.row_count(), 3);
371
372 assert_eq!(table.columns[0].name, "id");
374 assert_eq!(table.columns[0].data_type, DataType::Integer);
375
376 assert_eq!(table.columns[1].name, "name");
377 assert_eq!(table.columns[1].data_type, DataType::String);
378
379 assert_eq!(table.columns[2].name, "price");
380 assert_eq!(table.columns[2].data_type, DataType::Float);
381
382 assert_eq!(table.columns[3].name, "quantity");
383 assert_eq!(table.columns[3].data_type, DataType::Integer);
384
385 let value = table.get_value_by_name(0, "name").unwrap();
387 assert_eq!(value.to_string(), "Widget");
388
389 Ok(())
390 }
391
392 #[test]
393 fn test_load_json() -> Result<()> {
394 let mut temp_file = NamedTempFile::new()?;
396 writeln!(
397 temp_file,
398 r#"[
399 {{"id": 1, "name": "Alice", "active": true, "score": 95.5}},
400 {{"id": 2, "name": "Bob", "active": false, "score": 87.3}},
401 {{"id": 3, "name": "Charlie", "active": true, "score": null}}
402 ]"#
403 )?;
404 temp_file.flush()?;
405
406 let table = load_json_to_datatable(temp_file.path(), "users")?;
407
408 assert_eq!(table.name, "users");
409 assert_eq!(table.column_count(), 4);
410 assert_eq!(table.row_count(), 3);
411
412 let score = table.get_value_by_name(2, "score").unwrap();
414 assert!(score.is_null());
415
416 Ok(())
417 }
418}