sql_cli/data/
datatable_converter.rs1use crate::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use anyhow::Result;
3use serde_json::Value;
4use std::collections::HashMap;
5use tracing::{debug, info, trace};
6
7pub struct DataTableConverter;
9
10impl DataTableConverter {
11 pub fn from_json_values(values: &[Value], table_name: impl Into<String>) -> Result<DataTable> {
13 let table_name = table_name.into();
14 info!(target: "datatable", "Converting {} JSON values to DataTable '{}'",
15 values.len(), table_name);
16
17 if values.is_empty() {
18 return Ok(DataTable::new(table_name));
19 }
20
21 let column_names = Self::extract_column_names(values)?;
23 debug!(target: "datatable", "Found {} columns: {:?}",
24 column_names.len(), column_names);
25
26 let column_types = Self::infer_column_types(values, &column_names);
28
29 for (name, dtype) in column_names.iter().zip(column_types.iter()) {
31 debug!(target: "datatable", "Column '{}' inferred as {:?}", name, dtype);
32 }
33
34 let mut table = DataTable::new(table_name);
36 for (name, data_type) in column_names.iter().zip(column_types.iter()) {
37 table.add_column(
38 DataColumn::new(name.clone())
39 .with_type(data_type.clone())
40 .with_nullable(true), );
42 }
43
44 for json_row in values {
46 let data_row = Self::convert_json_row(json_row, &column_names, &column_types)?;
47 table
48 .add_row(data_row)
49 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
50 }
51
52 Self::update_column_stats(&mut table);
54
55 info!(target: "datatable", "DataTable '{}' created: {} rows, {} columns",
56 table.name, table.row_count(), table.column_count());
57
58 Ok(table)
59 }
60
61 pub fn from_csv_data(
63 headers: Vec<String>,
64 rows: Vec<Vec<String>>,
65 table_name: impl Into<String>,
66 ) -> Result<DataTable> {
67 let column_types = Self::infer_types_from_strings(&headers, &rows);
69
70 let mut table = DataTable::new(table_name);
72 for (name, data_type) in headers.iter().zip(column_types.iter()) {
73 table.add_column(
74 DataColumn::new(name.clone())
75 .with_type(data_type.clone())
76 .with_nullable(true),
77 );
78 }
79
80 for row_values in rows {
82 let mut data_values = Vec::new();
83 for (value, data_type) in row_values.iter().zip(column_types.iter()) {
84 data_values.push(DataValue::from_string(value, data_type));
85 }
86 table
87 .add_row(DataRow::new(data_values))
88 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
89 }
90
91 Self::update_column_stats(&mut table);
93
94 info!(target: "datatable", "DataTable '{}' created: {} rows, {} columns",
95 table.name, table.row_count(), table.column_count());
96
97 Ok(table)
98 }
99
100 fn extract_column_names(values: &[Value]) -> Result<Vec<String>> {
102 if let Some(first) = values.first() {
103 if let Some(obj) = first.as_object() {
104 Ok(obj.keys().cloned().collect())
105 } else {
106 Err(anyhow::anyhow!("JSON values must be objects"))
107 }
108 } else {
109 Ok(vec![])
110 }
111 }
112
113 fn infer_column_types(values: &[Value], column_names: &[String]) -> Vec<DataType> {
115 let mut column_types = vec![DataType::Null; column_names.len()];
116
117 let sample_size = values.len().min(100);
119 trace!(target: "datatable", "Sampling {} rows for type inference", sample_size);
120
121 for row in values.iter().take(sample_size) {
122 if let Some(obj) = row.as_object() {
123 for (idx, col_name) in column_names.iter().enumerate() {
124 if let Some(value) = obj.get(col_name) {
125 let value_type = Self::infer_json_type(value);
126 column_types[idx] = column_types[idx].merge(&value_type);
127 }
128 }
129 }
130 }
131
132 for col_type in &mut column_types {
134 if *col_type == DataType::Null {
135 *col_type = DataType::String;
136 }
137 }
138
139 column_types
140 }
141
142 fn infer_json_type(value: &Value) -> DataType {
144 match value {
145 Value::Null => DataType::Null,
146 Value::Bool(_) => DataType::Boolean,
147 Value::Number(n) => {
148 if n.is_i64() || n.is_u64() {
149 DataType::Integer
150 } else {
151 DataType::Float
152 }
153 }
154 Value::String(s) => DataType::infer_from_string(s),
155 Value::Array(_) | Value::Object(_) => DataType::String, }
157 }
158
159 fn convert_json_row(
161 json_row: &Value,
162 column_names: &[String],
163 column_types: &[DataType],
164 ) -> Result<DataRow> {
165 let mut values = Vec::new();
166
167 if let Some(obj) = json_row.as_object() {
168 for (col_name, col_type) in column_names.iter().zip(column_types.iter()) {
169 let value = obj
170 .get(col_name)
171 .map_or(DataValue::Null, |v| Self::json_to_datavalue(v, col_type));
172 values.push(value);
173 }
174 } else {
175 return Err(anyhow::anyhow!("JSON row must be an object"));
176 }
177
178 Ok(DataRow::new(values))
179 }
180
181 fn json_to_datavalue(json_val: &Value, expected_type: &DataType) -> DataValue {
183 match json_val {
184 Value::Null => DataValue::Null,
185 Value::Bool(b) => DataValue::Boolean(*b),
186 Value::Number(n) => match expected_type {
187 DataType::Integer => n.as_i64().map_or(DataValue::Null, DataValue::Integer),
188 DataType::Float => n.as_f64().map_or(DataValue::Null, DataValue::Float),
189 _ => DataValue::String(n.to_string()),
190 },
191 Value::String(s) => DataValue::from_string(s, expected_type),
192 Value::Array(_) | Value::Object(_) => {
193 DataValue::String(json_val.to_string())
195 }
196 }
197 }
198
199 fn infer_types_from_strings(headers: &[String], rows: &[Vec<String>]) -> Vec<DataType> {
201 let mut column_types = vec![DataType::Null; headers.len()];
202
203 let sample_size = rows.len().min(100);
205
206 for row in rows.iter().take(sample_size) {
207 for (idx, value) in row.iter().enumerate() {
208 if idx < column_types.len() {
209 let value_type = DataType::infer_from_string(value);
210 column_types[idx] = column_types[idx].merge(&value_type);
211 }
212 }
213 }
214
215 for col_type in &mut column_types {
217 if *col_type == DataType::Null {
218 *col_type = DataType::String;
219 }
220 }
221
222 column_types
223 }
224
225 fn update_column_stats(table: &mut DataTable) {
227 for (col_idx, column) in table.columns.iter_mut().enumerate() {
228 let mut null_count = 0;
229 let mut unique_values = HashMap::new();
230
231 for row in &table.rows {
232 if let Some(value) = row.get(col_idx) {
233 if value.is_null() {
234 null_count += 1;
235 } else {
236 unique_values.insert(value.to_string(), ());
237 }
238 }
239 }
240
241 column.null_count = null_count;
242 column.unique_values = Some(unique_values.len());
243 }
244 }
245
246 pub fn debug_print(table: &DataTable) {
248 println!("DataTable: {}", table.name);
249 println!("Columns: {}", table.column_count());
250 println!("Rows: {}", table.row_count());
251
252 for col in &table.columns {
254 println!(
255 " - {} ({:?}, nulls: {}, unique: {:?})",
256 col.name, col.data_type, col.null_count, col.unique_values
257 );
258 }
259
260 println!("\nFirst 5 rows:");
262 for (idx, row) in table.rows.iter().take(5).enumerate() {
263 print!(" Row {idx}: ");
264 for value in &row.values {
265 print!("{value}, ");
266 }
267 println!();
268 }
269 }
270}