sql_cli/data/
datatable_converter.rs1use crate::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use anyhow::Result;
3use serde_json::Value;
4use std::collections::HashMap;
5use tracing::{debug, info, trace};
6
7pub struct DataTableConverter;
9
10impl DataTableConverter {
11 pub fn from_json_values(values: &[Value], table_name: impl Into<String>) -> Result<DataTable> {
13 let table_name = table_name.into();
14 info!(target: "datatable", "Converting {} JSON values to DataTable '{}'",
15 values.len(), table_name);
16
17 if values.is_empty() {
18 return Ok(DataTable::new(table_name));
19 }
20
21 let column_names = Self::extract_column_names(values)?;
23 debug!(target: "datatable", "Found {} columns: {:?}",
24 column_names.len(), column_names);
25
26 let column_types = Self::infer_column_types(values, &column_names);
28
29 for (name, dtype) in column_names.iter().zip(column_types.iter()) {
31 debug!(target: "datatable", "Column '{}' inferred as {:?}", name, dtype);
32 }
33
34 let mut table = DataTable::new(table_name);
36 for (name, data_type) in column_names.iter().zip(column_types.iter()) {
37 table.add_column(
38 DataColumn::new(name.clone())
39 .with_type(data_type.clone())
40 .with_nullable(true), );
42 }
43
44 for json_row in values {
46 let data_row = Self::convert_json_row(json_row, &column_names, &column_types)?;
47 table
48 .add_row(data_row)
49 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
50 }
51
52 Self::update_column_stats(&mut table);
54
55 info!(target: "datatable", "DataTable '{}' created: {} rows, {} columns",
56 table.name, table.row_count(), table.column_count());
57
58 Ok(table)
59 }
60
61 pub fn from_csv_data(
63 headers: Vec<String>,
64 rows: Vec<Vec<String>>,
65 table_name: impl Into<String>,
66 ) -> Result<DataTable> {
67 let column_types = Self::infer_types_from_strings(&headers, &rows);
69
70 let mut table = DataTable::new(table_name);
72 for (name, data_type) in headers.iter().zip(column_types.iter()) {
73 table.add_column(
74 DataColumn::new(name.clone())
75 .with_type(data_type.clone())
76 .with_nullable(true),
77 );
78 }
79
80 for row_values in rows {
82 let mut data_values = Vec::new();
83 for (value, data_type) in row_values.iter().zip(column_types.iter()) {
84 data_values.push(DataValue::from_string(value, data_type));
85 }
86 table
87 .add_row(DataRow::new(data_values))
88 .map_err(|e| anyhow::anyhow!("Failed to add row: {}", e))?;
89 }
90
91 Self::update_column_stats(&mut table);
93
94 info!(target: "datatable", "DataTable '{}' created: {} rows, {} columns",
95 table.name, table.row_count(), table.column_count());
96
97 Ok(table)
98 }
99
100 fn extract_column_names(values: &[Value]) -> Result<Vec<String>> {
102 if let Some(first) = values.first() {
103 if let Some(obj) = first.as_object() {
104 Ok(obj.keys().cloned().collect())
105 } else {
106 Err(anyhow::anyhow!("JSON values must be objects"))
107 }
108 } else {
109 Ok(vec![])
110 }
111 }
112
113 fn infer_column_types(values: &[Value], column_names: &[String]) -> Vec<DataType> {
115 let mut column_types = vec![DataType::Null; column_names.len()];
116
117 let sample_size = values.len().min(100);
119 trace!(target: "datatable", "Sampling {} rows for type inference", sample_size);
120
121 for row in values.iter().take(sample_size) {
122 if let Some(obj) = row.as_object() {
123 for (idx, col_name) in column_names.iter().enumerate() {
124 if let Some(value) = obj.get(col_name) {
125 let value_type = Self::infer_json_type(value);
126 column_types[idx] = column_types[idx].merge(&value_type);
127 }
128 }
129 }
130 }
131
132 for col_type in &mut column_types {
134 if *col_type == DataType::Null {
135 *col_type = DataType::String;
136 }
137 }
138
139 column_types
140 }
141
142 fn infer_json_type(value: &Value) -> DataType {
144 match value {
145 Value::Null => DataType::Null,
146 Value::Bool(_) => DataType::Boolean,
147 Value::Number(n) => {
148 if n.is_i64() || n.is_u64() {
149 DataType::Integer
150 } else {
151 DataType::Float
152 }
153 }
154 Value::String(s) => DataType::infer_from_string(s),
155 Value::Array(_) | Value::Object(_) => DataType::String, }
157 }
158
159 fn convert_json_row(
161 json_row: &Value,
162 column_names: &[String],
163 column_types: &[DataType],
164 ) -> Result<DataRow> {
165 let mut values = Vec::new();
166
167 if let Some(obj) = json_row.as_object() {
168 for (col_name, col_type) in column_names.iter().zip(column_types.iter()) {
169 let value = obj
170 .get(col_name)
171 .map(|v| Self::json_to_datavalue(v, col_type))
172 .unwrap_or(DataValue::Null);
173 values.push(value);
174 }
175 } else {
176 return Err(anyhow::anyhow!("JSON row must be an object"));
177 }
178
179 Ok(DataRow::new(values))
180 }
181
182 fn json_to_datavalue(json_val: &Value, expected_type: &DataType) -> DataValue {
184 match json_val {
185 Value::Null => DataValue::Null,
186 Value::Bool(b) => DataValue::Boolean(*b),
187 Value::Number(n) => match expected_type {
188 DataType::Integer => n
189 .as_i64()
190 .map(DataValue::Integer)
191 .unwrap_or(DataValue::Null),
192 DataType::Float => n.as_f64().map(DataValue::Float).unwrap_or(DataValue::Null),
193 _ => DataValue::String(n.to_string()),
194 },
195 Value::String(s) => DataValue::from_string(s, expected_type),
196 Value::Array(_) | Value::Object(_) => {
197 DataValue::String(json_val.to_string())
199 }
200 }
201 }
202
203 fn infer_types_from_strings(headers: &[String], rows: &[Vec<String>]) -> Vec<DataType> {
205 let mut column_types = vec![DataType::Null; headers.len()];
206
207 let sample_size = rows.len().min(100);
209
210 for row in rows.iter().take(sample_size) {
211 for (idx, value) in row.iter().enumerate() {
212 if idx < column_types.len() {
213 let value_type = DataType::infer_from_string(value);
214 column_types[idx] = column_types[idx].merge(&value_type);
215 }
216 }
217 }
218
219 for col_type in &mut column_types {
221 if *col_type == DataType::Null {
222 *col_type = DataType::String;
223 }
224 }
225
226 column_types
227 }
228
229 fn update_column_stats(table: &mut DataTable) {
231 for (col_idx, column) in table.columns.iter_mut().enumerate() {
232 let mut null_count = 0;
233 let mut unique_values = HashMap::new();
234
235 for row in &table.rows {
236 if let Some(value) = row.get(col_idx) {
237 if value.is_null() {
238 null_count += 1;
239 } else {
240 unique_values.insert(value.to_string(), ());
241 }
242 }
243 }
244
245 column.null_count = null_count;
246 column.unique_values = Some(unique_values.len());
247 }
248 }
249
250 pub fn debug_print(table: &DataTable) {
252 println!("DataTable: {}", table.name);
253 println!("Columns: {}", table.column_count());
254 println!("Rows: {}", table.row_count());
255
256 for col in &table.columns {
258 println!(
259 " - {} ({:?}, nulls: {}, unique: {:?})",
260 col.name, col.data_type, col.null_count, col.unique_values
261 );
262 }
263
264 println!("\nFirst 5 rows:");
266 for (idx, row) in table.rows.iter().take(5).enumerate() {
267 print!(" Row {}: ", idx);
268 for value in &row.values {
269 print!("{}, ", value);
270 }
271 println!();
272 }
273 }
274}