1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::{Deserialize, Serialize};
5use serde_json::Value as JsonValue;
6use std::collections::HashMap;
7use std::fmt;
8use std::sync::Arc;
9use tracing::debug;
10
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub enum DataType {
14 String,
15 Integer,
16 Float,
17 Boolean,
18 DateTime,
19 Null,
20 Mixed, }
22
23impl DataType {
24 #[must_use]
26 pub fn infer_from_string(value: &str) -> Self {
27 if value.eq_ignore_ascii_case("null") {
29 return DataType::Null;
30 }
31
32 match TypeInference::infer_from_string(value) {
34 InferredType::Null => DataType::Null,
35 InferredType::Boolean => DataType::Boolean,
36 InferredType::Integer => DataType::Integer,
37 InferredType::Float => DataType::Float,
38 InferredType::DateTime => DataType::DateTime,
39 InferredType::String => DataType::String,
40 }
41 }
42
43 fn looks_like_datetime(value: &str) -> bool {
46 TypeInference::looks_like_datetime(value)
47 }
48
49 #[must_use]
51 pub fn merge(&self, other: &DataType) -> DataType {
52 if self == other {
53 return self.clone();
54 }
55
56 match (self, other) {
57 (DataType::Null, t) | (t, DataType::Null) => t.clone(),
58 (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
59 DataType::Float
60 }
61 _ => DataType::Mixed,
62 }
63 }
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct DataColumn {
69 pub name: String,
70 pub data_type: DataType,
71 pub nullable: bool,
72 pub unique_values: Option<usize>,
73 pub null_count: usize,
74 pub metadata: HashMap<String, String>,
75 pub qualified_name: Option<String>,
77 pub source_table: Option<String>,
79}
80
81impl DataColumn {
82 pub fn new(name: impl Into<String>) -> Self {
83 Self {
84 name: name.into(),
85 data_type: DataType::String,
86 nullable: true,
87 unique_values: None,
88 null_count: 0,
89 metadata: HashMap::new(),
90 qualified_name: None,
91 source_table: None,
92 }
93 }
94
95 #[must_use]
96 pub fn with_type(mut self, data_type: DataType) -> Self {
97 self.data_type = data_type;
98 self
99 }
100
101 #[must_use]
103 pub fn with_qualified_name(mut self, table_name: &str) -> Self {
104 self.qualified_name = Some(format!("{}.{}", table_name, self.name));
105 self.source_table = Some(table_name.to_string());
106 self
107 }
108
109 pub fn get_qualified_or_simple_name(&self) -> &str {
111 self.qualified_name.as_deref().unwrap_or(&self.name)
112 }
113
114 #[must_use]
115 pub fn with_nullable(mut self, nullable: bool) -> Self {
116 self.nullable = nullable;
117 self
118 }
119}
120
121#[derive(Debug, Clone, PartialEq, PartialOrd)]
123pub enum DataValue {
124 String(String),
125 InternedString(Arc<String>), Integer(i64),
127 Float(f64),
128 Boolean(bool),
129 DateTime(String), Null,
131}
132
133impl std::hash::Hash for DataValue {
135 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
136 match self {
137 DataValue::String(s) => {
138 0u8.hash(state);
139 s.hash(state);
140 }
141 DataValue::InternedString(s) => {
142 1u8.hash(state);
143 s.hash(state);
144 }
145 DataValue::Integer(i) => {
146 2u8.hash(state);
147 i.hash(state);
148 }
149 DataValue::Float(f) => {
150 3u8.hash(state);
151 f.to_bits().hash(state);
153 }
154 DataValue::Boolean(b) => {
155 4u8.hash(state);
156 b.hash(state);
157 }
158 DataValue::DateTime(dt) => {
159 5u8.hash(state);
160 dt.hash(state);
161 }
162 DataValue::Null => {
163 6u8.hash(state);
164 }
165 }
166 }
167}
168
169impl Eq for DataValue {}
171
172impl DataValue {
173 pub fn from_string(s: &str, data_type: &DataType) -> Self {
174 if s.is_empty() || s.eq_ignore_ascii_case("null") {
175 return DataValue::Null;
176 }
177
178 match data_type {
179 DataType::String => DataValue::String(s.to_string()),
180 DataType::Integer => s
181 .parse::<i64>()
182 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
183 DataType::Float => s
184 .parse::<f64>()
185 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
186 DataType::Boolean => {
187 let lower = s.to_lowercase();
188 DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
189 }
190 DataType::DateTime => DataValue::DateTime(s.to_string()),
191 DataType::Null => DataValue::Null,
192 DataType::Mixed => {
193 let inferred = DataType::infer_from_string(s);
195 Self::from_string(s, &inferred)
196 }
197 }
198 }
199
200 #[must_use]
201 pub fn is_null(&self) -> bool {
202 matches!(self, DataValue::Null)
203 }
204
205 #[must_use]
206 pub fn data_type(&self) -> DataType {
207 match self {
208 DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
209 DataValue::Integer(_) => DataType::Integer,
210 DataValue::Float(_) => DataType::Float,
211 DataValue::Boolean(_) => DataType::Boolean,
212 DataValue::DateTime(_) => DataType::DateTime,
213 DataValue::Null => DataType::Null,
214 }
215 }
216
217 #[must_use]
220 pub fn to_string_optimized(&self) -> String {
221 match self {
222 DataValue::String(s) => s.clone(), DataValue::InternedString(s) => s.as_ref().clone(), DataValue::DateTime(s) => s.clone(), DataValue::Integer(i) => i.to_string(),
226 DataValue::Float(f) => f.to_string(),
227 DataValue::Boolean(b) => {
228 if *b {
229 "true".to_string()
230 } else {
231 "false".to_string()
232 }
233 }
234 DataValue::Null => String::new(), }
236 }
237}
238
239impl fmt::Display for DataValue {
240 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241 match self {
242 DataValue::String(s) => write!(f, "{s}"),
243 DataValue::InternedString(s) => write!(f, "{s}"),
244 DataValue::Integer(i) => write!(f, "{i}"),
245 DataValue::Float(fl) => write!(f, "{fl}"),
246 DataValue::Boolean(b) => write!(f, "{b}"),
247 DataValue::DateTime(dt) => write!(f, "{dt}"),
248 DataValue::Null => write!(f, ""),
249 }
250 }
251}
252
253#[derive(Debug, Clone)]
255pub struct DataRow {
256 pub values: Vec<DataValue>,
257}
258
259impl DataRow {
260 #[must_use]
261 pub fn new(values: Vec<DataValue>) -> Self {
262 Self { values }
263 }
264
265 #[must_use]
266 pub fn get(&self, index: usize) -> Option<&DataValue> {
267 self.values.get(index)
268 }
269
270 pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
271 self.values.get_mut(index)
272 }
273
274 #[must_use]
275 pub fn len(&self) -> usize {
276 self.values.len()
277 }
278
279 #[must_use]
280 pub fn is_empty(&self) -> bool {
281 self.values.is_empty()
282 }
283}
284
285#[derive(Debug, Clone)]
287pub struct DataTable {
288 pub name: String,
289 pub columns: Vec<DataColumn>,
290 pub rows: Vec<DataRow>,
291 pub metadata: HashMap<String, String>,
292}
293
294impl DataTable {
295 pub fn new(name: impl Into<String>) -> Self {
296 Self {
297 name: name.into(),
298 columns: Vec::new(),
299 rows: Vec::new(),
300 metadata: HashMap::new(),
301 }
302 }
303
304 #[must_use]
307 pub fn dual() -> Self {
308 let mut table = DataTable::new("DUAL");
309 table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
310 table
311 .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
312 .unwrap();
313 table
314 }
315
316 pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
317 self.columns.push(column);
318 self
319 }
320
321 pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
322 if row.len() != self.columns.len() {
323 return Err(format!(
324 "Row has {} values but table has {} columns",
325 row.len(),
326 self.columns.len()
327 ));
328 }
329 self.rows.push(row);
330 Ok(())
331 }
332
333 #[must_use]
334 pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
335 self.columns.iter().find(|c| c.name == name)
336 }
337
338 #[must_use]
339 pub fn get_column_index(&self, name: &str) -> Option<usize> {
340 self.columns.iter().position(|c| c.name == name)
341 }
342
343 #[must_use]
345 pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
346 self.columns
347 .iter()
348 .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
349 }
350
351 #[must_use]
354 pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
355 if let Some(prefix) = table_prefix {
357 let qualified = format!("{}.{}", prefix, name);
358 if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
359 return Some(idx);
360 }
361 }
362
363 self.get_column_index(name)
365 }
366
367 pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
369 for column in &mut self.columns {
370 column.qualified_name = Some(format!("{}.{}", table_name, column.name));
371 column.source_table = Some(table_name.to_string());
372 }
373 }
374
375 #[must_use]
376 pub fn column_count(&self) -> usize {
377 self.columns.len()
378 }
379
380 #[must_use]
381 pub fn row_count(&self) -> usize {
382 self.rows.len()
383 }
384
385 #[must_use]
386 pub fn is_empty(&self) -> bool {
387 self.rows.is_empty()
388 }
389
390 #[must_use]
392 pub fn column_names(&self) -> Vec<String> {
393 self.columns.iter().map(|c| c.name.clone()).collect()
394 }
395
396 pub fn columns_mut(&mut self) -> &mut [DataColumn] {
398 &mut self.columns
399 }
400
401 pub fn infer_column_types(&mut self) {
403 for (col_idx, column) in self.columns.iter_mut().enumerate() {
404 let mut inferred_type = DataType::Null;
405 let mut null_count = 0;
406 let mut unique_values = std::collections::HashSet::new();
407
408 for row in &self.rows {
409 if let Some(value) = row.get(col_idx) {
410 if value.is_null() {
411 null_count += 1;
412 } else {
413 let value_type = value.data_type();
414 inferred_type = inferred_type.merge(&value_type);
415 unique_values.insert(value.to_string());
416 }
417 }
418 }
419
420 column.data_type = inferred_type;
421 column.null_count = null_count;
422 column.nullable = null_count > 0;
423 column.unique_values = Some(unique_values.len());
424 }
425 }
426
427 #[must_use]
429 pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
430 self.rows.get(row)?.get(col)
431 }
432
433 #[must_use]
435 pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
436 let col_idx = self.get_column_index(col_name)?;
437 self.get_value(row, col_idx)
438 }
439
440 #[must_use]
442 pub fn to_string_table(&self) -> Vec<Vec<String>> {
443 self.rows
444 .iter()
445 .map(|row| {
446 row.values
447 .iter()
448 .map(DataValue::to_string_optimized)
449 .collect()
450 })
451 .collect()
452 }
453
454 #[must_use]
456 pub fn get_stats(&self) -> DataTableStats {
457 DataTableStats {
458 row_count: self.row_count(),
459 column_count: self.column_count(),
460 memory_size: self.estimate_memory_size(),
461 null_count: self.columns.iter().map(|c| c.null_count).sum(),
462 }
463 }
464
465 #[must_use]
467 pub fn debug_dump(&self) -> String {
468 let mut output = String::new();
469
470 output.push_str(&format!("DataTable: {}\n", self.name));
471 output.push_str(&format!(
472 "Rows: {} | Columns: {}\n",
473 self.row_count(),
474 self.column_count()
475 ));
476
477 if !self.metadata.is_empty() {
478 output.push_str("Metadata:\n");
479 for (key, value) in &self.metadata {
480 output.push_str(&format!(" {key}: {value}\n"));
481 }
482 }
483
484 output.push_str("\nColumns:\n");
485 for column in &self.columns {
486 output.push_str(&format!(" {} ({:?})", column.name, column.data_type));
487 if column.nullable {
488 output.push_str(&format!(" - nullable, {} nulls", column.null_count));
489 }
490 if let Some(unique) = column.unique_values {
491 output.push_str(&format!(", {unique} unique"));
492 }
493 output.push('\n');
494 }
495
496 if self.row_count() > 0 {
498 let sample_size = 5.min(self.row_count());
499 output.push_str(&format!("\nFirst {sample_size} rows:\n"));
500
501 for row_idx in 0..sample_size {
502 output.push_str(&format!(" [{row_idx}]: "));
503 for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
504 if col_idx > 0 {
505 output.push_str(", ");
506 }
507 output.push_str(&value.to_string());
508 }
509 output.push('\n');
510 }
511 }
512
513 output
514 }
515
516 #[must_use]
517 pub fn estimate_memory_size(&self) -> usize {
518 let mut size = std::mem::size_of::<Self>();
520
521 size += self.columns.len() * std::mem::size_of::<DataColumn>();
523 for col in &self.columns {
524 size += col.name.len();
525 }
526
527 size += self.rows.len() * std::mem::size_of::<DataRow>();
529
530 for row in &self.rows {
532 for value in &row.values {
533 size += std::mem::size_of::<DataValue>();
535 match value {
537 DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
538 _ => {} }
540 }
541 }
542
543 size
544 }
545
546 pub fn to_csv(&self) -> String {
548 let mut csv_output = String::new();
549
550 let headers: Vec<String> = self
552 .columns
553 .iter()
554 .map(|col| {
555 if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
556 format!("\"{}\"", col.name.replace('"', "\"\""))
557 } else {
558 col.name.clone()
559 }
560 })
561 .collect();
562 csv_output.push_str(&headers.join(","));
563 csv_output.push('\n');
564
565 for row in &self.rows {
567 let row_values: Vec<String> = row
568 .values
569 .iter()
570 .map(|value| {
571 let str_val = value.to_string();
572 if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
573 format!("\"{}\"", str_val.replace('"', "\"\""))
574 } else {
575 str_val
576 }
577 })
578 .collect();
579 csv_output.push_str(&row_values.join(","));
580 csv_output.push('\n');
581 }
582
583 csv_output
584 }
585
586 pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
589 debug!(
590 "V46: Converting QueryResponse to DataTable for table '{}'",
591 table_name
592 );
593
594 crate::utils::memory_tracker::track_memory("start_from_query_response");
596
597 let mut table = DataTable::new(table_name);
598
599 if let Some(first_row) = response.data.first() {
601 if let Some(obj) = first_row.as_object() {
602 for key in obj.keys() {
604 let column = DataColumn::new(key.clone());
605 table.add_column(column);
606 }
607
608 for json_row in &response.data {
610 if let Some(row_obj) = json_row.as_object() {
611 let mut values = Vec::new();
612
613 for column in &table.columns {
615 let value = row_obj
616 .get(&column.name)
617 .map_or(DataValue::Null, json_value_to_data_value);
618 values.push(value);
619 }
620
621 table.add_row(DataRow::new(values))?;
622 }
623 }
624
625 table.infer_column_types();
627
628 if let Some(source) = &response.source {
630 table.metadata.insert("source".to_string(), source.clone());
631 }
632 if let Some(cached) = response.cached {
633 table
634 .metadata
635 .insert("cached".to_string(), cached.to_string());
636 }
637 table
638 .metadata
639 .insert("original_count".to_string(), response.count.to_string());
640
641 debug!(
642 "V46: Created DataTable with {} columns and {} rows",
643 table.column_count(),
644 table.row_count()
645 );
646 } else {
647 table.add_column(DataColumn::new("value"));
649 for json_value in &response.data {
650 let value = json_value_to_data_value(json_value);
651 table.add_row(DataRow::new(vec![value]))?;
652 }
653 }
654 }
655
656 Ok(table)
657 }
658
659 #[must_use]
661 pub fn get_row(&self, index: usize) -> Option<&DataRow> {
662 self.rows.get(index)
663 }
664
665 #[must_use]
667 pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
668 self.rows.get(index).map(|row| {
669 row.values
670 .iter()
671 .map(DataValue::to_string_optimized)
672 .collect()
673 })
674 }
675
676 #[must_use]
678 pub fn pretty_print(&self) -> String {
679 let mut output = String::new();
680
681 output.push_str("╔═══════════════════════════════════════════════════════╗\n");
683 output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
684 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
685
686 output.push_str(&format!(
688 "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
689 self.row_count(),
690 self.column_count(),
691 self.get_stats().memory_size
692 ));
693
694 if !self.metadata.is_empty() {
696 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
697 output.push_str("║ Metadata: ║\n");
698 for (key, value) in &self.metadata {
699 let truncated_value = if value.len() > 35 {
700 format!("{}...", &value[..32])
701 } else {
702 value.clone()
703 };
704 output.push_str(&format!(
705 "║ {:15} : {:35} ║\n",
706 Self::truncate_string(key, 15),
707 truncated_value
708 ));
709 }
710 }
711
712 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
714 output.push_str("║ Columns: ║\n");
715 output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
716 output.push_str("║ Name │ Type │ Nullable│ Nulls│Unique║\n");
717 output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
718
719 for column in &self.columns {
720 let type_str = match &column.data_type {
721 DataType::String => "String",
722 DataType::Integer => "Integer",
723 DataType::Float => "Float",
724 DataType::Boolean => "Boolean",
725 DataType::DateTime => "DateTime",
726 DataType::Null => "Null",
727 DataType::Mixed => "Mixed",
728 };
729
730 output.push_str(&format!(
731 "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
732 Self::truncate_string(&column.name, 17),
733 type_str,
734 if column.nullable { "Yes" } else { "No" },
735 column.null_count,
736 column.unique_values.unwrap_or(0)
737 ));
738 }
739
740 output.push_str("╚═══════════════════════════════════════════════════════╝\n");
741
742 output.push_str("\nSample Data (first 5 rows):\n");
744 let sample_count = self.rows.len().min(5);
745
746 if sample_count > 0 {
747 output.push('┌');
749 for (i, _col) in self.columns.iter().enumerate() {
750 if i > 0 {
751 output.push('┬');
752 }
753 output.push_str(&"─".repeat(20));
754 }
755 output.push_str("┐\n");
756
757 output.push('│');
758 for col in &self.columns {
759 output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
760 }
761 output.push('\n');
762
763 output.push('├');
764 for (i, _) in self.columns.iter().enumerate() {
765 if i > 0 {
766 output.push('┼');
767 }
768 output.push_str(&"─".repeat(20));
769 }
770 output.push_str("┤\n");
771
772 for row_idx in 0..sample_count {
774 if let Some(row) = self.rows.get(row_idx) {
775 output.push('│');
776 for value in &row.values {
777 let value_str = value.to_string();
778 output
779 .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
780 }
781 output.push('\n');
782 }
783 }
784
785 output.push('└');
786 for (i, _) in self.columns.iter().enumerate() {
787 if i > 0 {
788 output.push('┴');
789 }
790 output.push_str(&"─".repeat(20));
791 }
792 output.push_str("┘\n");
793 }
794
795 output
796 }
797
798 fn truncate_string(s: &str, max_len: usize) -> String {
799 if s.len() > max_len {
800 format!("{}...", &s[..max_len - 3])
801 } else {
802 s.to_string()
803 }
804 }
805
806 #[must_use]
808 pub fn get_schema_summary(&self) -> String {
809 let mut summary = String::new();
810 summary.push_str(&format!(
811 "DataTable Schema ({} columns, {} rows):\n",
812 self.columns.len(),
813 self.rows.len()
814 ));
815
816 for (idx, column) in self.columns.iter().enumerate() {
817 let type_str = match &column.data_type {
818 DataType::String => "String",
819 DataType::Integer => "Integer",
820 DataType::Float => "Float",
821 DataType::Boolean => "Boolean",
822 DataType::DateTime => "DateTime",
823 DataType::Null => "Null",
824 DataType::Mixed => "Mixed",
825 };
826
827 let nullable_str = if column.nullable {
828 "nullable"
829 } else {
830 "not null"
831 };
832 let null_info = if column.null_count > 0 {
833 format!(", {} nulls", column.null_count)
834 } else {
835 String::new()
836 };
837
838 summary.push_str(&format!(
839 " [{:3}] {} : {} ({}{})\n",
840 idx, column.name, type_str, nullable_str, null_info
841 ));
842 }
843
844 summary
845 }
846
847 #[must_use]
849 pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
850 self.columns
851 .iter()
852 .map(|col| {
853 let type_name = format!("{:?}", col.data_type);
854 (col.name.clone(), type_name, col.nullable, col.null_count)
855 })
856 .collect()
857 }
858
859 pub fn reserve_rows(&mut self, additional: usize) {
861 self.rows.reserve(additional);
862 }
863
864 pub fn shrink_to_fit(&mut self) {
866 self.rows.shrink_to_fit();
867 for _column in &mut self.columns {
868 }
870 }
871
872 #[must_use]
874 pub fn get_memory_usage(&self) -> usize {
875 let mut size = std::mem::size_of::<Self>();
876
877 size += self.name.capacity();
879
880 size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
882 for col in &self.columns {
883 size += col.name.capacity();
884 }
885
886 size += self.rows.capacity() * std::mem::size_of::<DataRow>();
888
889 for row in &self.rows {
891 size += row.values.capacity() * std::mem::size_of::<DataValue>();
892 for value in &row.values {
893 match value {
894 DataValue::String(s) => size += s.capacity(),
895 DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
896 DataValue::DateTime(s) => size += s.capacity(),
897 _ => {} }
899 }
900 }
901
902 size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
904 for (k, v) in &self.metadata {
905 size += k.capacity() + v.capacity();
906 }
907
908 size
909 }
910}
911
912fn json_value_to_data_value(json: &JsonValue) -> DataValue {
914 match json {
915 JsonValue::Null => DataValue::Null,
916 JsonValue::Bool(b) => DataValue::Boolean(*b),
917 JsonValue::Number(n) => {
918 if let Some(i) = n.as_i64() {
919 DataValue::Integer(i)
920 } else if let Some(f) = n.as_f64() {
921 DataValue::Float(f)
922 } else {
923 DataValue::String(n.to_string())
924 }
925 }
926 JsonValue::String(s) => {
927 if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
929 DataValue::DateTime(s.clone())
931 } else {
932 DataValue::String(s.clone())
933 }
934 }
935 JsonValue::Array(_) | JsonValue::Object(_) => {
936 DataValue::String(json.to_string())
938 }
939 }
940}
941
942#[derive(Debug, Clone)]
944pub struct DataTableStats {
945 pub row_count: usize,
946 pub column_count: usize,
947 pub memory_size: usize,
948 pub null_count: usize,
949}
950
951impl DataProvider for DataTable {
954 fn get_row(&self, index: usize) -> Option<Vec<String>> {
955 self.rows.get(index).map(|row| {
956 row.values
957 .iter()
958 .map(DataValue::to_string_optimized)
959 .collect()
960 })
961 }
962
963 fn get_column_names(&self) -> Vec<String> {
964 self.column_names()
965 }
966
967 fn get_row_count(&self) -> usize {
968 self.row_count()
969 }
970
971 fn get_column_count(&self) -> usize {
972 self.column_count()
973 }
974}
975
976#[cfg(test)]
977mod tests {
978 use super::*;
979
980 #[test]
981 fn test_data_type_inference() {
982 assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
983 assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
984 assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
985 assert_eq!(DataType::infer_from_string("hello"), DataType::String);
986 assert_eq!(DataType::infer_from_string(""), DataType::Null);
987 assert_eq!(
988 DataType::infer_from_string("2024-01-01"),
989 DataType::DateTime
990 );
991 }
992
993 #[test]
994 fn test_datatable_creation() {
995 let mut table = DataTable::new("test");
996
997 table.add_column(DataColumn::new("id").with_type(DataType::Integer));
998 table.add_column(DataColumn::new("name").with_type(DataType::String));
999 table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1000
1001 assert_eq!(table.column_count(), 3);
1002 assert_eq!(table.row_count(), 0);
1003
1004 let row = DataRow::new(vec![
1005 DataValue::Integer(1),
1006 DataValue::String("Alice".to_string()),
1007 DataValue::Boolean(true),
1008 ]);
1009
1010 table.add_row(row).unwrap();
1011 assert_eq!(table.row_count(), 1);
1012
1013 let value = table.get_value_by_name(0, "name").unwrap();
1014 assert_eq!(value.to_string(), "Alice");
1015 }
1016
1017 #[test]
1018 fn test_type_inference() {
1019 let mut table = DataTable::new("test");
1020
1021 table.add_column(DataColumn::new("mixed"));
1023
1024 table
1026 .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1027 .unwrap();
1028 table
1029 .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1030 .unwrap();
1031 table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1032
1033 table.infer_column_types();
1034
1035 assert_eq!(table.columns[0].data_type, DataType::Float);
1037 assert_eq!(table.columns[0].null_count, 1);
1038 assert!(table.columns[0].nullable);
1039 }
1040
1041 #[test]
1042 fn test_from_query_response() {
1043 use crate::api_client::{QueryInfo, QueryResponse};
1044 use serde_json::json;
1045
1046 let response = QueryResponse {
1047 query: QueryInfo {
1048 select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1049 where_clause: None,
1050 order_by: None,
1051 },
1052 data: vec![
1053 json!({
1054 "id": 1,
1055 "name": "Alice",
1056 "age": 30
1057 }),
1058 json!({
1059 "id": 2,
1060 "name": "Bob",
1061 "age": 25
1062 }),
1063 json!({
1064 "id": 3,
1065 "name": "Carol",
1066 "age": null
1067 }),
1068 ],
1069 count: 3,
1070 source: Some("test.csv".to_string()),
1071 table: Some("test".to_string()),
1072 cached: Some(false),
1073 };
1074
1075 let table = DataTable::from_query_response(&response, "test").unwrap();
1076
1077 assert_eq!(table.name, "test");
1078 assert_eq!(table.row_count(), 3);
1079 assert_eq!(table.column_count(), 3);
1080
1081 let col_names = table.column_names();
1083 assert!(col_names.contains(&"id".to_string()));
1084 assert!(col_names.contains(&"name".to_string()));
1085 assert!(col_names.contains(&"age".to_string()));
1086
1087 assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1089 assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1090
1091 assert_eq!(
1093 table.get_value_by_name(0, "id"),
1094 Some(&DataValue::Integer(1))
1095 );
1096 assert_eq!(
1097 table.get_value_by_name(0, "name"),
1098 Some(&DataValue::String("Alice".to_string()))
1099 );
1100 assert_eq!(
1101 table.get_value_by_name(0, "age"),
1102 Some(&DataValue::Integer(30))
1103 );
1104
1105 assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1107 }
1108}