1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15 String,
16 Integer,
17 Float,
18 Boolean,
19 DateTime,
20 Null,
21 Mixed, }
23
24impl DataType {
25 #[must_use]
27 pub fn infer_from_string(value: &str) -> Self {
28 if value.eq_ignore_ascii_case("null") {
30 return DataType::Null;
31 }
32
33 match TypeInference::infer_from_string(value) {
35 InferredType::Null => DataType::Null,
36 InferredType::Boolean => DataType::Boolean,
37 InferredType::Integer => DataType::Integer,
38 InferredType::Float => DataType::Float,
39 InferredType::DateTime => DataType::DateTime,
40 InferredType::String => DataType::String,
41 }
42 }
43
44 fn looks_like_datetime(value: &str) -> bool {
47 TypeInference::looks_like_datetime(value)
48 }
49
50 #[must_use]
52 pub fn merge(&self, other: &DataType) -> DataType {
53 if self == other {
54 return self.clone();
55 }
56
57 match (self, other) {
58 (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59 (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60 DataType::Float
61 }
62 _ => DataType::Mixed,
63 }
64 }
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70 pub name: String,
71 pub data_type: DataType,
72 pub nullable: bool,
73 pub unique_values: Option<usize>,
74 pub null_count: usize,
75 pub metadata: HashMap<String, String>,
76 pub qualified_name: Option<String>,
78 pub source_table: Option<String>,
80}
81
82impl DataColumn {
83 pub fn new(name: impl Into<String>) -> Self {
84 Self {
85 name: name.into(),
86 data_type: DataType::String,
87 nullable: true,
88 unique_values: None,
89 null_count: 0,
90 metadata: HashMap::new(),
91 qualified_name: None,
92 source_table: None,
93 }
94 }
95
96 #[must_use]
97 pub fn with_type(mut self, data_type: DataType) -> Self {
98 self.data_type = data_type;
99 self
100 }
101
102 #[must_use]
104 pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105 self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106 self.source_table = Some(table_name.to_string());
107 self
108 }
109
110 pub fn get_qualified_or_simple_name(&self) -> &str {
112 self.qualified_name.as_deref().unwrap_or(&self.name)
113 }
114
115 #[must_use]
116 pub fn with_nullable(mut self, nullable: bool) -> Self {
117 self.nullable = nullable;
118 self
119 }
120}
121
122#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125 String(String),
126 InternedString(Arc<String>), Integer(i64),
128 Float(f64),
129 Boolean(bool),
130 DateTime(String), Null,
132}
133
134impl std::hash::Hash for DataValue {
136 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
137 match self {
138 DataValue::String(s) => {
139 0u8.hash(state);
140 s.hash(state);
141 }
142 DataValue::InternedString(s) => {
143 1u8.hash(state);
144 s.hash(state);
145 }
146 DataValue::Integer(i) => {
147 2u8.hash(state);
148 i.hash(state);
149 }
150 DataValue::Float(f) => {
151 3u8.hash(state);
152 f.to_bits().hash(state);
154 }
155 DataValue::Boolean(b) => {
156 4u8.hash(state);
157 b.hash(state);
158 }
159 DataValue::DateTime(dt) => {
160 5u8.hash(state);
161 dt.hash(state);
162 }
163 DataValue::Null => {
164 6u8.hash(state);
165 }
166 }
167 }
168}
169
170impl Serialize for DataValue {
172 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
173 where
174 S: serde::Serializer,
175 {
176 match self {
177 DataValue::String(s) => {
178 serializer.serialize_newtype_variant("DataValue", 0, "String", s)
179 }
180 DataValue::InternedString(arc_s) => {
181 serializer.serialize_newtype_variant(
183 "DataValue",
184 1,
185 "InternedString",
186 arc_s.as_ref(),
187 )
188 }
189 DataValue::Integer(i) => {
190 serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
191 }
192 DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
193 DataValue::Boolean(b) => {
194 serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
195 }
196 DataValue::DateTime(dt) => {
197 serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
198 }
199 DataValue::Null => serializer.serialize_unit_variant("DataValue", 6, "Null"),
200 }
201 }
202}
203
204impl<'de> Deserialize<'de> for DataValue {
206 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
207 where
208 D: serde::Deserializer<'de>,
209 {
210 #[derive(Deserialize)]
211 #[serde(field_identifier, rename_all = "PascalCase")]
212 enum Field {
213 String,
214 InternedString,
215 Integer,
216 Float,
217 Boolean,
218 DateTime,
219 Null,
220 }
221
222 struct DataValueVisitor;
223
224 impl<'de> Visitor<'de> for DataValueVisitor {
225 type Value = DataValue;
226
227 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
228 formatter.write_str("enum DataValue")
229 }
230
231 fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
232 where
233 A: serde::de::EnumAccess<'de>,
234 {
235 let (field, variant) = data.variant()?;
236 match field {
237 Field::String => {
238 let s: String = variant.newtype_variant()?;
239 Ok(DataValue::String(s))
240 }
241 Field::InternedString => {
242 let s: String = variant.newtype_variant()?;
243 Ok(DataValue::InternedString(Arc::new(s)))
244 }
245 Field::Integer => {
246 let i: i64 = variant.newtype_variant()?;
247 Ok(DataValue::Integer(i))
248 }
249 Field::Float => {
250 let f: f64 = variant.newtype_variant()?;
251 Ok(DataValue::Float(f))
252 }
253 Field::Boolean => {
254 let b: bool = variant.newtype_variant()?;
255 Ok(DataValue::Boolean(b))
256 }
257 Field::DateTime => {
258 let dt: String = variant.newtype_variant()?;
259 Ok(DataValue::DateTime(dt))
260 }
261 Field::Null => {
262 variant.unit_variant()?;
263 Ok(DataValue::Null)
264 }
265 }
266 }
267 }
268
269 deserializer.deserialize_enum(
270 "DataValue",
271 &[
272 "String",
273 "InternedString",
274 "Integer",
275 "Float",
276 "Boolean",
277 "DateTime",
278 "Null",
279 ],
280 DataValueVisitor,
281 )
282 }
283}
284
285impl Eq for DataValue {}
287
288impl DataValue {
289 pub fn from_string(s: &str, data_type: &DataType) -> Self {
290 if s.is_empty() || s.eq_ignore_ascii_case("null") {
291 return DataValue::Null;
292 }
293
294 match data_type {
295 DataType::String => DataValue::String(s.to_string()),
296 DataType::Integer => s
297 .parse::<i64>()
298 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
299 DataType::Float => s
300 .parse::<f64>()
301 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
302 DataType::Boolean => {
303 let lower = s.to_lowercase();
304 DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
305 }
306 DataType::DateTime => DataValue::DateTime(s.to_string()),
307 DataType::Null => DataValue::Null,
308 DataType::Mixed => {
309 let inferred = DataType::infer_from_string(s);
311 Self::from_string(s, &inferred)
312 }
313 }
314 }
315
316 #[must_use]
317 pub fn is_null(&self) -> bool {
318 matches!(self, DataValue::Null)
319 }
320
321 #[must_use]
322 pub fn data_type(&self) -> DataType {
323 match self {
324 DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
325 DataValue::Integer(_) => DataType::Integer,
326 DataValue::Float(_) => DataType::Float,
327 DataValue::Boolean(_) => DataType::Boolean,
328 DataValue::DateTime(_) => DataType::DateTime,
329 DataValue::Null => DataType::Null,
330 }
331 }
332
333 #[must_use]
336 pub fn to_string_optimized(&self) -> String {
337 match self {
338 DataValue::String(s) => s.clone(), DataValue::InternedString(s) => s.as_ref().clone(), DataValue::DateTime(s) => s.clone(), DataValue::Integer(i) => i.to_string(),
342 DataValue::Float(f) => f.to_string(),
343 DataValue::Boolean(b) => {
344 if *b {
345 "true".to_string()
346 } else {
347 "false".to_string()
348 }
349 }
350 DataValue::Null => String::new(), }
352 }
353}
354
355impl fmt::Display for DataValue {
356 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357 match self {
358 DataValue::String(s) => write!(f, "{s}"),
359 DataValue::InternedString(s) => write!(f, "{s}"),
360 DataValue::Integer(i) => write!(f, "{i}"),
361 DataValue::Float(fl) => write!(f, "{fl}"),
362 DataValue::Boolean(b) => write!(f, "{b}"),
363 DataValue::DateTime(dt) => write!(f, "{dt}"),
364 DataValue::Null => write!(f, ""),
365 }
366 }
367}
368
369#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct DataRow {
372 pub values: Vec<DataValue>,
373}
374
375impl DataRow {
376 #[must_use]
377 pub fn new(values: Vec<DataValue>) -> Self {
378 Self { values }
379 }
380
381 #[must_use]
382 pub fn get(&self, index: usize) -> Option<&DataValue> {
383 self.values.get(index)
384 }
385
386 pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
387 self.values.get_mut(index)
388 }
389
390 #[must_use]
391 pub fn len(&self) -> usize {
392 self.values.len()
393 }
394
395 #[must_use]
396 pub fn is_empty(&self) -> bool {
397 self.values.is_empty()
398 }
399}
400
401#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct DataTable {
404 pub name: String,
405 pub columns: Vec<DataColumn>,
406 pub rows: Vec<DataRow>,
407 pub metadata: HashMap<String, String>,
408}
409
410impl DataTable {
411 pub fn new(name: impl Into<String>) -> Self {
412 Self {
413 name: name.into(),
414 columns: Vec::new(),
415 rows: Vec::new(),
416 metadata: HashMap::new(),
417 }
418 }
419
420 #[must_use]
423 pub fn dual() -> Self {
424 let mut table = DataTable::new("DUAL");
425 table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
426 table
427 .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
428 .unwrap();
429 table
430 }
431
432 pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
433 self.columns.push(column);
434 self
435 }
436
437 pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
438 if row.len() != self.columns.len() {
439 return Err(format!(
440 "Row has {} values but table has {} columns",
441 row.len(),
442 self.columns.len()
443 ));
444 }
445 self.rows.push(row);
446 Ok(())
447 }
448
449 #[must_use]
450 pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
451 self.columns.iter().find(|c| c.name == name)
452 }
453
454 #[must_use]
455 pub fn get_column_index(&self, name: &str) -> Option<usize> {
456 self.columns.iter().position(|c| c.name == name)
457 }
458
459 #[must_use]
461 pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
462 self.columns
463 .iter()
464 .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
465 }
466
467 #[must_use]
470 pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
471 if let Some(prefix) = table_prefix {
473 let qualified = format!("{}.{}", prefix, name);
474 if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
475 return Some(idx);
476 }
477 }
478
479 self.get_column_index(name)
481 }
482
483 pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
485 for column in &mut self.columns {
486 column.qualified_name = Some(format!("{}.{}", table_name, column.name));
487 column.source_table = Some(table_name.to_string());
488 }
489 }
490
491 #[must_use]
492 pub fn column_count(&self) -> usize {
493 self.columns.len()
494 }
495
496 #[must_use]
497 pub fn row_count(&self) -> usize {
498 self.rows.len()
499 }
500
501 #[must_use]
502 pub fn is_empty(&self) -> bool {
503 self.rows.is_empty()
504 }
505
506 #[must_use]
508 pub fn column_names(&self) -> Vec<String> {
509 self.columns.iter().map(|c| c.name.clone()).collect()
510 }
511
512 pub fn columns_mut(&mut self) -> &mut [DataColumn] {
514 &mut self.columns
515 }
516
517 pub fn infer_column_types(&mut self) {
519 for (col_idx, column) in self.columns.iter_mut().enumerate() {
520 let mut inferred_type = DataType::Null;
521 let mut null_count = 0;
522 let mut unique_values = std::collections::HashSet::new();
523
524 for row in &self.rows {
525 if let Some(value) = row.get(col_idx) {
526 if value.is_null() {
527 null_count += 1;
528 } else {
529 let value_type = value.data_type();
530 inferred_type = inferred_type.merge(&value_type);
531 unique_values.insert(value.to_string());
532 }
533 }
534 }
535
536 column.data_type = inferred_type;
537 column.null_count = null_count;
538 column.nullable = null_count > 0;
539 column.unique_values = Some(unique_values.len());
540 }
541 }
542
543 #[must_use]
545 pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
546 self.rows.get(row)?.get(col)
547 }
548
549 #[must_use]
551 pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
552 let col_idx = self.get_column_index(col_name)?;
553 self.get_value(row, col_idx)
554 }
555
556 #[must_use]
558 pub fn to_string_table(&self) -> Vec<Vec<String>> {
559 self.rows
560 .iter()
561 .map(|row| {
562 row.values
563 .iter()
564 .map(DataValue::to_string_optimized)
565 .collect()
566 })
567 .collect()
568 }
569
570 #[must_use]
572 pub fn get_stats(&self) -> DataTableStats {
573 DataTableStats {
574 row_count: self.row_count(),
575 column_count: self.column_count(),
576 memory_size: self.estimate_memory_size(),
577 null_count: self.columns.iter().map(|c| c.null_count).sum(),
578 }
579 }
580
581 #[must_use]
583 pub fn debug_dump(&self) -> String {
584 let mut output = String::new();
585
586 output.push_str(&format!("DataTable: {}\n", self.name));
587 output.push_str(&format!(
588 "Rows: {} | Columns: {}\n",
589 self.row_count(),
590 self.column_count()
591 ));
592
593 if !self.metadata.is_empty() {
594 output.push_str("Metadata:\n");
595 for (key, value) in &self.metadata {
596 output.push_str(&format!(" {key}: {value}\n"));
597 }
598 }
599
600 output.push_str("\nColumns:\n");
601 for column in &self.columns {
602 output.push_str(&format!(" {} ({:?})", column.name, column.data_type));
603 if column.nullable {
604 output.push_str(&format!(" - nullable, {} nulls", column.null_count));
605 }
606 if let Some(unique) = column.unique_values {
607 output.push_str(&format!(", {unique} unique"));
608 }
609 output.push('\n');
610 }
611
612 if self.row_count() > 0 {
614 let sample_size = 5.min(self.row_count());
615 output.push_str(&format!("\nFirst {sample_size} rows:\n"));
616
617 for row_idx in 0..sample_size {
618 output.push_str(&format!(" [{row_idx}]: "));
619 for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
620 if col_idx > 0 {
621 output.push_str(", ");
622 }
623 output.push_str(&value.to_string());
624 }
625 output.push('\n');
626 }
627 }
628
629 output
630 }
631
632 #[must_use]
633 pub fn estimate_memory_size(&self) -> usize {
634 let mut size = std::mem::size_of::<Self>();
636
637 size += self.columns.len() * std::mem::size_of::<DataColumn>();
639 for col in &self.columns {
640 size += col.name.len();
641 }
642
643 size += self.rows.len() * std::mem::size_of::<DataRow>();
645
646 for row in &self.rows {
648 for value in &row.values {
649 size += std::mem::size_of::<DataValue>();
651 match value {
653 DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
654 _ => {} }
656 }
657 }
658
659 size
660 }
661
662 pub fn to_csv(&self) -> String {
664 let mut csv_output = String::new();
665
666 let headers: Vec<String> = self
668 .columns
669 .iter()
670 .map(|col| {
671 if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
672 format!("\"{}\"", col.name.replace('"', "\"\""))
673 } else {
674 col.name.clone()
675 }
676 })
677 .collect();
678 csv_output.push_str(&headers.join(","));
679 csv_output.push('\n');
680
681 for row in &self.rows {
683 let row_values: Vec<String> = row
684 .values
685 .iter()
686 .map(|value| {
687 let str_val = value.to_string();
688 if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
689 format!("\"{}\"", str_val.replace('"', "\"\""))
690 } else {
691 str_val
692 }
693 })
694 .collect();
695 csv_output.push_str(&row_values.join(","));
696 csv_output.push('\n');
697 }
698
699 csv_output
700 }
701
702 pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
705 debug!(
706 "V46: Converting QueryResponse to DataTable for table '{}'",
707 table_name
708 );
709
710 crate::utils::memory_tracker::track_memory("start_from_query_response");
712
713 let mut table = DataTable::new(table_name);
714
715 if let Some(first_row) = response.data.first() {
717 if let Some(obj) = first_row.as_object() {
718 for key in obj.keys() {
720 let column = DataColumn::new(key.clone());
721 table.add_column(column);
722 }
723
724 for json_row in &response.data {
726 if let Some(row_obj) = json_row.as_object() {
727 let mut values = Vec::new();
728
729 for column in &table.columns {
731 let value = row_obj
732 .get(&column.name)
733 .map_or(DataValue::Null, json_value_to_data_value);
734 values.push(value);
735 }
736
737 table.add_row(DataRow::new(values))?;
738 }
739 }
740
741 table.infer_column_types();
743
744 if let Some(source) = &response.source {
746 table.metadata.insert("source".to_string(), source.clone());
747 }
748 if let Some(cached) = response.cached {
749 table
750 .metadata
751 .insert("cached".to_string(), cached.to_string());
752 }
753 table
754 .metadata
755 .insert("original_count".to_string(), response.count.to_string());
756
757 debug!(
758 "V46: Created DataTable with {} columns and {} rows",
759 table.column_count(),
760 table.row_count()
761 );
762 } else {
763 table.add_column(DataColumn::new("value"));
765 for json_value in &response.data {
766 let value = json_value_to_data_value(json_value);
767 table.add_row(DataRow::new(vec![value]))?;
768 }
769 }
770 }
771
772 Ok(table)
773 }
774
775 #[must_use]
777 pub fn get_row(&self, index: usize) -> Option<&DataRow> {
778 self.rows.get(index)
779 }
780
781 #[must_use]
783 pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
784 self.rows.get(index).map(|row| {
785 row.values
786 .iter()
787 .map(DataValue::to_string_optimized)
788 .collect()
789 })
790 }
791
792 #[must_use]
794 pub fn pretty_print(&self) -> String {
795 let mut output = String::new();
796
797 output.push_str("╔═══════════════════════════════════════════════════════╗\n");
799 output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
800 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
801
802 output.push_str(&format!(
804 "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
805 self.row_count(),
806 self.column_count(),
807 self.get_stats().memory_size
808 ));
809
810 if !self.metadata.is_empty() {
812 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
813 output.push_str("║ Metadata: ║\n");
814 for (key, value) in &self.metadata {
815 let truncated_value = if value.len() > 35 {
816 format!("{}...", &value[..32])
817 } else {
818 value.clone()
819 };
820 output.push_str(&format!(
821 "║ {:15} : {:35} ║\n",
822 Self::truncate_string(key, 15),
823 truncated_value
824 ));
825 }
826 }
827
828 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
830 output.push_str("║ Columns: ║\n");
831 output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
832 output.push_str("║ Name │ Type │ Nullable│ Nulls│Unique║\n");
833 output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
834
835 for column in &self.columns {
836 let type_str = match &column.data_type {
837 DataType::String => "String",
838 DataType::Integer => "Integer",
839 DataType::Float => "Float",
840 DataType::Boolean => "Boolean",
841 DataType::DateTime => "DateTime",
842 DataType::Null => "Null",
843 DataType::Mixed => "Mixed",
844 };
845
846 output.push_str(&format!(
847 "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
848 Self::truncate_string(&column.name, 17),
849 type_str,
850 if column.nullable { "Yes" } else { "No" },
851 column.null_count,
852 column.unique_values.unwrap_or(0)
853 ));
854 }
855
856 output.push_str("╚═══════════════════════════════════════════════════════╝\n");
857
858 output.push_str("\nSample Data (first 5 rows):\n");
860 let sample_count = self.rows.len().min(5);
861
862 if sample_count > 0 {
863 output.push('┌');
865 for (i, _col) in self.columns.iter().enumerate() {
866 if i > 0 {
867 output.push('┬');
868 }
869 output.push_str(&"─".repeat(20));
870 }
871 output.push_str("┐\n");
872
873 output.push('│');
874 for col in &self.columns {
875 output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
876 }
877 output.push('\n');
878
879 output.push('├');
880 for (i, _) in self.columns.iter().enumerate() {
881 if i > 0 {
882 output.push('┼');
883 }
884 output.push_str(&"─".repeat(20));
885 }
886 output.push_str("┤\n");
887
888 for row_idx in 0..sample_count {
890 if let Some(row) = self.rows.get(row_idx) {
891 output.push('│');
892 for value in &row.values {
893 let value_str = value.to_string();
894 output
895 .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
896 }
897 output.push('\n');
898 }
899 }
900
901 output.push('└');
902 for (i, _) in self.columns.iter().enumerate() {
903 if i > 0 {
904 output.push('┴');
905 }
906 output.push_str(&"─".repeat(20));
907 }
908 output.push_str("┘\n");
909 }
910
911 output
912 }
913
914 fn truncate_string(s: &str, max_len: usize) -> String {
915 if s.len() > max_len {
916 format!("{}...", &s[..max_len - 3])
917 } else {
918 s.to_string()
919 }
920 }
921
922 #[must_use]
924 pub fn get_schema_summary(&self) -> String {
925 let mut summary = String::new();
926 summary.push_str(&format!(
927 "DataTable Schema ({} columns, {} rows):\n",
928 self.columns.len(),
929 self.rows.len()
930 ));
931
932 for (idx, column) in self.columns.iter().enumerate() {
933 let type_str = match &column.data_type {
934 DataType::String => "String",
935 DataType::Integer => "Integer",
936 DataType::Float => "Float",
937 DataType::Boolean => "Boolean",
938 DataType::DateTime => "DateTime",
939 DataType::Null => "Null",
940 DataType::Mixed => "Mixed",
941 };
942
943 let nullable_str = if column.nullable {
944 "nullable"
945 } else {
946 "not null"
947 };
948 let null_info = if column.null_count > 0 {
949 format!(", {} nulls", column.null_count)
950 } else {
951 String::new()
952 };
953
954 summary.push_str(&format!(
955 " [{:3}] {} : {} ({}{})\n",
956 idx, column.name, type_str, nullable_str, null_info
957 ));
958 }
959
960 summary
961 }
962
963 #[must_use]
965 pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
966 self.columns
967 .iter()
968 .map(|col| {
969 let type_name = format!("{:?}", col.data_type);
970 (col.name.clone(), type_name, col.nullable, col.null_count)
971 })
972 .collect()
973 }
974
975 pub fn reserve_rows(&mut self, additional: usize) {
977 self.rows.reserve(additional);
978 }
979
980 pub fn shrink_to_fit(&mut self) {
982 self.rows.shrink_to_fit();
983 for _column in &mut self.columns {
984 }
986 }
987
988 #[must_use]
990 pub fn get_memory_usage(&self) -> usize {
991 let mut size = std::mem::size_of::<Self>();
992
993 size += self.name.capacity();
995
996 size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
998 for col in &self.columns {
999 size += col.name.capacity();
1000 }
1001
1002 size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1004
1005 for row in &self.rows {
1007 size += row.values.capacity() * std::mem::size_of::<DataValue>();
1008 for value in &row.values {
1009 match value {
1010 DataValue::String(s) => size += s.capacity(),
1011 DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1012 DataValue::DateTime(s) => size += s.capacity(),
1013 _ => {} }
1015 }
1016 }
1017
1018 size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1020 for (k, v) in &self.metadata {
1021 size += k.capacity() + v.capacity();
1022 }
1023
1024 size
1025 }
1026
1027 pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1029 rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1032 }
1033
1034 pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1036 rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1039 }
1040}
1041
1042fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1044 match json {
1045 JsonValue::Null => DataValue::Null,
1046 JsonValue::Bool(b) => DataValue::Boolean(*b),
1047 JsonValue::Number(n) => {
1048 if let Some(i) = n.as_i64() {
1049 DataValue::Integer(i)
1050 } else if let Some(f) = n.as_f64() {
1051 DataValue::Float(f)
1052 } else {
1053 DataValue::String(n.to_string())
1054 }
1055 }
1056 JsonValue::String(s) => {
1057 if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1059 DataValue::DateTime(s.clone())
1061 } else {
1062 DataValue::String(s.clone())
1063 }
1064 }
1065 JsonValue::Array(_) | JsonValue::Object(_) => {
1066 DataValue::String(json.to_string())
1068 }
1069 }
1070}
1071
1072#[derive(Debug, Clone)]
1074pub struct DataTableStats {
1075 pub row_count: usize,
1076 pub column_count: usize,
1077 pub memory_size: usize,
1078 pub null_count: usize,
1079}
1080
1081impl DataProvider for DataTable {
1084 fn get_row(&self, index: usize) -> Option<Vec<String>> {
1085 self.rows.get(index).map(|row| {
1086 row.values
1087 .iter()
1088 .map(DataValue::to_string_optimized)
1089 .collect()
1090 })
1091 }
1092
1093 fn get_column_names(&self) -> Vec<String> {
1094 self.column_names()
1095 }
1096
1097 fn get_row_count(&self) -> usize {
1098 self.row_count()
1099 }
1100
1101 fn get_column_count(&self) -> usize {
1102 self.column_count()
1103 }
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108 use super::*;
1109
1110 #[test]
1111 fn test_data_type_inference() {
1112 assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1113 assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1114 assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1115 assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1116 assert_eq!(DataType::infer_from_string(""), DataType::Null);
1117 assert_eq!(
1118 DataType::infer_from_string("2024-01-01"),
1119 DataType::DateTime
1120 );
1121 }
1122
1123 #[test]
1124 fn test_datatable_creation() {
1125 let mut table = DataTable::new("test");
1126
1127 table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1128 table.add_column(DataColumn::new("name").with_type(DataType::String));
1129 table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1130
1131 assert_eq!(table.column_count(), 3);
1132 assert_eq!(table.row_count(), 0);
1133
1134 let row = DataRow::new(vec![
1135 DataValue::Integer(1),
1136 DataValue::String("Alice".to_string()),
1137 DataValue::Boolean(true),
1138 ]);
1139
1140 table.add_row(row).unwrap();
1141 assert_eq!(table.row_count(), 1);
1142
1143 let value = table.get_value_by_name(0, "name").unwrap();
1144 assert_eq!(value.to_string(), "Alice");
1145 }
1146
1147 #[test]
1148 fn test_type_inference() {
1149 let mut table = DataTable::new("test");
1150
1151 table.add_column(DataColumn::new("mixed"));
1153
1154 table
1156 .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1157 .unwrap();
1158 table
1159 .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1160 .unwrap();
1161 table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1162
1163 table.infer_column_types();
1164
1165 assert_eq!(table.columns[0].data_type, DataType::Float);
1167 assert_eq!(table.columns[0].null_count, 1);
1168 assert!(table.columns[0].nullable);
1169 }
1170
1171 #[test]
1172 fn test_from_query_response() {
1173 use crate::api_client::{QueryInfo, QueryResponse};
1174 use serde_json::json;
1175
1176 let response = QueryResponse {
1177 query: QueryInfo {
1178 select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1179 where_clause: None,
1180 order_by: None,
1181 },
1182 data: vec![
1183 json!({
1184 "id": 1,
1185 "name": "Alice",
1186 "age": 30
1187 }),
1188 json!({
1189 "id": 2,
1190 "name": "Bob",
1191 "age": 25
1192 }),
1193 json!({
1194 "id": 3,
1195 "name": "Carol",
1196 "age": null
1197 }),
1198 ],
1199 count: 3,
1200 source: Some("test.csv".to_string()),
1201 table: Some("test".to_string()),
1202 cached: Some(false),
1203 };
1204
1205 let table = DataTable::from_query_response(&response, "test").unwrap();
1206
1207 assert_eq!(table.name, "test");
1208 assert_eq!(table.row_count(), 3);
1209 assert_eq!(table.column_count(), 3);
1210
1211 let col_names = table.column_names();
1213 assert!(col_names.contains(&"id".to_string()));
1214 assert!(col_names.contains(&"name".to_string()));
1215 assert!(col_names.contains(&"age".to_string()));
1216
1217 assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1219 assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1220
1221 assert_eq!(
1223 table.get_value_by_name(0, "id"),
1224 Some(&DataValue::Integer(1))
1225 );
1226 assert_eq!(
1227 table.get_value_by_name(0, "name"),
1228 Some(&DataValue::String("Alice".to_string()))
1229 );
1230 assert_eq!(
1231 table.get_value_by_name(0, "age"),
1232 Some(&DataValue::Integer(30))
1233 );
1234
1235 assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1237 }
1238}