1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15 String,
16 Integer,
17 Float,
18 Boolean,
19 DateTime,
20 Null,
21 Mixed, }
23
24impl DataType {
25 #[must_use]
27 pub fn infer_from_string(value: &str) -> Self {
28 if value.eq_ignore_ascii_case("null") {
30 return DataType::Null;
31 }
32
33 match TypeInference::infer_from_string(value) {
35 InferredType::Null => DataType::Null,
36 InferredType::Boolean => DataType::Boolean,
37 InferredType::Integer => DataType::Integer,
38 InferredType::Float => DataType::Float,
39 InferredType::DateTime => DataType::DateTime,
40 InferredType::String => DataType::String,
41 }
42 }
43
44 fn looks_like_datetime(value: &str) -> bool {
47 TypeInference::looks_like_datetime(value)
48 }
49
50 #[must_use]
52 pub fn merge(&self, other: &DataType) -> DataType {
53 if self == other {
54 return self.clone();
55 }
56
57 match (self, other) {
58 (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59 (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60 DataType::Float
61 }
62 _ => DataType::Mixed,
63 }
64 }
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70 pub name: String,
71 pub data_type: DataType,
72 pub nullable: bool,
73 pub unique_values: Option<usize>,
74 pub null_count: usize,
75 pub metadata: HashMap<String, String>,
76 pub qualified_name: Option<String>,
78 pub source_table: Option<String>,
80}
81
82impl DataColumn {
83 pub fn new(name: impl Into<String>) -> Self {
84 Self {
85 name: name.into(),
86 data_type: DataType::String,
87 nullable: true,
88 unique_values: None,
89 null_count: 0,
90 metadata: HashMap::new(),
91 qualified_name: None,
92 source_table: None,
93 }
94 }
95
96 #[must_use]
97 pub fn with_type(mut self, data_type: DataType) -> Self {
98 self.data_type = data_type;
99 self
100 }
101
102 #[must_use]
104 pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105 self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106 self.source_table = Some(table_name.to_string());
107 self
108 }
109
110 pub fn get_qualified_or_simple_name(&self) -> &str {
112 self.qualified_name.as_deref().unwrap_or(&self.name)
113 }
114
115 #[must_use]
116 pub fn with_nullable(mut self, nullable: bool) -> Self {
117 self.nullable = nullable;
118 self
119 }
120}
121
122#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125 String(String),
126 InternedString(Arc<String>), Integer(i64),
128 Float(f64),
129 Boolean(bool),
130 DateTime(String), Vector(Vec<f64>), Null,
133}
134
135impl std::hash::Hash for DataValue {
137 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
138 match self {
139 DataValue::String(s) => {
140 0u8.hash(state);
141 s.hash(state);
142 }
143 DataValue::InternedString(s) => {
144 1u8.hash(state);
145 s.hash(state);
146 }
147 DataValue::Integer(i) => {
148 2u8.hash(state);
149 i.hash(state);
150 }
151 DataValue::Float(f) => {
152 3u8.hash(state);
153 f.to_bits().hash(state);
155 }
156 DataValue::Boolean(b) => {
157 4u8.hash(state);
158 b.hash(state);
159 }
160 DataValue::DateTime(dt) => {
161 5u8.hash(state);
162 dt.hash(state);
163 }
164 DataValue::Vector(v) => {
165 6u8.hash(state);
166 for f in v {
168 f.to_bits().hash(state);
169 }
170 }
171 DataValue::Null => {
172 7u8.hash(state);
173 }
174 }
175 }
176}
177
178impl Serialize for DataValue {
180 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181 where
182 S: serde::Serializer,
183 {
184 match self {
185 DataValue::String(s) => {
186 serializer.serialize_newtype_variant("DataValue", 0, "String", s)
187 }
188 DataValue::InternedString(arc_s) => {
189 serializer.serialize_newtype_variant(
191 "DataValue",
192 1,
193 "InternedString",
194 arc_s.as_ref(),
195 )
196 }
197 DataValue::Integer(i) => {
198 serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
199 }
200 DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
201 DataValue::Boolean(b) => {
202 serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
203 }
204 DataValue::DateTime(dt) => {
205 serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
206 }
207 DataValue::Vector(v) => {
208 serializer.serialize_newtype_variant("DataValue", 6, "Vector", v)
209 }
210 DataValue::Null => serializer.serialize_unit_variant("DataValue", 7, "Null"),
211 }
212 }
213}
214
215impl<'de> Deserialize<'de> for DataValue {
217 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
218 where
219 D: serde::Deserializer<'de>,
220 {
221 #[derive(Deserialize)]
222 #[serde(field_identifier, rename_all = "PascalCase")]
223 enum Field {
224 String,
225 InternedString,
226 Integer,
227 Float,
228 Boolean,
229 DateTime,
230 Vector,
231 Null,
232 }
233
234 struct DataValueVisitor;
235
236 impl<'de> Visitor<'de> for DataValueVisitor {
237 type Value = DataValue;
238
239 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
240 formatter.write_str("enum DataValue")
241 }
242
243 fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
244 where
245 A: serde::de::EnumAccess<'de>,
246 {
247 let (field, variant) = data.variant()?;
248 match field {
249 Field::String => {
250 let s: String = variant.newtype_variant()?;
251 Ok(DataValue::String(s))
252 }
253 Field::InternedString => {
254 let s: String = variant.newtype_variant()?;
255 Ok(DataValue::InternedString(Arc::new(s)))
256 }
257 Field::Integer => {
258 let i: i64 = variant.newtype_variant()?;
259 Ok(DataValue::Integer(i))
260 }
261 Field::Float => {
262 let f: f64 = variant.newtype_variant()?;
263 Ok(DataValue::Float(f))
264 }
265 Field::Boolean => {
266 let b: bool = variant.newtype_variant()?;
267 Ok(DataValue::Boolean(b))
268 }
269 Field::DateTime => {
270 let dt: String = variant.newtype_variant()?;
271 Ok(DataValue::DateTime(dt))
272 }
273 Field::Vector => {
274 let v: Vec<f64> = variant.newtype_variant()?;
275 Ok(DataValue::Vector(v))
276 }
277 Field::Null => {
278 variant.unit_variant()?;
279 Ok(DataValue::Null)
280 }
281 }
282 }
283 }
284
285 deserializer.deserialize_enum(
286 "DataValue",
287 &[
288 "String",
289 "InternedString",
290 "Integer",
291 "Float",
292 "Boolean",
293 "DateTime",
294 "Vector",
295 "Null",
296 ],
297 DataValueVisitor,
298 )
299 }
300}
301
302impl Eq for DataValue {}
304
305impl DataValue {
306 pub fn from_string(s: &str, data_type: &DataType) -> Self {
307 if s.is_empty() || s.eq_ignore_ascii_case("null") {
308 return DataValue::Null;
309 }
310
311 match data_type {
312 DataType::String => DataValue::String(s.to_string()),
313 DataType::Integer => s.parse::<i64>().map_or_else(
314 |_| {
320 s.parse::<f64>()
321 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float)
322 },
323 DataValue::Integer,
324 ),
325 DataType::Float => s
326 .parse::<f64>()
327 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
328 DataType::Boolean => {
329 let lower = s.to_lowercase();
330 DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
331 }
332 DataType::DateTime => DataValue::DateTime(s.to_string()),
333 DataType::Null => DataValue::Null,
334 DataType::Mixed => {
335 let inferred = DataType::infer_from_string(s);
337 Self::from_string(s, &inferred)
338 }
339 }
340 }
341
342 #[must_use]
343 pub fn is_null(&self) -> bool {
344 matches!(self, DataValue::Null)
345 }
346
347 #[must_use]
348 pub fn data_type(&self) -> DataType {
349 match self {
350 DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
351 DataValue::Integer(_) => DataType::Integer,
352 DataValue::Float(_) => DataType::Float,
353 DataValue::Boolean(_) => DataType::Boolean,
354 DataValue::DateTime(_) => DataType::DateTime,
355 DataValue::Vector(_) => DataType::String, DataValue::Null => DataType::Null,
357 }
358 }
359
360 #[must_use]
363 pub fn to_string_optimized(&self) -> String {
364 match self {
365 DataValue::String(s) => s.clone(), DataValue::InternedString(s) => s.as_ref().clone(), DataValue::DateTime(s) => s.clone(), DataValue::Integer(i) => i.to_string(),
369 DataValue::Float(f) => f.to_string(),
370 DataValue::Boolean(b) => {
371 if *b {
372 "true".to_string()
373 } else {
374 "false".to_string()
375 }
376 }
377 DataValue::Vector(v) => {
378 let components: Vec<String> = v.iter().map(|f| f.to_string()).collect();
380 format!("[{}]", components.join(","))
381 }
382 DataValue::Null => String::new(), }
384 }
385}
386
387impl fmt::Display for DataValue {
388 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389 match self {
390 DataValue::String(s) => write!(f, "{s}"),
391 DataValue::InternedString(s) => write!(f, "{s}"),
392 DataValue::Integer(i) => write!(f, "{i}"),
393 DataValue::Float(fl) => write!(f, "{fl}"),
394 DataValue::Boolean(b) => write!(f, "{b}"),
395 DataValue::DateTime(dt) => write!(f, "{dt}"),
396 DataValue::Vector(v) => {
397 let components: Vec<String> = v.iter().map(|fl| fl.to_string()).collect();
398 write!(f, "[{}]", components.join(","))
399 }
400 DataValue::Null => write!(f, ""),
401 }
402 }
403}
404
405#[derive(Debug, Clone, Serialize, Deserialize)]
407pub struct DataRow {
408 pub values: Vec<DataValue>,
409}
410
411impl DataRow {
412 #[must_use]
413 pub fn new(values: Vec<DataValue>) -> Self {
414 Self { values }
415 }
416
417 #[must_use]
418 pub fn get(&self, index: usize) -> Option<&DataValue> {
419 self.values.get(index)
420 }
421
422 pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
423 self.values.get_mut(index)
424 }
425
426 #[must_use]
427 pub fn len(&self) -> usize {
428 self.values.len()
429 }
430
431 #[must_use]
432 pub fn is_empty(&self) -> bool {
433 self.values.is_empty()
434 }
435}
436
437#[derive(Debug, Clone, Serialize, Deserialize)]
439pub struct DataTable {
440 pub name: String,
441 pub columns: Vec<DataColumn>,
442 pub rows: Vec<DataRow>,
443 pub metadata: HashMap<String, String>,
444}
445
446impl DataTable {
447 pub fn new(name: impl Into<String>) -> Self {
448 Self {
449 name: name.into(),
450 columns: Vec::new(),
451 rows: Vec::new(),
452 metadata: HashMap::new(),
453 }
454 }
455
456 #[must_use]
459 pub fn dual() -> Self {
460 let mut table = DataTable::new("DUAL");
461 table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
462 table
463 .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
464 .unwrap();
465 table
466 }
467
468 pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
469 self.columns.push(column);
470 self
471 }
472
473 pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
474 if row.len() != self.columns.len() {
475 return Err(format!(
476 "Row has {} values but table has {} columns",
477 row.len(),
478 self.columns.len()
479 ));
480 }
481 self.rows.push(row);
482 Ok(())
483 }
484
485 #[must_use]
486 pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
487 self.columns.iter().find(|c| c.name == name)
488 }
489
490 #[must_use]
491 pub fn get_column_index(&self, name: &str) -> Option<usize> {
492 self.columns.iter().position(|c| c.name == name)
493 }
494
495 #[must_use]
497 pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
498 self.columns
499 .iter()
500 .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
501 }
502
503 #[must_use]
506 pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
507 if let Some(prefix) = table_prefix {
509 let qualified = format!("{}.{}", prefix, name);
510 if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
511 return Some(idx);
512 }
513 }
514
515 self.get_column_index(name)
517 }
518
519 pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
521 for column in &mut self.columns {
522 column.qualified_name = Some(format!("{}.{}", table_name, column.name));
523 column.source_table = Some(table_name.to_string());
524 }
525 }
526
527 #[must_use]
528 pub fn column_count(&self) -> usize {
529 self.columns.len()
530 }
531
532 #[must_use]
533 pub fn row_count(&self) -> usize {
534 self.rows.len()
535 }
536
537 #[must_use]
538 pub fn is_empty(&self) -> bool {
539 self.rows.is_empty()
540 }
541
542 #[must_use]
544 pub fn column_names(&self) -> Vec<String> {
545 self.columns.iter().map(|c| c.name.clone()).collect()
546 }
547
548 pub fn columns_mut(&mut self) -> &mut [DataColumn] {
550 &mut self.columns
551 }
552
553 pub fn infer_column_types(&mut self) {
555 for (col_idx, column) in self.columns.iter_mut().enumerate() {
556 let mut inferred_type = DataType::Null;
557 let mut null_count = 0;
558 let mut unique_values = std::collections::HashSet::new();
559
560 for row in &self.rows {
561 if let Some(value) = row.get(col_idx) {
562 if value.is_null() {
563 null_count += 1;
564 } else {
565 let value_type = value.data_type();
566 inferred_type = inferred_type.merge(&value_type);
567 unique_values.insert(value.to_string());
568 }
569 }
570 }
571
572 column.data_type = inferred_type;
573 column.null_count = null_count;
574 column.nullable = null_count > 0;
575 column.unique_values = Some(unique_values.len());
576 }
577 }
578
579 #[must_use]
581 pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
582 self.rows.get(row)?.get(col)
583 }
584
585 #[must_use]
587 pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
588 let col_idx = self.get_column_index(col_name)?;
589 self.get_value(row, col_idx)
590 }
591
592 #[must_use]
594 pub fn to_string_table(&self) -> Vec<Vec<String>> {
595 self.rows
596 .iter()
597 .map(|row| {
598 row.values
599 .iter()
600 .map(DataValue::to_string_optimized)
601 .collect()
602 })
603 .collect()
604 }
605
606 #[must_use]
608 pub fn get_stats(&self) -> DataTableStats {
609 DataTableStats {
610 row_count: self.row_count(),
611 column_count: self.column_count(),
612 memory_size: self.estimate_memory_size(),
613 null_count: self.columns.iter().map(|c| c.null_count).sum(),
614 }
615 }
616
617 #[must_use]
619 pub fn debug_dump(&self) -> String {
620 let mut output = String::new();
621
622 output.push_str(&format!("DataTable: {}\n", self.name));
623 output.push_str(&format!(
624 "Rows: {} | Columns: {}\n",
625 self.row_count(),
626 self.column_count()
627 ));
628
629 if !self.metadata.is_empty() {
630 output.push_str("Metadata:\n");
631 for (key, value) in &self.metadata {
632 output.push_str(&format!(" {key}: {value}\n"));
633 }
634 }
635
636 output.push_str("\nColumns:\n");
637 for column in &self.columns {
638 output.push_str(&format!(" {} ({:?})", column.name, column.data_type));
639 if column.nullable {
640 output.push_str(&format!(" - nullable, {} nulls", column.null_count));
641 }
642 if let Some(unique) = column.unique_values {
643 output.push_str(&format!(", {unique} unique"));
644 }
645 output.push('\n');
646 }
647
648 if self.row_count() > 0 {
650 let sample_size = 5.min(self.row_count());
651 output.push_str(&format!("\nFirst {sample_size} rows:\n"));
652
653 for row_idx in 0..sample_size {
654 output.push_str(&format!(" [{row_idx}]: "));
655 for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
656 if col_idx > 0 {
657 output.push_str(", ");
658 }
659 output.push_str(&value.to_string());
660 }
661 output.push('\n');
662 }
663 }
664
665 output
666 }
667
668 #[must_use]
669 pub fn estimate_memory_size(&self) -> usize {
670 let mut size = std::mem::size_of::<Self>();
672
673 size += self.columns.len() * std::mem::size_of::<DataColumn>();
675 for col in &self.columns {
676 size += col.name.len();
677 }
678
679 size += self.rows.len() * std::mem::size_of::<DataRow>();
681
682 for row in &self.rows {
684 for value in &row.values {
685 size += std::mem::size_of::<DataValue>();
687 match value {
689 DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
690 DataValue::Vector(v) => size += v.len() * std::mem::size_of::<f64>(),
691 _ => {} }
693 }
694 }
695
696 size
697 }
698
699 pub fn to_csv(&self) -> String {
701 let mut csv_output = String::new();
702
703 let headers: Vec<String> = self
705 .columns
706 .iter()
707 .map(|col| {
708 if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
709 format!("\"{}\"", col.name.replace('"', "\"\""))
710 } else {
711 col.name.clone()
712 }
713 })
714 .collect();
715 csv_output.push_str(&headers.join(","));
716 csv_output.push('\n');
717
718 for row in &self.rows {
720 let row_values: Vec<String> = row
721 .values
722 .iter()
723 .map(|value| {
724 let str_val = value.to_string();
725 if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
726 format!("\"{}\"", str_val.replace('"', "\"\""))
727 } else {
728 str_val
729 }
730 })
731 .collect();
732 csv_output.push_str(&row_values.join(","));
733 csv_output.push('\n');
734 }
735
736 csv_output
737 }
738
739 pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
742 debug!(
743 "V46: Converting QueryResponse to DataTable for table '{}'",
744 table_name
745 );
746
747 crate::utils::memory_tracker::track_memory("start_from_query_response");
749
750 let mut table = DataTable::new(table_name);
751
752 if let Some(first_row) = response.data.first() {
754 if let Some(obj) = first_row.as_object() {
755 for key in obj.keys() {
757 let column = DataColumn::new(key.clone());
758 table.add_column(column);
759 }
760
761 for json_row in &response.data {
763 if let Some(row_obj) = json_row.as_object() {
764 let mut values = Vec::new();
765
766 for column in &table.columns {
768 let value = row_obj
769 .get(&column.name)
770 .map_or(DataValue::Null, json_value_to_data_value);
771 values.push(value);
772 }
773
774 table.add_row(DataRow::new(values))?;
775 }
776 }
777
778 table.infer_column_types();
780
781 if let Some(source) = &response.source {
783 table.metadata.insert("source".to_string(), source.clone());
784 }
785 if let Some(cached) = response.cached {
786 table
787 .metadata
788 .insert("cached".to_string(), cached.to_string());
789 }
790 table
791 .metadata
792 .insert("original_count".to_string(), response.count.to_string());
793
794 debug!(
795 "V46: Created DataTable with {} columns and {} rows",
796 table.column_count(),
797 table.row_count()
798 );
799 } else {
800 table.add_column(DataColumn::new("value"));
802 for json_value in &response.data {
803 let value = json_value_to_data_value(json_value);
804 table.add_row(DataRow::new(vec![value]))?;
805 }
806 }
807 }
808
809 Ok(table)
810 }
811
812 #[must_use]
814 pub fn get_row(&self, index: usize) -> Option<&DataRow> {
815 self.rows.get(index)
816 }
817
818 #[must_use]
820 pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
821 self.rows.get(index).map(|row| {
822 row.values
823 .iter()
824 .map(DataValue::to_string_optimized)
825 .collect()
826 })
827 }
828
829 #[must_use]
831 pub fn pretty_print(&self) -> String {
832 let mut output = String::new();
833
834 output.push_str("╔═══════════════════════════════════════════════════════╗\n");
836 output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
837 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
838
839 output.push_str(&format!(
841 "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
842 self.row_count(),
843 self.column_count(),
844 self.get_stats().memory_size
845 ));
846
847 if !self.metadata.is_empty() {
849 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
850 output.push_str("║ Metadata: ║\n");
851 for (key, value) in &self.metadata {
852 let truncated_value = if value.len() > 35 {
853 format!("{}...", &value[..32])
854 } else {
855 value.clone()
856 };
857 output.push_str(&format!(
858 "║ {:15} : {:35} ║\n",
859 Self::truncate_string(key, 15),
860 truncated_value
861 ));
862 }
863 }
864
865 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
867 output.push_str("║ Columns: ║\n");
868 output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
869 output.push_str("║ Name │ Type │ Nullable│ Nulls│Unique║\n");
870 output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
871
872 for column in &self.columns {
873 let type_str = match &column.data_type {
874 DataType::String => "String",
875 DataType::Integer => "Integer",
876 DataType::Float => "Float",
877 DataType::Boolean => "Boolean",
878 DataType::DateTime => "DateTime",
879 DataType::Null => "Null",
880 DataType::Mixed => "Mixed",
881 };
882
883 output.push_str(&format!(
884 "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
885 Self::truncate_string(&column.name, 17),
886 type_str,
887 if column.nullable { "Yes" } else { "No" },
888 column.null_count,
889 column.unique_values.unwrap_or(0)
890 ));
891 }
892
893 output.push_str("╚═══════════════════════════════════════════════════════╝\n");
894
895 output.push_str("\nSample Data (first 5 rows):\n");
897 let sample_count = self.rows.len().min(5);
898
899 if sample_count > 0 {
900 output.push('┌');
902 for (i, _col) in self.columns.iter().enumerate() {
903 if i > 0 {
904 output.push('┬');
905 }
906 output.push_str(&"─".repeat(20));
907 }
908 output.push_str("┐\n");
909
910 output.push('│');
911 for col in &self.columns {
912 output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
913 }
914 output.push('\n');
915
916 output.push('├');
917 for (i, _) in self.columns.iter().enumerate() {
918 if i > 0 {
919 output.push('┼');
920 }
921 output.push_str(&"─".repeat(20));
922 }
923 output.push_str("┤\n");
924
925 for row_idx in 0..sample_count {
927 if let Some(row) = self.rows.get(row_idx) {
928 output.push('│');
929 for value in &row.values {
930 let value_str = value.to_string();
931 output
932 .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
933 }
934 output.push('\n');
935 }
936 }
937
938 output.push('└');
939 for (i, _) in self.columns.iter().enumerate() {
940 if i > 0 {
941 output.push('┴');
942 }
943 output.push_str(&"─".repeat(20));
944 }
945 output.push_str("┘\n");
946 }
947
948 output
949 }
950
951 fn truncate_string(s: &str, max_len: usize) -> String {
952 if s.len() > max_len {
953 format!("{}...", &s[..max_len - 3])
954 } else {
955 s.to_string()
956 }
957 }
958
959 #[must_use]
961 pub fn get_schema_summary(&self) -> String {
962 let mut summary = String::new();
963 summary.push_str(&format!(
964 "DataTable Schema ({} columns, {} rows):\n",
965 self.columns.len(),
966 self.rows.len()
967 ));
968
969 for (idx, column) in self.columns.iter().enumerate() {
970 let type_str = match &column.data_type {
971 DataType::String => "String",
972 DataType::Integer => "Integer",
973 DataType::Float => "Float",
974 DataType::Boolean => "Boolean",
975 DataType::DateTime => "DateTime",
976 DataType::Null => "Null",
977 DataType::Mixed => "Mixed",
978 };
979
980 let nullable_str = if column.nullable {
981 "nullable"
982 } else {
983 "not null"
984 };
985 let null_info = if column.null_count > 0 {
986 format!(", {} nulls", column.null_count)
987 } else {
988 String::new()
989 };
990
991 summary.push_str(&format!(
992 " [{:3}] {} : {} ({}{})\n",
993 idx, column.name, type_str, nullable_str, null_info
994 ));
995 }
996
997 summary
998 }
999
1000 #[must_use]
1002 pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
1003 self.columns
1004 .iter()
1005 .map(|col| {
1006 let type_name = format!("{:?}", col.data_type);
1007 (col.name.clone(), type_name, col.nullable, col.null_count)
1008 })
1009 .collect()
1010 }
1011
1012 pub fn reserve_rows(&mut self, additional: usize) {
1014 self.rows.reserve(additional);
1015 }
1016
1017 pub fn shrink_to_fit(&mut self) {
1019 self.rows.shrink_to_fit();
1020 for _column in &mut self.columns {
1021 }
1023 }
1024
1025 #[must_use]
1027 pub fn get_memory_usage(&self) -> usize {
1028 let mut size = std::mem::size_of::<Self>();
1029
1030 size += self.name.capacity();
1032
1033 size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
1035 for col in &self.columns {
1036 size += col.name.capacity();
1037 }
1038
1039 size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1041
1042 for row in &self.rows {
1044 size += row.values.capacity() * std::mem::size_of::<DataValue>();
1045 for value in &row.values {
1046 match value {
1047 DataValue::String(s) => size += s.capacity(),
1048 DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1049 DataValue::DateTime(s) => size += s.capacity(),
1050 DataValue::Vector(v) => size += v.capacity() * std::mem::size_of::<f64>(),
1051 _ => {} }
1053 }
1054 }
1055
1056 size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1058 for (k, v) in &self.metadata {
1059 size += k.capacity() + v.capacity();
1060 }
1061
1062 size
1063 }
1064
1065 pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1067 rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1070 }
1071
1072 pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1074 rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1077 }
1078}
1079
1080fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1082 match json {
1083 JsonValue::Null => DataValue::Null,
1084 JsonValue::Bool(b) => DataValue::Boolean(*b),
1085 JsonValue::Number(n) => {
1086 if let Some(i) = n.as_i64() {
1087 DataValue::Integer(i)
1088 } else if let Some(f) = n.as_f64() {
1089 DataValue::Float(f)
1090 } else {
1091 DataValue::String(n.to_string())
1092 }
1093 }
1094 JsonValue::String(s) => {
1095 if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1097 DataValue::DateTime(s.clone())
1099 } else {
1100 DataValue::String(s.clone())
1101 }
1102 }
1103 JsonValue::Array(_) | JsonValue::Object(_) => {
1104 DataValue::String(json.to_string())
1106 }
1107 }
1108}
1109
1110#[derive(Debug, Clone)]
1112pub struct DataTableStats {
1113 pub row_count: usize,
1114 pub column_count: usize,
1115 pub memory_size: usize,
1116 pub null_count: usize,
1117}
1118
1119impl DataProvider for DataTable {
1122 fn get_row(&self, index: usize) -> Option<Vec<String>> {
1123 self.rows.get(index).map(|row| {
1124 row.values
1125 .iter()
1126 .map(DataValue::to_string_optimized)
1127 .collect()
1128 })
1129 }
1130
1131 fn get_column_names(&self) -> Vec<String> {
1132 self.column_names()
1133 }
1134
1135 fn get_row_count(&self) -> usize {
1136 self.row_count()
1137 }
1138
1139 fn get_column_count(&self) -> usize {
1140 self.column_count()
1141 }
1142}
1143
1144#[cfg(test)]
1145mod tests {
1146 use super::*;
1147
1148 #[test]
1149 fn test_data_type_inference() {
1150 assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1151 assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1152 assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1153 assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1154 assert_eq!(DataType::infer_from_string(""), DataType::Null);
1155 assert_eq!(
1156 DataType::infer_from_string("2024-01-01"),
1157 DataType::DateTime
1158 );
1159 }
1160
1161 #[test]
1162 fn test_datatable_creation() {
1163 let mut table = DataTable::new("test");
1164
1165 table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1166 table.add_column(DataColumn::new("name").with_type(DataType::String));
1167 table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1168
1169 assert_eq!(table.column_count(), 3);
1170 assert_eq!(table.row_count(), 0);
1171
1172 let row = DataRow::new(vec![
1173 DataValue::Integer(1),
1174 DataValue::String("Alice".to_string()),
1175 DataValue::Boolean(true),
1176 ]);
1177
1178 table.add_row(row).unwrap();
1179 assert_eq!(table.row_count(), 1);
1180
1181 let value = table.get_value_by_name(0, "name").unwrap();
1182 assert_eq!(value.to_string(), "Alice");
1183 }
1184
1185 #[test]
1186 fn test_type_inference() {
1187 let mut table = DataTable::new("test");
1188
1189 table.add_column(DataColumn::new("mixed"));
1191
1192 table
1194 .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1195 .unwrap();
1196 table
1197 .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1198 .unwrap();
1199 table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1200
1201 table.infer_column_types();
1202
1203 assert_eq!(table.columns[0].data_type, DataType::Float);
1205 assert_eq!(table.columns[0].null_count, 1);
1206 assert!(table.columns[0].nullable);
1207 }
1208
1209 #[test]
1210 fn test_from_query_response() {
1211 use crate::api_client::{QueryInfo, QueryResponse};
1212 use serde_json::json;
1213
1214 let response = QueryResponse {
1215 query: QueryInfo {
1216 select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1217 where_clause: None,
1218 order_by: None,
1219 },
1220 data: vec![
1221 json!({
1222 "id": 1,
1223 "name": "Alice",
1224 "age": 30
1225 }),
1226 json!({
1227 "id": 2,
1228 "name": "Bob",
1229 "age": 25
1230 }),
1231 json!({
1232 "id": 3,
1233 "name": "Carol",
1234 "age": null
1235 }),
1236 ],
1237 count: 3,
1238 source: Some("test.csv".to_string()),
1239 table: Some("test".to_string()),
1240 cached: Some(false),
1241 };
1242
1243 let table = DataTable::from_query_response(&response, "test").unwrap();
1244
1245 assert_eq!(table.name, "test");
1246 assert_eq!(table.row_count(), 3);
1247 assert_eq!(table.column_count(), 3);
1248
1249 let col_names = table.column_names();
1251 assert!(col_names.contains(&"id".to_string()));
1252 assert!(col_names.contains(&"name".to_string()));
1253 assert!(col_names.contains(&"age".to_string()));
1254
1255 assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1257 assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1258
1259 assert_eq!(
1261 table.get_value_by_name(0, "id"),
1262 Some(&DataValue::Integer(1))
1263 );
1264 assert_eq!(
1265 table.get_value_by_name(0, "name"),
1266 Some(&DataValue::String("Alice".to_string()))
1267 );
1268 assert_eq!(
1269 table.get_value_by_name(0, "age"),
1270 Some(&DataValue::Integer(30))
1271 );
1272
1273 assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1275 }
1276}