1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15 String,
16 Integer,
17 Float,
18 Boolean,
19 DateTime,
20 Null,
21 Mixed, }
23
24impl DataType {
25 #[must_use]
27 pub fn infer_from_string(value: &str) -> Self {
28 if value.eq_ignore_ascii_case("null") {
30 return DataType::Null;
31 }
32
33 match TypeInference::infer_from_string(value) {
35 InferredType::Null => DataType::Null,
36 InferredType::Boolean => DataType::Boolean,
37 InferredType::Integer => DataType::Integer,
38 InferredType::Float => DataType::Float,
39 InferredType::DateTime => DataType::DateTime,
40 InferredType::String => DataType::String,
41 }
42 }
43
44 fn looks_like_datetime(value: &str) -> bool {
47 TypeInference::looks_like_datetime(value)
48 }
49
50 #[must_use]
52 pub fn merge(&self, other: &DataType) -> DataType {
53 if self == other {
54 return self.clone();
55 }
56
57 match (self, other) {
58 (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59 (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60 DataType::Float
61 }
62 _ => DataType::Mixed,
63 }
64 }
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70 pub name: String,
71 pub data_type: DataType,
72 pub nullable: bool,
73 pub unique_values: Option<usize>,
74 pub null_count: usize,
75 pub metadata: HashMap<String, String>,
76 pub qualified_name: Option<String>,
78 pub source_table: Option<String>,
80}
81
82impl DataColumn {
83 pub fn new(name: impl Into<String>) -> Self {
84 Self {
85 name: name.into(),
86 data_type: DataType::String,
87 nullable: true,
88 unique_values: None,
89 null_count: 0,
90 metadata: HashMap::new(),
91 qualified_name: None,
92 source_table: None,
93 }
94 }
95
96 #[must_use]
97 pub fn with_type(mut self, data_type: DataType) -> Self {
98 self.data_type = data_type;
99 self
100 }
101
102 #[must_use]
104 pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105 self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106 self.source_table = Some(table_name.to_string());
107 self
108 }
109
110 pub fn get_qualified_or_simple_name(&self) -> &str {
112 self.qualified_name.as_deref().unwrap_or(&self.name)
113 }
114
115 #[must_use]
116 pub fn with_nullable(mut self, nullable: bool) -> Self {
117 self.nullable = nullable;
118 self
119 }
120}
121
122#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125 String(String),
126 InternedString(Arc<String>), Integer(i64),
128 Float(f64),
129 Boolean(bool),
130 DateTime(String), Vector(Vec<f64>), Null,
133}
134
135impl std::hash::Hash for DataValue {
137 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
138 match self {
139 DataValue::String(s) => {
140 0u8.hash(state);
141 s.hash(state);
142 }
143 DataValue::InternedString(s) => {
144 1u8.hash(state);
145 s.hash(state);
146 }
147 DataValue::Integer(i) => {
148 2u8.hash(state);
149 i.hash(state);
150 }
151 DataValue::Float(f) => {
152 3u8.hash(state);
153 f.to_bits().hash(state);
155 }
156 DataValue::Boolean(b) => {
157 4u8.hash(state);
158 b.hash(state);
159 }
160 DataValue::DateTime(dt) => {
161 5u8.hash(state);
162 dt.hash(state);
163 }
164 DataValue::Vector(v) => {
165 6u8.hash(state);
166 for f in v {
168 f.to_bits().hash(state);
169 }
170 }
171 DataValue::Null => {
172 7u8.hash(state);
173 }
174 }
175 }
176}
177
178impl Serialize for DataValue {
180 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181 where
182 S: serde::Serializer,
183 {
184 match self {
185 DataValue::String(s) => {
186 serializer.serialize_newtype_variant("DataValue", 0, "String", s)
187 }
188 DataValue::InternedString(arc_s) => {
189 serializer.serialize_newtype_variant(
191 "DataValue",
192 1,
193 "InternedString",
194 arc_s.as_ref(),
195 )
196 }
197 DataValue::Integer(i) => {
198 serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
199 }
200 DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
201 DataValue::Boolean(b) => {
202 serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
203 }
204 DataValue::DateTime(dt) => {
205 serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
206 }
207 DataValue::Vector(v) => {
208 serializer.serialize_newtype_variant("DataValue", 6, "Vector", v)
209 }
210 DataValue::Null => serializer.serialize_unit_variant("DataValue", 7, "Null"),
211 }
212 }
213}
214
215impl<'de> Deserialize<'de> for DataValue {
217 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
218 where
219 D: serde::Deserializer<'de>,
220 {
221 #[derive(Deserialize)]
222 #[serde(field_identifier, rename_all = "PascalCase")]
223 enum Field {
224 String,
225 InternedString,
226 Integer,
227 Float,
228 Boolean,
229 DateTime,
230 Vector,
231 Null,
232 }
233
234 struct DataValueVisitor;
235
236 impl<'de> Visitor<'de> for DataValueVisitor {
237 type Value = DataValue;
238
239 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
240 formatter.write_str("enum DataValue")
241 }
242
243 fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
244 where
245 A: serde::de::EnumAccess<'de>,
246 {
247 let (field, variant) = data.variant()?;
248 match field {
249 Field::String => {
250 let s: String = variant.newtype_variant()?;
251 Ok(DataValue::String(s))
252 }
253 Field::InternedString => {
254 let s: String = variant.newtype_variant()?;
255 Ok(DataValue::InternedString(Arc::new(s)))
256 }
257 Field::Integer => {
258 let i: i64 = variant.newtype_variant()?;
259 Ok(DataValue::Integer(i))
260 }
261 Field::Float => {
262 let f: f64 = variant.newtype_variant()?;
263 Ok(DataValue::Float(f))
264 }
265 Field::Boolean => {
266 let b: bool = variant.newtype_variant()?;
267 Ok(DataValue::Boolean(b))
268 }
269 Field::DateTime => {
270 let dt: String = variant.newtype_variant()?;
271 Ok(DataValue::DateTime(dt))
272 }
273 Field::Vector => {
274 let v: Vec<f64> = variant.newtype_variant()?;
275 Ok(DataValue::Vector(v))
276 }
277 Field::Null => {
278 variant.unit_variant()?;
279 Ok(DataValue::Null)
280 }
281 }
282 }
283 }
284
285 deserializer.deserialize_enum(
286 "DataValue",
287 &[
288 "String",
289 "InternedString",
290 "Integer",
291 "Float",
292 "Boolean",
293 "DateTime",
294 "Vector",
295 "Null",
296 ],
297 DataValueVisitor,
298 )
299 }
300}
301
302impl Eq for DataValue {}
304
305impl DataValue {
306 pub fn from_string(s: &str, data_type: &DataType) -> Self {
307 if s.is_empty() || s.eq_ignore_ascii_case("null") {
308 return DataValue::Null;
309 }
310
311 match data_type {
312 DataType::String => DataValue::String(s.to_string()),
313 DataType::Integer => s
314 .parse::<i64>()
315 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
316 DataType::Float => s
317 .parse::<f64>()
318 .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
319 DataType::Boolean => {
320 let lower = s.to_lowercase();
321 DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
322 }
323 DataType::DateTime => DataValue::DateTime(s.to_string()),
324 DataType::Null => DataValue::Null,
325 DataType::Mixed => {
326 let inferred = DataType::infer_from_string(s);
328 Self::from_string(s, &inferred)
329 }
330 }
331 }
332
333 #[must_use]
334 pub fn is_null(&self) -> bool {
335 matches!(self, DataValue::Null)
336 }
337
338 #[must_use]
339 pub fn data_type(&self) -> DataType {
340 match self {
341 DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
342 DataValue::Integer(_) => DataType::Integer,
343 DataValue::Float(_) => DataType::Float,
344 DataValue::Boolean(_) => DataType::Boolean,
345 DataValue::DateTime(_) => DataType::DateTime,
346 DataValue::Vector(_) => DataType::String, DataValue::Null => DataType::Null,
348 }
349 }
350
351 #[must_use]
354 pub fn to_string_optimized(&self) -> String {
355 match self {
356 DataValue::String(s) => s.clone(), DataValue::InternedString(s) => s.as_ref().clone(), DataValue::DateTime(s) => s.clone(), DataValue::Integer(i) => i.to_string(),
360 DataValue::Float(f) => f.to_string(),
361 DataValue::Boolean(b) => {
362 if *b {
363 "true".to_string()
364 } else {
365 "false".to_string()
366 }
367 }
368 DataValue::Vector(v) => {
369 let components: Vec<String> = v.iter().map(|f| f.to_string()).collect();
371 format!("[{}]", components.join(","))
372 }
373 DataValue::Null => String::new(), }
375 }
376}
377
378impl fmt::Display for DataValue {
379 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380 match self {
381 DataValue::String(s) => write!(f, "{s}"),
382 DataValue::InternedString(s) => write!(f, "{s}"),
383 DataValue::Integer(i) => write!(f, "{i}"),
384 DataValue::Float(fl) => write!(f, "{fl}"),
385 DataValue::Boolean(b) => write!(f, "{b}"),
386 DataValue::DateTime(dt) => write!(f, "{dt}"),
387 DataValue::Vector(v) => {
388 let components: Vec<String> = v.iter().map(|fl| fl.to_string()).collect();
389 write!(f, "[{}]", components.join(","))
390 }
391 DataValue::Null => write!(f, ""),
392 }
393 }
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct DataRow {
399 pub values: Vec<DataValue>,
400}
401
402impl DataRow {
403 #[must_use]
404 pub fn new(values: Vec<DataValue>) -> Self {
405 Self { values }
406 }
407
408 #[must_use]
409 pub fn get(&self, index: usize) -> Option<&DataValue> {
410 self.values.get(index)
411 }
412
413 pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
414 self.values.get_mut(index)
415 }
416
417 #[must_use]
418 pub fn len(&self) -> usize {
419 self.values.len()
420 }
421
422 #[must_use]
423 pub fn is_empty(&self) -> bool {
424 self.values.is_empty()
425 }
426}
427
428#[derive(Debug, Clone, Serialize, Deserialize)]
430pub struct DataTable {
431 pub name: String,
432 pub columns: Vec<DataColumn>,
433 pub rows: Vec<DataRow>,
434 pub metadata: HashMap<String, String>,
435}
436
437impl DataTable {
438 pub fn new(name: impl Into<String>) -> Self {
439 Self {
440 name: name.into(),
441 columns: Vec::new(),
442 rows: Vec::new(),
443 metadata: HashMap::new(),
444 }
445 }
446
447 #[must_use]
450 pub fn dual() -> Self {
451 let mut table = DataTable::new("DUAL");
452 table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
453 table
454 .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
455 .unwrap();
456 table
457 }
458
459 pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
460 self.columns.push(column);
461 self
462 }
463
464 pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
465 if row.len() != self.columns.len() {
466 return Err(format!(
467 "Row has {} values but table has {} columns",
468 row.len(),
469 self.columns.len()
470 ));
471 }
472 self.rows.push(row);
473 Ok(())
474 }
475
476 #[must_use]
477 pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
478 self.columns.iter().find(|c| c.name == name)
479 }
480
481 #[must_use]
482 pub fn get_column_index(&self, name: &str) -> Option<usize> {
483 self.columns.iter().position(|c| c.name == name)
484 }
485
486 #[must_use]
488 pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
489 self.columns
490 .iter()
491 .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
492 }
493
494 #[must_use]
497 pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
498 if let Some(prefix) = table_prefix {
500 let qualified = format!("{}.{}", prefix, name);
501 if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
502 return Some(idx);
503 }
504 }
505
506 self.get_column_index(name)
508 }
509
510 pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
512 for column in &mut self.columns {
513 column.qualified_name = Some(format!("{}.{}", table_name, column.name));
514 column.source_table = Some(table_name.to_string());
515 }
516 }
517
518 #[must_use]
519 pub fn column_count(&self) -> usize {
520 self.columns.len()
521 }
522
523 #[must_use]
524 pub fn row_count(&self) -> usize {
525 self.rows.len()
526 }
527
528 #[must_use]
529 pub fn is_empty(&self) -> bool {
530 self.rows.is_empty()
531 }
532
533 #[must_use]
535 pub fn column_names(&self) -> Vec<String> {
536 self.columns.iter().map(|c| c.name.clone()).collect()
537 }
538
539 pub fn columns_mut(&mut self) -> &mut [DataColumn] {
541 &mut self.columns
542 }
543
544 pub fn infer_column_types(&mut self) {
546 for (col_idx, column) in self.columns.iter_mut().enumerate() {
547 let mut inferred_type = DataType::Null;
548 let mut null_count = 0;
549 let mut unique_values = std::collections::HashSet::new();
550
551 for row in &self.rows {
552 if let Some(value) = row.get(col_idx) {
553 if value.is_null() {
554 null_count += 1;
555 } else {
556 let value_type = value.data_type();
557 inferred_type = inferred_type.merge(&value_type);
558 unique_values.insert(value.to_string());
559 }
560 }
561 }
562
563 column.data_type = inferred_type;
564 column.null_count = null_count;
565 column.nullable = null_count > 0;
566 column.unique_values = Some(unique_values.len());
567 }
568 }
569
570 #[must_use]
572 pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
573 self.rows.get(row)?.get(col)
574 }
575
576 #[must_use]
578 pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
579 let col_idx = self.get_column_index(col_name)?;
580 self.get_value(row, col_idx)
581 }
582
583 #[must_use]
585 pub fn to_string_table(&self) -> Vec<Vec<String>> {
586 self.rows
587 .iter()
588 .map(|row| {
589 row.values
590 .iter()
591 .map(DataValue::to_string_optimized)
592 .collect()
593 })
594 .collect()
595 }
596
597 #[must_use]
599 pub fn get_stats(&self) -> DataTableStats {
600 DataTableStats {
601 row_count: self.row_count(),
602 column_count: self.column_count(),
603 memory_size: self.estimate_memory_size(),
604 null_count: self.columns.iter().map(|c| c.null_count).sum(),
605 }
606 }
607
608 #[must_use]
610 pub fn debug_dump(&self) -> String {
611 let mut output = String::new();
612
613 output.push_str(&format!("DataTable: {}\n", self.name));
614 output.push_str(&format!(
615 "Rows: {} | Columns: {}\n",
616 self.row_count(),
617 self.column_count()
618 ));
619
620 if !self.metadata.is_empty() {
621 output.push_str("Metadata:\n");
622 for (key, value) in &self.metadata {
623 output.push_str(&format!(" {key}: {value}\n"));
624 }
625 }
626
627 output.push_str("\nColumns:\n");
628 for column in &self.columns {
629 output.push_str(&format!(" {} ({:?})", column.name, column.data_type));
630 if column.nullable {
631 output.push_str(&format!(" - nullable, {} nulls", column.null_count));
632 }
633 if let Some(unique) = column.unique_values {
634 output.push_str(&format!(", {unique} unique"));
635 }
636 output.push('\n');
637 }
638
639 if self.row_count() > 0 {
641 let sample_size = 5.min(self.row_count());
642 output.push_str(&format!("\nFirst {sample_size} rows:\n"));
643
644 for row_idx in 0..sample_size {
645 output.push_str(&format!(" [{row_idx}]: "));
646 for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
647 if col_idx > 0 {
648 output.push_str(", ");
649 }
650 output.push_str(&value.to_string());
651 }
652 output.push('\n');
653 }
654 }
655
656 output
657 }
658
659 #[must_use]
660 pub fn estimate_memory_size(&self) -> usize {
661 let mut size = std::mem::size_of::<Self>();
663
664 size += self.columns.len() * std::mem::size_of::<DataColumn>();
666 for col in &self.columns {
667 size += col.name.len();
668 }
669
670 size += self.rows.len() * std::mem::size_of::<DataRow>();
672
673 for row in &self.rows {
675 for value in &row.values {
676 size += std::mem::size_of::<DataValue>();
678 match value {
680 DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
681 DataValue::Vector(v) => size += v.len() * std::mem::size_of::<f64>(),
682 _ => {} }
684 }
685 }
686
687 size
688 }
689
690 pub fn to_csv(&self) -> String {
692 let mut csv_output = String::new();
693
694 let headers: Vec<String> = self
696 .columns
697 .iter()
698 .map(|col| {
699 if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
700 format!("\"{}\"", col.name.replace('"', "\"\""))
701 } else {
702 col.name.clone()
703 }
704 })
705 .collect();
706 csv_output.push_str(&headers.join(","));
707 csv_output.push('\n');
708
709 for row in &self.rows {
711 let row_values: Vec<String> = row
712 .values
713 .iter()
714 .map(|value| {
715 let str_val = value.to_string();
716 if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
717 format!("\"{}\"", str_val.replace('"', "\"\""))
718 } else {
719 str_val
720 }
721 })
722 .collect();
723 csv_output.push_str(&row_values.join(","));
724 csv_output.push('\n');
725 }
726
727 csv_output
728 }
729
730 pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
733 debug!(
734 "V46: Converting QueryResponse to DataTable for table '{}'",
735 table_name
736 );
737
738 crate::utils::memory_tracker::track_memory("start_from_query_response");
740
741 let mut table = DataTable::new(table_name);
742
743 if let Some(first_row) = response.data.first() {
745 if let Some(obj) = first_row.as_object() {
746 for key in obj.keys() {
748 let column = DataColumn::new(key.clone());
749 table.add_column(column);
750 }
751
752 for json_row in &response.data {
754 if let Some(row_obj) = json_row.as_object() {
755 let mut values = Vec::new();
756
757 for column in &table.columns {
759 let value = row_obj
760 .get(&column.name)
761 .map_or(DataValue::Null, json_value_to_data_value);
762 values.push(value);
763 }
764
765 table.add_row(DataRow::new(values))?;
766 }
767 }
768
769 table.infer_column_types();
771
772 if let Some(source) = &response.source {
774 table.metadata.insert("source".to_string(), source.clone());
775 }
776 if let Some(cached) = response.cached {
777 table
778 .metadata
779 .insert("cached".to_string(), cached.to_string());
780 }
781 table
782 .metadata
783 .insert("original_count".to_string(), response.count.to_string());
784
785 debug!(
786 "V46: Created DataTable with {} columns and {} rows",
787 table.column_count(),
788 table.row_count()
789 );
790 } else {
791 table.add_column(DataColumn::new("value"));
793 for json_value in &response.data {
794 let value = json_value_to_data_value(json_value);
795 table.add_row(DataRow::new(vec![value]))?;
796 }
797 }
798 }
799
800 Ok(table)
801 }
802
803 #[must_use]
805 pub fn get_row(&self, index: usize) -> Option<&DataRow> {
806 self.rows.get(index)
807 }
808
809 #[must_use]
811 pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
812 self.rows.get(index).map(|row| {
813 row.values
814 .iter()
815 .map(DataValue::to_string_optimized)
816 .collect()
817 })
818 }
819
820 #[must_use]
822 pub fn pretty_print(&self) -> String {
823 let mut output = String::new();
824
825 output.push_str("╔═══════════════════════════════════════════════════════╗\n");
827 output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
828 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
829
830 output.push_str(&format!(
832 "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
833 self.row_count(),
834 self.column_count(),
835 self.get_stats().memory_size
836 ));
837
838 if !self.metadata.is_empty() {
840 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
841 output.push_str("║ Metadata: ║\n");
842 for (key, value) in &self.metadata {
843 let truncated_value = if value.len() > 35 {
844 format!("{}...", &value[..32])
845 } else {
846 value.clone()
847 };
848 output.push_str(&format!(
849 "║ {:15} : {:35} ║\n",
850 Self::truncate_string(key, 15),
851 truncated_value
852 ));
853 }
854 }
855
856 output.push_str("╠═══════════════════════════════════════════════════════╣\n");
858 output.push_str("║ Columns: ║\n");
859 output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
860 output.push_str("║ Name │ Type │ Nullable│ Nulls│Unique║\n");
861 output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
862
863 for column in &self.columns {
864 let type_str = match &column.data_type {
865 DataType::String => "String",
866 DataType::Integer => "Integer",
867 DataType::Float => "Float",
868 DataType::Boolean => "Boolean",
869 DataType::DateTime => "DateTime",
870 DataType::Null => "Null",
871 DataType::Mixed => "Mixed",
872 };
873
874 output.push_str(&format!(
875 "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
876 Self::truncate_string(&column.name, 17),
877 type_str,
878 if column.nullable { "Yes" } else { "No" },
879 column.null_count,
880 column.unique_values.unwrap_or(0)
881 ));
882 }
883
884 output.push_str("╚═══════════════════════════════════════════════════════╝\n");
885
886 output.push_str("\nSample Data (first 5 rows):\n");
888 let sample_count = self.rows.len().min(5);
889
890 if sample_count > 0 {
891 output.push('┌');
893 for (i, _col) in self.columns.iter().enumerate() {
894 if i > 0 {
895 output.push('┬');
896 }
897 output.push_str(&"─".repeat(20));
898 }
899 output.push_str("┐\n");
900
901 output.push('│');
902 for col in &self.columns {
903 output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
904 }
905 output.push('\n');
906
907 output.push('├');
908 for (i, _) in self.columns.iter().enumerate() {
909 if i > 0 {
910 output.push('┼');
911 }
912 output.push_str(&"─".repeat(20));
913 }
914 output.push_str("┤\n");
915
916 for row_idx in 0..sample_count {
918 if let Some(row) = self.rows.get(row_idx) {
919 output.push('│');
920 for value in &row.values {
921 let value_str = value.to_string();
922 output
923 .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
924 }
925 output.push('\n');
926 }
927 }
928
929 output.push('└');
930 for (i, _) in self.columns.iter().enumerate() {
931 if i > 0 {
932 output.push('┴');
933 }
934 output.push_str(&"─".repeat(20));
935 }
936 output.push_str("┘\n");
937 }
938
939 output
940 }
941
942 fn truncate_string(s: &str, max_len: usize) -> String {
943 if s.len() > max_len {
944 format!("{}...", &s[..max_len - 3])
945 } else {
946 s.to_string()
947 }
948 }
949
950 #[must_use]
952 pub fn get_schema_summary(&self) -> String {
953 let mut summary = String::new();
954 summary.push_str(&format!(
955 "DataTable Schema ({} columns, {} rows):\n",
956 self.columns.len(),
957 self.rows.len()
958 ));
959
960 for (idx, column) in self.columns.iter().enumerate() {
961 let type_str = match &column.data_type {
962 DataType::String => "String",
963 DataType::Integer => "Integer",
964 DataType::Float => "Float",
965 DataType::Boolean => "Boolean",
966 DataType::DateTime => "DateTime",
967 DataType::Null => "Null",
968 DataType::Mixed => "Mixed",
969 };
970
971 let nullable_str = if column.nullable {
972 "nullable"
973 } else {
974 "not null"
975 };
976 let null_info = if column.null_count > 0 {
977 format!(", {} nulls", column.null_count)
978 } else {
979 String::new()
980 };
981
982 summary.push_str(&format!(
983 " [{:3}] {} : {} ({}{})\n",
984 idx, column.name, type_str, nullable_str, null_info
985 ));
986 }
987
988 summary
989 }
990
991 #[must_use]
993 pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
994 self.columns
995 .iter()
996 .map(|col| {
997 let type_name = format!("{:?}", col.data_type);
998 (col.name.clone(), type_name, col.nullable, col.null_count)
999 })
1000 .collect()
1001 }
1002
1003 pub fn reserve_rows(&mut self, additional: usize) {
1005 self.rows.reserve(additional);
1006 }
1007
1008 pub fn shrink_to_fit(&mut self) {
1010 self.rows.shrink_to_fit();
1011 for _column in &mut self.columns {
1012 }
1014 }
1015
1016 #[must_use]
1018 pub fn get_memory_usage(&self) -> usize {
1019 let mut size = std::mem::size_of::<Self>();
1020
1021 size += self.name.capacity();
1023
1024 size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
1026 for col in &self.columns {
1027 size += col.name.capacity();
1028 }
1029
1030 size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1032
1033 for row in &self.rows {
1035 size += row.values.capacity() * std::mem::size_of::<DataValue>();
1036 for value in &row.values {
1037 match value {
1038 DataValue::String(s) => size += s.capacity(),
1039 DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1040 DataValue::DateTime(s) => size += s.capacity(),
1041 DataValue::Vector(v) => size += v.capacity() * std::mem::size_of::<f64>(),
1042 _ => {} }
1044 }
1045 }
1046
1047 size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1049 for (k, v) in &self.metadata {
1050 size += k.capacity() + v.capacity();
1051 }
1052
1053 size
1054 }
1055
1056 pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1058 rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1061 }
1062
1063 pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1065 rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1068 }
1069}
1070
1071fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1073 match json {
1074 JsonValue::Null => DataValue::Null,
1075 JsonValue::Bool(b) => DataValue::Boolean(*b),
1076 JsonValue::Number(n) => {
1077 if let Some(i) = n.as_i64() {
1078 DataValue::Integer(i)
1079 } else if let Some(f) = n.as_f64() {
1080 DataValue::Float(f)
1081 } else {
1082 DataValue::String(n.to_string())
1083 }
1084 }
1085 JsonValue::String(s) => {
1086 if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1088 DataValue::DateTime(s.clone())
1090 } else {
1091 DataValue::String(s.clone())
1092 }
1093 }
1094 JsonValue::Array(_) | JsonValue::Object(_) => {
1095 DataValue::String(json.to_string())
1097 }
1098 }
1099}
1100
1101#[derive(Debug, Clone)]
1103pub struct DataTableStats {
1104 pub row_count: usize,
1105 pub column_count: usize,
1106 pub memory_size: usize,
1107 pub null_count: usize,
1108}
1109
1110impl DataProvider for DataTable {
1113 fn get_row(&self, index: usize) -> Option<Vec<String>> {
1114 self.rows.get(index).map(|row| {
1115 row.values
1116 .iter()
1117 .map(DataValue::to_string_optimized)
1118 .collect()
1119 })
1120 }
1121
1122 fn get_column_names(&self) -> Vec<String> {
1123 self.column_names()
1124 }
1125
1126 fn get_row_count(&self) -> usize {
1127 self.row_count()
1128 }
1129
1130 fn get_column_count(&self) -> usize {
1131 self.column_count()
1132 }
1133}
1134
1135#[cfg(test)]
1136mod tests {
1137 use super::*;
1138
1139 #[test]
1140 fn test_data_type_inference() {
1141 assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1142 assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1143 assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1144 assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1145 assert_eq!(DataType::infer_from_string(""), DataType::Null);
1146 assert_eq!(
1147 DataType::infer_from_string("2024-01-01"),
1148 DataType::DateTime
1149 );
1150 }
1151
1152 #[test]
1153 fn test_datatable_creation() {
1154 let mut table = DataTable::new("test");
1155
1156 table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1157 table.add_column(DataColumn::new("name").with_type(DataType::String));
1158 table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1159
1160 assert_eq!(table.column_count(), 3);
1161 assert_eq!(table.row_count(), 0);
1162
1163 let row = DataRow::new(vec![
1164 DataValue::Integer(1),
1165 DataValue::String("Alice".to_string()),
1166 DataValue::Boolean(true),
1167 ]);
1168
1169 table.add_row(row).unwrap();
1170 assert_eq!(table.row_count(), 1);
1171
1172 let value = table.get_value_by_name(0, "name").unwrap();
1173 assert_eq!(value.to_string(), "Alice");
1174 }
1175
1176 #[test]
1177 fn test_type_inference() {
1178 let mut table = DataTable::new("test");
1179
1180 table.add_column(DataColumn::new("mixed"));
1182
1183 table
1185 .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1186 .unwrap();
1187 table
1188 .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1189 .unwrap();
1190 table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1191
1192 table.infer_column_types();
1193
1194 assert_eq!(table.columns[0].data_type, DataType::Float);
1196 assert_eq!(table.columns[0].null_count, 1);
1197 assert!(table.columns[0].nullable);
1198 }
1199
1200 #[test]
1201 fn test_from_query_response() {
1202 use crate::api_client::{QueryInfo, QueryResponse};
1203 use serde_json::json;
1204
1205 let response = QueryResponse {
1206 query: QueryInfo {
1207 select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1208 where_clause: None,
1209 order_by: None,
1210 },
1211 data: vec![
1212 json!({
1213 "id": 1,
1214 "name": "Alice",
1215 "age": 30
1216 }),
1217 json!({
1218 "id": 2,
1219 "name": "Bob",
1220 "age": 25
1221 }),
1222 json!({
1223 "id": 3,
1224 "name": "Carol",
1225 "age": null
1226 }),
1227 ],
1228 count: 3,
1229 source: Some("test.csv".to_string()),
1230 table: Some("test".to_string()),
1231 cached: Some(false),
1232 };
1233
1234 let table = DataTable::from_query_response(&response, "test").unwrap();
1235
1236 assert_eq!(table.name, "test");
1237 assert_eq!(table.row_count(), 3);
1238 assert_eq!(table.column_count(), 3);
1239
1240 let col_names = table.column_names();
1242 assert!(col_names.contains(&"id".to_string()));
1243 assert!(col_names.contains(&"name".to_string()));
1244 assert!(col_names.contains(&"age".to_string()));
1245
1246 assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1248 assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1249
1250 assert_eq!(
1252 table.get_value_by_name(0, "id"),
1253 Some(&DataValue::Integer(1))
1254 );
1255 assert_eq!(
1256 table.get_value_by_name(0, "name"),
1257 Some(&DataValue::String("Alice".to_string()))
1258 );
1259 assert_eq!(
1260 table.get_value_by_name(0, "age"),
1261 Some(&DataValue::Integer(30))
1262 );
1263
1264 assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1266 }
1267}