sql_cli/data/
datatable.rs

1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::{Deserialize, Serialize};
5use serde_json::Value as JsonValue;
6use std::collections::HashMap;
7use std::fmt;
8use std::sync::Arc;
9use tracing::debug;
10
11/// Represents the data type of a column
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub enum DataType {
14    String,
15    Integer,
16    Float,
17    Boolean,
18    DateTime,
19    Null,
20    Mixed, // For columns with mixed types
21}
22
23impl DataType {
24    /// Infer type from a string value
25    #[must_use]
26    pub fn infer_from_string(value: &str) -> Self {
27        // Handle explicit null string
28        if value.eq_ignore_ascii_case("null") {
29            return DataType::Null;
30        }
31
32        // Use the shared type inference logic
33        match TypeInference::infer_from_string(value) {
34            InferredType::Null => DataType::Null,
35            InferredType::Boolean => DataType::Boolean,
36            InferredType::Integer => DataType::Integer,
37            InferredType::Float => DataType::Float,
38            InferredType::DateTime => DataType::DateTime,
39            InferredType::String => DataType::String,
40        }
41    }
42
43    /// Check if a string looks like a datetime value
44    /// Delegates to shared type inference logic
45    fn looks_like_datetime(value: &str) -> bool {
46        TypeInference::looks_like_datetime(value)
47    }
48
49    /// Merge two types (for columns with mixed types)
50    #[must_use]
51    pub fn merge(&self, other: &DataType) -> DataType {
52        if self == other {
53            return self.clone();
54        }
55
56        match (self, other) {
57            (DataType::Null, t) | (t, DataType::Null) => t.clone(),
58            (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
59                DataType::Float
60            }
61            _ => DataType::Mixed,
62        }
63    }
64}
65
66/// Column metadata and definition
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct DataColumn {
69    pub name: String,
70    pub data_type: DataType,
71    pub nullable: bool,
72    pub unique_values: Option<usize>,
73    pub null_count: usize,
74    pub metadata: HashMap<String, String>,
75}
76
77impl DataColumn {
78    pub fn new(name: impl Into<String>) -> Self {
79        Self {
80            name: name.into(),
81            data_type: DataType::String,
82            nullable: true,
83            unique_values: None,
84            null_count: 0,
85            metadata: HashMap::new(),
86        }
87    }
88
89    #[must_use]
90    pub fn with_type(mut self, data_type: DataType) -> Self {
91        self.data_type = data_type;
92        self
93    }
94
95    #[must_use]
96    pub fn with_nullable(mut self, nullable: bool) -> Self {
97        self.nullable = nullable;
98        self
99    }
100}
101
102/// A single cell value in the table
103#[derive(Debug, Clone, PartialEq, PartialOrd)]
104pub enum DataValue {
105    String(String),
106    InternedString(Arc<String>), // For repeated strings (e.g., status, trader names)
107    Integer(i64),
108    Float(f64),
109    Boolean(bool),
110    DateTime(String), // Store as ISO 8601 string for now
111    Null,
112}
113
114// Custom Hash implementation for DataValue to handle f64
115impl std::hash::Hash for DataValue {
116    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
117        match self {
118            DataValue::String(s) => {
119                0u8.hash(state);
120                s.hash(state);
121            }
122            DataValue::InternedString(s) => {
123                1u8.hash(state);
124                s.hash(state);
125            }
126            DataValue::Integer(i) => {
127                2u8.hash(state);
128                i.hash(state);
129            }
130            DataValue::Float(f) => {
131                3u8.hash(state);
132                // Hash the bits of the float for consistency
133                f.to_bits().hash(state);
134            }
135            DataValue::Boolean(b) => {
136                4u8.hash(state);
137                b.hash(state);
138            }
139            DataValue::DateTime(dt) => {
140                5u8.hash(state);
141                dt.hash(state);
142            }
143            DataValue::Null => {
144                6u8.hash(state);
145            }
146        }
147    }
148}
149
150// Custom Eq implementation for DataValue
151impl Eq for DataValue {}
152
153impl DataValue {
154    pub fn from_string(s: &str, data_type: &DataType) -> Self {
155        if s.is_empty() || s.eq_ignore_ascii_case("null") {
156            return DataValue::Null;
157        }
158
159        match data_type {
160            DataType::String => DataValue::String(s.to_string()),
161            DataType::Integer => s
162                .parse::<i64>()
163                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
164            DataType::Float => s
165                .parse::<f64>()
166                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
167            DataType::Boolean => {
168                let lower = s.to_lowercase();
169                DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
170            }
171            DataType::DateTime => DataValue::DateTime(s.to_string()),
172            DataType::Null => DataValue::Null,
173            DataType::Mixed => {
174                // Try to infer for mixed columns
175                let inferred = DataType::infer_from_string(s);
176                Self::from_string(s, &inferred)
177            }
178        }
179    }
180
181    #[must_use]
182    pub fn is_null(&self) -> bool {
183        matches!(self, DataValue::Null)
184    }
185
186    #[must_use]
187    pub fn data_type(&self) -> DataType {
188        match self {
189            DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
190            DataValue::Integer(_) => DataType::Integer,
191            DataValue::Float(_) => DataType::Float,
192            DataValue::Boolean(_) => DataType::Boolean,
193            DataValue::DateTime(_) => DataType::DateTime,
194            DataValue::Null => DataType::Null,
195        }
196    }
197
198    /// Get string representation without allocation when possible
199    /// Returns owned String for compatibility but tries to reuse existing strings
200    #[must_use]
201    pub fn to_string_optimized(&self) -> String {
202        match self {
203            DataValue::String(s) => s.clone(), // Clone existing string
204            DataValue::InternedString(s) => s.as_ref().clone(), // Clone from Rc
205            DataValue::DateTime(s) => s.clone(), // Clone existing string
206            DataValue::Integer(i) => i.to_string(),
207            DataValue::Float(f) => f.to_string(),
208            DataValue::Boolean(b) => {
209                if *b {
210                    "true".to_string()
211                } else {
212                    "false".to_string()
213                }
214            }
215            DataValue::Null => String::new(), // Empty string, minimal allocation
216        }
217    }
218}
219
220impl fmt::Display for DataValue {
221    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
222        match self {
223            DataValue::String(s) => write!(f, "{s}"),
224            DataValue::InternedString(s) => write!(f, "{s}"),
225            DataValue::Integer(i) => write!(f, "{i}"),
226            DataValue::Float(fl) => write!(f, "{fl}"),
227            DataValue::Boolean(b) => write!(f, "{b}"),
228            DataValue::DateTime(dt) => write!(f, "{dt}"),
229            DataValue::Null => write!(f, ""),
230        }
231    }
232}
233
234/// A row of data in the table
235#[derive(Debug, Clone)]
236pub struct DataRow {
237    pub values: Vec<DataValue>,
238}
239
240impl DataRow {
241    #[must_use]
242    pub fn new(values: Vec<DataValue>) -> Self {
243        Self { values }
244    }
245
246    #[must_use]
247    pub fn get(&self, index: usize) -> Option<&DataValue> {
248        self.values.get(index)
249    }
250
251    pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
252        self.values.get_mut(index)
253    }
254
255    #[must_use]
256    pub fn len(&self) -> usize {
257        self.values.len()
258    }
259
260    #[must_use]
261    pub fn is_empty(&self) -> bool {
262        self.values.is_empty()
263    }
264}
265
266/// The main `DataTable` structure
267#[derive(Debug, Clone)]
268pub struct DataTable {
269    pub name: String,
270    pub columns: Vec<DataColumn>,
271    pub rows: Vec<DataRow>,
272    pub metadata: HashMap<String, String>,
273}
274
275impl DataTable {
276    pub fn new(name: impl Into<String>) -> Self {
277        Self {
278            name: name.into(),
279            columns: Vec::new(),
280            rows: Vec::new(),
281            metadata: HashMap::new(),
282        }
283    }
284
285    /// Create a DUAL table (similar to Oracle's DUAL) with one row and one column
286    /// Used for evaluating expressions without a data source
287    #[must_use]
288    pub fn dual() -> Self {
289        let mut table = DataTable::new("DUAL");
290        table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
291        table
292            .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
293            .unwrap();
294        table
295    }
296
297    pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
298        self.columns.push(column);
299        self
300    }
301
302    pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
303        if row.len() != self.columns.len() {
304            return Err(format!(
305                "Row has {} values but table has {} columns",
306                row.len(),
307                self.columns.len()
308            ));
309        }
310        self.rows.push(row);
311        Ok(())
312    }
313
314    #[must_use]
315    pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
316        self.columns.iter().find(|c| c.name == name)
317    }
318
319    #[must_use]
320    pub fn get_column_index(&self, name: &str) -> Option<usize> {
321        self.columns.iter().position(|c| c.name == name)
322    }
323
324    #[must_use]
325    pub fn column_count(&self) -> usize {
326        self.columns.len()
327    }
328
329    #[must_use]
330    pub fn row_count(&self) -> usize {
331        self.rows.len()
332    }
333
334    #[must_use]
335    pub fn is_empty(&self) -> bool {
336        self.rows.is_empty()
337    }
338
339    /// Get column names as a vector
340    #[must_use]
341    pub fn column_names(&self) -> Vec<String> {
342        self.columns.iter().map(|c| c.name.clone()).collect()
343    }
344
345    /// Infer and update column types based on data
346    pub fn infer_column_types(&mut self) {
347        for (col_idx, column) in self.columns.iter_mut().enumerate() {
348            let mut inferred_type = DataType::Null;
349            let mut null_count = 0;
350            let mut unique_values = std::collections::HashSet::new();
351
352            for row in &self.rows {
353                if let Some(value) = row.get(col_idx) {
354                    if value.is_null() {
355                        null_count += 1;
356                    } else {
357                        let value_type = value.data_type();
358                        inferred_type = inferred_type.merge(&value_type);
359                        unique_values.insert(value.to_string());
360                    }
361                }
362            }
363
364            column.data_type = inferred_type;
365            column.null_count = null_count;
366            column.nullable = null_count > 0;
367            column.unique_values = Some(unique_values.len());
368        }
369    }
370
371    /// Get a value at specific row and column
372    #[must_use]
373    pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
374        self.rows.get(row)?.get(col)
375    }
376
377    /// Get a value by row index and column name
378    #[must_use]
379    pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
380        let col_idx = self.get_column_index(col_name)?;
381        self.get_value(row, col_idx)
382    }
383
384    /// Convert to a vector of string vectors (for display/compatibility)
385    #[must_use]
386    pub fn to_string_table(&self) -> Vec<Vec<String>> {
387        self.rows
388            .iter()
389            .map(|row| {
390                row.values
391                    .iter()
392                    .map(DataValue::to_string_optimized)
393                    .collect()
394            })
395            .collect()
396    }
397
398    /// Get table statistics
399    #[must_use]
400    pub fn get_stats(&self) -> DataTableStats {
401        DataTableStats {
402            row_count: self.row_count(),
403            column_count: self.column_count(),
404            memory_size: self.estimate_memory_size(),
405            null_count: self.columns.iter().map(|c| c.null_count).sum(),
406        }
407    }
408
409    /// Generate a debug dump string for display
410    #[must_use]
411    pub fn debug_dump(&self) -> String {
412        let mut output = String::new();
413
414        output.push_str(&format!("DataTable: {}\n", self.name));
415        output.push_str(&format!(
416            "Rows: {} | Columns: {}\n",
417            self.row_count(),
418            self.column_count()
419        ));
420
421        if !self.metadata.is_empty() {
422            output.push_str("Metadata:\n");
423            for (key, value) in &self.metadata {
424                output.push_str(&format!("  {key}: {value}\n"));
425            }
426        }
427
428        output.push_str("\nColumns:\n");
429        for column in &self.columns {
430            output.push_str(&format!("  {} ({:?})", column.name, column.data_type));
431            if column.nullable {
432                output.push_str(&format!(" - nullable, {} nulls", column.null_count));
433            }
434            if let Some(unique) = column.unique_values {
435                output.push_str(&format!(", {unique} unique"));
436            }
437            output.push('\n');
438        }
439
440        // Show first few rows
441        if self.row_count() > 0 {
442            let sample_size = 5.min(self.row_count());
443            output.push_str(&format!("\nFirst {sample_size} rows:\n"));
444
445            for row_idx in 0..sample_size {
446                output.push_str(&format!("  [{row_idx}]: "));
447                for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
448                    if col_idx > 0 {
449                        output.push_str(", ");
450                    }
451                    output.push_str(&value.to_string());
452                }
453                output.push('\n');
454            }
455        }
456
457        output
458    }
459
460    #[must_use]
461    pub fn estimate_memory_size(&self) -> usize {
462        // Base structure size
463        let mut size = std::mem::size_of::<Self>();
464
465        // Column metadata
466        size += self.columns.len() * std::mem::size_of::<DataColumn>();
467        for col in &self.columns {
468            size += col.name.len();
469        }
470
471        // Row structure overhead
472        size += self.rows.len() * std::mem::size_of::<DataRow>();
473
474        // Actual data values
475        for row in &self.rows {
476            for value in &row.values {
477                // Base enum size
478                size += std::mem::size_of::<DataValue>();
479                // Add string content size
480                match value {
481                    DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
482                    _ => {} // Numbers and booleans are inline
483                }
484            }
485        }
486
487        size
488    }
489
490    /// Convert DataTable to CSV format
491    pub fn to_csv(&self) -> String {
492        let mut csv_output = String::new();
493
494        // Write headers
495        let headers: Vec<String> = self
496            .columns
497            .iter()
498            .map(|col| {
499                if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
500                    format!("\"{}\"", col.name.replace('"', "\"\""))
501                } else {
502                    col.name.clone()
503                }
504            })
505            .collect();
506        csv_output.push_str(&headers.join(","));
507        csv_output.push('\n');
508
509        // Write data rows
510        for row in &self.rows {
511            let row_values: Vec<String> = row
512                .values
513                .iter()
514                .map(|value| {
515                    let str_val = value.to_string();
516                    if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
517                        format!("\"{}\"", str_val.replace('"', "\"\""))
518                    } else {
519                        str_val
520                    }
521                })
522                .collect();
523            csv_output.push_str(&row_values.join(","));
524            csv_output.push('\n');
525        }
526
527        csv_output
528    }
529
530    /// V46: Create `DataTable` from `QueryResponse`
531    /// This is the key conversion function that bridges old and new systems
532    pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
533        debug!(
534            "V46: Converting QueryResponse to DataTable for table '{}'",
535            table_name
536        );
537
538        // Track memory before conversion
539        crate::utils::memory_tracker::track_memory("start_from_query_response");
540
541        let mut table = DataTable::new(table_name);
542
543        // Extract column names and types from first row
544        if let Some(first_row) = response.data.first() {
545            if let Some(obj) = first_row.as_object() {
546                // Create columns based on the keys in the JSON object
547                for key in obj.keys() {
548                    let column = DataColumn::new(key.clone());
549                    table.add_column(column);
550                }
551
552                // Now convert all rows
553                for json_row in &response.data {
554                    if let Some(row_obj) = json_row.as_object() {
555                        let mut values = Vec::new();
556
557                        // Ensure we get values in the same order as columns
558                        for column in &table.columns {
559                            let value = row_obj
560                                .get(&column.name)
561                                .map_or(DataValue::Null, json_value_to_data_value);
562                            values.push(value);
563                        }
564
565                        table.add_row(DataRow::new(values))?;
566                    }
567                }
568
569                // Infer column types from the data
570                table.infer_column_types();
571
572                // Add metadata
573                if let Some(source) = &response.source {
574                    table.metadata.insert("source".to_string(), source.clone());
575                }
576                if let Some(cached) = response.cached {
577                    table
578                        .metadata
579                        .insert("cached".to_string(), cached.to_string());
580                }
581                table
582                    .metadata
583                    .insert("original_count".to_string(), response.count.to_string());
584
585                debug!(
586                    "V46: Created DataTable with {} columns and {} rows",
587                    table.column_count(),
588                    table.row_count()
589                );
590            } else {
591                // Handle non-object JSON (single values)
592                table.add_column(DataColumn::new("value"));
593                for json_value in &response.data {
594                    let value = json_value_to_data_value(json_value);
595                    table.add_row(DataRow::new(vec![value]))?;
596                }
597            }
598        }
599
600        Ok(table)
601    }
602
603    /// Get a single row by index
604    #[must_use]
605    pub fn get_row(&self, index: usize) -> Option<&DataRow> {
606        self.rows.get(index)
607    }
608
609    /// V50: Get a single row as strings
610    #[must_use]
611    pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
612        self.rows.get(index).map(|row| {
613            row.values
614                .iter()
615                .map(DataValue::to_string_optimized)
616                .collect()
617        })
618    }
619
620    /// Pretty print the `DataTable` with a nice box drawing
621    #[must_use]
622    pub fn pretty_print(&self) -> String {
623        let mut output = String::new();
624
625        // Header
626        output.push_str("╔═══════════════════════════════════════════════════════╗\n");
627        output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
628        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
629
630        // Summary stats
631        output.push_str(&format!(
632            "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
633            self.row_count(),
634            self.column_count(),
635            self.get_stats().memory_size
636        ));
637
638        // Metadata if any
639        if !self.metadata.is_empty() {
640            output.push_str("╠═══════════════════════════════════════════════════════╣\n");
641            output.push_str("║ Metadata:                                             ║\n");
642            for (key, value) in &self.metadata {
643                let truncated_value = if value.len() > 35 {
644                    format!("{}...", &value[..32])
645                } else {
646                    value.clone()
647                };
648                output.push_str(&format!(
649                    "║   {:15} : {:35} ║\n",
650                    Self::truncate_string(key, 15),
651                    truncated_value
652                ));
653            }
654        }
655
656        // Column details
657        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
658        output.push_str("║ Columns:                                              ║\n");
659        output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
660        output.push_str("║ Name              │ Type     │ Nullable│ Nulls│Unique║\n");
661        output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
662
663        for column in &self.columns {
664            let type_str = match &column.data_type {
665                DataType::String => "String",
666                DataType::Integer => "Integer",
667                DataType::Float => "Float",
668                DataType::Boolean => "Boolean",
669                DataType::DateTime => "DateTime",
670                DataType::Null => "Null",
671                DataType::Mixed => "Mixed",
672            };
673
674            output.push_str(&format!(
675                "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
676                Self::truncate_string(&column.name, 17),
677                type_str,
678                if column.nullable { "Yes" } else { "No" },
679                column.null_count,
680                column.unique_values.unwrap_or(0)
681            ));
682        }
683
684        output.push_str("╚═══════════════════════════════════════════════════════╝\n");
685
686        // Sample data (first 5 rows)
687        output.push_str("\nSample Data (first 5 rows):\n");
688        let sample_count = self.rows.len().min(5);
689
690        if sample_count > 0 {
691            // Column headers
692            output.push('┌');
693            for (i, _col) in self.columns.iter().enumerate() {
694                if i > 0 {
695                    output.push('┬');
696                }
697                output.push_str(&"─".repeat(20));
698            }
699            output.push_str("┐\n");
700
701            output.push('│');
702            for col in &self.columns {
703                output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
704            }
705            output.push('\n');
706
707            output.push('├');
708            for (i, _) in self.columns.iter().enumerate() {
709                if i > 0 {
710                    output.push('┼');
711                }
712                output.push_str(&"─".repeat(20));
713            }
714            output.push_str("┤\n");
715
716            // Data rows
717            for row_idx in 0..sample_count {
718                if let Some(row) = self.rows.get(row_idx) {
719                    output.push('│');
720                    for value in &row.values {
721                        let value_str = value.to_string();
722                        output
723                            .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
724                    }
725                    output.push('\n');
726                }
727            }
728
729            output.push('└');
730            for (i, _) in self.columns.iter().enumerate() {
731                if i > 0 {
732                    output.push('┴');
733                }
734                output.push_str(&"─".repeat(20));
735            }
736            output.push_str("┘\n");
737        }
738
739        output
740    }
741
742    fn truncate_string(s: &str, max_len: usize) -> String {
743        if s.len() > max_len {
744            format!("{}...", &s[..max_len - 3])
745        } else {
746            s.to_string()
747        }
748    }
749
750    /// Get a schema summary of the `DataTable`
751    #[must_use]
752    pub fn get_schema_summary(&self) -> String {
753        let mut summary = String::new();
754        summary.push_str(&format!(
755            "DataTable Schema ({} columns, {} rows):\n",
756            self.columns.len(),
757            self.rows.len()
758        ));
759
760        for (idx, column) in self.columns.iter().enumerate() {
761            let type_str = match &column.data_type {
762                DataType::String => "String",
763                DataType::Integer => "Integer",
764                DataType::Float => "Float",
765                DataType::Boolean => "Boolean",
766                DataType::DateTime => "DateTime",
767                DataType::Null => "Null",
768                DataType::Mixed => "Mixed",
769            };
770
771            let nullable_str = if column.nullable {
772                "nullable"
773            } else {
774                "not null"
775            };
776            let null_info = if column.null_count > 0 {
777                format!(", {} nulls", column.null_count)
778            } else {
779                String::new()
780            };
781
782            summary.push_str(&format!(
783                "  [{:3}] {} : {} ({}{})\n",
784                idx, column.name, type_str, nullable_str, null_info
785            ));
786        }
787
788        summary
789    }
790
791    /// Get detailed schema information as a structured format
792    #[must_use]
793    pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
794        self.columns
795            .iter()
796            .map(|col| {
797                let type_name = format!("{:?}", col.data_type);
798                (col.name.clone(), type_name, col.nullable, col.null_count)
799            })
800            .collect()
801    }
802
803    /// Reserve capacity for rows to avoid reallocations
804    pub fn reserve_rows(&mut self, additional: usize) {
805        self.rows.reserve(additional);
806    }
807
808    /// Shrink vectors to fit actual data (removes excess capacity)
809    pub fn shrink_to_fit(&mut self) {
810        self.rows.shrink_to_fit();
811        for _column in &mut self.columns {
812            // Shrink any column-specific data if needed
813        }
814    }
815
816    /// Get actual memory usage estimate (more accurate than `estimate_memory_size`)
817    #[must_use]
818    pub fn get_memory_usage(&self) -> usize {
819        let mut size = std::mem::size_of::<Self>();
820
821        // Account for string allocations
822        size += self.name.capacity();
823
824        // Account for columns
825        size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
826        for col in &self.columns {
827            size += col.name.capacity();
828        }
829
830        // Account for rows and their capacity
831        size += self.rows.capacity() * std::mem::size_of::<DataRow>();
832
833        // Account for actual data values
834        for row in &self.rows {
835            size += row.values.capacity() * std::mem::size_of::<DataValue>();
836            for value in &row.values {
837                match value {
838                    DataValue::String(s) => size += s.capacity(),
839                    DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
840                    DataValue::DateTime(s) => size += s.capacity(),
841                    _ => {} // Other types are inline
842                }
843            }
844        }
845
846        // Account for metadata
847        size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
848        for (k, v) in &self.metadata {
849            size += k.capacity() + v.capacity();
850        }
851
852        size
853    }
854}
855
856/// V46: Helper function to convert JSON value to `DataValue`
857fn json_value_to_data_value(json: &JsonValue) -> DataValue {
858    match json {
859        JsonValue::Null => DataValue::Null,
860        JsonValue::Bool(b) => DataValue::Boolean(*b),
861        JsonValue::Number(n) => {
862            if let Some(i) = n.as_i64() {
863                DataValue::Integer(i)
864            } else if let Some(f) = n.as_f64() {
865                DataValue::Float(f)
866            } else {
867                DataValue::String(n.to_string())
868            }
869        }
870        JsonValue::String(s) => {
871            // Try to detect if it's a date/time
872            if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
873                // Simple heuristic for dates
874                DataValue::DateTime(s.clone())
875            } else {
876                DataValue::String(s.clone())
877            }
878        }
879        JsonValue::Array(_) | JsonValue::Object(_) => {
880            // Store complex types as JSON string
881            DataValue::String(json.to_string())
882        }
883    }
884}
885
886/// Statistics about a `DataTable`
887#[derive(Debug, Clone)]
888pub struct DataTableStats {
889    pub row_count: usize,
890    pub column_count: usize,
891    pub memory_size: usize,
892    pub null_count: usize,
893}
894
895/// Implementation of `DataProvider` for `DataTable`
896/// This allows `DataTable` to be used wherever `DataProvider` trait is expected
897impl DataProvider for DataTable {
898    fn get_row(&self, index: usize) -> Option<Vec<String>> {
899        self.rows.get(index).map(|row| {
900            row.values
901                .iter()
902                .map(DataValue::to_string_optimized)
903                .collect()
904        })
905    }
906
907    fn get_column_names(&self) -> Vec<String> {
908        self.column_names()
909    }
910
911    fn get_row_count(&self) -> usize {
912        self.row_count()
913    }
914
915    fn get_column_count(&self) -> usize {
916        self.column_count()
917    }
918}
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923
924    #[test]
925    fn test_data_type_inference() {
926        assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
927        assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
928        assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
929        assert_eq!(DataType::infer_from_string("hello"), DataType::String);
930        assert_eq!(DataType::infer_from_string(""), DataType::Null);
931        assert_eq!(
932            DataType::infer_from_string("2024-01-01"),
933            DataType::DateTime
934        );
935    }
936
937    #[test]
938    fn test_datatable_creation() {
939        let mut table = DataTable::new("test");
940
941        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
942        table.add_column(DataColumn::new("name").with_type(DataType::String));
943        table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
944
945        assert_eq!(table.column_count(), 3);
946        assert_eq!(table.row_count(), 0);
947
948        let row = DataRow::new(vec![
949            DataValue::Integer(1),
950            DataValue::String("Alice".to_string()),
951            DataValue::Boolean(true),
952        ]);
953
954        table.add_row(row).unwrap();
955        assert_eq!(table.row_count(), 1);
956
957        let value = table.get_value_by_name(0, "name").unwrap();
958        assert_eq!(value.to_string(), "Alice");
959    }
960
961    #[test]
962    fn test_type_inference() {
963        let mut table = DataTable::new("test");
964
965        // Add columns without types
966        table.add_column(DataColumn::new("mixed"));
967
968        // Add rows with different types
969        table
970            .add_row(DataRow::new(vec![DataValue::Integer(1)]))
971            .unwrap();
972        table
973            .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
974            .unwrap();
975        table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
976
977        table.infer_column_types();
978
979        // Should infer Float since we have both Integer and Float
980        assert_eq!(table.columns[0].data_type, DataType::Float);
981        assert_eq!(table.columns[0].null_count, 1);
982        assert!(table.columns[0].nullable);
983    }
984
985    #[test]
986    fn test_from_query_response() {
987        use crate::api_client::{QueryInfo, QueryResponse};
988        use serde_json::json;
989
990        let response = QueryResponse {
991            query: QueryInfo {
992                select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
993                where_clause: None,
994                order_by: None,
995            },
996            data: vec![
997                json!({
998                    "id": 1,
999                    "name": "Alice",
1000                    "age": 30
1001                }),
1002                json!({
1003                    "id": 2,
1004                    "name": "Bob",
1005                    "age": 25
1006                }),
1007                json!({
1008                    "id": 3,
1009                    "name": "Carol",
1010                    "age": null
1011                }),
1012            ],
1013            count: 3,
1014            source: Some("test.csv".to_string()),
1015            table: Some("test".to_string()),
1016            cached: Some(false),
1017        };
1018
1019        let table = DataTable::from_query_response(&response, "test").unwrap();
1020
1021        assert_eq!(table.name, "test");
1022        assert_eq!(table.row_count(), 3);
1023        assert_eq!(table.column_count(), 3);
1024
1025        // Check column names
1026        let col_names = table.column_names();
1027        assert!(col_names.contains(&"id".to_string()));
1028        assert!(col_names.contains(&"name".to_string()));
1029        assert!(col_names.contains(&"age".to_string()));
1030
1031        // Check metadata
1032        assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1033        assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1034
1035        // Check first row values
1036        assert_eq!(
1037            table.get_value_by_name(0, "id"),
1038            Some(&DataValue::Integer(1))
1039        );
1040        assert_eq!(
1041            table.get_value_by_name(0, "name"),
1042            Some(&DataValue::String("Alice".to_string()))
1043        );
1044        assert_eq!(
1045            table.get_value_by_name(0, "age"),
1046            Some(&DataValue::Integer(30))
1047        );
1048
1049        // Check null handling
1050        assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1051    }
1052}