sql_cli/data/
datatable.rs

1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::{Deserialize, Serialize};
5use serde_json::Value as JsonValue;
6use std::collections::HashMap;
7use std::fmt;
8use std::sync::Arc;
9use tracing::debug;
10
11/// Represents the data type of a column
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub enum DataType {
14    String,
15    Integer,
16    Float,
17    Boolean,
18    DateTime,
19    Null,
20    Mixed, // For columns with mixed types
21}
22
23impl DataType {
24    /// Infer type from a string value
25    #[must_use]
26    pub fn infer_from_string(value: &str) -> Self {
27        // Handle explicit null string
28        if value.eq_ignore_ascii_case("null") {
29            return DataType::Null;
30        }
31
32        // Use the shared type inference logic
33        match TypeInference::infer_from_string(value) {
34            InferredType::Null => DataType::Null,
35            InferredType::Boolean => DataType::Boolean,
36            InferredType::Integer => DataType::Integer,
37            InferredType::Float => DataType::Float,
38            InferredType::DateTime => DataType::DateTime,
39            InferredType::String => DataType::String,
40        }
41    }
42
43    /// Check if a string looks like a datetime value
44    /// Delegates to shared type inference logic
45    fn looks_like_datetime(value: &str) -> bool {
46        TypeInference::looks_like_datetime(value)
47    }
48
49    /// Merge two types (for columns with mixed types)
50    #[must_use]
51    pub fn merge(&self, other: &DataType) -> DataType {
52        if self == other {
53            return self.clone();
54        }
55
56        match (self, other) {
57            (DataType::Null, t) | (t, DataType::Null) => t.clone(),
58            (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
59                DataType::Float
60            }
61            _ => DataType::Mixed,
62        }
63    }
64}
65
66/// Column metadata and definition
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct DataColumn {
69    pub name: String,
70    pub data_type: DataType,
71    pub nullable: bool,
72    pub unique_values: Option<usize>,
73    pub null_count: usize,
74    pub metadata: HashMap<String, String>,
75    /// Qualified name with table prefix (e.g., "messages.field_name")
76    pub qualified_name: Option<String>,
77    /// Source table or CTE name
78    pub source_table: Option<String>,
79}
80
81impl DataColumn {
82    pub fn new(name: impl Into<String>) -> Self {
83        Self {
84            name: name.into(),
85            data_type: DataType::String,
86            nullable: true,
87            unique_values: None,
88            null_count: 0,
89            metadata: HashMap::new(),
90            qualified_name: None,
91            source_table: None,
92        }
93    }
94
95    #[must_use]
96    pub fn with_type(mut self, data_type: DataType) -> Self {
97        self.data_type = data_type;
98        self
99    }
100
101    /// Set the qualified name (table.column format)
102    #[must_use]
103    pub fn with_qualified_name(mut self, table_name: &str) -> Self {
104        self.qualified_name = Some(format!("{}.{}", table_name, self.name));
105        self.source_table = Some(table_name.to_string());
106        self
107    }
108
109    /// Get the qualified name if available, otherwise return the simple name
110    pub fn get_qualified_or_simple_name(&self) -> &str {
111        self.qualified_name.as_deref().unwrap_or(&self.name)
112    }
113
114    #[must_use]
115    pub fn with_nullable(mut self, nullable: bool) -> Self {
116        self.nullable = nullable;
117        self
118    }
119}
120
121/// A single cell value in the table
122#[derive(Debug, Clone, PartialEq, PartialOrd)]
123pub enum DataValue {
124    String(String),
125    InternedString(Arc<String>), // For repeated strings (e.g., status, trader names)
126    Integer(i64),
127    Float(f64),
128    Boolean(bool),
129    DateTime(String), // Store as ISO 8601 string for now
130    Null,
131}
132
133// Custom Hash implementation for DataValue to handle f64
134impl std::hash::Hash for DataValue {
135    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
136        match self {
137            DataValue::String(s) => {
138                0u8.hash(state);
139                s.hash(state);
140            }
141            DataValue::InternedString(s) => {
142                1u8.hash(state);
143                s.hash(state);
144            }
145            DataValue::Integer(i) => {
146                2u8.hash(state);
147                i.hash(state);
148            }
149            DataValue::Float(f) => {
150                3u8.hash(state);
151                // Hash the bits of the float for consistency
152                f.to_bits().hash(state);
153            }
154            DataValue::Boolean(b) => {
155                4u8.hash(state);
156                b.hash(state);
157            }
158            DataValue::DateTime(dt) => {
159                5u8.hash(state);
160                dt.hash(state);
161            }
162            DataValue::Null => {
163                6u8.hash(state);
164            }
165        }
166    }
167}
168
169// Custom Eq implementation for DataValue
170impl Eq for DataValue {}
171
172impl DataValue {
173    pub fn from_string(s: &str, data_type: &DataType) -> Self {
174        if s.is_empty() || s.eq_ignore_ascii_case("null") {
175            return DataValue::Null;
176        }
177
178        match data_type {
179            DataType::String => DataValue::String(s.to_string()),
180            DataType::Integer => s
181                .parse::<i64>()
182                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
183            DataType::Float => s
184                .parse::<f64>()
185                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
186            DataType::Boolean => {
187                let lower = s.to_lowercase();
188                DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
189            }
190            DataType::DateTime => DataValue::DateTime(s.to_string()),
191            DataType::Null => DataValue::Null,
192            DataType::Mixed => {
193                // Try to infer for mixed columns
194                let inferred = DataType::infer_from_string(s);
195                Self::from_string(s, &inferred)
196            }
197        }
198    }
199
200    #[must_use]
201    pub fn is_null(&self) -> bool {
202        matches!(self, DataValue::Null)
203    }
204
205    #[must_use]
206    pub fn data_type(&self) -> DataType {
207        match self {
208            DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
209            DataValue::Integer(_) => DataType::Integer,
210            DataValue::Float(_) => DataType::Float,
211            DataValue::Boolean(_) => DataType::Boolean,
212            DataValue::DateTime(_) => DataType::DateTime,
213            DataValue::Null => DataType::Null,
214        }
215    }
216
217    /// Get string representation without allocation when possible
218    /// Returns owned String for compatibility but tries to reuse existing strings
219    #[must_use]
220    pub fn to_string_optimized(&self) -> String {
221        match self {
222            DataValue::String(s) => s.clone(), // Clone existing string
223            DataValue::InternedString(s) => s.as_ref().clone(), // Clone from Rc
224            DataValue::DateTime(s) => s.clone(), // Clone existing string
225            DataValue::Integer(i) => i.to_string(),
226            DataValue::Float(f) => f.to_string(),
227            DataValue::Boolean(b) => {
228                if *b {
229                    "true".to_string()
230                } else {
231                    "false".to_string()
232                }
233            }
234            DataValue::Null => String::new(), // Empty string, minimal allocation
235        }
236    }
237}
238
239impl fmt::Display for DataValue {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        match self {
242            DataValue::String(s) => write!(f, "{s}"),
243            DataValue::InternedString(s) => write!(f, "{s}"),
244            DataValue::Integer(i) => write!(f, "{i}"),
245            DataValue::Float(fl) => write!(f, "{fl}"),
246            DataValue::Boolean(b) => write!(f, "{b}"),
247            DataValue::DateTime(dt) => write!(f, "{dt}"),
248            DataValue::Null => write!(f, ""),
249        }
250    }
251}
252
253/// A row of data in the table
254#[derive(Debug, Clone)]
255pub struct DataRow {
256    pub values: Vec<DataValue>,
257}
258
259impl DataRow {
260    #[must_use]
261    pub fn new(values: Vec<DataValue>) -> Self {
262        Self { values }
263    }
264
265    #[must_use]
266    pub fn get(&self, index: usize) -> Option<&DataValue> {
267        self.values.get(index)
268    }
269
270    pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
271        self.values.get_mut(index)
272    }
273
274    #[must_use]
275    pub fn len(&self) -> usize {
276        self.values.len()
277    }
278
279    #[must_use]
280    pub fn is_empty(&self) -> bool {
281        self.values.is_empty()
282    }
283}
284
285/// The main `DataTable` structure
286#[derive(Debug, Clone)]
287pub struct DataTable {
288    pub name: String,
289    pub columns: Vec<DataColumn>,
290    pub rows: Vec<DataRow>,
291    pub metadata: HashMap<String, String>,
292}
293
294impl DataTable {
295    pub fn new(name: impl Into<String>) -> Self {
296        Self {
297            name: name.into(),
298            columns: Vec::new(),
299            rows: Vec::new(),
300            metadata: HashMap::new(),
301        }
302    }
303
304    /// Create a DUAL table (similar to Oracle's DUAL) with one row and one column
305    /// Used for evaluating expressions without a data source
306    #[must_use]
307    pub fn dual() -> Self {
308        let mut table = DataTable::new("DUAL");
309        table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
310        table
311            .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
312            .unwrap();
313        table
314    }
315
316    pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
317        self.columns.push(column);
318        self
319    }
320
321    pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
322        if row.len() != self.columns.len() {
323            return Err(format!(
324                "Row has {} values but table has {} columns",
325                row.len(),
326                self.columns.len()
327            ));
328        }
329        self.rows.push(row);
330        Ok(())
331    }
332
333    #[must_use]
334    pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
335        self.columns.iter().find(|c| c.name == name)
336    }
337
338    #[must_use]
339    pub fn get_column_index(&self, name: &str) -> Option<usize> {
340        self.columns.iter().position(|c| c.name == name)
341    }
342
343    /// Find column index by qualified name (e.g., "messages.field_name")
344    #[must_use]
345    pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
346        self.columns
347            .iter()
348            .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
349    }
350
351    /// Find column by either qualified or simple name
352    /// First tries qualified match, then falls back to simple name
353    #[must_use]
354    pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
355        // If table prefix provided, try qualified match first
356        if let Some(prefix) = table_prefix {
357            let qualified = format!("{}.{}", prefix, name);
358            if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
359                return Some(idx);
360            }
361        }
362
363        // Fall back to simple name match
364        self.get_column_index(name)
365    }
366
367    /// Enrich all columns with qualified names based on the table name
368    pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
369        for column in &mut self.columns {
370            column.qualified_name = Some(format!("{}.{}", table_name, column.name));
371            column.source_table = Some(table_name.to_string());
372        }
373    }
374
375    #[must_use]
376    pub fn column_count(&self) -> usize {
377        self.columns.len()
378    }
379
380    #[must_use]
381    pub fn row_count(&self) -> usize {
382        self.rows.len()
383    }
384
385    #[must_use]
386    pub fn is_empty(&self) -> bool {
387        self.rows.is_empty()
388    }
389
390    /// Get column names as a vector
391    #[must_use]
392    pub fn column_names(&self) -> Vec<String> {
393        self.columns.iter().map(|c| c.name.clone()).collect()
394    }
395
396    /// Get mutable access to columns for enrichment
397    pub fn columns_mut(&mut self) -> &mut [DataColumn] {
398        &mut self.columns
399    }
400
401    /// Infer and update column types based on data
402    pub fn infer_column_types(&mut self) {
403        for (col_idx, column) in self.columns.iter_mut().enumerate() {
404            let mut inferred_type = DataType::Null;
405            let mut null_count = 0;
406            let mut unique_values = std::collections::HashSet::new();
407
408            for row in &self.rows {
409                if let Some(value) = row.get(col_idx) {
410                    if value.is_null() {
411                        null_count += 1;
412                    } else {
413                        let value_type = value.data_type();
414                        inferred_type = inferred_type.merge(&value_type);
415                        unique_values.insert(value.to_string());
416                    }
417                }
418            }
419
420            column.data_type = inferred_type;
421            column.null_count = null_count;
422            column.nullable = null_count > 0;
423            column.unique_values = Some(unique_values.len());
424        }
425    }
426
427    /// Get a value at specific row and column
428    #[must_use]
429    pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
430        self.rows.get(row)?.get(col)
431    }
432
433    /// Get a value by row index and column name
434    #[must_use]
435    pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
436        let col_idx = self.get_column_index(col_name)?;
437        self.get_value(row, col_idx)
438    }
439
440    /// Convert to a vector of string vectors (for display/compatibility)
441    #[must_use]
442    pub fn to_string_table(&self) -> Vec<Vec<String>> {
443        self.rows
444            .iter()
445            .map(|row| {
446                row.values
447                    .iter()
448                    .map(DataValue::to_string_optimized)
449                    .collect()
450            })
451            .collect()
452    }
453
454    /// Get table statistics
455    #[must_use]
456    pub fn get_stats(&self) -> DataTableStats {
457        DataTableStats {
458            row_count: self.row_count(),
459            column_count: self.column_count(),
460            memory_size: self.estimate_memory_size(),
461            null_count: self.columns.iter().map(|c| c.null_count).sum(),
462        }
463    }
464
465    /// Generate a debug dump string for display
466    #[must_use]
467    pub fn debug_dump(&self) -> String {
468        let mut output = String::new();
469
470        output.push_str(&format!("DataTable: {}\n", self.name));
471        output.push_str(&format!(
472            "Rows: {} | Columns: {}\n",
473            self.row_count(),
474            self.column_count()
475        ));
476
477        if !self.metadata.is_empty() {
478            output.push_str("Metadata:\n");
479            for (key, value) in &self.metadata {
480                output.push_str(&format!("  {key}: {value}\n"));
481            }
482        }
483
484        output.push_str("\nColumns:\n");
485        for column in &self.columns {
486            output.push_str(&format!("  {} ({:?})", column.name, column.data_type));
487            if column.nullable {
488                output.push_str(&format!(" - nullable, {} nulls", column.null_count));
489            }
490            if let Some(unique) = column.unique_values {
491                output.push_str(&format!(", {unique} unique"));
492            }
493            output.push('\n');
494        }
495
496        // Show first few rows
497        if self.row_count() > 0 {
498            let sample_size = 5.min(self.row_count());
499            output.push_str(&format!("\nFirst {sample_size} rows:\n"));
500
501            for row_idx in 0..sample_size {
502                output.push_str(&format!("  [{row_idx}]: "));
503                for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
504                    if col_idx > 0 {
505                        output.push_str(", ");
506                    }
507                    output.push_str(&value.to_string());
508                }
509                output.push('\n');
510            }
511        }
512
513        output
514    }
515
516    #[must_use]
517    pub fn estimate_memory_size(&self) -> usize {
518        // Base structure size
519        let mut size = std::mem::size_of::<Self>();
520
521        // Column metadata
522        size += self.columns.len() * std::mem::size_of::<DataColumn>();
523        for col in &self.columns {
524            size += col.name.len();
525        }
526
527        // Row structure overhead
528        size += self.rows.len() * std::mem::size_of::<DataRow>();
529
530        // Actual data values
531        for row in &self.rows {
532            for value in &row.values {
533                // Base enum size
534                size += std::mem::size_of::<DataValue>();
535                // Add string content size
536                match value {
537                    DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
538                    _ => {} // Numbers and booleans are inline
539                }
540            }
541        }
542
543        size
544    }
545
546    /// Convert DataTable to CSV format
547    pub fn to_csv(&self) -> String {
548        let mut csv_output = String::new();
549
550        // Write headers
551        let headers: Vec<String> = self
552            .columns
553            .iter()
554            .map(|col| {
555                if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
556                    format!("\"{}\"", col.name.replace('"', "\"\""))
557                } else {
558                    col.name.clone()
559                }
560            })
561            .collect();
562        csv_output.push_str(&headers.join(","));
563        csv_output.push('\n');
564
565        // Write data rows
566        for row in &self.rows {
567            let row_values: Vec<String> = row
568                .values
569                .iter()
570                .map(|value| {
571                    let str_val = value.to_string();
572                    if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
573                        format!("\"{}\"", str_val.replace('"', "\"\""))
574                    } else {
575                        str_val
576                    }
577                })
578                .collect();
579            csv_output.push_str(&row_values.join(","));
580            csv_output.push('\n');
581        }
582
583        csv_output
584    }
585
586    /// V46: Create `DataTable` from `QueryResponse`
587    /// This is the key conversion function that bridges old and new systems
588    pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
589        debug!(
590            "V46: Converting QueryResponse to DataTable for table '{}'",
591            table_name
592        );
593
594        // Track memory before conversion
595        crate::utils::memory_tracker::track_memory("start_from_query_response");
596
597        let mut table = DataTable::new(table_name);
598
599        // Extract column names and types from first row
600        if let Some(first_row) = response.data.first() {
601            if let Some(obj) = first_row.as_object() {
602                // Create columns based on the keys in the JSON object
603                for key in obj.keys() {
604                    let column = DataColumn::new(key.clone());
605                    table.add_column(column);
606                }
607
608                // Now convert all rows
609                for json_row in &response.data {
610                    if let Some(row_obj) = json_row.as_object() {
611                        let mut values = Vec::new();
612
613                        // Ensure we get values in the same order as columns
614                        for column in &table.columns {
615                            let value = row_obj
616                                .get(&column.name)
617                                .map_or(DataValue::Null, json_value_to_data_value);
618                            values.push(value);
619                        }
620
621                        table.add_row(DataRow::new(values))?;
622                    }
623                }
624
625                // Infer column types from the data
626                table.infer_column_types();
627
628                // Add metadata
629                if let Some(source) = &response.source {
630                    table.metadata.insert("source".to_string(), source.clone());
631                }
632                if let Some(cached) = response.cached {
633                    table
634                        .metadata
635                        .insert("cached".to_string(), cached.to_string());
636                }
637                table
638                    .metadata
639                    .insert("original_count".to_string(), response.count.to_string());
640
641                debug!(
642                    "V46: Created DataTable with {} columns and {} rows",
643                    table.column_count(),
644                    table.row_count()
645                );
646            } else {
647                // Handle non-object JSON (single values)
648                table.add_column(DataColumn::new("value"));
649                for json_value in &response.data {
650                    let value = json_value_to_data_value(json_value);
651                    table.add_row(DataRow::new(vec![value]))?;
652                }
653            }
654        }
655
656        Ok(table)
657    }
658
659    /// Get a single row by index
660    #[must_use]
661    pub fn get_row(&self, index: usize) -> Option<&DataRow> {
662        self.rows.get(index)
663    }
664
665    /// V50: Get a single row as strings
666    #[must_use]
667    pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
668        self.rows.get(index).map(|row| {
669            row.values
670                .iter()
671                .map(DataValue::to_string_optimized)
672                .collect()
673        })
674    }
675
676    /// Pretty print the `DataTable` with a nice box drawing
677    #[must_use]
678    pub fn pretty_print(&self) -> String {
679        let mut output = String::new();
680
681        // Header
682        output.push_str("╔═══════════════════════════════════════════════════════╗\n");
683        output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
684        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
685
686        // Summary stats
687        output.push_str(&format!(
688            "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
689            self.row_count(),
690            self.column_count(),
691            self.get_stats().memory_size
692        ));
693
694        // Metadata if any
695        if !self.metadata.is_empty() {
696            output.push_str("╠═══════════════════════════════════════════════════════╣\n");
697            output.push_str("║ Metadata:                                             ║\n");
698            for (key, value) in &self.metadata {
699                let truncated_value = if value.len() > 35 {
700                    format!("{}...", &value[..32])
701                } else {
702                    value.clone()
703                };
704                output.push_str(&format!(
705                    "║   {:15} : {:35} ║\n",
706                    Self::truncate_string(key, 15),
707                    truncated_value
708                ));
709            }
710        }
711
712        // Column details
713        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
714        output.push_str("║ Columns:                                              ║\n");
715        output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
716        output.push_str("║ Name              │ Type     │ Nullable│ Nulls│Unique║\n");
717        output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
718
719        for column in &self.columns {
720            let type_str = match &column.data_type {
721                DataType::String => "String",
722                DataType::Integer => "Integer",
723                DataType::Float => "Float",
724                DataType::Boolean => "Boolean",
725                DataType::DateTime => "DateTime",
726                DataType::Null => "Null",
727                DataType::Mixed => "Mixed",
728            };
729
730            output.push_str(&format!(
731                "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
732                Self::truncate_string(&column.name, 17),
733                type_str,
734                if column.nullable { "Yes" } else { "No" },
735                column.null_count,
736                column.unique_values.unwrap_or(0)
737            ));
738        }
739
740        output.push_str("╚═══════════════════════════════════════════════════════╝\n");
741
742        // Sample data (first 5 rows)
743        output.push_str("\nSample Data (first 5 rows):\n");
744        let sample_count = self.rows.len().min(5);
745
746        if sample_count > 0 {
747            // Column headers
748            output.push('┌');
749            for (i, _col) in self.columns.iter().enumerate() {
750                if i > 0 {
751                    output.push('┬');
752                }
753                output.push_str(&"─".repeat(20));
754            }
755            output.push_str("┐\n");
756
757            output.push('│');
758            for col in &self.columns {
759                output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
760            }
761            output.push('\n');
762
763            output.push('├');
764            for (i, _) in self.columns.iter().enumerate() {
765                if i > 0 {
766                    output.push('┼');
767                }
768                output.push_str(&"─".repeat(20));
769            }
770            output.push_str("┤\n");
771
772            // Data rows
773            for row_idx in 0..sample_count {
774                if let Some(row) = self.rows.get(row_idx) {
775                    output.push('│');
776                    for value in &row.values {
777                        let value_str = value.to_string();
778                        output
779                            .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
780                    }
781                    output.push('\n');
782                }
783            }
784
785            output.push('└');
786            for (i, _) in self.columns.iter().enumerate() {
787                if i > 0 {
788                    output.push('┴');
789                }
790                output.push_str(&"─".repeat(20));
791            }
792            output.push_str("┘\n");
793        }
794
795        output
796    }
797
798    fn truncate_string(s: &str, max_len: usize) -> String {
799        if s.len() > max_len {
800            format!("{}...", &s[..max_len - 3])
801        } else {
802            s.to_string()
803        }
804    }
805
806    /// Get a schema summary of the `DataTable`
807    #[must_use]
808    pub fn get_schema_summary(&self) -> String {
809        let mut summary = String::new();
810        summary.push_str(&format!(
811            "DataTable Schema ({} columns, {} rows):\n",
812            self.columns.len(),
813            self.rows.len()
814        ));
815
816        for (idx, column) in self.columns.iter().enumerate() {
817            let type_str = match &column.data_type {
818                DataType::String => "String",
819                DataType::Integer => "Integer",
820                DataType::Float => "Float",
821                DataType::Boolean => "Boolean",
822                DataType::DateTime => "DateTime",
823                DataType::Null => "Null",
824                DataType::Mixed => "Mixed",
825            };
826
827            let nullable_str = if column.nullable {
828                "nullable"
829            } else {
830                "not null"
831            };
832            let null_info = if column.null_count > 0 {
833                format!(", {} nulls", column.null_count)
834            } else {
835                String::new()
836            };
837
838            summary.push_str(&format!(
839                "  [{:3}] {} : {} ({}{})\n",
840                idx, column.name, type_str, nullable_str, null_info
841            ));
842        }
843
844        summary
845    }
846
847    /// Get detailed schema information as a structured format
848    #[must_use]
849    pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
850        self.columns
851            .iter()
852            .map(|col| {
853                let type_name = format!("{:?}", col.data_type);
854                (col.name.clone(), type_name, col.nullable, col.null_count)
855            })
856            .collect()
857    }
858
859    /// Reserve capacity for rows to avoid reallocations
860    pub fn reserve_rows(&mut self, additional: usize) {
861        self.rows.reserve(additional);
862    }
863
864    /// Shrink vectors to fit actual data (removes excess capacity)
865    pub fn shrink_to_fit(&mut self) {
866        self.rows.shrink_to_fit();
867        for _column in &mut self.columns {
868            // Shrink any column-specific data if needed
869        }
870    }
871
872    /// Get actual memory usage estimate (more accurate than `estimate_memory_size`)
873    #[must_use]
874    pub fn get_memory_usage(&self) -> usize {
875        let mut size = std::mem::size_of::<Self>();
876
877        // Account for string allocations
878        size += self.name.capacity();
879
880        // Account for columns
881        size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
882        for col in &self.columns {
883            size += col.name.capacity();
884        }
885
886        // Account for rows and their capacity
887        size += self.rows.capacity() * std::mem::size_of::<DataRow>();
888
889        // Account for actual data values
890        for row in &self.rows {
891            size += row.values.capacity() * std::mem::size_of::<DataValue>();
892            for value in &row.values {
893                match value {
894                    DataValue::String(s) => size += s.capacity(),
895                    DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
896                    DataValue::DateTime(s) => size += s.capacity(),
897                    _ => {} // Other types are inline
898                }
899            }
900        }
901
902        // Account for metadata
903        size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
904        for (k, v) in &self.metadata {
905            size += k.capacity() + v.capacity();
906        }
907
908        size
909    }
910}
911
912/// V46: Helper function to convert JSON value to `DataValue`
913fn json_value_to_data_value(json: &JsonValue) -> DataValue {
914    match json {
915        JsonValue::Null => DataValue::Null,
916        JsonValue::Bool(b) => DataValue::Boolean(*b),
917        JsonValue::Number(n) => {
918            if let Some(i) = n.as_i64() {
919                DataValue::Integer(i)
920            } else if let Some(f) = n.as_f64() {
921                DataValue::Float(f)
922            } else {
923                DataValue::String(n.to_string())
924            }
925        }
926        JsonValue::String(s) => {
927            // Try to detect if it's a date/time
928            if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
929                // Simple heuristic for dates
930                DataValue::DateTime(s.clone())
931            } else {
932                DataValue::String(s.clone())
933            }
934        }
935        JsonValue::Array(_) | JsonValue::Object(_) => {
936            // Store complex types as JSON string
937            DataValue::String(json.to_string())
938        }
939    }
940}
941
942/// Statistics about a `DataTable`
943#[derive(Debug, Clone)]
944pub struct DataTableStats {
945    pub row_count: usize,
946    pub column_count: usize,
947    pub memory_size: usize,
948    pub null_count: usize,
949}
950
951/// Implementation of `DataProvider` for `DataTable`
952/// This allows `DataTable` to be used wherever `DataProvider` trait is expected
953impl DataProvider for DataTable {
954    fn get_row(&self, index: usize) -> Option<Vec<String>> {
955        self.rows.get(index).map(|row| {
956            row.values
957                .iter()
958                .map(DataValue::to_string_optimized)
959                .collect()
960        })
961    }
962
963    fn get_column_names(&self) -> Vec<String> {
964        self.column_names()
965    }
966
967    fn get_row_count(&self) -> usize {
968        self.row_count()
969    }
970
971    fn get_column_count(&self) -> usize {
972        self.column_count()
973    }
974}
975
976#[cfg(test)]
977mod tests {
978    use super::*;
979
980    #[test]
981    fn test_data_type_inference() {
982        assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
983        assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
984        assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
985        assert_eq!(DataType::infer_from_string("hello"), DataType::String);
986        assert_eq!(DataType::infer_from_string(""), DataType::Null);
987        assert_eq!(
988            DataType::infer_from_string("2024-01-01"),
989            DataType::DateTime
990        );
991    }
992
993    #[test]
994    fn test_datatable_creation() {
995        let mut table = DataTable::new("test");
996
997        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
998        table.add_column(DataColumn::new("name").with_type(DataType::String));
999        table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1000
1001        assert_eq!(table.column_count(), 3);
1002        assert_eq!(table.row_count(), 0);
1003
1004        let row = DataRow::new(vec![
1005            DataValue::Integer(1),
1006            DataValue::String("Alice".to_string()),
1007            DataValue::Boolean(true),
1008        ]);
1009
1010        table.add_row(row).unwrap();
1011        assert_eq!(table.row_count(), 1);
1012
1013        let value = table.get_value_by_name(0, "name").unwrap();
1014        assert_eq!(value.to_string(), "Alice");
1015    }
1016
1017    #[test]
1018    fn test_type_inference() {
1019        let mut table = DataTable::new("test");
1020
1021        // Add columns without types
1022        table.add_column(DataColumn::new("mixed"));
1023
1024        // Add rows with different types
1025        table
1026            .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1027            .unwrap();
1028        table
1029            .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1030            .unwrap();
1031        table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1032
1033        table.infer_column_types();
1034
1035        // Should infer Float since we have both Integer and Float
1036        assert_eq!(table.columns[0].data_type, DataType::Float);
1037        assert_eq!(table.columns[0].null_count, 1);
1038        assert!(table.columns[0].nullable);
1039    }
1040
1041    #[test]
1042    fn test_from_query_response() {
1043        use crate::api_client::{QueryInfo, QueryResponse};
1044        use serde_json::json;
1045
1046        let response = QueryResponse {
1047            query: QueryInfo {
1048                select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1049                where_clause: None,
1050                order_by: None,
1051            },
1052            data: vec![
1053                json!({
1054                    "id": 1,
1055                    "name": "Alice",
1056                    "age": 30
1057                }),
1058                json!({
1059                    "id": 2,
1060                    "name": "Bob",
1061                    "age": 25
1062                }),
1063                json!({
1064                    "id": 3,
1065                    "name": "Carol",
1066                    "age": null
1067                }),
1068            ],
1069            count: 3,
1070            source: Some("test.csv".to_string()),
1071            table: Some("test".to_string()),
1072            cached: Some(false),
1073        };
1074
1075        let table = DataTable::from_query_response(&response, "test").unwrap();
1076
1077        assert_eq!(table.name, "test");
1078        assert_eq!(table.row_count(), 3);
1079        assert_eq!(table.column_count(), 3);
1080
1081        // Check column names
1082        let col_names = table.column_names();
1083        assert!(col_names.contains(&"id".to_string()));
1084        assert!(col_names.contains(&"name".to_string()));
1085        assert!(col_names.contains(&"age".to_string()));
1086
1087        // Check metadata
1088        assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1089        assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1090
1091        // Check first row values
1092        assert_eq!(
1093            table.get_value_by_name(0, "id"),
1094            Some(&DataValue::Integer(1))
1095        );
1096        assert_eq!(
1097            table.get_value_by_name(0, "name"),
1098            Some(&DataValue::String("Alice".to_string()))
1099        );
1100        assert_eq!(
1101            table.get_value_by_name(0, "age"),
1102            Some(&DataValue::Integer(30))
1103        );
1104
1105        // Check null handling
1106        assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1107    }
1108}