sql_cli/data/
datatable.rs

1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12/// Represents the data type of a column
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15    String,
16    Integer,
17    Float,
18    Boolean,
19    DateTime,
20    Null,
21    Mixed, // For columns with mixed types
22}
23
24impl DataType {
25    /// Infer type from a string value
26    #[must_use]
27    pub fn infer_from_string(value: &str) -> Self {
28        // Handle explicit null string
29        if value.eq_ignore_ascii_case("null") {
30            return DataType::Null;
31        }
32
33        // Use the shared type inference logic
34        match TypeInference::infer_from_string(value) {
35            InferredType::Null => DataType::Null,
36            InferredType::Boolean => DataType::Boolean,
37            InferredType::Integer => DataType::Integer,
38            InferredType::Float => DataType::Float,
39            InferredType::DateTime => DataType::DateTime,
40            InferredType::String => DataType::String,
41        }
42    }
43
44    /// Check if a string looks like a datetime value
45    /// Delegates to shared type inference logic
46    fn looks_like_datetime(value: &str) -> bool {
47        TypeInference::looks_like_datetime(value)
48    }
49
50    /// Merge two types (for columns with mixed types)
51    #[must_use]
52    pub fn merge(&self, other: &DataType) -> DataType {
53        if self == other {
54            return self.clone();
55        }
56
57        match (self, other) {
58            (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59            (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60                DataType::Float
61            }
62            _ => DataType::Mixed,
63        }
64    }
65}
66
67/// Column metadata and definition
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70    pub name: String,
71    pub data_type: DataType,
72    pub nullable: bool,
73    pub unique_values: Option<usize>,
74    pub null_count: usize,
75    pub metadata: HashMap<String, String>,
76    /// Qualified name with table prefix (e.g., "messages.field_name")
77    pub qualified_name: Option<String>,
78    /// Source table or CTE name
79    pub source_table: Option<String>,
80}
81
82impl DataColumn {
83    pub fn new(name: impl Into<String>) -> Self {
84        Self {
85            name: name.into(),
86            data_type: DataType::String,
87            nullable: true,
88            unique_values: None,
89            null_count: 0,
90            metadata: HashMap::new(),
91            qualified_name: None,
92            source_table: None,
93        }
94    }
95
96    #[must_use]
97    pub fn with_type(mut self, data_type: DataType) -> Self {
98        self.data_type = data_type;
99        self
100    }
101
102    /// Set the qualified name (table.column format)
103    #[must_use]
104    pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105        self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106        self.source_table = Some(table_name.to_string());
107        self
108    }
109
110    /// Get the qualified name if available, otherwise return the simple name
111    pub fn get_qualified_or_simple_name(&self) -> &str {
112        self.qualified_name.as_deref().unwrap_or(&self.name)
113    }
114
115    #[must_use]
116    pub fn with_nullable(mut self, nullable: bool) -> Self {
117        self.nullable = nullable;
118        self
119    }
120}
121
122/// A single cell value in the table
123#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125    String(String),
126    InternedString(Arc<String>), // For repeated strings (e.g., status, trader names)
127    Integer(i64),
128    Float(f64),
129    Boolean(bool),
130    DateTime(String), // Store as ISO 8601 string for now
131    Null,
132}
133
134// Custom Hash implementation for DataValue to handle f64
135impl std::hash::Hash for DataValue {
136    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
137        match self {
138            DataValue::String(s) => {
139                0u8.hash(state);
140                s.hash(state);
141            }
142            DataValue::InternedString(s) => {
143                1u8.hash(state);
144                s.hash(state);
145            }
146            DataValue::Integer(i) => {
147                2u8.hash(state);
148                i.hash(state);
149            }
150            DataValue::Float(f) => {
151                3u8.hash(state);
152                // Hash the bits of the float for consistency
153                f.to_bits().hash(state);
154            }
155            DataValue::Boolean(b) => {
156                4u8.hash(state);
157                b.hash(state);
158            }
159            DataValue::DateTime(dt) => {
160                5u8.hash(state);
161                dt.hash(state);
162            }
163            DataValue::Null => {
164                6u8.hash(state);
165            }
166        }
167    }
168}
169
170// Custom Serialize implementation for DataValue to handle Arc<String>
171impl Serialize for DataValue {
172    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
173    where
174        S: serde::Serializer,
175    {
176        match self {
177            DataValue::String(s) => {
178                serializer.serialize_newtype_variant("DataValue", 0, "String", s)
179            }
180            DataValue::InternedString(arc_s) => {
181                // Serialize the Arc<String> as just the String content
182                serializer.serialize_newtype_variant(
183                    "DataValue",
184                    1,
185                    "InternedString",
186                    arc_s.as_ref(),
187                )
188            }
189            DataValue::Integer(i) => {
190                serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
191            }
192            DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
193            DataValue::Boolean(b) => {
194                serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
195            }
196            DataValue::DateTime(dt) => {
197                serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
198            }
199            DataValue::Null => serializer.serialize_unit_variant("DataValue", 6, "Null"),
200        }
201    }
202}
203
204// Custom Deserialize implementation for DataValue to handle Arc<String>
205impl<'de> Deserialize<'de> for DataValue {
206    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
207    where
208        D: serde::Deserializer<'de>,
209    {
210        #[derive(Deserialize)]
211        #[serde(field_identifier, rename_all = "PascalCase")]
212        enum Field {
213            String,
214            InternedString,
215            Integer,
216            Float,
217            Boolean,
218            DateTime,
219            Null,
220        }
221
222        struct DataValueVisitor;
223
224        impl<'de> Visitor<'de> for DataValueVisitor {
225            type Value = DataValue;
226
227            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
228                formatter.write_str("enum DataValue")
229            }
230
231            fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
232            where
233                A: serde::de::EnumAccess<'de>,
234            {
235                let (field, variant) = data.variant()?;
236                match field {
237                    Field::String => {
238                        let s: String = variant.newtype_variant()?;
239                        Ok(DataValue::String(s))
240                    }
241                    Field::InternedString => {
242                        let s: String = variant.newtype_variant()?;
243                        Ok(DataValue::InternedString(Arc::new(s)))
244                    }
245                    Field::Integer => {
246                        let i: i64 = variant.newtype_variant()?;
247                        Ok(DataValue::Integer(i))
248                    }
249                    Field::Float => {
250                        let f: f64 = variant.newtype_variant()?;
251                        Ok(DataValue::Float(f))
252                    }
253                    Field::Boolean => {
254                        let b: bool = variant.newtype_variant()?;
255                        Ok(DataValue::Boolean(b))
256                    }
257                    Field::DateTime => {
258                        let dt: String = variant.newtype_variant()?;
259                        Ok(DataValue::DateTime(dt))
260                    }
261                    Field::Null => {
262                        variant.unit_variant()?;
263                        Ok(DataValue::Null)
264                    }
265                }
266            }
267        }
268
269        deserializer.deserialize_enum(
270            "DataValue",
271            &[
272                "String",
273                "InternedString",
274                "Integer",
275                "Float",
276                "Boolean",
277                "DateTime",
278                "Null",
279            ],
280            DataValueVisitor,
281        )
282    }
283}
284
285// Custom Eq implementation for DataValue
286impl Eq for DataValue {}
287
288impl DataValue {
289    pub fn from_string(s: &str, data_type: &DataType) -> Self {
290        if s.is_empty() || s.eq_ignore_ascii_case("null") {
291            return DataValue::Null;
292        }
293
294        match data_type {
295            DataType::String => DataValue::String(s.to_string()),
296            DataType::Integer => s
297                .parse::<i64>()
298                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
299            DataType::Float => s
300                .parse::<f64>()
301                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
302            DataType::Boolean => {
303                let lower = s.to_lowercase();
304                DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
305            }
306            DataType::DateTime => DataValue::DateTime(s.to_string()),
307            DataType::Null => DataValue::Null,
308            DataType::Mixed => {
309                // Try to infer for mixed columns
310                let inferred = DataType::infer_from_string(s);
311                Self::from_string(s, &inferred)
312            }
313        }
314    }
315
316    #[must_use]
317    pub fn is_null(&self) -> bool {
318        matches!(self, DataValue::Null)
319    }
320
321    #[must_use]
322    pub fn data_type(&self) -> DataType {
323        match self {
324            DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
325            DataValue::Integer(_) => DataType::Integer,
326            DataValue::Float(_) => DataType::Float,
327            DataValue::Boolean(_) => DataType::Boolean,
328            DataValue::DateTime(_) => DataType::DateTime,
329            DataValue::Null => DataType::Null,
330        }
331    }
332
333    /// Get string representation without allocation when possible
334    /// Returns owned String for compatibility but tries to reuse existing strings
335    #[must_use]
336    pub fn to_string_optimized(&self) -> String {
337        match self {
338            DataValue::String(s) => s.clone(), // Clone existing string
339            DataValue::InternedString(s) => s.as_ref().clone(), // Clone from Rc
340            DataValue::DateTime(s) => s.clone(), // Clone existing string
341            DataValue::Integer(i) => i.to_string(),
342            DataValue::Float(f) => f.to_string(),
343            DataValue::Boolean(b) => {
344                if *b {
345                    "true".to_string()
346                } else {
347                    "false".to_string()
348                }
349            }
350            DataValue::Null => String::new(), // Empty string, minimal allocation
351        }
352    }
353}
354
355impl fmt::Display for DataValue {
356    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357        match self {
358            DataValue::String(s) => write!(f, "{s}"),
359            DataValue::InternedString(s) => write!(f, "{s}"),
360            DataValue::Integer(i) => write!(f, "{i}"),
361            DataValue::Float(fl) => write!(f, "{fl}"),
362            DataValue::Boolean(b) => write!(f, "{b}"),
363            DataValue::DateTime(dt) => write!(f, "{dt}"),
364            DataValue::Null => write!(f, ""),
365        }
366    }
367}
368
369/// A row of data in the table
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct DataRow {
372    pub values: Vec<DataValue>,
373}
374
375impl DataRow {
376    #[must_use]
377    pub fn new(values: Vec<DataValue>) -> Self {
378        Self { values }
379    }
380
381    #[must_use]
382    pub fn get(&self, index: usize) -> Option<&DataValue> {
383        self.values.get(index)
384    }
385
386    pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
387        self.values.get_mut(index)
388    }
389
390    #[must_use]
391    pub fn len(&self) -> usize {
392        self.values.len()
393    }
394
395    #[must_use]
396    pub fn is_empty(&self) -> bool {
397        self.values.is_empty()
398    }
399}
400
401/// The main `DataTable` structure
402#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct DataTable {
404    pub name: String,
405    pub columns: Vec<DataColumn>,
406    pub rows: Vec<DataRow>,
407    pub metadata: HashMap<String, String>,
408}
409
410impl DataTable {
411    pub fn new(name: impl Into<String>) -> Self {
412        Self {
413            name: name.into(),
414            columns: Vec::new(),
415            rows: Vec::new(),
416            metadata: HashMap::new(),
417        }
418    }
419
420    /// Create a DUAL table (similar to Oracle's DUAL) with one row and one column
421    /// Used for evaluating expressions without a data source
422    #[must_use]
423    pub fn dual() -> Self {
424        let mut table = DataTable::new("DUAL");
425        table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
426        table
427            .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
428            .unwrap();
429        table
430    }
431
432    pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
433        self.columns.push(column);
434        self
435    }
436
437    pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
438        if row.len() != self.columns.len() {
439            return Err(format!(
440                "Row has {} values but table has {} columns",
441                row.len(),
442                self.columns.len()
443            ));
444        }
445        self.rows.push(row);
446        Ok(())
447    }
448
449    #[must_use]
450    pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
451        self.columns.iter().find(|c| c.name == name)
452    }
453
454    #[must_use]
455    pub fn get_column_index(&self, name: &str) -> Option<usize> {
456        self.columns.iter().position(|c| c.name == name)
457    }
458
459    /// Find column index by qualified name (e.g., "messages.field_name")
460    #[must_use]
461    pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
462        self.columns
463            .iter()
464            .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
465    }
466
467    /// Find column by either qualified or simple name
468    /// First tries qualified match, then falls back to simple name
469    #[must_use]
470    pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
471        // If table prefix provided, try qualified match first
472        if let Some(prefix) = table_prefix {
473            let qualified = format!("{}.{}", prefix, name);
474            if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
475                return Some(idx);
476            }
477        }
478
479        // Fall back to simple name match
480        self.get_column_index(name)
481    }
482
483    /// Enrich all columns with qualified names based on the table name
484    pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
485        for column in &mut self.columns {
486            column.qualified_name = Some(format!("{}.{}", table_name, column.name));
487            column.source_table = Some(table_name.to_string());
488        }
489    }
490
491    #[must_use]
492    pub fn column_count(&self) -> usize {
493        self.columns.len()
494    }
495
496    #[must_use]
497    pub fn row_count(&self) -> usize {
498        self.rows.len()
499    }
500
501    #[must_use]
502    pub fn is_empty(&self) -> bool {
503        self.rows.is_empty()
504    }
505
506    /// Get column names as a vector
507    #[must_use]
508    pub fn column_names(&self) -> Vec<String> {
509        self.columns.iter().map(|c| c.name.clone()).collect()
510    }
511
512    /// Get mutable access to columns for enrichment
513    pub fn columns_mut(&mut self) -> &mut [DataColumn] {
514        &mut self.columns
515    }
516
517    /// Infer and update column types based on data
518    pub fn infer_column_types(&mut self) {
519        for (col_idx, column) in self.columns.iter_mut().enumerate() {
520            let mut inferred_type = DataType::Null;
521            let mut null_count = 0;
522            let mut unique_values = std::collections::HashSet::new();
523
524            for row in &self.rows {
525                if let Some(value) = row.get(col_idx) {
526                    if value.is_null() {
527                        null_count += 1;
528                    } else {
529                        let value_type = value.data_type();
530                        inferred_type = inferred_type.merge(&value_type);
531                        unique_values.insert(value.to_string());
532                    }
533                }
534            }
535
536            column.data_type = inferred_type;
537            column.null_count = null_count;
538            column.nullable = null_count > 0;
539            column.unique_values = Some(unique_values.len());
540        }
541    }
542
543    /// Get a value at specific row and column
544    #[must_use]
545    pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
546        self.rows.get(row)?.get(col)
547    }
548
549    /// Get a value by row index and column name
550    #[must_use]
551    pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
552        let col_idx = self.get_column_index(col_name)?;
553        self.get_value(row, col_idx)
554    }
555
556    /// Convert to a vector of string vectors (for display/compatibility)
557    #[must_use]
558    pub fn to_string_table(&self) -> Vec<Vec<String>> {
559        self.rows
560            .iter()
561            .map(|row| {
562                row.values
563                    .iter()
564                    .map(DataValue::to_string_optimized)
565                    .collect()
566            })
567            .collect()
568    }
569
570    /// Get table statistics
571    #[must_use]
572    pub fn get_stats(&self) -> DataTableStats {
573        DataTableStats {
574            row_count: self.row_count(),
575            column_count: self.column_count(),
576            memory_size: self.estimate_memory_size(),
577            null_count: self.columns.iter().map(|c| c.null_count).sum(),
578        }
579    }
580
581    /// Generate a debug dump string for display
582    #[must_use]
583    pub fn debug_dump(&self) -> String {
584        let mut output = String::new();
585
586        output.push_str(&format!("DataTable: {}\n", self.name));
587        output.push_str(&format!(
588            "Rows: {} | Columns: {}\n",
589            self.row_count(),
590            self.column_count()
591        ));
592
593        if !self.metadata.is_empty() {
594            output.push_str("Metadata:\n");
595            for (key, value) in &self.metadata {
596                output.push_str(&format!("  {key}: {value}\n"));
597            }
598        }
599
600        output.push_str("\nColumns:\n");
601        for column in &self.columns {
602            output.push_str(&format!("  {} ({:?})", column.name, column.data_type));
603            if column.nullable {
604                output.push_str(&format!(" - nullable, {} nulls", column.null_count));
605            }
606            if let Some(unique) = column.unique_values {
607                output.push_str(&format!(", {unique} unique"));
608            }
609            output.push('\n');
610        }
611
612        // Show first few rows
613        if self.row_count() > 0 {
614            let sample_size = 5.min(self.row_count());
615            output.push_str(&format!("\nFirst {sample_size} rows:\n"));
616
617            for row_idx in 0..sample_size {
618                output.push_str(&format!("  [{row_idx}]: "));
619                for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
620                    if col_idx > 0 {
621                        output.push_str(", ");
622                    }
623                    output.push_str(&value.to_string());
624                }
625                output.push('\n');
626            }
627        }
628
629        output
630    }
631
632    #[must_use]
633    pub fn estimate_memory_size(&self) -> usize {
634        // Base structure size
635        let mut size = std::mem::size_of::<Self>();
636
637        // Column metadata
638        size += self.columns.len() * std::mem::size_of::<DataColumn>();
639        for col in &self.columns {
640            size += col.name.len();
641        }
642
643        // Row structure overhead
644        size += self.rows.len() * std::mem::size_of::<DataRow>();
645
646        // Actual data values
647        for row in &self.rows {
648            for value in &row.values {
649                // Base enum size
650                size += std::mem::size_of::<DataValue>();
651                // Add string content size
652                match value {
653                    DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
654                    _ => {} // Numbers and booleans are inline
655                }
656            }
657        }
658
659        size
660    }
661
662    /// Convert DataTable to CSV format
663    pub fn to_csv(&self) -> String {
664        let mut csv_output = String::new();
665
666        // Write headers
667        let headers: Vec<String> = self
668            .columns
669            .iter()
670            .map(|col| {
671                if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
672                    format!("\"{}\"", col.name.replace('"', "\"\""))
673                } else {
674                    col.name.clone()
675                }
676            })
677            .collect();
678        csv_output.push_str(&headers.join(","));
679        csv_output.push('\n');
680
681        // Write data rows
682        for row in &self.rows {
683            let row_values: Vec<String> = row
684                .values
685                .iter()
686                .map(|value| {
687                    let str_val = value.to_string();
688                    if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
689                        format!("\"{}\"", str_val.replace('"', "\"\""))
690                    } else {
691                        str_val
692                    }
693                })
694                .collect();
695            csv_output.push_str(&row_values.join(","));
696            csv_output.push('\n');
697        }
698
699        csv_output
700    }
701
702    /// V46: Create `DataTable` from `QueryResponse`
703    /// This is the key conversion function that bridges old and new systems
704    pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
705        debug!(
706            "V46: Converting QueryResponse to DataTable for table '{}'",
707            table_name
708        );
709
710        // Track memory before conversion
711        crate::utils::memory_tracker::track_memory("start_from_query_response");
712
713        let mut table = DataTable::new(table_name);
714
715        // Extract column names and types from first row
716        if let Some(first_row) = response.data.first() {
717            if let Some(obj) = first_row.as_object() {
718                // Create columns based on the keys in the JSON object
719                for key in obj.keys() {
720                    let column = DataColumn::new(key.clone());
721                    table.add_column(column);
722                }
723
724                // Now convert all rows
725                for json_row in &response.data {
726                    if let Some(row_obj) = json_row.as_object() {
727                        let mut values = Vec::new();
728
729                        // Ensure we get values in the same order as columns
730                        for column in &table.columns {
731                            let value = row_obj
732                                .get(&column.name)
733                                .map_or(DataValue::Null, json_value_to_data_value);
734                            values.push(value);
735                        }
736
737                        table.add_row(DataRow::new(values))?;
738                    }
739                }
740
741                // Infer column types from the data
742                table.infer_column_types();
743
744                // Add metadata
745                if let Some(source) = &response.source {
746                    table.metadata.insert("source".to_string(), source.clone());
747                }
748                if let Some(cached) = response.cached {
749                    table
750                        .metadata
751                        .insert("cached".to_string(), cached.to_string());
752                }
753                table
754                    .metadata
755                    .insert("original_count".to_string(), response.count.to_string());
756
757                debug!(
758                    "V46: Created DataTable with {} columns and {} rows",
759                    table.column_count(),
760                    table.row_count()
761                );
762            } else {
763                // Handle non-object JSON (single values)
764                table.add_column(DataColumn::new("value"));
765                for json_value in &response.data {
766                    let value = json_value_to_data_value(json_value);
767                    table.add_row(DataRow::new(vec![value]))?;
768                }
769            }
770        }
771
772        Ok(table)
773    }
774
775    /// Get a single row by index
776    #[must_use]
777    pub fn get_row(&self, index: usize) -> Option<&DataRow> {
778        self.rows.get(index)
779    }
780
781    /// V50: Get a single row as strings
782    #[must_use]
783    pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
784        self.rows.get(index).map(|row| {
785            row.values
786                .iter()
787                .map(DataValue::to_string_optimized)
788                .collect()
789        })
790    }
791
792    /// Pretty print the `DataTable` with a nice box drawing
793    #[must_use]
794    pub fn pretty_print(&self) -> String {
795        let mut output = String::new();
796
797        // Header
798        output.push_str("╔═══════════════════════════════════════════════════════╗\n");
799        output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
800        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
801
802        // Summary stats
803        output.push_str(&format!(
804            "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
805            self.row_count(),
806            self.column_count(),
807            self.get_stats().memory_size
808        ));
809
810        // Metadata if any
811        if !self.metadata.is_empty() {
812            output.push_str("╠═══════════════════════════════════════════════════════╣\n");
813            output.push_str("║ Metadata:                                             ║\n");
814            for (key, value) in &self.metadata {
815                let truncated_value = if value.len() > 35 {
816                    format!("{}...", &value[..32])
817                } else {
818                    value.clone()
819                };
820                output.push_str(&format!(
821                    "║   {:15} : {:35} ║\n",
822                    Self::truncate_string(key, 15),
823                    truncated_value
824                ));
825            }
826        }
827
828        // Column details
829        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
830        output.push_str("║ Columns:                                              ║\n");
831        output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
832        output.push_str("║ Name              │ Type     │ Nullable│ Nulls│Unique║\n");
833        output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
834
835        for column in &self.columns {
836            let type_str = match &column.data_type {
837                DataType::String => "String",
838                DataType::Integer => "Integer",
839                DataType::Float => "Float",
840                DataType::Boolean => "Boolean",
841                DataType::DateTime => "DateTime",
842                DataType::Null => "Null",
843                DataType::Mixed => "Mixed",
844            };
845
846            output.push_str(&format!(
847                "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
848                Self::truncate_string(&column.name, 17),
849                type_str,
850                if column.nullable { "Yes" } else { "No" },
851                column.null_count,
852                column.unique_values.unwrap_or(0)
853            ));
854        }
855
856        output.push_str("╚═══════════════════════════════════════════════════════╝\n");
857
858        // Sample data (first 5 rows)
859        output.push_str("\nSample Data (first 5 rows):\n");
860        let sample_count = self.rows.len().min(5);
861
862        if sample_count > 0 {
863            // Column headers
864            output.push('┌');
865            for (i, _col) in self.columns.iter().enumerate() {
866                if i > 0 {
867                    output.push('┬');
868                }
869                output.push_str(&"─".repeat(20));
870            }
871            output.push_str("┐\n");
872
873            output.push('│');
874            for col in &self.columns {
875                output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
876            }
877            output.push('\n');
878
879            output.push('├');
880            for (i, _) in self.columns.iter().enumerate() {
881                if i > 0 {
882                    output.push('┼');
883                }
884                output.push_str(&"─".repeat(20));
885            }
886            output.push_str("┤\n");
887
888            // Data rows
889            for row_idx in 0..sample_count {
890                if let Some(row) = self.rows.get(row_idx) {
891                    output.push('│');
892                    for value in &row.values {
893                        let value_str = value.to_string();
894                        output
895                            .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
896                    }
897                    output.push('\n');
898                }
899            }
900
901            output.push('└');
902            for (i, _) in self.columns.iter().enumerate() {
903                if i > 0 {
904                    output.push('┴');
905                }
906                output.push_str(&"─".repeat(20));
907            }
908            output.push_str("┘\n");
909        }
910
911        output
912    }
913
914    fn truncate_string(s: &str, max_len: usize) -> String {
915        if s.len() > max_len {
916            format!("{}...", &s[..max_len - 3])
917        } else {
918            s.to_string()
919        }
920    }
921
922    /// Get a schema summary of the `DataTable`
923    #[must_use]
924    pub fn get_schema_summary(&self) -> String {
925        let mut summary = String::new();
926        summary.push_str(&format!(
927            "DataTable Schema ({} columns, {} rows):\n",
928            self.columns.len(),
929            self.rows.len()
930        ));
931
932        for (idx, column) in self.columns.iter().enumerate() {
933            let type_str = match &column.data_type {
934                DataType::String => "String",
935                DataType::Integer => "Integer",
936                DataType::Float => "Float",
937                DataType::Boolean => "Boolean",
938                DataType::DateTime => "DateTime",
939                DataType::Null => "Null",
940                DataType::Mixed => "Mixed",
941            };
942
943            let nullable_str = if column.nullable {
944                "nullable"
945            } else {
946                "not null"
947            };
948            let null_info = if column.null_count > 0 {
949                format!(", {} nulls", column.null_count)
950            } else {
951                String::new()
952            };
953
954            summary.push_str(&format!(
955                "  [{:3}] {} : {} ({}{})\n",
956                idx, column.name, type_str, nullable_str, null_info
957            ));
958        }
959
960        summary
961    }
962
963    /// Get detailed schema information as a structured format
964    #[must_use]
965    pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
966        self.columns
967            .iter()
968            .map(|col| {
969                let type_name = format!("{:?}", col.data_type);
970                (col.name.clone(), type_name, col.nullable, col.null_count)
971            })
972            .collect()
973    }
974
975    /// Reserve capacity for rows to avoid reallocations
976    pub fn reserve_rows(&mut self, additional: usize) {
977        self.rows.reserve(additional);
978    }
979
980    /// Shrink vectors to fit actual data (removes excess capacity)
981    pub fn shrink_to_fit(&mut self) {
982        self.rows.shrink_to_fit();
983        for _column in &mut self.columns {
984            // Shrink any column-specific data if needed
985        }
986    }
987
988    /// Get actual memory usage estimate (more accurate than `estimate_memory_size`)
989    #[must_use]
990    pub fn get_memory_usage(&self) -> usize {
991        let mut size = std::mem::size_of::<Self>();
992
993        // Account for string allocations
994        size += self.name.capacity();
995
996        // Account for columns
997        size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
998        for col in &self.columns {
999            size += col.name.capacity();
1000        }
1001
1002        // Account for rows and their capacity
1003        size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1004
1005        // Account for actual data values
1006        for row in &self.rows {
1007            size += row.values.capacity() * std::mem::size_of::<DataValue>();
1008            for value in &row.values {
1009                match value {
1010                    DataValue::String(s) => size += s.capacity(),
1011                    DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1012                    DataValue::DateTime(s) => size += s.capacity(),
1013                    _ => {} // Other types are inline
1014                }
1015            }
1016        }
1017
1018        // Account for metadata
1019        size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1020        for (k, v) in &self.metadata {
1021            size += k.capacity() + v.capacity();
1022        }
1023
1024        size
1025    }
1026
1027    /// Serialize DataTable to bytes for caching (using MessagePack for now, can be upgraded to Parquet)
1028    pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1029        // For now, use MessagePack which is binary-safe and fast
1030        // Later we can upgrade to actual Parquet format
1031        rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1032    }
1033
1034    /// Deserialize DataTable from cached bytes
1035    pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1036        // For now, use MessagePack
1037        // Later we can upgrade to actual Parquet format
1038        rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1039    }
1040}
1041
1042/// V46: Helper function to convert JSON value to `DataValue`
1043fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1044    match json {
1045        JsonValue::Null => DataValue::Null,
1046        JsonValue::Bool(b) => DataValue::Boolean(*b),
1047        JsonValue::Number(n) => {
1048            if let Some(i) = n.as_i64() {
1049                DataValue::Integer(i)
1050            } else if let Some(f) = n.as_f64() {
1051                DataValue::Float(f)
1052            } else {
1053                DataValue::String(n.to_string())
1054            }
1055        }
1056        JsonValue::String(s) => {
1057            // Try to detect if it's a date/time
1058            if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1059                // Simple heuristic for dates
1060                DataValue::DateTime(s.clone())
1061            } else {
1062                DataValue::String(s.clone())
1063            }
1064        }
1065        JsonValue::Array(_) | JsonValue::Object(_) => {
1066            // Store complex types as JSON string
1067            DataValue::String(json.to_string())
1068        }
1069    }
1070}
1071
1072/// Statistics about a `DataTable`
1073#[derive(Debug, Clone)]
1074pub struct DataTableStats {
1075    pub row_count: usize,
1076    pub column_count: usize,
1077    pub memory_size: usize,
1078    pub null_count: usize,
1079}
1080
1081/// Implementation of `DataProvider` for `DataTable`
1082/// This allows `DataTable` to be used wherever `DataProvider` trait is expected
1083impl DataProvider for DataTable {
1084    fn get_row(&self, index: usize) -> Option<Vec<String>> {
1085        self.rows.get(index).map(|row| {
1086            row.values
1087                .iter()
1088                .map(DataValue::to_string_optimized)
1089                .collect()
1090        })
1091    }
1092
1093    fn get_column_names(&self) -> Vec<String> {
1094        self.column_names()
1095    }
1096
1097    fn get_row_count(&self) -> usize {
1098        self.row_count()
1099    }
1100
1101    fn get_column_count(&self) -> usize {
1102        self.column_count()
1103    }
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108    use super::*;
1109
1110    #[test]
1111    fn test_data_type_inference() {
1112        assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1113        assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1114        assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1115        assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1116        assert_eq!(DataType::infer_from_string(""), DataType::Null);
1117        assert_eq!(
1118            DataType::infer_from_string("2024-01-01"),
1119            DataType::DateTime
1120        );
1121    }
1122
1123    #[test]
1124    fn test_datatable_creation() {
1125        let mut table = DataTable::new("test");
1126
1127        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1128        table.add_column(DataColumn::new("name").with_type(DataType::String));
1129        table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1130
1131        assert_eq!(table.column_count(), 3);
1132        assert_eq!(table.row_count(), 0);
1133
1134        let row = DataRow::new(vec![
1135            DataValue::Integer(1),
1136            DataValue::String("Alice".to_string()),
1137            DataValue::Boolean(true),
1138        ]);
1139
1140        table.add_row(row).unwrap();
1141        assert_eq!(table.row_count(), 1);
1142
1143        let value = table.get_value_by_name(0, "name").unwrap();
1144        assert_eq!(value.to_string(), "Alice");
1145    }
1146
1147    #[test]
1148    fn test_type_inference() {
1149        let mut table = DataTable::new("test");
1150
1151        // Add columns without types
1152        table.add_column(DataColumn::new("mixed"));
1153
1154        // Add rows with different types
1155        table
1156            .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1157            .unwrap();
1158        table
1159            .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1160            .unwrap();
1161        table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1162
1163        table.infer_column_types();
1164
1165        // Should infer Float since we have both Integer and Float
1166        assert_eq!(table.columns[0].data_type, DataType::Float);
1167        assert_eq!(table.columns[0].null_count, 1);
1168        assert!(table.columns[0].nullable);
1169    }
1170
1171    #[test]
1172    fn test_from_query_response() {
1173        use crate::api_client::{QueryInfo, QueryResponse};
1174        use serde_json::json;
1175
1176        let response = QueryResponse {
1177            query: QueryInfo {
1178                select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1179                where_clause: None,
1180                order_by: None,
1181            },
1182            data: vec![
1183                json!({
1184                    "id": 1,
1185                    "name": "Alice",
1186                    "age": 30
1187                }),
1188                json!({
1189                    "id": 2,
1190                    "name": "Bob",
1191                    "age": 25
1192                }),
1193                json!({
1194                    "id": 3,
1195                    "name": "Carol",
1196                    "age": null
1197                }),
1198            ],
1199            count: 3,
1200            source: Some("test.csv".to_string()),
1201            table: Some("test".to_string()),
1202            cached: Some(false),
1203        };
1204
1205        let table = DataTable::from_query_response(&response, "test").unwrap();
1206
1207        assert_eq!(table.name, "test");
1208        assert_eq!(table.row_count(), 3);
1209        assert_eq!(table.column_count(), 3);
1210
1211        // Check column names
1212        let col_names = table.column_names();
1213        assert!(col_names.contains(&"id".to_string()));
1214        assert!(col_names.contains(&"name".to_string()));
1215        assert!(col_names.contains(&"age".to_string()));
1216
1217        // Check metadata
1218        assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1219        assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1220
1221        // Check first row values
1222        assert_eq!(
1223            table.get_value_by_name(0, "id"),
1224            Some(&DataValue::Integer(1))
1225        );
1226        assert_eq!(
1227            table.get_value_by_name(0, "name"),
1228            Some(&DataValue::String("Alice".to_string()))
1229        );
1230        assert_eq!(
1231            table.get_value_by_name(0, "age"),
1232            Some(&DataValue::Integer(30))
1233        );
1234
1235        // Check null handling
1236        assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1237    }
1238}