Skip to main content

sql_cli/data/
datatable.rs

1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12/// Represents the data type of a column
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15    String,
16    Integer,
17    Float,
18    Boolean,
19    DateTime,
20    Null,
21    Mixed, // For columns with mixed types
22}
23
24impl DataType {
25    /// Infer type from a string value
26    #[must_use]
27    pub fn infer_from_string(value: &str) -> Self {
28        // Handle explicit null string
29        if value.eq_ignore_ascii_case("null") {
30            return DataType::Null;
31        }
32
33        // Use the shared type inference logic
34        match TypeInference::infer_from_string(value) {
35            InferredType::Null => DataType::Null,
36            InferredType::Boolean => DataType::Boolean,
37            InferredType::Integer => DataType::Integer,
38            InferredType::Float => DataType::Float,
39            InferredType::DateTime => DataType::DateTime,
40            InferredType::String => DataType::String,
41        }
42    }
43
44    /// Check if a string looks like a datetime value
45    /// Delegates to shared type inference logic
46    fn looks_like_datetime(value: &str) -> bool {
47        TypeInference::looks_like_datetime(value)
48    }
49
50    /// Merge two types (for columns with mixed types)
51    #[must_use]
52    pub fn merge(&self, other: &DataType) -> DataType {
53        if self == other {
54            return self.clone();
55        }
56
57        match (self, other) {
58            (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59            (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60                DataType::Float
61            }
62            _ => DataType::Mixed,
63        }
64    }
65}
66
67/// Column metadata and definition
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70    pub name: String,
71    pub data_type: DataType,
72    pub nullable: bool,
73    pub unique_values: Option<usize>,
74    pub null_count: usize,
75    pub metadata: HashMap<String, String>,
76    /// Qualified name with table prefix (e.g., "messages.field_name")
77    pub qualified_name: Option<String>,
78    /// Source table or CTE name
79    pub source_table: Option<String>,
80}
81
82impl DataColumn {
83    pub fn new(name: impl Into<String>) -> Self {
84        Self {
85            name: name.into(),
86            data_type: DataType::String,
87            nullable: true,
88            unique_values: None,
89            null_count: 0,
90            metadata: HashMap::new(),
91            qualified_name: None,
92            source_table: None,
93        }
94    }
95
96    #[must_use]
97    pub fn with_type(mut self, data_type: DataType) -> Self {
98        self.data_type = data_type;
99        self
100    }
101
102    /// Set the qualified name (table.column format)
103    #[must_use]
104    pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105        self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106        self.source_table = Some(table_name.to_string());
107        self
108    }
109
110    /// Get the qualified name if available, otherwise return the simple name
111    pub fn get_qualified_or_simple_name(&self) -> &str {
112        self.qualified_name.as_deref().unwrap_or(&self.name)
113    }
114
115    #[must_use]
116    pub fn with_nullable(mut self, nullable: bool) -> Self {
117        self.nullable = nullable;
118        self
119    }
120}
121
122/// A single cell value in the table
123#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125    String(String),
126    InternedString(Arc<String>), // For repeated strings (e.g., status, trader names)
127    Integer(i64),
128    Float(f64),
129    Boolean(bool),
130    DateTime(String), // Store as ISO 8601 string for now
131    Vector(Vec<f64>), // For vector mathematics (physics, geometry, etc.)
132    Null,
133}
134
135// Custom Hash implementation for DataValue to handle f64
136impl std::hash::Hash for DataValue {
137    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
138        match self {
139            DataValue::String(s) => {
140                0u8.hash(state);
141                s.hash(state);
142            }
143            DataValue::InternedString(s) => {
144                1u8.hash(state);
145                s.hash(state);
146            }
147            DataValue::Integer(i) => {
148                2u8.hash(state);
149                i.hash(state);
150            }
151            DataValue::Float(f) => {
152                3u8.hash(state);
153                // Hash the bits of the float for consistency
154                f.to_bits().hash(state);
155            }
156            DataValue::Boolean(b) => {
157                4u8.hash(state);
158                b.hash(state);
159            }
160            DataValue::DateTime(dt) => {
161                5u8.hash(state);
162                dt.hash(state);
163            }
164            DataValue::Vector(v) => {
165                6u8.hash(state);
166                // Hash each float's bits
167                for f in v {
168                    f.to_bits().hash(state);
169                }
170            }
171            DataValue::Null => {
172                7u8.hash(state);
173            }
174        }
175    }
176}
177
178// Custom Serialize implementation for DataValue to handle Arc<String>
179impl Serialize for DataValue {
180    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181    where
182        S: serde::Serializer,
183    {
184        match self {
185            DataValue::String(s) => {
186                serializer.serialize_newtype_variant("DataValue", 0, "String", s)
187            }
188            DataValue::InternedString(arc_s) => {
189                // Serialize the Arc<String> as just the String content
190                serializer.serialize_newtype_variant(
191                    "DataValue",
192                    1,
193                    "InternedString",
194                    arc_s.as_ref(),
195                )
196            }
197            DataValue::Integer(i) => {
198                serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
199            }
200            DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
201            DataValue::Boolean(b) => {
202                serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
203            }
204            DataValue::DateTime(dt) => {
205                serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
206            }
207            DataValue::Vector(v) => {
208                serializer.serialize_newtype_variant("DataValue", 6, "Vector", v)
209            }
210            DataValue::Null => serializer.serialize_unit_variant("DataValue", 7, "Null"),
211        }
212    }
213}
214
215// Custom Deserialize implementation for DataValue to handle Arc<String>
216impl<'de> Deserialize<'de> for DataValue {
217    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
218    where
219        D: serde::Deserializer<'de>,
220    {
221        #[derive(Deserialize)]
222        #[serde(field_identifier, rename_all = "PascalCase")]
223        enum Field {
224            String,
225            InternedString,
226            Integer,
227            Float,
228            Boolean,
229            DateTime,
230            Vector,
231            Null,
232        }
233
234        struct DataValueVisitor;
235
236        impl<'de> Visitor<'de> for DataValueVisitor {
237            type Value = DataValue;
238
239            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
240                formatter.write_str("enum DataValue")
241            }
242
243            fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
244            where
245                A: serde::de::EnumAccess<'de>,
246            {
247                let (field, variant) = data.variant()?;
248                match field {
249                    Field::String => {
250                        let s: String = variant.newtype_variant()?;
251                        Ok(DataValue::String(s))
252                    }
253                    Field::InternedString => {
254                        let s: String = variant.newtype_variant()?;
255                        Ok(DataValue::InternedString(Arc::new(s)))
256                    }
257                    Field::Integer => {
258                        let i: i64 = variant.newtype_variant()?;
259                        Ok(DataValue::Integer(i))
260                    }
261                    Field::Float => {
262                        let f: f64 = variant.newtype_variant()?;
263                        Ok(DataValue::Float(f))
264                    }
265                    Field::Boolean => {
266                        let b: bool = variant.newtype_variant()?;
267                        Ok(DataValue::Boolean(b))
268                    }
269                    Field::DateTime => {
270                        let dt: String = variant.newtype_variant()?;
271                        Ok(DataValue::DateTime(dt))
272                    }
273                    Field::Vector => {
274                        let v: Vec<f64> = variant.newtype_variant()?;
275                        Ok(DataValue::Vector(v))
276                    }
277                    Field::Null => {
278                        variant.unit_variant()?;
279                        Ok(DataValue::Null)
280                    }
281                }
282            }
283        }
284
285        deserializer.deserialize_enum(
286            "DataValue",
287            &[
288                "String",
289                "InternedString",
290                "Integer",
291                "Float",
292                "Boolean",
293                "DateTime",
294                "Vector",
295                "Null",
296            ],
297            DataValueVisitor,
298        )
299    }
300}
301
302// Custom Eq implementation for DataValue
303impl Eq for DataValue {}
304
305impl DataValue {
306    pub fn from_string(s: &str, data_type: &DataType) -> Self {
307        if s.is_empty() || s.eq_ignore_ascii_case("null") {
308            return DataValue::Null;
309        }
310
311        match data_type {
312            DataType::String => DataValue::String(s.to_string()),
313            DataType::Integer => s.parse::<i64>().map_or_else(
314                // The column was inferred as Integer (type inference only samples
315                // the first N rows, so a fractional value further down can be
316                // missed). Promote to Float rather than demoting to String, which
317                // would corrupt numeric sorting (String sorts after all numbers).
318                // The final infer_column_types() pass re-merges the column to Float.
319                |_| {
320                    s.parse::<f64>()
321                        .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float)
322                },
323                DataValue::Integer,
324            ),
325            DataType::Float => s
326                .parse::<f64>()
327                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
328            DataType::Boolean => {
329                let lower = s.to_lowercase();
330                DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
331            }
332            DataType::DateTime => DataValue::DateTime(s.to_string()),
333            DataType::Null => DataValue::Null,
334            DataType::Mixed => {
335                // Try to infer for mixed columns
336                let inferred = DataType::infer_from_string(s);
337                Self::from_string(s, &inferred)
338            }
339        }
340    }
341
342    #[must_use]
343    pub fn is_null(&self) -> bool {
344        matches!(self, DataValue::Null)
345    }
346
347    #[must_use]
348    pub fn data_type(&self) -> DataType {
349        match self {
350            DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
351            DataValue::Integer(_) => DataType::Integer,
352            DataValue::Float(_) => DataType::Float,
353            DataValue::Boolean(_) => DataType::Boolean,
354            DataValue::DateTime(_) => DataType::DateTime,
355            DataValue::Vector(_) => DataType::String, // Display as string "[x,y,z]"
356            DataValue::Null => DataType::Null,
357        }
358    }
359
360    /// Get string representation without allocation when possible
361    /// Returns owned String for compatibility but tries to reuse existing strings
362    #[must_use]
363    pub fn to_string_optimized(&self) -> String {
364        match self {
365            DataValue::String(s) => s.clone(), // Clone existing string
366            DataValue::InternedString(s) => s.as_ref().clone(), // Clone from Rc
367            DataValue::DateTime(s) => s.clone(), // Clone existing string
368            DataValue::Integer(i) => i.to_string(),
369            DataValue::Float(f) => f.to_string(),
370            DataValue::Boolean(b) => {
371                if *b {
372                    "true".to_string()
373                } else {
374                    "false".to_string()
375                }
376            }
377            DataValue::Vector(v) => {
378                // Format as "[x,y,z]"
379                let components: Vec<String> = v.iter().map(|f| f.to_string()).collect();
380                format!("[{}]", components.join(","))
381            }
382            DataValue::Null => String::new(), // Empty string, minimal allocation
383        }
384    }
385}
386
387impl fmt::Display for DataValue {
388    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389        match self {
390            DataValue::String(s) => write!(f, "{s}"),
391            DataValue::InternedString(s) => write!(f, "{s}"),
392            DataValue::Integer(i) => write!(f, "{i}"),
393            DataValue::Float(fl) => write!(f, "{fl}"),
394            DataValue::Boolean(b) => write!(f, "{b}"),
395            DataValue::DateTime(dt) => write!(f, "{dt}"),
396            DataValue::Vector(v) => {
397                let components: Vec<String> = v.iter().map(|fl| fl.to_string()).collect();
398                write!(f, "[{}]", components.join(","))
399            }
400            DataValue::Null => write!(f, ""),
401        }
402    }
403}
404
405/// A row of data in the table
406#[derive(Debug, Clone, Serialize, Deserialize)]
407pub struct DataRow {
408    pub values: Vec<DataValue>,
409}
410
411impl DataRow {
412    #[must_use]
413    pub fn new(values: Vec<DataValue>) -> Self {
414        Self { values }
415    }
416
417    #[must_use]
418    pub fn get(&self, index: usize) -> Option<&DataValue> {
419        self.values.get(index)
420    }
421
422    pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
423        self.values.get_mut(index)
424    }
425
426    #[must_use]
427    pub fn len(&self) -> usize {
428        self.values.len()
429    }
430
431    #[must_use]
432    pub fn is_empty(&self) -> bool {
433        self.values.is_empty()
434    }
435}
436
437/// The main `DataTable` structure
438#[derive(Debug, Clone, Serialize, Deserialize)]
439pub struct DataTable {
440    pub name: String,
441    pub columns: Vec<DataColumn>,
442    pub rows: Vec<DataRow>,
443    pub metadata: HashMap<String, String>,
444}
445
446impl DataTable {
447    pub fn new(name: impl Into<String>) -> Self {
448        Self {
449            name: name.into(),
450            columns: Vec::new(),
451            rows: Vec::new(),
452            metadata: HashMap::new(),
453        }
454    }
455
456    /// Create a DUAL table (similar to Oracle's DUAL) with one row and one column
457    /// Used for evaluating expressions without a data source
458    #[must_use]
459    pub fn dual() -> Self {
460        let mut table = DataTable::new("DUAL");
461        table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
462        table
463            .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
464            .unwrap();
465        table
466    }
467
468    pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
469        self.columns.push(column);
470        self
471    }
472
473    pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
474        if row.len() != self.columns.len() {
475            return Err(format!(
476                "Row has {} values but table has {} columns",
477                row.len(),
478                self.columns.len()
479            ));
480        }
481        self.rows.push(row);
482        Ok(())
483    }
484
485    #[must_use]
486    pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
487        self.columns.iter().find(|c| c.name == name)
488    }
489
490    #[must_use]
491    pub fn get_column_index(&self, name: &str) -> Option<usize> {
492        self.columns.iter().position(|c| c.name == name)
493    }
494
495    /// Find column index by qualified name (e.g., "messages.field_name")
496    #[must_use]
497    pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
498        self.columns
499            .iter()
500            .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
501    }
502
503    /// Find column by either qualified or simple name
504    /// First tries qualified match, then falls back to simple name
505    #[must_use]
506    pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
507        // If table prefix provided, try qualified match first
508        if let Some(prefix) = table_prefix {
509            let qualified = format!("{}.{}", prefix, name);
510            if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
511                return Some(idx);
512            }
513        }
514
515        // Fall back to simple name match
516        self.get_column_index(name)
517    }
518
519    /// Enrich all columns with qualified names based on the table name
520    pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
521        for column in &mut self.columns {
522            column.qualified_name = Some(format!("{}.{}", table_name, column.name));
523            column.source_table = Some(table_name.to_string());
524        }
525    }
526
527    #[must_use]
528    pub fn column_count(&self) -> usize {
529        self.columns.len()
530    }
531
532    #[must_use]
533    pub fn row_count(&self) -> usize {
534        self.rows.len()
535    }
536
537    #[must_use]
538    pub fn is_empty(&self) -> bool {
539        self.rows.is_empty()
540    }
541
542    /// Get column names as a vector
543    #[must_use]
544    pub fn column_names(&self) -> Vec<String> {
545        self.columns.iter().map(|c| c.name.clone()).collect()
546    }
547
548    /// Get mutable access to columns for enrichment
549    pub fn columns_mut(&mut self) -> &mut [DataColumn] {
550        &mut self.columns
551    }
552
553    /// Infer and update column types based on data
554    pub fn infer_column_types(&mut self) {
555        for (col_idx, column) in self.columns.iter_mut().enumerate() {
556            let mut inferred_type = DataType::Null;
557            let mut null_count = 0;
558            let mut unique_values = std::collections::HashSet::new();
559
560            for row in &self.rows {
561                if let Some(value) = row.get(col_idx) {
562                    if value.is_null() {
563                        null_count += 1;
564                    } else {
565                        let value_type = value.data_type();
566                        inferred_type = inferred_type.merge(&value_type);
567                        unique_values.insert(value.to_string());
568                    }
569                }
570            }
571
572            column.data_type = inferred_type;
573            column.null_count = null_count;
574            column.nullable = null_count > 0;
575            column.unique_values = Some(unique_values.len());
576        }
577    }
578
579    /// Get a value at specific row and column
580    #[must_use]
581    pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
582        self.rows.get(row)?.get(col)
583    }
584
585    /// Get a value by row index and column name
586    #[must_use]
587    pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
588        let col_idx = self.get_column_index(col_name)?;
589        self.get_value(row, col_idx)
590    }
591
592    /// Convert to a vector of string vectors (for display/compatibility)
593    #[must_use]
594    pub fn to_string_table(&self) -> Vec<Vec<String>> {
595        self.rows
596            .iter()
597            .map(|row| {
598                row.values
599                    .iter()
600                    .map(DataValue::to_string_optimized)
601                    .collect()
602            })
603            .collect()
604    }
605
606    /// Get table statistics
607    #[must_use]
608    pub fn get_stats(&self) -> DataTableStats {
609        DataTableStats {
610            row_count: self.row_count(),
611            column_count: self.column_count(),
612            memory_size: self.estimate_memory_size(),
613            null_count: self.columns.iter().map(|c| c.null_count).sum(),
614        }
615    }
616
617    /// Generate a debug dump string for display
618    #[must_use]
619    pub fn debug_dump(&self) -> String {
620        let mut output = String::new();
621
622        output.push_str(&format!("DataTable: {}\n", self.name));
623        output.push_str(&format!(
624            "Rows: {} | Columns: {}\n",
625            self.row_count(),
626            self.column_count()
627        ));
628
629        if !self.metadata.is_empty() {
630            output.push_str("Metadata:\n");
631            for (key, value) in &self.metadata {
632                output.push_str(&format!("  {key}: {value}\n"));
633            }
634        }
635
636        output.push_str("\nColumns:\n");
637        for column in &self.columns {
638            output.push_str(&format!("  {} ({:?})", column.name, column.data_type));
639            if column.nullable {
640                output.push_str(&format!(" - nullable, {} nulls", column.null_count));
641            }
642            if let Some(unique) = column.unique_values {
643                output.push_str(&format!(", {unique} unique"));
644            }
645            output.push('\n');
646        }
647
648        // Show first few rows
649        if self.row_count() > 0 {
650            let sample_size = 5.min(self.row_count());
651            output.push_str(&format!("\nFirst {sample_size} rows:\n"));
652
653            for row_idx in 0..sample_size {
654                output.push_str(&format!("  [{row_idx}]: "));
655                for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
656                    if col_idx > 0 {
657                        output.push_str(", ");
658                    }
659                    output.push_str(&value.to_string());
660                }
661                output.push('\n');
662            }
663        }
664
665        output
666    }
667
668    #[must_use]
669    pub fn estimate_memory_size(&self) -> usize {
670        // Base structure size
671        let mut size = std::mem::size_of::<Self>();
672
673        // Column metadata
674        size += self.columns.len() * std::mem::size_of::<DataColumn>();
675        for col in &self.columns {
676            size += col.name.len();
677        }
678
679        // Row structure overhead
680        size += self.rows.len() * std::mem::size_of::<DataRow>();
681
682        // Actual data values
683        for row in &self.rows {
684            for value in &row.values {
685                // Base enum size
686                size += std::mem::size_of::<DataValue>();
687                // Add string content size
688                match value {
689                    DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
690                    DataValue::Vector(v) => size += v.len() * std::mem::size_of::<f64>(),
691                    _ => {} // Numbers and booleans are inline
692                }
693            }
694        }
695
696        size
697    }
698
699    /// Convert DataTable to CSV format
700    pub fn to_csv(&self) -> String {
701        let mut csv_output = String::new();
702
703        // Write headers
704        let headers: Vec<String> = self
705            .columns
706            .iter()
707            .map(|col| {
708                if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
709                    format!("\"{}\"", col.name.replace('"', "\"\""))
710                } else {
711                    col.name.clone()
712                }
713            })
714            .collect();
715        csv_output.push_str(&headers.join(","));
716        csv_output.push('\n');
717
718        // Write data rows
719        for row in &self.rows {
720            let row_values: Vec<String> = row
721                .values
722                .iter()
723                .map(|value| {
724                    let str_val = value.to_string();
725                    if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
726                        format!("\"{}\"", str_val.replace('"', "\"\""))
727                    } else {
728                        str_val
729                    }
730                })
731                .collect();
732            csv_output.push_str(&row_values.join(","));
733            csv_output.push('\n');
734        }
735
736        csv_output
737    }
738
739    /// V46: Create `DataTable` from `QueryResponse`
740    /// This is the key conversion function that bridges old and new systems
741    pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
742        debug!(
743            "V46: Converting QueryResponse to DataTable for table '{}'",
744            table_name
745        );
746
747        // Track memory before conversion
748        crate::utils::memory_tracker::track_memory("start_from_query_response");
749
750        let mut table = DataTable::new(table_name);
751
752        // Extract column names and types from first row
753        if let Some(first_row) = response.data.first() {
754            if let Some(obj) = first_row.as_object() {
755                // Create columns based on the keys in the JSON object
756                for key in obj.keys() {
757                    let column = DataColumn::new(key.clone());
758                    table.add_column(column);
759                }
760
761                // Now convert all rows
762                for json_row in &response.data {
763                    if let Some(row_obj) = json_row.as_object() {
764                        let mut values = Vec::new();
765
766                        // Ensure we get values in the same order as columns
767                        for column in &table.columns {
768                            let value = row_obj
769                                .get(&column.name)
770                                .map_or(DataValue::Null, json_value_to_data_value);
771                            values.push(value);
772                        }
773
774                        table.add_row(DataRow::new(values))?;
775                    }
776                }
777
778                // Infer column types from the data
779                table.infer_column_types();
780
781                // Add metadata
782                if let Some(source) = &response.source {
783                    table.metadata.insert("source".to_string(), source.clone());
784                }
785                if let Some(cached) = response.cached {
786                    table
787                        .metadata
788                        .insert("cached".to_string(), cached.to_string());
789                }
790                table
791                    .metadata
792                    .insert("original_count".to_string(), response.count.to_string());
793
794                debug!(
795                    "V46: Created DataTable with {} columns and {} rows",
796                    table.column_count(),
797                    table.row_count()
798                );
799            } else {
800                // Handle non-object JSON (single values)
801                table.add_column(DataColumn::new("value"));
802                for json_value in &response.data {
803                    let value = json_value_to_data_value(json_value);
804                    table.add_row(DataRow::new(vec![value]))?;
805                }
806            }
807        }
808
809        Ok(table)
810    }
811
812    /// Get a single row by index
813    #[must_use]
814    pub fn get_row(&self, index: usize) -> Option<&DataRow> {
815        self.rows.get(index)
816    }
817
818    /// V50: Get a single row as strings
819    #[must_use]
820    pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
821        self.rows.get(index).map(|row| {
822            row.values
823                .iter()
824                .map(DataValue::to_string_optimized)
825                .collect()
826        })
827    }
828
829    /// Pretty print the `DataTable` with a nice box drawing
830    #[must_use]
831    pub fn pretty_print(&self) -> String {
832        let mut output = String::new();
833
834        // Header
835        output.push_str("╔═══════════════════════════════════════════════════════╗\n");
836        output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
837        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
838
839        // Summary stats
840        output.push_str(&format!(
841            "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
842            self.row_count(),
843            self.column_count(),
844            self.get_stats().memory_size
845        ));
846
847        // Metadata if any
848        if !self.metadata.is_empty() {
849            output.push_str("╠═══════════════════════════════════════════════════════╣\n");
850            output.push_str("║ Metadata:                                             ║\n");
851            for (key, value) in &self.metadata {
852                let truncated_value = if value.len() > 35 {
853                    format!("{}...", &value[..32])
854                } else {
855                    value.clone()
856                };
857                output.push_str(&format!(
858                    "║   {:15} : {:35} ║\n",
859                    Self::truncate_string(key, 15),
860                    truncated_value
861                ));
862            }
863        }
864
865        // Column details
866        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
867        output.push_str("║ Columns:                                              ║\n");
868        output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
869        output.push_str("║ Name              │ Type     │ Nullable│ Nulls│Unique║\n");
870        output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
871
872        for column in &self.columns {
873            let type_str = match &column.data_type {
874                DataType::String => "String",
875                DataType::Integer => "Integer",
876                DataType::Float => "Float",
877                DataType::Boolean => "Boolean",
878                DataType::DateTime => "DateTime",
879                DataType::Null => "Null",
880                DataType::Mixed => "Mixed",
881            };
882
883            output.push_str(&format!(
884                "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
885                Self::truncate_string(&column.name, 17),
886                type_str,
887                if column.nullable { "Yes" } else { "No" },
888                column.null_count,
889                column.unique_values.unwrap_or(0)
890            ));
891        }
892
893        output.push_str("╚═══════════════════════════════════════════════════════╝\n");
894
895        // Sample data (first 5 rows)
896        output.push_str("\nSample Data (first 5 rows):\n");
897        let sample_count = self.rows.len().min(5);
898
899        if sample_count > 0 {
900            // Column headers
901            output.push('┌');
902            for (i, _col) in self.columns.iter().enumerate() {
903                if i > 0 {
904                    output.push('┬');
905                }
906                output.push_str(&"─".repeat(20));
907            }
908            output.push_str("┐\n");
909
910            output.push('│');
911            for col in &self.columns {
912                output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
913            }
914            output.push('\n');
915
916            output.push('├');
917            for (i, _) in self.columns.iter().enumerate() {
918                if i > 0 {
919                    output.push('┼');
920                }
921                output.push_str(&"─".repeat(20));
922            }
923            output.push_str("┤\n");
924
925            // Data rows
926            for row_idx in 0..sample_count {
927                if let Some(row) = self.rows.get(row_idx) {
928                    output.push('│');
929                    for value in &row.values {
930                        let value_str = value.to_string();
931                        output
932                            .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
933                    }
934                    output.push('\n');
935                }
936            }
937
938            output.push('└');
939            for (i, _) in self.columns.iter().enumerate() {
940                if i > 0 {
941                    output.push('┴');
942                }
943                output.push_str(&"─".repeat(20));
944            }
945            output.push_str("┘\n");
946        }
947
948        output
949    }
950
951    fn truncate_string(s: &str, max_len: usize) -> String {
952        if s.len() > max_len {
953            format!("{}...", &s[..max_len - 3])
954        } else {
955            s.to_string()
956        }
957    }
958
959    /// Get a schema summary of the `DataTable`
960    #[must_use]
961    pub fn get_schema_summary(&self) -> String {
962        let mut summary = String::new();
963        summary.push_str(&format!(
964            "DataTable Schema ({} columns, {} rows):\n",
965            self.columns.len(),
966            self.rows.len()
967        ));
968
969        for (idx, column) in self.columns.iter().enumerate() {
970            let type_str = match &column.data_type {
971                DataType::String => "String",
972                DataType::Integer => "Integer",
973                DataType::Float => "Float",
974                DataType::Boolean => "Boolean",
975                DataType::DateTime => "DateTime",
976                DataType::Null => "Null",
977                DataType::Mixed => "Mixed",
978            };
979
980            let nullable_str = if column.nullable {
981                "nullable"
982            } else {
983                "not null"
984            };
985            let null_info = if column.null_count > 0 {
986                format!(", {} nulls", column.null_count)
987            } else {
988                String::new()
989            };
990
991            summary.push_str(&format!(
992                "  [{:3}] {} : {} ({}{})\n",
993                idx, column.name, type_str, nullable_str, null_info
994            ));
995        }
996
997        summary
998    }
999
1000    /// Get detailed schema information as a structured format
1001    #[must_use]
1002    pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
1003        self.columns
1004            .iter()
1005            .map(|col| {
1006                let type_name = format!("{:?}", col.data_type);
1007                (col.name.clone(), type_name, col.nullable, col.null_count)
1008            })
1009            .collect()
1010    }
1011
1012    /// Reserve capacity for rows to avoid reallocations
1013    pub fn reserve_rows(&mut self, additional: usize) {
1014        self.rows.reserve(additional);
1015    }
1016
1017    /// Shrink vectors to fit actual data (removes excess capacity)
1018    pub fn shrink_to_fit(&mut self) {
1019        self.rows.shrink_to_fit();
1020        for _column in &mut self.columns {
1021            // Shrink any column-specific data if needed
1022        }
1023    }
1024
1025    /// Get actual memory usage estimate (more accurate than `estimate_memory_size`)
1026    #[must_use]
1027    pub fn get_memory_usage(&self) -> usize {
1028        let mut size = std::mem::size_of::<Self>();
1029
1030        // Account for string allocations
1031        size += self.name.capacity();
1032
1033        // Account for columns
1034        size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
1035        for col in &self.columns {
1036            size += col.name.capacity();
1037        }
1038
1039        // Account for rows and their capacity
1040        size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1041
1042        // Account for actual data values
1043        for row in &self.rows {
1044            size += row.values.capacity() * std::mem::size_of::<DataValue>();
1045            for value in &row.values {
1046                match value {
1047                    DataValue::String(s) => size += s.capacity(),
1048                    DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1049                    DataValue::DateTime(s) => size += s.capacity(),
1050                    DataValue::Vector(v) => size += v.capacity() * std::mem::size_of::<f64>(),
1051                    _ => {} // Other types are inline
1052                }
1053            }
1054        }
1055
1056        // Account for metadata
1057        size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1058        for (k, v) in &self.metadata {
1059            size += k.capacity() + v.capacity();
1060        }
1061
1062        size
1063    }
1064
1065    /// Serialize DataTable to bytes for caching (using MessagePack for now, can be upgraded to Parquet)
1066    pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1067        // For now, use MessagePack which is binary-safe and fast
1068        // Later we can upgrade to actual Parquet format
1069        rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1070    }
1071
1072    /// Deserialize DataTable from cached bytes
1073    pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1074        // For now, use MessagePack
1075        // Later we can upgrade to actual Parquet format
1076        rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1077    }
1078}
1079
1080/// V46: Helper function to convert JSON value to `DataValue`
1081fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1082    match json {
1083        JsonValue::Null => DataValue::Null,
1084        JsonValue::Bool(b) => DataValue::Boolean(*b),
1085        JsonValue::Number(n) => {
1086            if let Some(i) = n.as_i64() {
1087                DataValue::Integer(i)
1088            } else if let Some(f) = n.as_f64() {
1089                DataValue::Float(f)
1090            } else {
1091                DataValue::String(n.to_string())
1092            }
1093        }
1094        JsonValue::String(s) => {
1095            // Try to detect if it's a date/time
1096            if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1097                // Simple heuristic for dates
1098                DataValue::DateTime(s.clone())
1099            } else {
1100                DataValue::String(s.clone())
1101            }
1102        }
1103        JsonValue::Array(_) | JsonValue::Object(_) => {
1104            // Store complex types as JSON string
1105            DataValue::String(json.to_string())
1106        }
1107    }
1108}
1109
1110/// Statistics about a `DataTable`
1111#[derive(Debug, Clone)]
1112pub struct DataTableStats {
1113    pub row_count: usize,
1114    pub column_count: usize,
1115    pub memory_size: usize,
1116    pub null_count: usize,
1117}
1118
1119/// Implementation of `DataProvider` for `DataTable`
1120/// This allows `DataTable` to be used wherever `DataProvider` trait is expected
1121impl DataProvider for DataTable {
1122    fn get_row(&self, index: usize) -> Option<Vec<String>> {
1123        self.rows.get(index).map(|row| {
1124            row.values
1125                .iter()
1126                .map(DataValue::to_string_optimized)
1127                .collect()
1128        })
1129    }
1130
1131    fn get_column_names(&self) -> Vec<String> {
1132        self.column_names()
1133    }
1134
1135    fn get_row_count(&self) -> usize {
1136        self.row_count()
1137    }
1138
1139    fn get_column_count(&self) -> usize {
1140        self.column_count()
1141    }
1142}
1143
1144#[cfg(test)]
1145mod tests {
1146    use super::*;
1147
1148    #[test]
1149    fn test_data_type_inference() {
1150        assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1151        assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1152        assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1153        assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1154        assert_eq!(DataType::infer_from_string(""), DataType::Null);
1155        assert_eq!(
1156            DataType::infer_from_string("2024-01-01"),
1157            DataType::DateTime
1158        );
1159    }
1160
1161    #[test]
1162    fn test_datatable_creation() {
1163        let mut table = DataTable::new("test");
1164
1165        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1166        table.add_column(DataColumn::new("name").with_type(DataType::String));
1167        table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1168
1169        assert_eq!(table.column_count(), 3);
1170        assert_eq!(table.row_count(), 0);
1171
1172        let row = DataRow::new(vec![
1173            DataValue::Integer(1),
1174            DataValue::String("Alice".to_string()),
1175            DataValue::Boolean(true),
1176        ]);
1177
1178        table.add_row(row).unwrap();
1179        assert_eq!(table.row_count(), 1);
1180
1181        let value = table.get_value_by_name(0, "name").unwrap();
1182        assert_eq!(value.to_string(), "Alice");
1183    }
1184
1185    #[test]
1186    fn test_type_inference() {
1187        let mut table = DataTable::new("test");
1188
1189        // Add columns without types
1190        table.add_column(DataColumn::new("mixed"));
1191
1192        // Add rows with different types
1193        table
1194            .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1195            .unwrap();
1196        table
1197            .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1198            .unwrap();
1199        table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1200
1201        table.infer_column_types();
1202
1203        // Should infer Float since we have both Integer and Float
1204        assert_eq!(table.columns[0].data_type, DataType::Float);
1205        assert_eq!(table.columns[0].null_count, 1);
1206        assert!(table.columns[0].nullable);
1207    }
1208
1209    #[test]
1210    fn test_from_query_response() {
1211        use crate::api_client::{QueryInfo, QueryResponse};
1212        use serde_json::json;
1213
1214        let response = QueryResponse {
1215            query: QueryInfo {
1216                select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1217                where_clause: None,
1218                order_by: None,
1219            },
1220            data: vec![
1221                json!({
1222                    "id": 1,
1223                    "name": "Alice",
1224                    "age": 30
1225                }),
1226                json!({
1227                    "id": 2,
1228                    "name": "Bob",
1229                    "age": 25
1230                }),
1231                json!({
1232                    "id": 3,
1233                    "name": "Carol",
1234                    "age": null
1235                }),
1236            ],
1237            count: 3,
1238            source: Some("test.csv".to_string()),
1239            table: Some("test".to_string()),
1240            cached: Some(false),
1241        };
1242
1243        let table = DataTable::from_query_response(&response, "test").unwrap();
1244
1245        assert_eq!(table.name, "test");
1246        assert_eq!(table.row_count(), 3);
1247        assert_eq!(table.column_count(), 3);
1248
1249        // Check column names
1250        let col_names = table.column_names();
1251        assert!(col_names.contains(&"id".to_string()));
1252        assert!(col_names.contains(&"name".to_string()));
1253        assert!(col_names.contains(&"age".to_string()));
1254
1255        // Check metadata
1256        assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1257        assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1258
1259        // Check first row values
1260        assert_eq!(
1261            table.get_value_by_name(0, "id"),
1262            Some(&DataValue::Integer(1))
1263        );
1264        assert_eq!(
1265            table.get_value_by_name(0, "name"),
1266            Some(&DataValue::String("Alice".to_string()))
1267        );
1268        assert_eq!(
1269            table.get_value_by_name(0, "age"),
1270            Some(&DataValue::Integer(30))
1271        );
1272
1273        // Check null handling
1274        assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1275    }
1276}