sql_cli/data/
datatable.rs

1use crate::api_client::QueryResponse;
2use crate::data::data_provider::DataProvider;
3use crate::data::type_inference::{InferredType, TypeInference};
4use serde::de::{VariantAccess, Visitor};
5use serde::{Deserialize, Serialize};
6use serde_json::Value as JsonValue;
7use std::collections::HashMap;
8use std::fmt;
9use std::sync::Arc;
10use tracing::debug;
11
12/// Represents the data type of a column
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub enum DataType {
15    String,
16    Integer,
17    Float,
18    Boolean,
19    DateTime,
20    Null,
21    Mixed, // For columns with mixed types
22}
23
24impl DataType {
25    /// Infer type from a string value
26    #[must_use]
27    pub fn infer_from_string(value: &str) -> Self {
28        // Handle explicit null string
29        if value.eq_ignore_ascii_case("null") {
30            return DataType::Null;
31        }
32
33        // Use the shared type inference logic
34        match TypeInference::infer_from_string(value) {
35            InferredType::Null => DataType::Null,
36            InferredType::Boolean => DataType::Boolean,
37            InferredType::Integer => DataType::Integer,
38            InferredType::Float => DataType::Float,
39            InferredType::DateTime => DataType::DateTime,
40            InferredType::String => DataType::String,
41        }
42    }
43
44    /// Check if a string looks like a datetime value
45    /// Delegates to shared type inference logic
46    fn looks_like_datetime(value: &str) -> bool {
47        TypeInference::looks_like_datetime(value)
48    }
49
50    /// Merge two types (for columns with mixed types)
51    #[must_use]
52    pub fn merge(&self, other: &DataType) -> DataType {
53        if self == other {
54            return self.clone();
55        }
56
57        match (self, other) {
58            (DataType::Null, t) | (t, DataType::Null) => t.clone(),
59            (DataType::Integer, DataType::Float) | (DataType::Float, DataType::Integer) => {
60                DataType::Float
61            }
62            _ => DataType::Mixed,
63        }
64    }
65}
66
67/// Column metadata and definition
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct DataColumn {
70    pub name: String,
71    pub data_type: DataType,
72    pub nullable: bool,
73    pub unique_values: Option<usize>,
74    pub null_count: usize,
75    pub metadata: HashMap<String, String>,
76    /// Qualified name with table prefix (e.g., "messages.field_name")
77    pub qualified_name: Option<String>,
78    /// Source table or CTE name
79    pub source_table: Option<String>,
80}
81
82impl DataColumn {
83    pub fn new(name: impl Into<String>) -> Self {
84        Self {
85            name: name.into(),
86            data_type: DataType::String,
87            nullable: true,
88            unique_values: None,
89            null_count: 0,
90            metadata: HashMap::new(),
91            qualified_name: None,
92            source_table: None,
93        }
94    }
95
96    #[must_use]
97    pub fn with_type(mut self, data_type: DataType) -> Self {
98        self.data_type = data_type;
99        self
100    }
101
102    /// Set the qualified name (table.column format)
103    #[must_use]
104    pub fn with_qualified_name(mut self, table_name: &str) -> Self {
105        self.qualified_name = Some(format!("{}.{}", table_name, self.name));
106        self.source_table = Some(table_name.to_string());
107        self
108    }
109
110    /// Get the qualified name if available, otherwise return the simple name
111    pub fn get_qualified_or_simple_name(&self) -> &str {
112        self.qualified_name.as_deref().unwrap_or(&self.name)
113    }
114
115    #[must_use]
116    pub fn with_nullable(mut self, nullable: bool) -> Self {
117        self.nullable = nullable;
118        self
119    }
120}
121
122/// A single cell value in the table
123#[derive(Debug, Clone, PartialEq, PartialOrd)]
124pub enum DataValue {
125    String(String),
126    InternedString(Arc<String>), // For repeated strings (e.g., status, trader names)
127    Integer(i64),
128    Float(f64),
129    Boolean(bool),
130    DateTime(String), // Store as ISO 8601 string for now
131    Vector(Vec<f64>), // For vector mathematics (physics, geometry, etc.)
132    Null,
133}
134
135// Custom Hash implementation for DataValue to handle f64
136impl std::hash::Hash for DataValue {
137    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
138        match self {
139            DataValue::String(s) => {
140                0u8.hash(state);
141                s.hash(state);
142            }
143            DataValue::InternedString(s) => {
144                1u8.hash(state);
145                s.hash(state);
146            }
147            DataValue::Integer(i) => {
148                2u8.hash(state);
149                i.hash(state);
150            }
151            DataValue::Float(f) => {
152                3u8.hash(state);
153                // Hash the bits of the float for consistency
154                f.to_bits().hash(state);
155            }
156            DataValue::Boolean(b) => {
157                4u8.hash(state);
158                b.hash(state);
159            }
160            DataValue::DateTime(dt) => {
161                5u8.hash(state);
162                dt.hash(state);
163            }
164            DataValue::Vector(v) => {
165                6u8.hash(state);
166                // Hash each float's bits
167                for f in v {
168                    f.to_bits().hash(state);
169                }
170            }
171            DataValue::Null => {
172                7u8.hash(state);
173            }
174        }
175    }
176}
177
178// Custom Serialize implementation for DataValue to handle Arc<String>
179impl Serialize for DataValue {
180    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
181    where
182        S: serde::Serializer,
183    {
184        match self {
185            DataValue::String(s) => {
186                serializer.serialize_newtype_variant("DataValue", 0, "String", s)
187            }
188            DataValue::InternedString(arc_s) => {
189                // Serialize the Arc<String> as just the String content
190                serializer.serialize_newtype_variant(
191                    "DataValue",
192                    1,
193                    "InternedString",
194                    arc_s.as_ref(),
195                )
196            }
197            DataValue::Integer(i) => {
198                serializer.serialize_newtype_variant("DataValue", 2, "Integer", i)
199            }
200            DataValue::Float(f) => serializer.serialize_newtype_variant("DataValue", 3, "Float", f),
201            DataValue::Boolean(b) => {
202                serializer.serialize_newtype_variant("DataValue", 4, "Boolean", b)
203            }
204            DataValue::DateTime(dt) => {
205                serializer.serialize_newtype_variant("DataValue", 5, "DateTime", dt)
206            }
207            DataValue::Vector(v) => {
208                serializer.serialize_newtype_variant("DataValue", 6, "Vector", v)
209            }
210            DataValue::Null => serializer.serialize_unit_variant("DataValue", 7, "Null"),
211        }
212    }
213}
214
215// Custom Deserialize implementation for DataValue to handle Arc<String>
216impl<'de> Deserialize<'de> for DataValue {
217    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
218    where
219        D: serde::Deserializer<'de>,
220    {
221        #[derive(Deserialize)]
222        #[serde(field_identifier, rename_all = "PascalCase")]
223        enum Field {
224            String,
225            InternedString,
226            Integer,
227            Float,
228            Boolean,
229            DateTime,
230            Vector,
231            Null,
232        }
233
234        struct DataValueVisitor;
235
236        impl<'de> Visitor<'de> for DataValueVisitor {
237            type Value = DataValue;
238
239            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
240                formatter.write_str("enum DataValue")
241            }
242
243            fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
244            where
245                A: serde::de::EnumAccess<'de>,
246            {
247                let (field, variant) = data.variant()?;
248                match field {
249                    Field::String => {
250                        let s: String = variant.newtype_variant()?;
251                        Ok(DataValue::String(s))
252                    }
253                    Field::InternedString => {
254                        let s: String = variant.newtype_variant()?;
255                        Ok(DataValue::InternedString(Arc::new(s)))
256                    }
257                    Field::Integer => {
258                        let i: i64 = variant.newtype_variant()?;
259                        Ok(DataValue::Integer(i))
260                    }
261                    Field::Float => {
262                        let f: f64 = variant.newtype_variant()?;
263                        Ok(DataValue::Float(f))
264                    }
265                    Field::Boolean => {
266                        let b: bool = variant.newtype_variant()?;
267                        Ok(DataValue::Boolean(b))
268                    }
269                    Field::DateTime => {
270                        let dt: String = variant.newtype_variant()?;
271                        Ok(DataValue::DateTime(dt))
272                    }
273                    Field::Vector => {
274                        let v: Vec<f64> = variant.newtype_variant()?;
275                        Ok(DataValue::Vector(v))
276                    }
277                    Field::Null => {
278                        variant.unit_variant()?;
279                        Ok(DataValue::Null)
280                    }
281                }
282            }
283        }
284
285        deserializer.deserialize_enum(
286            "DataValue",
287            &[
288                "String",
289                "InternedString",
290                "Integer",
291                "Float",
292                "Boolean",
293                "DateTime",
294                "Vector",
295                "Null",
296            ],
297            DataValueVisitor,
298        )
299    }
300}
301
302// Custom Eq implementation for DataValue
303impl Eq for DataValue {}
304
305impl DataValue {
306    pub fn from_string(s: &str, data_type: &DataType) -> Self {
307        if s.is_empty() || s.eq_ignore_ascii_case("null") {
308            return DataValue::Null;
309        }
310
311        match data_type {
312            DataType::String => DataValue::String(s.to_string()),
313            DataType::Integer => s
314                .parse::<i64>()
315                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Integer),
316            DataType::Float => s
317                .parse::<f64>()
318                .map_or_else(|_| DataValue::String(s.to_string()), DataValue::Float),
319            DataType::Boolean => {
320                let lower = s.to_lowercase();
321                DataValue::Boolean(lower == "true" || lower == "1" || lower == "yes")
322            }
323            DataType::DateTime => DataValue::DateTime(s.to_string()),
324            DataType::Null => DataValue::Null,
325            DataType::Mixed => {
326                // Try to infer for mixed columns
327                let inferred = DataType::infer_from_string(s);
328                Self::from_string(s, &inferred)
329            }
330        }
331    }
332
333    #[must_use]
334    pub fn is_null(&self) -> bool {
335        matches!(self, DataValue::Null)
336    }
337
338    #[must_use]
339    pub fn data_type(&self) -> DataType {
340        match self {
341            DataValue::String(_) | DataValue::InternedString(_) => DataType::String,
342            DataValue::Integer(_) => DataType::Integer,
343            DataValue::Float(_) => DataType::Float,
344            DataValue::Boolean(_) => DataType::Boolean,
345            DataValue::DateTime(_) => DataType::DateTime,
346            DataValue::Vector(_) => DataType::String, // Display as string "[x,y,z]"
347            DataValue::Null => DataType::Null,
348        }
349    }
350
351    /// Get string representation without allocation when possible
352    /// Returns owned String for compatibility but tries to reuse existing strings
353    #[must_use]
354    pub fn to_string_optimized(&self) -> String {
355        match self {
356            DataValue::String(s) => s.clone(), // Clone existing string
357            DataValue::InternedString(s) => s.as_ref().clone(), // Clone from Rc
358            DataValue::DateTime(s) => s.clone(), // Clone existing string
359            DataValue::Integer(i) => i.to_string(),
360            DataValue::Float(f) => f.to_string(),
361            DataValue::Boolean(b) => {
362                if *b {
363                    "true".to_string()
364                } else {
365                    "false".to_string()
366                }
367            }
368            DataValue::Vector(v) => {
369                // Format as "[x,y,z]"
370                let components: Vec<String> = v.iter().map(|f| f.to_string()).collect();
371                format!("[{}]", components.join(","))
372            }
373            DataValue::Null => String::new(), // Empty string, minimal allocation
374        }
375    }
376}
377
378impl fmt::Display for DataValue {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        match self {
381            DataValue::String(s) => write!(f, "{s}"),
382            DataValue::InternedString(s) => write!(f, "{s}"),
383            DataValue::Integer(i) => write!(f, "{i}"),
384            DataValue::Float(fl) => write!(f, "{fl}"),
385            DataValue::Boolean(b) => write!(f, "{b}"),
386            DataValue::DateTime(dt) => write!(f, "{dt}"),
387            DataValue::Vector(v) => {
388                let components: Vec<String> = v.iter().map(|fl| fl.to_string()).collect();
389                write!(f, "[{}]", components.join(","))
390            }
391            DataValue::Null => write!(f, ""),
392        }
393    }
394}
395
396/// A row of data in the table
397#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct DataRow {
399    pub values: Vec<DataValue>,
400}
401
402impl DataRow {
403    #[must_use]
404    pub fn new(values: Vec<DataValue>) -> Self {
405        Self { values }
406    }
407
408    #[must_use]
409    pub fn get(&self, index: usize) -> Option<&DataValue> {
410        self.values.get(index)
411    }
412
413    pub fn get_mut(&mut self, index: usize) -> Option<&mut DataValue> {
414        self.values.get_mut(index)
415    }
416
417    #[must_use]
418    pub fn len(&self) -> usize {
419        self.values.len()
420    }
421
422    #[must_use]
423    pub fn is_empty(&self) -> bool {
424        self.values.is_empty()
425    }
426}
427
428/// The main `DataTable` structure
429#[derive(Debug, Clone, Serialize, Deserialize)]
430pub struct DataTable {
431    pub name: String,
432    pub columns: Vec<DataColumn>,
433    pub rows: Vec<DataRow>,
434    pub metadata: HashMap<String, String>,
435}
436
437impl DataTable {
438    pub fn new(name: impl Into<String>) -> Self {
439        Self {
440            name: name.into(),
441            columns: Vec::new(),
442            rows: Vec::new(),
443            metadata: HashMap::new(),
444        }
445    }
446
447    /// Create a DUAL table (similar to Oracle's DUAL) with one row and one column
448    /// Used for evaluating expressions without a data source
449    #[must_use]
450    pub fn dual() -> Self {
451        let mut table = DataTable::new("DUAL");
452        table.add_column(DataColumn::new("DUMMY").with_type(DataType::String));
453        table
454            .add_row(DataRow::new(vec![DataValue::String("X".to_string())]))
455            .unwrap();
456        table
457    }
458
459    pub fn add_column(&mut self, column: DataColumn) -> &mut Self {
460        self.columns.push(column);
461        self
462    }
463
464    pub fn add_row(&mut self, row: DataRow) -> Result<(), String> {
465        if row.len() != self.columns.len() {
466            return Err(format!(
467                "Row has {} values but table has {} columns",
468                row.len(),
469                self.columns.len()
470            ));
471        }
472        self.rows.push(row);
473        Ok(())
474    }
475
476    #[must_use]
477    pub fn get_column(&self, name: &str) -> Option<&DataColumn> {
478        self.columns.iter().find(|c| c.name == name)
479    }
480
481    #[must_use]
482    pub fn get_column_index(&self, name: &str) -> Option<usize> {
483        self.columns.iter().position(|c| c.name == name)
484    }
485
486    /// Find column index by qualified name (e.g., "messages.field_name")
487    #[must_use]
488    pub fn find_column_by_qualified_name(&self, qualified_name: &str) -> Option<usize> {
489        self.columns
490            .iter()
491            .position(|c| c.qualified_name.as_deref() == Some(qualified_name))
492    }
493
494    /// Find column by either qualified or simple name
495    /// First tries qualified match, then falls back to simple name
496    #[must_use]
497    pub fn find_column_flexible(&self, name: &str, table_prefix: Option<&str>) -> Option<usize> {
498        // If table prefix provided, try qualified match first
499        if let Some(prefix) = table_prefix {
500            let qualified = format!("{}.{}", prefix, name);
501            if let Some(idx) = self.find_column_by_qualified_name(&qualified) {
502                return Some(idx);
503            }
504        }
505
506        // Fall back to simple name match
507        self.get_column_index(name)
508    }
509
510    /// Enrich all columns with qualified names based on the table name
511    pub fn enrich_columns_with_qualified_names(&mut self, table_name: &str) {
512        for column in &mut self.columns {
513            column.qualified_name = Some(format!("{}.{}", table_name, column.name));
514            column.source_table = Some(table_name.to_string());
515        }
516    }
517
518    #[must_use]
519    pub fn column_count(&self) -> usize {
520        self.columns.len()
521    }
522
523    #[must_use]
524    pub fn row_count(&self) -> usize {
525        self.rows.len()
526    }
527
528    #[must_use]
529    pub fn is_empty(&self) -> bool {
530        self.rows.is_empty()
531    }
532
533    /// Get column names as a vector
534    #[must_use]
535    pub fn column_names(&self) -> Vec<String> {
536        self.columns.iter().map(|c| c.name.clone()).collect()
537    }
538
539    /// Get mutable access to columns for enrichment
540    pub fn columns_mut(&mut self) -> &mut [DataColumn] {
541        &mut self.columns
542    }
543
544    /// Infer and update column types based on data
545    pub fn infer_column_types(&mut self) {
546        for (col_idx, column) in self.columns.iter_mut().enumerate() {
547            let mut inferred_type = DataType::Null;
548            let mut null_count = 0;
549            let mut unique_values = std::collections::HashSet::new();
550
551            for row in &self.rows {
552                if let Some(value) = row.get(col_idx) {
553                    if value.is_null() {
554                        null_count += 1;
555                    } else {
556                        let value_type = value.data_type();
557                        inferred_type = inferred_type.merge(&value_type);
558                        unique_values.insert(value.to_string());
559                    }
560                }
561            }
562
563            column.data_type = inferred_type;
564            column.null_count = null_count;
565            column.nullable = null_count > 0;
566            column.unique_values = Some(unique_values.len());
567        }
568    }
569
570    /// Get a value at specific row and column
571    #[must_use]
572    pub fn get_value(&self, row: usize, col: usize) -> Option<&DataValue> {
573        self.rows.get(row)?.get(col)
574    }
575
576    /// Get a value by row index and column name
577    #[must_use]
578    pub fn get_value_by_name(&self, row: usize, col_name: &str) -> Option<&DataValue> {
579        let col_idx = self.get_column_index(col_name)?;
580        self.get_value(row, col_idx)
581    }
582
583    /// Convert to a vector of string vectors (for display/compatibility)
584    #[must_use]
585    pub fn to_string_table(&self) -> Vec<Vec<String>> {
586        self.rows
587            .iter()
588            .map(|row| {
589                row.values
590                    .iter()
591                    .map(DataValue::to_string_optimized)
592                    .collect()
593            })
594            .collect()
595    }
596
597    /// Get table statistics
598    #[must_use]
599    pub fn get_stats(&self) -> DataTableStats {
600        DataTableStats {
601            row_count: self.row_count(),
602            column_count: self.column_count(),
603            memory_size: self.estimate_memory_size(),
604            null_count: self.columns.iter().map(|c| c.null_count).sum(),
605        }
606    }
607
608    /// Generate a debug dump string for display
609    #[must_use]
610    pub fn debug_dump(&self) -> String {
611        let mut output = String::new();
612
613        output.push_str(&format!("DataTable: {}\n", self.name));
614        output.push_str(&format!(
615            "Rows: {} | Columns: {}\n",
616            self.row_count(),
617            self.column_count()
618        ));
619
620        if !self.metadata.is_empty() {
621            output.push_str("Metadata:\n");
622            for (key, value) in &self.metadata {
623                output.push_str(&format!("  {key}: {value}\n"));
624            }
625        }
626
627        output.push_str("\nColumns:\n");
628        for column in &self.columns {
629            output.push_str(&format!("  {} ({:?})", column.name, column.data_type));
630            if column.nullable {
631                output.push_str(&format!(" - nullable, {} nulls", column.null_count));
632            }
633            if let Some(unique) = column.unique_values {
634                output.push_str(&format!(", {unique} unique"));
635            }
636            output.push('\n');
637        }
638
639        // Show first few rows
640        if self.row_count() > 0 {
641            let sample_size = 5.min(self.row_count());
642            output.push_str(&format!("\nFirst {sample_size} rows:\n"));
643
644            for row_idx in 0..sample_size {
645                output.push_str(&format!("  [{row_idx}]: "));
646                for (col_idx, value) in self.rows[row_idx].values.iter().enumerate() {
647                    if col_idx > 0 {
648                        output.push_str(", ");
649                    }
650                    output.push_str(&value.to_string());
651                }
652                output.push('\n');
653            }
654        }
655
656        output
657    }
658
659    #[must_use]
660    pub fn estimate_memory_size(&self) -> usize {
661        // Base structure size
662        let mut size = std::mem::size_of::<Self>();
663
664        // Column metadata
665        size += self.columns.len() * std::mem::size_of::<DataColumn>();
666        for col in &self.columns {
667            size += col.name.len();
668        }
669
670        // Row structure overhead
671        size += self.rows.len() * std::mem::size_of::<DataRow>();
672
673        // Actual data values
674        for row in &self.rows {
675            for value in &row.values {
676                // Base enum size
677                size += std::mem::size_of::<DataValue>();
678                // Add string content size
679                match value {
680                    DataValue::String(s) | DataValue::DateTime(s) => size += s.len(),
681                    DataValue::Vector(v) => size += v.len() * std::mem::size_of::<f64>(),
682                    _ => {} // Numbers and booleans are inline
683                }
684            }
685        }
686
687        size
688    }
689
690    /// Convert DataTable to CSV format
691    pub fn to_csv(&self) -> String {
692        let mut csv_output = String::new();
693
694        // Write headers
695        let headers: Vec<String> = self
696            .columns
697            .iter()
698            .map(|col| {
699                if col.name.contains(',') || col.name.contains('"') || col.name.contains('\n') {
700                    format!("\"{}\"", col.name.replace('"', "\"\""))
701                } else {
702                    col.name.clone()
703                }
704            })
705            .collect();
706        csv_output.push_str(&headers.join(","));
707        csv_output.push('\n');
708
709        // Write data rows
710        for row in &self.rows {
711            let row_values: Vec<String> = row
712                .values
713                .iter()
714                .map(|value| {
715                    let str_val = value.to_string();
716                    if str_val.contains(',') || str_val.contains('"') || str_val.contains('\n') {
717                        format!("\"{}\"", str_val.replace('"', "\"\""))
718                    } else {
719                        str_val
720                    }
721                })
722                .collect();
723            csv_output.push_str(&row_values.join(","));
724            csv_output.push('\n');
725        }
726
727        csv_output
728    }
729
730    /// V46: Create `DataTable` from `QueryResponse`
731    /// This is the key conversion function that bridges old and new systems
732    pub fn from_query_response(response: &QueryResponse, table_name: &str) -> Result<Self, String> {
733        debug!(
734            "V46: Converting QueryResponse to DataTable for table '{}'",
735            table_name
736        );
737
738        // Track memory before conversion
739        crate::utils::memory_tracker::track_memory("start_from_query_response");
740
741        let mut table = DataTable::new(table_name);
742
743        // Extract column names and types from first row
744        if let Some(first_row) = response.data.first() {
745            if let Some(obj) = first_row.as_object() {
746                // Create columns based on the keys in the JSON object
747                for key in obj.keys() {
748                    let column = DataColumn::new(key.clone());
749                    table.add_column(column);
750                }
751
752                // Now convert all rows
753                for json_row in &response.data {
754                    if let Some(row_obj) = json_row.as_object() {
755                        let mut values = Vec::new();
756
757                        // Ensure we get values in the same order as columns
758                        for column in &table.columns {
759                            let value = row_obj
760                                .get(&column.name)
761                                .map_or(DataValue::Null, json_value_to_data_value);
762                            values.push(value);
763                        }
764
765                        table.add_row(DataRow::new(values))?;
766                    }
767                }
768
769                // Infer column types from the data
770                table.infer_column_types();
771
772                // Add metadata
773                if let Some(source) = &response.source {
774                    table.metadata.insert("source".to_string(), source.clone());
775                }
776                if let Some(cached) = response.cached {
777                    table
778                        .metadata
779                        .insert("cached".to_string(), cached.to_string());
780                }
781                table
782                    .metadata
783                    .insert("original_count".to_string(), response.count.to_string());
784
785                debug!(
786                    "V46: Created DataTable with {} columns and {} rows",
787                    table.column_count(),
788                    table.row_count()
789                );
790            } else {
791                // Handle non-object JSON (single values)
792                table.add_column(DataColumn::new("value"));
793                for json_value in &response.data {
794                    let value = json_value_to_data_value(json_value);
795                    table.add_row(DataRow::new(vec![value]))?;
796                }
797            }
798        }
799
800        Ok(table)
801    }
802
803    /// Get a single row by index
804    #[must_use]
805    pub fn get_row(&self, index: usize) -> Option<&DataRow> {
806        self.rows.get(index)
807    }
808
809    /// V50: Get a single row as strings
810    #[must_use]
811    pub fn get_row_as_strings(&self, index: usize) -> Option<Vec<String>> {
812        self.rows.get(index).map(|row| {
813            row.values
814                .iter()
815                .map(DataValue::to_string_optimized)
816                .collect()
817        })
818    }
819
820    /// Pretty print the `DataTable` with a nice box drawing
821    #[must_use]
822    pub fn pretty_print(&self) -> String {
823        let mut output = String::new();
824
825        // Header
826        output.push_str("╔═══════════════════════════════════════════════════════╗\n");
827        output.push_str(&format!("║ DataTable: {:^41} ║\n", self.name));
828        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
829
830        // Summary stats
831        output.push_str(&format!(
832            "║ Rows: {:6} | Columns: {:3} | Memory: ~{:6} bytes ║\n",
833            self.row_count(),
834            self.column_count(),
835            self.get_stats().memory_size
836        ));
837
838        // Metadata if any
839        if !self.metadata.is_empty() {
840            output.push_str("╠═══════════════════════════════════════════════════════╣\n");
841            output.push_str("║ Metadata:                                             ║\n");
842            for (key, value) in &self.metadata {
843                let truncated_value = if value.len() > 35 {
844                    format!("{}...", &value[..32])
845                } else {
846                    value.clone()
847                };
848                output.push_str(&format!(
849                    "║   {:15} : {:35} ║\n",
850                    Self::truncate_string(key, 15),
851                    truncated_value
852                ));
853            }
854        }
855
856        // Column details
857        output.push_str("╠═══════════════════════════════════════════════════════╣\n");
858        output.push_str("║ Columns:                                              ║\n");
859        output.push_str("╟───────────────────┬──────────┬─────────┬──────┬──────╢\n");
860        output.push_str("║ Name              │ Type     │ Nullable│ Nulls│Unique║\n");
861        output.push_str("╟───────────────────┼──────────┼─────────┼──────┼──────╢\n");
862
863        for column in &self.columns {
864            let type_str = match &column.data_type {
865                DataType::String => "String",
866                DataType::Integer => "Integer",
867                DataType::Float => "Float",
868                DataType::Boolean => "Boolean",
869                DataType::DateTime => "DateTime",
870                DataType::Null => "Null",
871                DataType::Mixed => "Mixed",
872            };
873
874            output.push_str(&format!(
875                "║ {:17} │ {:8} │ {:7} │ {:4} │ {:4} ║\n",
876                Self::truncate_string(&column.name, 17),
877                type_str,
878                if column.nullable { "Yes" } else { "No" },
879                column.null_count,
880                column.unique_values.unwrap_or(0)
881            ));
882        }
883
884        output.push_str("╚═══════════════════════════════════════════════════════╝\n");
885
886        // Sample data (first 5 rows)
887        output.push_str("\nSample Data (first 5 rows):\n");
888        let sample_count = self.rows.len().min(5);
889
890        if sample_count > 0 {
891            // Column headers
892            output.push('┌');
893            for (i, _col) in self.columns.iter().enumerate() {
894                if i > 0 {
895                    output.push('┬');
896                }
897                output.push_str(&"─".repeat(20));
898            }
899            output.push_str("┐\n");
900
901            output.push('│');
902            for col in &self.columns {
903                output.push_str(&format!(" {:^18} │", Self::truncate_string(&col.name, 18)));
904            }
905            output.push('\n');
906
907            output.push('├');
908            for (i, _) in self.columns.iter().enumerate() {
909                if i > 0 {
910                    output.push('┼');
911                }
912                output.push_str(&"─".repeat(20));
913            }
914            output.push_str("┤\n");
915
916            // Data rows
917            for row_idx in 0..sample_count {
918                if let Some(row) = self.rows.get(row_idx) {
919                    output.push('│');
920                    for value in &row.values {
921                        let value_str = value.to_string();
922                        output
923                            .push_str(&format!(" {:18} │", Self::truncate_string(&value_str, 18)));
924                    }
925                    output.push('\n');
926                }
927            }
928
929            output.push('└');
930            for (i, _) in self.columns.iter().enumerate() {
931                if i > 0 {
932                    output.push('┴');
933                }
934                output.push_str(&"─".repeat(20));
935            }
936            output.push_str("┘\n");
937        }
938
939        output
940    }
941
942    fn truncate_string(s: &str, max_len: usize) -> String {
943        if s.len() > max_len {
944            format!("{}...", &s[..max_len - 3])
945        } else {
946            s.to_string()
947        }
948    }
949
950    /// Get a schema summary of the `DataTable`
951    #[must_use]
952    pub fn get_schema_summary(&self) -> String {
953        let mut summary = String::new();
954        summary.push_str(&format!(
955            "DataTable Schema ({} columns, {} rows):\n",
956            self.columns.len(),
957            self.rows.len()
958        ));
959
960        for (idx, column) in self.columns.iter().enumerate() {
961            let type_str = match &column.data_type {
962                DataType::String => "String",
963                DataType::Integer => "Integer",
964                DataType::Float => "Float",
965                DataType::Boolean => "Boolean",
966                DataType::DateTime => "DateTime",
967                DataType::Null => "Null",
968                DataType::Mixed => "Mixed",
969            };
970
971            let nullable_str = if column.nullable {
972                "nullable"
973            } else {
974                "not null"
975            };
976            let null_info = if column.null_count > 0 {
977                format!(", {} nulls", column.null_count)
978            } else {
979                String::new()
980            };
981
982            summary.push_str(&format!(
983                "  [{:3}] {} : {} ({}{})\n",
984                idx, column.name, type_str, nullable_str, null_info
985            ));
986        }
987
988        summary
989    }
990
991    /// Get detailed schema information as a structured format
992    #[must_use]
993    pub fn get_schema_info(&self) -> Vec<(String, String, bool, usize)> {
994        self.columns
995            .iter()
996            .map(|col| {
997                let type_name = format!("{:?}", col.data_type);
998                (col.name.clone(), type_name, col.nullable, col.null_count)
999            })
1000            .collect()
1001    }
1002
1003    /// Reserve capacity for rows to avoid reallocations
1004    pub fn reserve_rows(&mut self, additional: usize) {
1005        self.rows.reserve(additional);
1006    }
1007
1008    /// Shrink vectors to fit actual data (removes excess capacity)
1009    pub fn shrink_to_fit(&mut self) {
1010        self.rows.shrink_to_fit();
1011        for _column in &mut self.columns {
1012            // Shrink any column-specific data if needed
1013        }
1014    }
1015
1016    /// Get actual memory usage estimate (more accurate than `estimate_memory_size`)
1017    #[must_use]
1018    pub fn get_memory_usage(&self) -> usize {
1019        let mut size = std::mem::size_of::<Self>();
1020
1021        // Account for string allocations
1022        size += self.name.capacity();
1023
1024        // Account for columns
1025        size += self.columns.capacity() * std::mem::size_of::<DataColumn>();
1026        for col in &self.columns {
1027            size += col.name.capacity();
1028        }
1029
1030        // Account for rows and their capacity
1031        size += self.rows.capacity() * std::mem::size_of::<DataRow>();
1032
1033        // Account for actual data values
1034        for row in &self.rows {
1035            size += row.values.capacity() * std::mem::size_of::<DataValue>();
1036            for value in &row.values {
1037                match value {
1038                    DataValue::String(s) => size += s.capacity(),
1039                    DataValue::InternedString(_) => size += std::mem::size_of::<Arc<String>>(),
1040                    DataValue::DateTime(s) => size += s.capacity(),
1041                    DataValue::Vector(v) => size += v.capacity() * std::mem::size_of::<f64>(),
1042                    _ => {} // Other types are inline
1043                }
1044            }
1045        }
1046
1047        // Account for metadata
1048        size += self.metadata.capacity() * std::mem::size_of::<(String, String)>();
1049        for (k, v) in &self.metadata {
1050            size += k.capacity() + v.capacity();
1051        }
1052
1053        size
1054    }
1055
1056    /// Serialize DataTable to bytes for caching (using MessagePack for now, can be upgraded to Parquet)
1057    pub fn to_parquet_bytes(&self) -> Result<Vec<u8>, String> {
1058        // For now, use MessagePack which is binary-safe and fast
1059        // Later we can upgrade to actual Parquet format
1060        rmp_serde::to_vec(self).map_err(|e| format!("Failed to serialize DataTable: {}", e))
1061    }
1062
1063    /// Deserialize DataTable from cached bytes
1064    pub fn from_parquet_bytes(bytes: &[u8]) -> Result<Self, String> {
1065        // For now, use MessagePack
1066        // Later we can upgrade to actual Parquet format
1067        rmp_serde::from_slice(bytes).map_err(|e| format!("Failed to deserialize DataTable: {}", e))
1068    }
1069}
1070
1071/// V46: Helper function to convert JSON value to `DataValue`
1072fn json_value_to_data_value(json: &JsonValue) -> DataValue {
1073    match json {
1074        JsonValue::Null => DataValue::Null,
1075        JsonValue::Bool(b) => DataValue::Boolean(*b),
1076        JsonValue::Number(n) => {
1077            if let Some(i) = n.as_i64() {
1078                DataValue::Integer(i)
1079            } else if let Some(f) = n.as_f64() {
1080                DataValue::Float(f)
1081            } else {
1082                DataValue::String(n.to_string())
1083            }
1084        }
1085        JsonValue::String(s) => {
1086            // Try to detect if it's a date/time
1087            if s.contains('-') && s.len() >= 8 && s.len() <= 30 {
1088                // Simple heuristic for dates
1089                DataValue::DateTime(s.clone())
1090            } else {
1091                DataValue::String(s.clone())
1092            }
1093        }
1094        JsonValue::Array(_) | JsonValue::Object(_) => {
1095            // Store complex types as JSON string
1096            DataValue::String(json.to_string())
1097        }
1098    }
1099}
1100
1101/// Statistics about a `DataTable`
1102#[derive(Debug, Clone)]
1103pub struct DataTableStats {
1104    pub row_count: usize,
1105    pub column_count: usize,
1106    pub memory_size: usize,
1107    pub null_count: usize,
1108}
1109
1110/// Implementation of `DataProvider` for `DataTable`
1111/// This allows `DataTable` to be used wherever `DataProvider` trait is expected
1112impl DataProvider for DataTable {
1113    fn get_row(&self, index: usize) -> Option<Vec<String>> {
1114        self.rows.get(index).map(|row| {
1115            row.values
1116                .iter()
1117                .map(DataValue::to_string_optimized)
1118                .collect()
1119        })
1120    }
1121
1122    fn get_column_names(&self) -> Vec<String> {
1123        self.column_names()
1124    }
1125
1126    fn get_row_count(&self) -> usize {
1127        self.row_count()
1128    }
1129
1130    fn get_column_count(&self) -> usize {
1131        self.column_count()
1132    }
1133}
1134
1135#[cfg(test)]
1136mod tests {
1137    use super::*;
1138
1139    #[test]
1140    fn test_data_type_inference() {
1141        assert_eq!(DataType::infer_from_string("123"), DataType::Integer);
1142        assert_eq!(DataType::infer_from_string("123.45"), DataType::Float);
1143        assert_eq!(DataType::infer_from_string("true"), DataType::Boolean);
1144        assert_eq!(DataType::infer_from_string("hello"), DataType::String);
1145        assert_eq!(DataType::infer_from_string(""), DataType::Null);
1146        assert_eq!(
1147            DataType::infer_from_string("2024-01-01"),
1148            DataType::DateTime
1149        );
1150    }
1151
1152    #[test]
1153    fn test_datatable_creation() {
1154        let mut table = DataTable::new("test");
1155
1156        table.add_column(DataColumn::new("id").with_type(DataType::Integer));
1157        table.add_column(DataColumn::new("name").with_type(DataType::String));
1158        table.add_column(DataColumn::new("active").with_type(DataType::Boolean));
1159
1160        assert_eq!(table.column_count(), 3);
1161        assert_eq!(table.row_count(), 0);
1162
1163        let row = DataRow::new(vec![
1164            DataValue::Integer(1),
1165            DataValue::String("Alice".to_string()),
1166            DataValue::Boolean(true),
1167        ]);
1168
1169        table.add_row(row).unwrap();
1170        assert_eq!(table.row_count(), 1);
1171
1172        let value = table.get_value_by_name(0, "name").unwrap();
1173        assert_eq!(value.to_string(), "Alice");
1174    }
1175
1176    #[test]
1177    fn test_type_inference() {
1178        let mut table = DataTable::new("test");
1179
1180        // Add columns without types
1181        table.add_column(DataColumn::new("mixed"));
1182
1183        // Add rows with different types
1184        table
1185            .add_row(DataRow::new(vec![DataValue::Integer(1)]))
1186            .unwrap();
1187        table
1188            .add_row(DataRow::new(vec![DataValue::Float(2.5)]))
1189            .unwrap();
1190        table.add_row(DataRow::new(vec![DataValue::Null])).unwrap();
1191
1192        table.infer_column_types();
1193
1194        // Should infer Float since we have both Integer and Float
1195        assert_eq!(table.columns[0].data_type, DataType::Float);
1196        assert_eq!(table.columns[0].null_count, 1);
1197        assert!(table.columns[0].nullable);
1198    }
1199
1200    #[test]
1201    fn test_from_query_response() {
1202        use crate::api_client::{QueryInfo, QueryResponse};
1203        use serde_json::json;
1204
1205        let response = QueryResponse {
1206            query: QueryInfo {
1207                select: vec!["id".to_string(), "name".to_string(), "age".to_string()],
1208                where_clause: None,
1209                order_by: None,
1210            },
1211            data: vec![
1212                json!({
1213                    "id": 1,
1214                    "name": "Alice",
1215                    "age": 30
1216                }),
1217                json!({
1218                    "id": 2,
1219                    "name": "Bob",
1220                    "age": 25
1221                }),
1222                json!({
1223                    "id": 3,
1224                    "name": "Carol",
1225                    "age": null
1226                }),
1227            ],
1228            count: 3,
1229            source: Some("test.csv".to_string()),
1230            table: Some("test".to_string()),
1231            cached: Some(false),
1232        };
1233
1234        let table = DataTable::from_query_response(&response, "test").unwrap();
1235
1236        assert_eq!(table.name, "test");
1237        assert_eq!(table.row_count(), 3);
1238        assert_eq!(table.column_count(), 3);
1239
1240        // Check column names
1241        let col_names = table.column_names();
1242        assert!(col_names.contains(&"id".to_string()));
1243        assert!(col_names.contains(&"name".to_string()));
1244        assert!(col_names.contains(&"age".to_string()));
1245
1246        // Check metadata
1247        assert_eq!(table.metadata.get("source"), Some(&"test.csv".to_string()));
1248        assert_eq!(table.metadata.get("cached"), Some(&"false".to_string()));
1249
1250        // Check first row values
1251        assert_eq!(
1252            table.get_value_by_name(0, "id"),
1253            Some(&DataValue::Integer(1))
1254        );
1255        assert_eq!(
1256            table.get_value_by_name(0, "name"),
1257            Some(&DataValue::String("Alice".to_string()))
1258        );
1259        assert_eq!(
1260            table.get_value_by_name(0, "age"),
1261            Some(&DataValue::Integer(30))
1262        );
1263
1264        // Check null handling
1265        assert_eq!(table.get_value_by_name(2, "age"), Some(&DataValue::Null));
1266    }
1267}