sql_cli/data/
direct_csv_loader.rs

1/// Direct CSV to `DataTable` loader - bypasses JSON intermediate format
2use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
3use anyhow::Result;
4use csv;
5use std::fs::File;
6use std::path::Path;
7use tracing::{debug, info};
8
9pub struct DirectCsvLoader;
10
11impl DirectCsvLoader {
12    /// Load CSV directly into `DataTable` without JSON intermediate
13    pub fn load_csv_direct<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
14        let path = path.as_ref();
15        info!("Direct CSV load: Loading {} into DataTable", path.display());
16
17        // Track memory before loading
18        crate::utils::memory_tracker::track_memory("direct_csv_start");
19
20        let file = File::open(path)?;
21        let mut reader = csv::Reader::from_reader(file);
22
23        // Get headers and create columns
24        let headers = reader.headers()?.clone(); // Clone to release the borrow
25        let mut table = DataTable::new(table_name);
26
27        for header in &headers {
28            table.add_column(DataColumn::new(header.to_string()));
29        }
30
31        crate::utils::memory_tracker::track_memory("direct_csv_headers");
32
33        // Read rows directly into DataTable
34        let mut row_count = 0;
35        for result in reader.records() {
36            let record = result?;
37            let mut values = Vec::with_capacity(headers.len());
38
39            for field in &record {
40                // Simple type inference - can be improved later
41                let value = if field.is_empty() {
42                    DataValue::Null
43                } else if let Ok(b) = field.parse::<bool>() {
44                    DataValue::Boolean(b)
45                } else if let Ok(i) = field.parse::<i64>() {
46                    DataValue::Integer(i)
47                } else if let Ok(f) = field.parse::<f64>() {
48                    DataValue::Float(f)
49                } else {
50                    // Check for date-like strings
51                    if field.contains('-') && field.len() >= 8 && field.len() <= 30 {
52                        DataValue::DateTime(field.to_string())
53                    } else {
54                        DataValue::String(field.to_string())
55                    }
56                };
57                values.push(value);
58            }
59
60            table
61                .add_row(DataRow::new(values))
62                .map_err(|e| anyhow::anyhow!(e))?;
63            row_count += 1;
64
65            // Track memory every 5000 rows
66            if row_count % 5000 == 0 {
67                crate::utils::memory_tracker::track_memory(&format!("direct_csv_{row_count}rows"));
68            }
69        }
70
71        // Infer column types from the data
72        table.infer_column_types();
73
74        crate::utils::memory_tracker::track_memory("direct_csv_complete");
75
76        info!(
77            "Direct CSV load complete: {} rows, {} columns, ~{} MB",
78            table.row_count(),
79            table.column_count(),
80            table.estimate_memory_size() / 1024 / 1024
81        );
82
83        Ok(table)
84    }
85
86    /// Execute a SQL query directly on a `DataTable` (no JSON)
87    pub fn query_datatable(table: &DataTable, sql: &str) -> Result<DataTable> {
88        // For now, just return a reference/clone of the table
89        // In the future, this would apply WHERE/ORDER BY/etc directly on DataTable
90        debug!("Direct query on DataTable: {}", sql);
91
92        // Simple SELECT * for now
93        if sql.trim().to_uppercase().starts_with("SELECT *") {
94            Ok(table.clone())
95        } else {
96            // TODO: Implement proper SQL execution on DataTable
97            Ok(table.clone())
98        }
99    }
100}