sql_cli/data/
direct_csv_loader.rs

1/// Direct CSV to DataTable loader - bypasses JSON intermediate format
2use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
3use anyhow::Result;
4use csv;
5use std::fs::File;
6use std::path::Path;
7use tracing::{debug, info};
8
9pub struct DirectCsvLoader;
10
11impl DirectCsvLoader {
12    /// Load CSV directly into DataTable without JSON intermediate
13    pub fn load_csv_direct<P: AsRef<Path>>(path: P, table_name: &str) -> Result<DataTable> {
14        let path = path.as_ref();
15        info!("Direct CSV load: Loading {} into DataTable", path.display());
16
17        // Track memory before loading
18        crate::utils::memory_tracker::track_memory("direct_csv_start");
19
20        let file = File::open(path)?;
21        let mut reader = csv::Reader::from_reader(file);
22
23        // Get headers and create columns
24        let headers = reader.headers()?.clone(); // Clone to release the borrow
25        let mut table = DataTable::new(table_name);
26
27        for header in headers.iter() {
28            table.add_column(DataColumn::new(header.to_string()));
29        }
30
31        crate::utils::memory_tracker::track_memory("direct_csv_headers");
32
33        // Read rows directly into DataTable
34        let mut row_count = 0;
35        for result in reader.records() {
36            let record = result?;
37            let mut values = Vec::with_capacity(headers.len());
38
39            for field in record.iter() {
40                // Simple type inference - can be improved later
41                let value = if field.is_empty() {
42                    DataValue::Null
43                } else if let Ok(b) = field.parse::<bool>() {
44                    DataValue::Boolean(b)
45                } else if let Ok(i) = field.parse::<i64>() {
46                    DataValue::Integer(i)
47                } else if let Ok(f) = field.parse::<f64>() {
48                    DataValue::Float(f)
49                } else {
50                    // Check for date-like strings
51                    if field.contains('-') && field.len() >= 8 && field.len() <= 30 {
52                        DataValue::DateTime(field.to_string())
53                    } else {
54                        DataValue::String(field.to_string())
55                    }
56                };
57                values.push(value);
58            }
59
60            table
61                .add_row(DataRow::new(values))
62                .map_err(|e| anyhow::anyhow!(e))?;
63            row_count += 1;
64
65            // Track memory every 5000 rows
66            if row_count % 5000 == 0 {
67                crate::utils::memory_tracker::track_memory(&format!(
68                    "direct_csv_{}rows",
69                    row_count
70                ));
71            }
72        }
73
74        // Infer column types from the data
75        table.infer_column_types();
76
77        crate::utils::memory_tracker::track_memory("direct_csv_complete");
78
79        info!(
80            "Direct CSV load complete: {} rows, {} columns, ~{} MB",
81            table.row_count(),
82            table.column_count(),
83            table.estimate_memory_size() / 1024 / 1024
84        );
85
86        Ok(table)
87    }
88
89    /// Execute a SQL query directly on a DataTable (no JSON)
90    pub fn query_datatable(table: &DataTable, sql: &str) -> Result<DataTable> {
91        // For now, just return a reference/clone of the table
92        // In the future, this would apply WHERE/ORDER BY/etc directly on DataTable
93        debug!("Direct query on DataTable: {}", sql);
94
95        // Simple SELECT * for now
96        if sql.trim().to_uppercase().starts_with("SELECT *") {
97            Ok(table.clone())
98        } else {
99            // TODO: Implement proper SQL execution on DataTable
100            Ok(table.clone())
101        }
102    }
103}