Skip to main content

sql_cli/services/
data_loader_service.rs

1use crate::data::csv_datasource::CsvApiClient;
2use crate::data::data_view::DataView;
3use crate::data::datatable::DataTable;
4use crate::ui::utils::enhanced_tui_helpers;
5use anyhow::Result;
6use std::path::Path;
7use std::sync::Arc;
8use tracing::{info, warn};
9
10/// Service responsible for loading data from various sources
11/// This encapsulates all file loading logic that was previously in the TUI
12pub struct DataLoaderService {
13    case_insensitive: bool,
14}
15
16impl DataLoaderService {
17    #[must_use]
18    pub fn new(case_insensitive: bool) -> Self {
19        Self { case_insensitive }
20    }
21
22    /// Load a file and return a `DataView`
23    /// The TUI doesn't need to know about file types or loading strategies
24    pub fn load_file(&self, file_path: &str) -> Result<DataLoadResult> {
25        let path = Path::new(file_path);
26        let extension = path
27            .extension()
28            .and_then(|e| e.to_str())
29            .ok_or_else(|| anyhow::anyhow!("File has no extension: {}", file_path))?;
30
31        let raw_table_name = path
32            .file_stem()
33            .and_then(|s| s.to_str())
34            .unwrap_or("data")
35            .to_string();
36
37        // Sanitize the table name to be SQL-friendly
38        let table_name = enhanced_tui_helpers::sanitize_table_name(&raw_table_name);
39
40        match extension.to_lowercase().as_str() {
41            // .tsv/.psv share the CSV loader; delimiter is auto-detected from the path.
42            "csv" | "tsv" | "psv" => self.load_csv(file_path, &table_name, &raw_table_name),
43            "json" | "jsonl" | "ndjson" => self.load_json(file_path, &table_name, &raw_table_name),
44            _ => Err(anyhow::anyhow!(
45                "Unsupported file type: {}. Use .csv, .tsv, .psv, .json, .jsonl, or .ndjson files.",
46                extension
47            )),
48        }
49    }
50
51    /// Load a CSV file
52    fn load_csv(
53        &self,
54        file_path: &str,
55        table_name: &str,
56        raw_table_name: &str,
57    ) -> Result<DataLoadResult> {
58        info!("Loading CSV file: {}", file_path);
59        let start = std::time::Instant::now();
60
61        // Try advanced loader first (with string interning)
62        let datatable = match crate::data::advanced_csv_loader::AdvancedCsvLoader::new()
63            .load_csv_optimized(file_path, table_name)
64        {
65            Ok(dt) => {
66                info!("Successfully loaded CSV with advanced optimizations");
67                dt
68            }
69            Err(e) => {
70                warn!(
71                    "Advanced CSV loader failed: {}, falling back to standard loader",
72                    e
73                );
74                crate::data::datatable_loaders::load_csv_to_datatable(file_path, table_name)?
75            }
76        };
77
78        self.create_result(
79            datatable,
80            file_path.to_string(),
81            table_name.to_string(),
82            raw_table_name.to_string(),
83            start.elapsed(),
84        )
85    }
86
87    /// Load a JSON file
88    fn load_json(
89        &self,
90        file_path: &str,
91        table_name: &str,
92        raw_table_name: &str,
93    ) -> Result<DataLoadResult> {
94        info!("Loading JSON file: {}", file_path);
95        let start = std::time::Instant::now();
96
97        let datatable =
98            crate::data::datatable_loaders::load_json_to_datatable(file_path, table_name)?;
99
100        self.create_result(
101            datatable,
102            file_path.to_string(),
103            table_name.to_string(),
104            raw_table_name.to_string(),
105            start.elapsed(),
106        )
107    }
108
109    /// Load data using `CsvApiClient` (for additional files)
110    pub fn load_with_client(&self, file_path: &str) -> Result<DataLoadResult> {
111        let mut csv_client = CsvApiClient::new();
112        csv_client.set_case_insensitive(self.case_insensitive);
113
114        let path = Path::new(file_path);
115        let raw_table_name = path
116            .file_stem()
117            .and_then(|s| s.to_str())
118            .unwrap_or("data")
119            .to_string();
120
121        // Sanitize the table name to be SQL-friendly
122        let table_name = enhanced_tui_helpers::sanitize_table_name(&raw_table_name);
123
124        let extension = path
125            .extension()
126            .and_then(|e| e.to_str())
127            .ok_or_else(|| anyhow::anyhow!("File has no extension: {}", file_path))?;
128
129        let start = std::time::Instant::now();
130
131        match extension.to_lowercase().as_str() {
132            "csv" => csv_client.load_csv(file_path, &table_name)?,
133            "json" | "jsonl" | "ndjson" => csv_client.load_json(file_path, &table_name)?,
134            _ => return Err(anyhow::anyhow!("Unsupported file type: {}", extension)),
135        }
136
137        // Get the DataTable from the client
138        let datatable = csv_client
139            .get_datatable()
140            .ok_or_else(|| anyhow::anyhow!("Failed to load data from {}", file_path))?;
141
142        self.create_result(
143            datatable,
144            file_path.to_string(),
145            table_name,
146            raw_table_name,
147            start.elapsed(),
148        )
149    }
150
151    /// Create a `DataLoadResult` from a `DataTable`
152    fn create_result(
153        &self,
154        datatable: DataTable,
155        source_path: String,
156        table_name: String,
157        raw_table_name: String,
158        load_time: std::time::Duration,
159    ) -> Result<DataLoadResult> {
160        // Create initial DataView
161        let initial_row_count = datatable.row_count();
162        let initial_column_count = datatable.column_count();
163
164        // Create DataView
165        let dataview = DataView::new(Arc::new(datatable));
166
167        Ok(DataLoadResult {
168            dataview,
169            source_path,
170            table_name,
171            raw_table_name,
172            initial_row_count,
173            initial_column_count,
174            load_time,
175        })
176    }
177
178    /// Update configuration
179    pub fn set_case_insensitive(&mut self, case_insensitive: bool) {
180        self.case_insensitive = case_insensitive;
181    }
182}
183
184/// Result of loading data
185pub struct DataLoadResult {
186    /// The loaded `DataView` ready for display
187    pub dataview: DataView,
188
189    /// Path to the source file
190    pub source_path: String,
191
192    /// SQL-friendly table name
193    pub table_name: String,
194
195    /// Original table name (before sanitization)
196    pub raw_table_name: String,
197
198    /// Initial row count (before any filtering)
199    pub initial_row_count: usize,
200
201    /// Initial column count
202    pub initial_column_count: usize,
203
204    /// Time taken to load the file
205    pub load_time: std::time::Duration,
206}
207
208impl DataLoadResult {
209    /// Generate a status message for the load operation
210    #[must_use]
211    pub fn status_message(&self) -> String {
212        format!(
213            "Loaded {} ({} rows, {} columns) in {} ms",
214            self.source_path,
215            self.initial_row_count,
216            self.initial_column_count,
217            self.load_time.as_millis()
218        )
219    }
220}